"""Caches are used for multiple things:

    - To speed up asset building. Filter operations every step
      of the way can be cached, so that individual parts of a
      build that haven't changed can be reused.

    - Bundle definitions are cached when a bundle is built so we
      can determine whether they have changed and whether a rebuild
      is required.

This data is not all stored in the same cache necessarily. The
classes in this module provide the "environment.cache" object, but
also serve in other places.
"""

import os
from os import path
import errno
import tempfile
import warnings
from webassets import six
from webassets.merge import BaseHunk
from webassets.filter import Filter, freezedicts
from webassets.utils import md5_constructor, pickle
import types


__all__ = ('FilesystemCache', 'MemoryCache', 'get_cache',)


def make_hashable(data):
    """Ensures ``data`` can be hashed().

    Mostly needs to support dict. The other special types we use
    as hash keys (Hunks, Filters) already have a proper hash() method.

    See also ``make_md5``.

    Note that we do not actually hash the data for the memory cache.
    """
    return freezedicts(data)


def make_md5(*data):
    """Make a md5 hash based on``data``.

    Specifically, this knows about ``Hunk`` objects, and makes sure
    the actual content is hashed.

    This is very conservative, and raises an exception if there are
    data types that it does not explicitly support. This is because
    we had in the past some debugging headaches with the cache not
    working for this very reason.

    MD5 is faster than sha, and we don't care so much about collisions.
    We care enough however not to use hash().
    """
    def walk(obj):
        if isinstance(obj, (tuple, list, frozenset)):
            for item in obj:
                for d in walk(item): yield d
        elif isinstance(obj, (dict)):
            for k in sorted(obj.keys()):
                for d in walk(k): yield d
                for d in walk(obj[k]): yield d
        elif isinstance(obj, BaseHunk):
            yield obj.data().encode('utf-8')
        elif isinstance(obj, int):
            yield str(obj).encode('utf-8')
        elif isinstance(obj, six.text_type):
            yield obj.encode('utf-8')
        elif isinstance(obj, six.binary_type):
            yield obj
        elif hasattr(obj, "id"):
            for i in walk(obj.id()):
                yield i
        elif obj is None:
            yield "None".encode('utf-8')
        elif isinstance(obj, types.FunctionType):
            yield str(hash(obj)).encode('utf-8')
        else:
            raise ValueError('Cannot MD5 type %s' % type(obj))
    md5 = md5_constructor()
    for d in walk(data):
        md5.update(d)
    return md5.hexdigest()


def safe_unpickle(string):
    """Unpickle the string, or return ``None`` if that fails."""
    try:
        return pickle.loads(string)
    except:
        return None


class BaseCache(object):
    """Abstract base class.

    The cache key must be something that is supported by the Python hash()
    function. The cache value may be a string, or anything that can be pickled.

    Since the cache is used for multiple purposes, all webassets-internal code
    should always tag its keys with an id, like so:

        key = ("tag", actual_key)

    One cache instance can only be used safely with a single Environment.
    """

    def get(self, key):
        """Should return the cache contents, or False.
        """
        raise NotImplementedError()

    def set(self, key, value):
        raise NotImplementedError()


class MemoryCache(BaseCache):
    """Caches stuff in the process memory.

    WARNING: Do NOT use this in a production environment, where you
    are likely going to have multiple processes serving the same app!

    Note that the keys are used as-is, not passed through hash() (which is
    a difference: http://stackoverflow.com/a/9022664/15677). However, the
    reason we don't is because the original value is nicer to debug.
    """

    def __init__(self, capacity):
        self.capacity = capacity
        self.keys = []
        self.cache = {}

    def __eq__(self, other):
        """Return equality with the config values that instantiate
        this instance.
        """
        return False == other or \
               None == other or \
               id(self) == id(other)

    def get(self, key):
        key = make_md5(make_hashable(key))
        return self.cache.get(key, None)

    def set(self, key, value):
        key = make_md5(make_hashable(key))
        self.cache[key] = value
        try:
            self.keys.remove(key)
        except ValueError:
            pass
        self.keys.append(key)

        # limit cache to the given capacity
        to_delete = self.keys[0:max(0, len(self.keys)-self.capacity)]
        self.keys = self.keys[len(to_delete):]
        for item in to_delete:
            del self.cache[item]


class FilesystemCache(BaseCache):
    """Uses a temporary directory on the disk.
    """

    V = 2   # We have changed the cache format once

    def __init__(self, directory, new_file_mode=None):
        self.directory = directory
        self.new_file_mode = new_file_mode

    def __eq__(self, other):
        """Return equality with the config values
        that instantiate this instance.
        """
        return True == other or \
               self.directory == other or \
               id(self) == id(other)

    def get(self, key):
        filename = path.join(self.directory, '%s' % make_md5(self.V, key))
        try:
            f = open(filename, 'rb')
        except IOError as e:
            if e.errno != errno.ENOENT:
                raise
            return None
        try:
            result = f.read()
        finally:
            f.close()

        unpickled = safe_unpickle(result)
        if unpickled is None:
            warnings.warn('Ignoring corrupted cache file %s' % filename)
        return unpickled

    def set(self, key, data):
        md5 = '%s' % make_md5(self.V, key)
        filename = path.join(self.directory, md5)
        fd, temp_filename = tempfile.mkstemp(prefix='.' + md5,
                dir=self.directory)
        try:
            with os.fdopen(fd, 'wb') as f:
                pickle.dump(data, f)
                f.flush()
            # If a non default mode is specified, then chmod the file to
            # it before renaming it into place
            if self.new_file_mode is not None:
                os.chmod(temp_filename, self.new_file_mode)
            if os.path.isfile(filename):
                os.unlink(filename)
            os.rename(temp_filename, filename)
        except:
            os.unlink(temp_filename)
            raise


def get_cache(option, ctx):
    """Return a cache instance based on ``option``.
    """
    if not option:
        return None

    if isinstance(option, BaseCache):
        return option
    elif isinstance(option, type) and issubclass(option, BaseCache):
        return option()

    if option is True:
        directory = path.join(ctx.directory, '.webassets-cache')
        # Auto-create the default directory
        if not path.exists(directory):
            os.makedirs(directory)
    else:
        directory = option
    return FilesystemCache(directory, ctx.cache_file_mode)
