Source code for lib5c.util.lru_cache

"""
Module containing a backport of Python 3.3's least recently used cache
decorator.

The original module was downloaded from
http://code.activestate.com/recipes/578078/, then modified to support hashing of
counts dicts and annotationmaps.
"""

from collections import namedtuple
from functools import update_wrapper
from threading import RLock
import numbers

import numpy as np
from functools import reduce

_CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"])


class _HashedSeq(list):
    __slots__ = 'hashvalue'

    def __init__(self, tup, hash=hash):
        self[:] = tup
        self.hashvalue = hash(tup)

    def __hash__(self):
        return self.hashvalue


def _make_counts_hashable(counts):
    return reduce(lambda x, y: (x << 1) ^ y, (hash(counts[k].tostring())
                                              for k in sorted(counts.keys())))


def _make_annotationmap_hashable(annotationmap):
    keys = list(sorted(annotationmap.keys()))
    if type(annotationmap[keys[0]]) == np.ndarray:
        return str(_make_counts_hashable(annotationmap))
    return ''.join(keys + [str(annotationmap[k]) for k in keys])


def _make_annotationmaps_hashable(annotationmaps):
    keys = list(sorted(annotationmaps.keys()))
    return ''.join(keys + [_make_annotationmap_hashable(annotationmaps[k])
                           for k in keys])


def _typecheck_counts(counts):
    return type(counts) == dict and type(list(counts.keys())[0]) == str and \
           type(list(counts.values())[0]) == np.ndarray


def _typecheck_annotationmaps(annotationmaps):
    return type(annotationmaps) == dict and \
           type(list(annotationmaps.keys())[0]) == str and \
           type(list(annotationmaps.values())[0]) == dict and \
           type(list(list(annotationmaps.values())[0].keys())[0]) == str and \
           (isinstance(list(list(annotationmaps.values())[0].values())[0][0],
                       numbers.Integral) or
            isinstance(list(list(annotationmaps.values())[0].values())[0],
                       np.ndarray))


def _make_key(args, kwds, typed, kwd_mark=(object(),),
              fasttypes={int, str, frozenset, type(None)}, sorted=sorted,
              tuple=tuple, type=type, len=len):
    """"Make a cache key from optionally typed positional and keyword
    arguments"""
    key = args
    if kwds:
        sorted_items = sorted(kwds.items())
        key += kwd_mark
        for item in sorted_items:
            key += item

    # new section added to handle hashing of custom types
    key_list = list(key)
    for i in range(len(key_list)):
        if _typecheck_counts(key_list[i]):
            key_list[i] = _make_counts_hashable(key_list[i])
        elif _typecheck_annotationmaps(key_list[i]):
            key_list[i] = _make_annotationmaps_hashable(key_list[i])
    key = tuple(key_list)

    if typed:
        key += tuple(type(v) for v in args)
        if kwds:
            key += tuple(type(v) for k, v in sorted_items)
    elif len(key) == 1 and type(key[0]) in fasttypes:
        return key[0]
    return _HashedSeq(key)


[docs]def lru_cache(maxsize=100, typed=False):
    """Least-recently-used cache decorator.

    If *maxsize* is set to None, the LRU features are disabled and the cache
    can grow without bound.

    If *typed* is True, arguments of different types will be cached separately.
    For example, f(3.0) and f(3) will be treated as distinct calls with
    distinct results.

    Arguments to the cached function must be hashable.

    View the cache statistics named tuple (hits, misses, maxsize, currsize) with
    f.cache_info().  Clear the cache and statistics with f.cache_clear().
    Access the underlying function with f.__wrapped__.

    See:  http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used

    """

    # Users should only access the lru_cache through its public API:
    #       cache_info, cache_clear, and f.__wrapped__
    # The internals of the lru_cache are encapsulated for thread safety and
    # to allow the implementation to change (including a possible C version).

    def decorating_function(user_function):

        cache = dict()
        stats = [0, 0]                  # make statistics updateable non-locally
        HITS, MISSES = 0, 1             # names for the stats fields
        make_key = _make_key
        cache_get = cache.get           # bound method to lookup key or return None
        _len = len                      # localize the global len() function
        lock = RLock()                  # because linkedlist updates aren't threadsafe
        root = []                       # root of the circular doubly linked list
        root[:] = [root, root, None, None]      # initialize by pointing to self
        nonlocal_root = [root]                  # make updateable non-locally
        PREV, NEXT, KEY, RESULT = 0, 1, 2, 3    # names for the link fields

        if maxsize == 0:

            def wrapper(*args, **kwds):
                # no caching, just do a statistics update after a successful call
                result = user_function(*args, **kwds)
                stats[MISSES] += 1
                return result

        elif maxsize is None:

            def wrapper(*args, **kwds):
                # simple caching without ordering or size limit
                key = make_key(args, kwds, typed)
                result = cache_get(key, root)   # root used here as a unique not-found sentinel
                if result is not root:
                    stats[HITS] += 1
                    return result
                result = user_function(*args, **kwds)
                cache[key] = result
                stats[MISSES] += 1
                return result

        else:

            def wrapper(*args, **kwds):
                # size limited caching that tracks accesses by recency
                key = make_key(args, kwds, typed) if kwds or typed else args
                with lock:
                    link = cache_get(key)
                    if link is not None:
                        # record recent use of the key by moving it to the front of the list
                        root, = nonlocal_root
                        link_prev, link_next, key, result = link
                        link_prev[NEXT] = link_next
                        link_next[PREV] = link_prev
                        last = root[PREV]
                        last[NEXT] = root[PREV] = link
                        link[PREV] = last
                        link[NEXT] = root
                        stats[HITS] += 1
                        return result
                result = user_function(*args, **kwds)
                with lock:
                    root, = nonlocal_root
                    if key in cache:
                        # getting here means that this same key was added to the
                        # cache while the lock was released.  since the link
                        # update is already done, we need only return the
                        # computed result and update the count of misses.
                        pass
                    elif _len(cache) >= maxsize:
                        # use the old root to store the new key and result
                        oldroot = root
                        oldroot[KEY] = key
                        oldroot[RESULT] = result
                        # empty the oldest link and make it the new root
                        root = nonlocal_root[0] = oldroot[NEXT]
                        oldkey = root[KEY]
                        oldvalue = root[RESULT]
                        root[KEY] = root[RESULT] = None
                        # now update the cache dictionary for the new links
                        del cache[oldkey]
                        cache[key] = oldroot
                    else:
                        # put result in a new link at the front of the list
                        last = root[PREV]
                        link = [last, root, key, result]
                        last[NEXT] = root[PREV] = cache[key] = link
                    stats[MISSES] += 1
                return result

        def cache_info():
            """Report cache statistics"""
            with lock:
                return _CacheInfo(stats[HITS], stats[MISSES], maxsize, len(cache))

        def cache_clear():
            """Clear the cache and cache statistics"""
            with lock:
                cache.clear()
                root = nonlocal_root[0]
                root[:] = [root, root, None, None]
                stats[:] = [0, 0]

        wrapper.__wrapped__ = user_function
        wrapper.cache_info = cache_info
        wrapper.cache_clear = cache_clear
        return update_wrapper(wrapper, user_function)

    return decorating_function