Source code for openpathsampling.netcdfplus.stores.object

import logging
# from uuid import UUID
from weakref import WeakValueDictionary

from openpathsampling.netcdfplus.base import StorableNamedObject, StorableObject
from openpathsampling.netcdfplus.cache import MaxCache, Cache, NoCache, \
    WeakLRUCache
from openpathsampling.netcdfplus.proxy import LoaderProxy

from future.utils import iteritems

import sys
if sys.version_info > (3, ):
    long = int
    unicode = str

logger = logging.getLogger(__name__)
init_log = logging.getLogger('openpathsampling.initialization')


class HashedList(dict):
    def __init__(self):
        super(HashedList, self).__init__()
        dict.__init__(self)
        self._list = []

    def append(self, key):
        dict.__setitem__(self, key, len(self))
        self._list.append(key)

    # noinspection PyCallByClass
    def extend(self, t):
        l = len(self)
        dict.update(self, zip(t, range(l, l + len(t))))
        self._list.extend(t)

    def __setitem__(self, key, value):
        dict.__setitem__(self, key, value)
        self._list[value] = key

    def __getitem__(self, key):
        return dict.__getitem__(self, key)

    def index(self, key):
        return self._list[key]

    def mark(self, key):
        if key not in self:
            dict.__setitem__(self, key, -2)

    def unmark(self, key):
        if key in self:
            dict.__delitem__(self, key)

    def clear(self):
        dict.clear(self)
        self._list = []

    @property
    def list(self):
        return self._list



[docs]
class ObjectStore(StorableNamedObject):
    """
    Base Class for storing complex objects in a netCDF4 file. It holds a
    reference to the store file.`

    Attributes
    ----------
    content_class : :obj:`openpathsampling.netcdfplus.base.StorableObject`
        a reference to the class type to be stored using this Storage. Must be
        subclassed from :obj:`openpathsampling.netcdfplus.base.StorableObject`
    json : string
        if already computed a JSON Serialized string of the object
    cache : :py:class:`openpathsampling.netcdfplus.cache.Cache`
        a dictionary that holds references to all stored elements by index
        or string for named objects. This is only used for cached access
        if caching is not `False`. Must be of type
        :obj:`openpathsampling.netcdfplus.base.StorableObject` or subclassed.

    """
    _restore_non_initial_attr = False

    allowed_types = [
        'int', 'float', 'long', 'str', 'bool',
        'numpy.float32', 'numpy.float64',
        'numpy.int8', 'numpy.inf16', 'numpy.int32', 'numpy.int64',
        'numpy.uint8', 'numpy.uinf16', 'numpy.uint32', 'numpy.uint64',
        'index', 'length', 'uuid'
    ]

    default_store_chunk_size = 256

    _log_debug = False

    class DictDelegator(object):
        def __init__(self, store, dct):
            self.prefix = store.prefix + '_'
            self.dct = dct

        def __getitem__(self, item):
            return self.dct[self.prefix + item]

        def __contains__(self, item):
            return (self.prefix + item) in self.dct

    def prefix_delegate(self, dct):
        return ObjectStore.DictDelegator(self, dct)

    default_cache = 10000


[docs]
    def __init__(self, content_class, json=True, nestable=False):
        """

        Parameters
        ----------
        content_class
        json : bool or str `json` or `jsonobj`
            if `False` the store will not create a json variable for
            serialization if `True` the store will use the json pickling to
            store objects and a single storable object will be serialized and
            not referenced. If a string is given the string is taken as the
            variable type of the json variable. Here only two values are
            allowed: `jsonobj` (equivalent to `True`) or `json` which will
            also reference directly given storable objects.

        nestable : bool
            if `True` this marks the content_class to be saved as nested dict
            objects and not a pointing to saved objects. So the saved complex
            object is only stored once and not split into several objects that
            are referenced by each other in a tree-like fashion

        Notes
        -----
        Usually you want caching, but limited. Recommended is to use an LRUCache
        with a reasonable maximum number of objects that depends on the typical
        number of objects to cache and their size

        The class that takes care of storing data in a file is called a
        `Storage`, so the netCDF+ subclassed `Storage` is a storage.
        The classes that know how to load and save an object from the storage
        are called `Store`, like ObjectStore, SampleStore, etc...

        The difference between `json` and `jsonobj` is subtle. Consider
        storing a complex object. Then there are two ways to do that.
        1. `json`: Store a reference to the object (provided) it is stored and
        2. `jsonobj`: serialize the object and only use references for contained
        objects. All inner objects will always be stored using references.
        The only exception is using nestable. Consider objects that contain
        references to objects of the same type, like e.g. operations in an
        equation (2*3 + 3). Each operation represents a value but each
        operation needs values to operate on. To save such an object you have
        again two options:
        1. `nestable=False`. Store all single objects and always reference
        the contained objects. For an equation that would mean to store several
        objects `op1 = plus(op2, 3), op2 = times(2, 3)`. Since this is correct
        though not intuitive you can also use
        2. `nestable=True`. Store all the serialized objects nested into one
        object (string). For our example this corresponds to
        `plus(times(2,3), 3)`.

        """

        super(ObjectStore, self).__init__()
        self._storage = None
        self.content_class = content_class
        self.prefix = None
        self.cache = NoCache()
        self._free = set()
        self._cached_all = False
        self.nestable = nestable
        self._created = False

        self.attribute_list = {}
        self.cv = {}

        # This will not be stored since its information is contained in the
        # dimension names
        self._dimension_prefix_store = None

        self.variables = dict()
        self.vars = dict()
        self.units = dict()

        self.index = None

        self.proxy_index = WeakValueDictionary()

        if json in [True, False, 'json', 'jsonobj']:
            self.json = json
        else:
            raise ValueError(
                'Valid settings for json are only True, False, `json` or '
                '`jsonobj`.')

        if self.content_class is not None \
                and not issubclass(self.content_class, StorableObject):
            raise ValueError(
                'Content class "%s" must be subclassed from StorableObject.' %
                self.content_class.__name__)

        self.fallback_store = None


    def is_created(self):
        return self._created

    def to_dict(self):
        return {
            'content_class': self.content_class,
            'json': self.json,
            'nestable': self.nestable
        }

    def register_fallback(self, store):
        self.fallback_store = store

    def register(self, storage, prefix):
        """
        Associate the object store to a specific storage with a given prefix

        Parameters
        ----------
        storage : :class:`openpathsampling.netcdfplus.NetCDFPlus`
            the storage to be associated with
        prefix : str
            the name under which

        """
        self._storage = storage
        self.prefix = prefix

        self.variables = self.prefix_delegate(self.storage.variables)
        self.units = self.prefix_delegate(self.storage.units)
        self.vars = self.prefix_delegate(self.storage.vars)

        self.index = self.create_uuid_index()

    def create_uuid_index(self):
        return HashedList()

    def restore(self):
        self.load_indices()

    def load_indices(self):
        self.index.clear()
        self.index.extend(self.vars['uuid'][:])

    @property
    def storage(self):
        """Return the associated storage object

        Returns
        -------

        :class:`openpathsampling.netcdfplus.NetCDFPlus`
            the referenced storage object
        """

        if self._storage is None:
            raise RuntimeError(
                'A storage needs to be added to this store to be used! '
                'Use .register() to do so.')

        return self._storage

    def __str__(self):
        return repr(self)

    def __repr__(self):
        return 'store.%s[%s] : %s' % (
            self.prefix,
            self.content_class.__name__ if self.content_class is not None else
            'None/ANY',
            str(len(self)) + ' object(s)' if self._created else
            '(not created)'
        )

    @property
    def simplifier(self):
        """
        Return the simplifier instance used to create JSON serialization

        Returns
        -------
        :class:`openpathsampling.netcdfplus.dictify.StorableObjectJSON`
            the simplifier object used in the associated storage

        """
        return self.storage.simplifier

    def set_caching(self, caching):
        """
        Set the caching mode for this store

        Parameters
        ----------
        caching : :class:`openpathsampling.netcdfplus.Cache`

        """
        if caching is None:
            caching = self.default_cache

        if caching is True:
            caching = MaxCache()
        elif caching is False:
            caching = NoCache()
        elif type(caching) is int:
            caching = WeakLRUCache(caching)

        if isinstance(caching, Cache):
            self.cache = caching.transfer(self.cache)

    def idx(self, obj):
        """
        Return the index in this store for a given object

        Parameters
        ----------
        obj : :class:`openpathsampling.netcdfplus.base.StorableObject`
            the object that can be stored in this store for which its index is
            to be returned

        Returns
        -------
        int or `None`
            The integer index of the given object or `None` if it is not
            stored yet
        """
        return self.index[obj.__uuid__]

    def __iter__(self):
        """
        Add iteration over all elements in the storage
        """
        # we want to iterator in the order object were saved!
        for uuid in self.index._list:
            yield self.load(uuid)

    def __len__(self):
        """
        Return the number of stored objects

        Returns
        -------
        int
            number of stored objects

        """
        return len(self.storage.dimensions[self.prefix])

    def write(self, variable, idx, obj, attribute=None):
        if attribute is None:
            attribute = variable

        var = self.vars[variable]
        val = getattr(obj, attribute)

        var[int(idx)] = val

        if var.var_type.startswith('lazy'):
            proxy = var.store.proxy(val)
            if isinstance(obj, LoaderProxy):
                # for a loader proxy apply it to the real object
                setattr(obj.__subject__, attribute, proxy)
            else:
                setattr(obj, attribute, proxy)

    def proxy(self, item):
        """
        Return a proxy of a object for this store

        Parameters
        ----------
        item : :py:class:`openpathsampling.netcdfplus.base.StorableObject`
            or int The item or index that points to an object in this store
            and to which a proxy is requested.

        Returns
        -------

        """
        if item is None:
            return None

        try:
            idx = item.__uuid__
        except AttributeError:
            idx = item

        # tt = type(item)
        # if tt is int:
        #     idx = self.vars['uuid'][item]
        # elif tt is long:
        #     idx = item
        # elif tt in [str, unicode]:
        #     if item[0] == '-':
        #         return None
        #     idx = int(UUID(item))
        # else:
        #
        return LoaderProxy.new(self, idx)

    def __contains__(self, item):
        if item.__uuid__ in self.index:
            return True

        if self.fallback_store is not None and item in self.fallback_store:
            return True

        if self.storage.fallback is not None and item in self.storage.fallback:
            return True

        return False

    def __getitem__(self, item):
        """
        Enable numpy style selection of object in the store
        """
        try:
            if isinstance(item, (long, int)):
                if item < 0:
                    item += len(self)
                return self.load(item)
            elif type(item) is str:
                return self.load(item)
            elif type(item) is slice:
                return [self.load(idx)
                        for idx in range(*item.indices(len(self)))]
            elif type(item) is list:
                return [self.load(idx) for idx in item]
            elif item is Ellipsis:
                return iter(self)
        except KeyError:
            return None

    def get(self, item):
        try:
            return self[item]
        except KeyError:
            return None

    def _load(self, idx):
        obj = self.vars['json'][idx]
        return obj

    def clear_cache(self):
        """Clear the cache and force reloading"""

        self.cache.clear()
        self._cached_all = False

    def cache_all(self):
        """Load all samples as fast as possible into the cache"""
        if not self._cached_all:
            idxs = range(len(self))
            jsons = self.variables['json'][:]

            [self.add_single_to_cache(i, j) for i, j in zip(
                idxs,
                jsons)]

            self._cached_all = True

    def _save(self, obj, idx):
        self.vars['json'][idx] = obj

    @property
    def last(self):
        """
        Returns the last generated trajectory. Useful to continue a run.

        Returns
        -------
        :py:class:`openpathsampling.netcdfplus.base.StorableObject`
            the last stored object in this store
        """
        return self.load(len(self) - 1)

    @property
    def first(self):
        """
        Returns the first stored object.

        Returns
        -------
        :py:class:`openpathsampling.netcdfplus.base.StorableObject`
            the actual first stored object
        """
        return self.load(0)

    def free(self):
        """
        Return the number of the next free index for this store

        Returns
        -------
        index : int
            the number of the next free index in the storage.
            Used to store a new object.
        """

        # start at first free position in the storage
        idx = len(self)

        # # and skip also reserved potential stored ones
        # while idx in self._free:
        #     idx += 1

        return idx

    # def reserve_idx(self, idx):
    #     """
    #     Locks an idx as used
    #
    #     Parameters
    #     ----------
    #     idx : int
    #         the integer index to be reserved
    #     """
    #     self._free.add(idx)
    #
    # def release_idx(self, idx):
    #     """
    #     Releases a lock on an idx
    #
    #     Parameters
    #     ----------
    #     idx : int
    #         the integer index to be released
    #     """
    #     self._free.discard(idx)

    def initialize(self):
        """
        Initialize the associated storage to allow for object storage. Mainly
        creates an index dimension with the name of the object.
        """
        # define dimensions used for the specific object

        self.storage.create_dimension(self.prefix, 0)

        if self.json:
            jsontype = 'jsonobj'
            if type(self.json) is str:
                jsontype = self.json

            self.create_variable(
                "json",
                jsontype,
                description='A json serialized version of the object',
                chunksizes=tuple([65536])
            )

        # TODO: Change to 16byte string
        self.create_variable(
            "uuid", 'uuid',
            description='The uuid of the object',
            chunksizes=tuple([65536])
        )

        self._created = True

    # ==========================================================================
    # INITIALISATION UTILITY FUNCTIONS
    # ==========================================================================

    def create_variable(
            self,
            var_name,
            var_type,
            dimensions=None,
            chunksizes=None,
            description=None,
            simtk_unit=None,
            maskable=False
    ):
        """
        Create a new variable in the netCDF storage. This is just a helper
        function to structure the code better.

        Parameters
        ==========
        var_name : str
            The var_name of the variable to be created
        var_type : str
            The string representing the type of the data stored in the
            variable.  Allowed are strings of native python types in which
            case the variables will be treated as python or a string of the
            form 'numpy.type' which will refer to the numpy data types.
            Numpy is preferred sinec the api to netCDF uses numpy and thus
            it is faster. Possible input strings are
            `int`, `float`, `long`, `str`, `numpy.float32`, `numpy.float64`,
            `numpy.int8`, `numpy.int16`, `numpy.int32`, `numpy.int64`, `json`,
            `obj.<store>`, `lazyobj.<store>`
        dimensions : str or tuple of str
            A tuple representing the dimensions used for the netcdf variable.
            If not specified then the default dimension of the storage is used.
            If the last dimension is `'...'` then it is assumed that the
            objects are of variable length. In netCDF this is usually
            referred to as a VLType.  We will treat is just as another
            dimension, but it can only be the last dimension.
        description : str
            A string describing the variable in a readable form.
        chunksizes : tuple of int
            A tuple of ints per number of dimensions. This specifies in what
            block sizes a variable is stored. Usually for object related stuff
            we want to store everything of one object at once so this is often
            (1, ..., ...)
        simtk_unit : str
            A string representing the units used for this variable. Can be
            used with all var_types although it makes sense only for numeric
            ones.
        maskable : bool, default: False
            If set to `True` the values in this variable can only partially
            exist and if they have not yet been written they are filled with
            a fill_value which is treated as a non-set variable. The created
            variable will interpret this values as `None` when returned
        """

        # add the main dimension to the var_type

        if type(dimensions) is str:
            dimensions = [dimensions]

        if type(dimensions) is int:
            if dimensions == 1:
                dimensions = ['scalar']
            else:
                dimensions = [dimensions]

        if dimensions is None:
            dimensions = (self.prefix,)
        else:
            dimensions = tuple([self.prefix] + list(dimensions))

        store_chunk_size = ObjectStore.default_store_chunk_size

        if chunksizes is None and len(dimensions) == 1:
            chunksizes = (store_chunk_size, )
        elif chunksizes is not None and dimensions[-1] == '...' \
                and len(dimensions) == len(chunksizes) + 2:
            chunksizes = tuple([store_chunk_size] + list(chunksizes))
        elif chunksizes is not None and dimensions[-1] != '...' \
                and len(dimensions) == len(chunksizes) + 1:
            chunksizes = tuple([store_chunk_size] + list(chunksizes))

        if self.dimension_prefix:
            dimensions = tuple(
                [dimensions[0]] +
                [
                    self.dimension_prefix + dim if type(dim) is str and
                    dim != '...' else dim for dim in dimensions[1:]
                ]
            )
            chunksizes = tuple(
                [chunksizes[0]] +
                [
                    self.dimension_prefix + chs
                    if type(chs) is str else chs for chs in chunksizes[1:]
                ]
            )

        self.storage.create_variable(
            self.prefix + '_' + var_name,
            var_type=var_type,
            dimensions=dimensions,
            chunksizes=chunksizes,
            description=description,
            simtk_unit=simtk_unit,
            maskable=maskable
        )

    @property
    def dimension_prefix(self):
        if self._dimension_prefix_store is not None:
            return self._dimension_prefix_store.prefix
        else:
            return ''

    def set_dimension_prefix_store(self, prefix_store=None):
        """
        Select which store or none should be used to prefix dimension names

        If you want to create multiple instances of a store and these should
        have differently long dimensions you need unique names for these. This
        way you can select a store and the dimensions will be prefixed with the
        stores prefix

        Parameters
        ----------
        prefix_store : :obj:`openpathsampling.netcdf.ObjectStore`
            the store from which to use its prefix / name to prefix
            dimension names

        """
        self._dimension_prefix_store = prefix_store

    # ==========================================================================
    # LOAD/SAVE DECORATORS FOR CACHE HANDLING
    # ==========================================================================

    def load(self, idx):
        """
        Returns an object from the storage.

        Parameters
        ----------
        idx : int
            the integer index of the object to be loaded

        Returns
        -------
        :py:class:`openpathsampling.netcdfplus.base.StorableObject`
            the loaded object
        """

        if isinstance(idx, (long, int)):
            if idx < 1000000000:
                n_idx = idx
            elif idx in self.index:
                n_idx = self.index[idx]
            else:
                if self.fallback_store is not None:
                    return self.fallback_store.load(idx)
                elif self.storage.fallback is not None:
                    return self.storage.fallback.stores[self.name].load(idx)
                else:
                    raise ValueError(
                        'str %s not found in storage or fallback' % idx)

        else:
            raise ValueError(
                'indices need to be a 32-byte UUID in long format or a simple int ')

        if n_idx < 0:
            return None

        # if it is in the cache, return it
        try:
            obj = self.cache[n_idx]
            if self._log_debug:
                logger.debug(
                    'Found IDX #' + str(idx) + ' in cache. Not loading!')
            return obj

        except KeyError:
            pass

        if self._log_debug:
            logger.debug(
                'Calling load object of type `%s` @ IDX #%d' %
                (self.content_class.__name__, n_idx))

        if n_idx >= len(self):
            logger.warning(
                'Trying to load from IDX #%d > number of object %d' %
                (n_idx, len(self)))
            return None
        elif n_idx < 0:
            logger.warning((
                'Trying to load negative IDX #%d < 0. '
                'This should never happen!!!') % n_idx)
            raise RuntimeError(
                'Loading of negative int should result in no object. '
                'This should never happen!')
        else:
            obj = self._load(n_idx)

        if self._log_debug:
            logger.debug(
                'Calling load object of type %s and IDX # %d ... DONE' %
                (self.content_class.__name__, n_idx))

        if obj is not None:
            self._get_id(n_idx, obj)

            # update cache there might have been a change due to naming
            self.cache[n_idx] = obj

            if self._log_debug:
                logger.debug(
                    'Try loading UUID object of type %s and IDX # %d ... DONE' %
                    (self.content_class.__name__, n_idx))

        if self._log_debug:
            logger.debug(
                'Finished load object of type %s and IDX # %d ... DONE' %
                (self.content_class.__name__, n_idx))

        return obj

    @staticmethod
    def reference(obj):
        return obj.__uuid__

    def remember(self, obj):
        """
        Tell a store that an obj should be assumed as stored

        This is useful, if you do not want to store an object in a specific
        store. Especially to make sure attributes are not stored multiple times

        Parameters
        ----------
        obj : :py:class:`openpathsampling.netcdfplus.base.StorableObject`
            the object to be fake stored

        """
        self.index.mark(obj.__uuid__)

    def forget(self, obj):
        """
        This will revert remembering non-stored objects.

        Stored objects cannot be forgotten

        Parameters
        ----------
        obj : :py:class:`openpathsampling.netcdfplus.base.StorableObject`
            the object to be forgotten

        """

        self.index.unmark(obj.__uuid__)

    def save(self, obj, idx=None):
        """
        Saves an object to the storage.

        Parameters
        ----------
        obj : :class:`openpathsampling.netcdfplus.base.StorableObject`
            the object to be stored
        idx : int or string or `None`
            the index to be used for storing. This is highly discouraged since
            it changes an immutable object (at least in the storage). It is
            better to store also the new object and just ignore the
            previously stored one.

        """
        uuid = obj.__uuid__

        if uuid in self.index:
            # has been saved so quit and do nothing
            if not self.index[uuid] == -1:
                return self.reference(obj)

            # numbers other than -1 are reserved for other things

        if isinstance(obj, LoaderProxy):
            if obj._store is self:
                # is a proxy of a saved object so do nothing
                return uuid
            else:
                # it is stored but not in this store so we try storing the
                # full attribute which might be still in cache or memory
                # if that is not the case it will be stored again. This can
                # happen when you load from one store save to another. And load
                # again after some time while the cache has been changed and try
                # to save again the loaded object. We will not explicitly store
                # a table that matches objects between different storages.
                return self.save(obj.__subject__)

        if self.fallback_store is not None and \
                self.storage.exclude_from_fallback:
            if obj in self.fallback_store:
                return self.reference(obj)

        elif self.storage.fallback is not None and \
                self.storage.exclude_from_fallback:
            if obj in self.storage.fallback:
                return self.reference(obj)

        if not isinstance(obj, self.content_class):
            raise ValueError((
                'This store can only store object of base type "%s". Given '
                'obj is of type "%s". You might need to use another store.')
                % (self.content_class, obj.__class__.__name__)
            )

        # n_idx = self.free()
        n_idx = len(self.index)

        # mark as saved so circular dependencies will not cause infinite loops
        self.index.append(uuid)

        # make sure in nested saving that an IDX is not used twice!
        # self.reserve_idx(n_idx)

        logger.debug('Saving ' + str(type(obj)) + ' using IDX #' + str(n_idx))

        try:
            self._save(obj, n_idx)
            self._auto_complete(obj, n_idx)
            self.cache[n_idx] = obj

        except:
            # in case we did not succeed remove the mark as being saved
            del self.index[uuid]
            raise

        # self.release_idx(n_idx)
        self._set_id(n_idx, obj)

        return self.reference(obj)

    def __setitem__(self, key, value):
        """
        Enable saving using __setitem__

        """
        self.save(value, key)

    # def load_single(self, idx):
    #     return self._load(idx)
    #
    # def load_range(self, start, end):
    #     return map(self._load, range(start, end))

    def add_single_to_cache(self, idx, json):
        """
        Add a single object to cache by json

        Parameters
        ----------
        idx : int
            the index where the object was stored
        json : str
            json string the represents a serialized version of the stored object
        """

        if idx not in self.cache:
            obj = self.simplifier.from_json(json)

            self._get_id(idx, obj)

            self.cache[idx] = obj
            self.index[obj.__uuid__] = idx

            return obj

    # def uuid(self, uuid):
    #     """
    #     Return last object with a given uuid
    #
    #     Parameters
    #     ----------
    #     uuid : str
    #         the uuid to be searched for
    #
    #     Returns
    #     -------
    #     :py:class:`openpathsampling.netcdfplus.base.StorableObject`
    #         the last object with a given uuid. This is to mimic an immutable
    #         object. Once you (re-)save with the same uuid you replace the old
    #         one and hence you leed to load the last stored one.
    #
    #     """
    #     return self.load(uuid)

    def _set_id(self, idx, obj):
        self.vars['uuid'][idx] = obj.__uuid__

    def _get_id(self, idx, obj):
        obj.__uuid__ = self.index.index(int(idx))

    # CV SUPPORT

    def _auto_complete(self, obj, pos):
        for attribute, attribute_store in self.attribute_list.items():
            if not attribute_store.allow_incomplete:
                # value = attribute._cache_dict._get(obj)
                # if value is None:
                #     # not in cache so compute it if possible
                #     if attribute._eval_dict:
                #         value = attribute._eval_dict([obj])[0]

                value = attribute(obj)

                if value is not None:
                    if attribute_store.allow_incomplete:
                        attribute_store[obj] = value
                    else:
                        n_idx = pos
                        attribute_store.vars['value'][n_idx] = value
                        attribute_store.cache[n_idx] = value

    def complete_attribute(self, attribute):
        """
        Compute all missing values of a CV and store them


        Parameters
        ----------
        attribute : :obj:`openpathsampling.netcdfplus.PseudoAttribute`


        """
        if attribute not in self.attribute_list:
            return

        attribute_store = self.attribute_list[attribute]
        key_store = self.storage.attributes.key_store(attribute)

        if attribute_store.allow_incomplete:
            # for complete this does not make sense

            # TODO: Make better looping over this to not have
            # to load all the indices at once
            # can be problematic for 10M+ stored attributes
            indices = self.vars['uuid'][:]

            for pos, idx in enumerate(indices):
                if pos not in attribute_store.index:
                    # this value is not stored to go ahead

                    proxy = LoaderProxy.new(key_store, idx)

                    # # get from cache first, this is fastest
                    # value = attribute._cache_dict._get(proxy)
                    #
                    # if value is None:
                    #     # not in cache so compute it if possible
                    #     if attribute._eval_dict:
                    #         value = attribute._eval_dict([proxy])[0]
                    #     else:
                    #         value = None

                    value = attribute(proxy)

                    if value is not None:
                        n_idx = attribute_store.free()

                        attribute_store.vars['value'][n_idx] = value
                        attribute_store.vars['index'][n_idx] = pos
                        attribute_store.index[pos] = n_idx
                        attribute_store.cache[n_idx] = value

    def sync_attribute(self, attribute):
        """
        Store all cached values of a CV in the diskcache

        Parameters
        ----------
        attribute : :obj:`openpathsampling.CollectiveVariable`


        """

        if attribute not in self.attribute_list:
            return

        attribute_store = self.attribute_list[attribute]

        # for complete this does not make sense
        if attribute_store.allow_incomplete:

            # loop all objects in the fast CV cache
            for obj, value in iteritems(attribute._cache_dict.cache):
                if value is not None:
                    pos = self.pos(obj)

                    # if the attribute is not saved, there is nothing we can do
                    if pos is None:
                        continue

                    # if the value is stored, skip it
                    if pos in attribute_store.index:
                        continue

                    n_idx = attribute_store.free()

                    attribute_store.vars['value'][n_idx] = value
                    attribute_store.vars['index'][n_idx] = pos
                    attribute_store.index[pos] = n_idx
                    attribute_store.cache[n_idx] = value

    @staticmethod
    def _get_attribute_name(attribute_idx):
        return 'attribute' + str(attribute_idx)

    def pos(self, obj):
        return self.index.get(obj.__uuid__)

    def pos_uuid(self, uid):
        return self.index.get(uid)

    def add_attribute(
            self, store_cls, attribute, template,
            allow_incomplete=None, chunksize=None):
        """

        Parameters
        ----------
        store_cls : :obj:`openpathsampling.netcdfplus.ValueStore`
        attribute : :obj:`openpathsampling.CollectiveVariable`
        template : :obj:`openpathsampling.engines.Baseattribute`
        chunksize : int
        allow_incomplete : bool

        Returns
        -------
        :obj:`openpathsampling.netcdfplus.ObjectStore`
        int
        """
        if attribute in self.attribute_list:
            return self.attribute_list[attribute]

        key_store = self.storage.attributes.key_store(attribute)

        if allow_incomplete is None:
            allow_incomplete = attribute.diskcache_allow_incomplete
        if chunksize is None:
            chunksize = attribute.diskcache_chunksize

        if template is None:
            template = attribute.diskcache_template

        if not allow_incomplete:
            # in complete mode we force chunk size one to match it to attributes
            # chunksize = self.default_store_chunk_size
            chunksize = self.variables['uuid'].chunking()[0]

        # determine value type and shape
        params = self.storage.get_value_parameters(attribute(template))
        shape = params['dimensions']

        if shape is None:
            chunksizes = None
        else:
            chunksizes = tuple(params['dimensions'])

        # attribute_idx = self.storage.attributes.index[attribute.__uuid__]
        value_store = store_cls(
            attribute.key_class,
            allow_incomplete=allow_incomplete,
            chunksize=chunksize
        )

        store_name = self.name + '_' + attribute.name

        self.storage.create_store(store_name, value_store, False)

        if value_store.allow_incomplete:
            # we are not using the .initialize function here since we
            # only have one variable and only here know its shape
            self.storage.create_dimension(value_store.prefix, 0)

            if shape is not None:
                shape = tuple(list(shape))
                chunksizes = tuple([chunksize] + list(chunksizes))
            else:
                shape = tuple()
                chunksizes = tuple([chunksize])

            # create the variable
            value_store.create_variable(
                'value',
                var_type=params['var_type'],
                dimensions=shape,
                chunksizes=chunksizes,
                simtk_unit=params['simtk_unit'],
            )

            value_store.create_variable('index', 'index')

        else:
            # todo: seems to be a bug in NetCDF4. Need to set chunksize to 1
            # see Issue https://github.com/Unidata/netcdf4-python/issues/566
            # I assume this will still work as expected.

            # chunksize = self.default_store_chunk_size
            # chunksize = self.variables['uuid'].chunking()[0]
            chunksize = 1
            if shape is not None:
                shape = tuple([self.name] + list(shape))
                chunksizes = tuple([chunksize] + list(chunksizes))
            else:
                shape = tuple([self.name])
                chunksizes = tuple([chunksize])

            # create the variable
            value_store.storage.create_variable(
                store_name + '_value',
                var_type=params['var_type'],
                dimensions=shape,
                chunksizes=chunksizes,
                simtk_unit=params['simtk_unit'],
            )

        value_store.initialize()

        # the value
        self.attribute_list[attribute] = value_store
        attribute_idx = self.storage.attributes.index[attribute.__uuid__]
        self.storage.attributes.vars['cache'][attribute_idx] = value_store

        # use the cache and function of the CV to fill the store when it is made
        if not allow_incomplete:

            indices = self.vars['uuid'][:]

            for pos, idx in enumerate(indices):

                proxy = LoaderProxy.new(key_store, idx)

                # value = attribute._cache_dict._get(proxy)
                #
                # if value is None:
                #     # not in cache so compute it if possible
                #     if attribute._eval_dict:
                #         value = attribute._eval_dict([proxy])[0]
                #     else:
                #         value = None

                value = attribute(proxy)

                if value is not None:
                    value_store.vars['value'][pos] = value
                    value_store.cache[pos] = value

        attribute.set_cache_store(value_store)
        return value_store