Source code for openpathsampling.ensemble

"""
Created on 03.09.2014

@author: Jan-Hendrik Prinz, David W.H. Swenson
"""

import abc
import logging
import itertools

from openpathsampling.netcdfplus import StorableNamedObject
import openpathsampling as paths

from future.utils import with_metaclass


logger = logging.getLogger(__name__)
init_log = logging.getLogger('openpathsampling.initialization')


# TODO: Make Full and Empty be Singletons to avoid storing them several times!


[docs] def join_ensembles(ensemble_list): """Join several ensembles using a set theory union. Parameters ---------- ensemble_list : list of :class:`.Ensemble` list of ensembles to join Returns ------- :class:`.Ensemble` union of all given ensembles """ ensemble = None for ens in ensemble_list: if ensemble is None: ensemble = ens else: ensemble = ensemble | ens return ensemble
def _get_list_traj(trajectory): """Return a list of (proxy) snapshots from either a list or Trajectory Parameters ---------- trajectory : :class:`.Trajectory` or :class:`.list` trajectory or list to convert into a list of snapshots Returns ------- :class:`.list` list of (proxy) snapshots Note ---- Due to possible UUID space restrictions, we don't want to slice Trajectories if we can help it (as this will generate a new Trajectory object with its own UUID). This is a convenience function that will either turn a Trajectory into a list of (proxy) snapshots or just list to list mapping, which you can slice without generating a new UUID For a more in-depth discussion, please see: https://github.com/openpathsampling/openpathsampling/pull/978 """ itraj = getattr(trajectory, 'iter_proxies', trajectory.__iter__) return list(itraj()) # note: the cache is not storable, because that would just be silly! class EnsembleCache(object): """Object used by ensembles to enable fast algorithms for basic functions. The contents stored in the `can_append`, `can_prepend`, `call`, and `check_reverse` dictionaries will depend on the ensemble. Only two of these dictionaries should be non-`None` at any time: either the pair `call` and `can_append`, or the pair `check_reverse` and `can_prepend`. This object also contains basic functions to manage the cache. Attributes ---------- start_frame : :class:`openpathsampling.snapshot.Snapshot` prev_last_frame : :class:`openpathsampling.snapshot.Snapshot` direction : +1 or -1 contents : dictionary """ def __init__(self, direction=None): self.start_frame = None self.prev_last_frame = None self.prev_last_index = None self.last_length = None self.direction = direction self.contents = {} self.trusted = False self.debug_enabled = False def bad_direction_error(self): raise RuntimeError("EnsembleCache.direction = " + str(self.direction) + " invalid.") # nocover # def clear(self): # self.start_frame = None # self.prev_last_frame = None # self.last_length = None # self.contents = {} def check(self, trajectory=None, reset=None): """Checks and resets (if necessary) the ensemble cache. The trajectory is considered trustworthy based on checking several factors, compared to the last time the cache was checked. For forward caches (direction > 0), these are * the first frame has not changed * the length is the same, or has changed by 1 * if length unchanged, the final frame is the same; if length changed by 1, the penultimate frame is the old final frame Similar rules apply for backward caches (direction < 0), with obvious changes of "final" and "first" frames. If the trajectory is not trustworthy, we return True (should be reset). Parameters ---------- trajectory : :class:`.Trajectory` the trajectory to test reset : bool or None force a value for reset. If None, the value is determined based on the test criteria. Returns ------- bool : the value of reset """ if self.debug_enabled: logger.debug("Checking cache....") # logger.debug("traj " + str([id(s) for s in trajectory])) logger.debug("start_frame " + str(id(self.start_frame))) logger.debug("prev_last " + str(id(self.prev_last_frame))) logger.debug("prev_last_idx " + str(self.prev_last_index)) if trajectory is not None: # this might get a list instead of Trajectory from internal # functions get_frame = getattr(trajectory, "get_as_proxy", trajectory.__getitem__) # if the first frame has changed, we should reset if reset is None: lentraj = len(trajectory) if self.direction > 0: if get_frame(0) != self.start_frame: reset = True else: if lentraj == 1: # makes no difference here; always reset reset = True elif lentraj == self.last_length: reset = (get_frame(-1) != self.prev_last_frame) elif lentraj == self.last_length + 1: reset = (get_frame(-2) != self.prev_last_frame) else: reset = True elif self.direction < 0: if get_frame(-1) != self.start_frame: reset = True else: if lentraj == 1: reset = True elif lentraj == self.last_length: reset = (get_frame(0) != self.prev_last_frame) elif lentraj == self.last_length + 1: reset = (get_frame(1) != self.prev_last_frame) else: reset = True else: self.bad_direction_error() else: reset = True self.trusted = not reset self.last_length = len(trajectory) if reset: self.debug_enabled = logger.isEnabledFor(logging.DEBUG) if self.debug_enabled: logger.debug("Resetting cache " + str(self)) if self.direction > 0: # TODO: this can be hit with trajectory is None? self.start_frame = get_frame(0) self.prev_last_frame = get_frame(-1) self.last_length = len(trajectory) self.contents = {} elif self.direction < 0: # TODO: this can be hit with trajectory is None? self.start_frame = get_frame(-1) self.prev_last_frame = get_frame(0) self.last_length = len(trajectory) self.contents = {} else: self.bad_direction_error() else: self.trusted = True # by returning reset, we allow the functions that call this to reset # other things as well if self.direction > 0: # TODO: this can be hit with trajectory is None? self.prev_last_frame = get_frame(-1) self.prev_last_index = len(trajectory) - 1 elif self.direction < 0: # TODO: this can be hit with trajectory is None? self.prev_last_frame = get_frame(0) self.prev_last_index = 0 else: self.bad_direction_error() return reset
[docs] class Ensemble(with_metaclass(abc.ABCMeta, StorableNamedObject)): """ Path ensemble object. An Ensemble represents a path ensemble, effectively a set of trajectories. Typical set operations are allowed, here: and, or, xor, -(without), ~ (inverse = all - x) Notes ----- Maybe replace - by / to get better notation. So far it has not been used """ #__metaclass__ = abc.ABCMeta
[docs] def __init__(self): """ A path volume defines a set of paths. """ super(Ensemble, self).__init__() self._saved_str = None # cached first time it is requested
# https://docs.python.org/3/reference/datamodel.html#object.__hash__ __hash__ = StorableNamedObject.__hash__ def __eq__(self, other): if self is other: return True return str(self) == str(other) def __ne__(self, other): return not self == other @abc.abstractmethod def __call__(self, trajectory, trusted=None, candidate=False): """ Return `True` if the trajectory is part of the path ensemble. Parameters ---------- trajectory: :class:`.Trajectory` The trajectory to be checked trusted : boolean For many ensembles, a faster algorithm can be used if we know some information about the trajectory with one fewer frames. The `trusted` flag tells the ensemble to use such an algorithm. This is usually used in combination with an :class:`.EnsembleCache` which makes short-cut calculations possible. """ return False def check_reverse(self, trajectory, trusted=False): """ See __call__; same thing, but potentially in reverse frame order """ return self(trajectory, trusted=False) def check(self, trajectory): """Alias for __call__""" return self(trajectory, trusted=False) def trajectory_summary(self, trajectory): """ Return dict with info on how this ensemble "sees" the trajectory. Parameters ---------- trajectory : `openpathsampling.Trajectory` """ return {} def trajectory_summary_str(self, trajectory): """ Returns a string with the results of the trajectory_summary function. Parameters ---------- trajectory : `openpathsampling.Trajectory` """ summ = self.trajectory_summary(trajectory) if summ == {}: return "No summary available" else: return str(summ) def can_append(self, trajectory, trusted=False): """ Returns true, if the trajectory so far can still be in the ensemble if it is appended by a frame. To check, it assumes that the trajectory to length L-1 is okay. This is mainly for interactive usage, when a trajectory is generated. Parameters ---------- trajectory : :class:`openpathsampling.trajectory.Trajectory` the actual trajectory to be tested trusted : bool If trusted=True, some ensembles can be computed more efficiently (e.g., by checking only one frame) Returns ------- bool Returns true or false if using a forward step (extending the trajectory forward in time at its end) `trajectory` could still be in the ensemble and thus makes sense to continue a simulation """ return True def can_prepend(self, trajectory, trusted=False): """ Returns true, if the trajectory so far can still be in the ensemble if it is prepended by a frame. To check, it assumes that the trajectory from index 1 is okay. This is mainly for interactive usage, when a trajectory is generated using a backward move. Parameters ---------- trajectory : :class:`openpathsampling.trajectory.Trajectory` the actual trajectory to be tested trusted : bool If trusted=True, some ensembles can be computed more efficiently (e.g., by checking only one frame) Returns ------- bool Returns true or false if using a backward step (extending the trajectory backwards in time at its beginning) `trajectory` could still be in the ensemble and thus makes sense to continue a simulation """ return True def strict_can_append(self, trajectory, trusted=False): """ Returns true if the trajectory can be the beginning of a trajectory in the ensemble. Parameters ---------- trajectory : :class:`.Trajectory` trajectory to test trusted : bool If trusted=True, some ensembles can be computed more efficiently (e.g., by checking only one frame) Returns ------- bool True if and only if the given trajectory can be the beginning of a trajectory in the ensemble. """ # default behavior is to be the same as can_append return self.can_append(trajectory, trusted) def strict_can_prepend(self, trajectory, trusted=False): """ Returns true if the trajectory can be the end of a trajectory in the ensemble. Parameters ---------- trajectory : :class:`.Trajectory` trajectory to test trusted : bool If trusted=True, some ensembles can be computed more efficiently (e.g., by checking only one frame) Returns ------- bool True if and only if the given trajectory can be the end of a trajectory in the ensemble. """ # default behavior is to be the same as can_prepend return self.can_prepend(trajectory, trusted) def iter_valid_slices( self, trajectory, max_length=None, min_length=1, overlap=1, reverse=False ): """ Return an iterator over slices of subtrajectories matching the ensemble Parameters ---------- trajectory : :class:`openpathsampling.trajectory.Trajectory` the actual trajectory to be splitted into ensemble parts max_length : int > 0, optional if set this determines the maximal size to be tested (is mainly used in the recursion) min_length : int > 0, optional if set this determines the minimal size to be tested (in lazy mode might no overlap : int >= 0, optional determines the allowed overlap of all trajectories to be found. A value of x means that two sub-trajectorie can share up to x frames at the beginning and x frames at the end. Default is 1 reverse : bool if `True` this will start searching from the end of the trajectory. Otherwise (default) it will start at the beginning. Returns ------- list of `slice` Returns a list of index-slices for sub-trajectories in trajectory that are in the ensemble. """ length = len(trajectory) if max_length is None: max_length = length max_length = min(length, max_length) min_length = max(1, min_length) logger.debug("Looking for subtrajectories in " + str(trajectory)) old_tt_len = 0 if not reverse: start = 0 end = start + min_length while start <= length - min_length and end <= length: # print start, end tt = trajectory[start:end] if len(tt) != old_tt_len + 1: can_append_tt = self.strict_can_append(tt) else: can_append_tt = self.strict_can_append(tt, trusted=True) old_tt_len = len(tt) if end < length and can_append_tt: end += 1 if end - start > max_length + 1: start += 1 end = start + min_length else: if end - start <= max_length and self(tt, trusted=False): yield slice(start, end) pad = min(overlap, end - start - 1) start = end - pad if end == length: # This means we have reached the end and should stop # All other possible subtraj can only be contained # in already existing ones start = length elif end - start >= min_length + 1 and \ self(tt[0:len(tt) - 1], trusted=False): yield slice(start, end - 1) pad = min(overlap + 1, end - start - 2) start = end - pad else: # TODO: for some ensembles, there are better ways to # change start. For frame-by-frame ensembles # (AllInX, AllOutX) we know that we can completely # stop for all subtrajectories. start += 1 end = start + min_length else: end = length start = end - min_length while start >= 0 and end >= min_length: tt = trajectory[start:end] if len(tt) != old_tt_len + 1: can_prepend_tt = self.can_prepend(tt) else: can_prepend_tt = self.can_prepend(tt, trusted=True) old_tt_len = len(tt) if start > 0 and can_prepend_tt: start -= 1 if end - start > max_length + 1: end -= 1 start = end - min_length else: if end - start <= max_length and self(tt, trusted=False): yield slice(start, end) pad = min(overlap, end - start - 1) end = start + pad if start == 0: # This means we have reached the end and should stop # All other possible subtraj can only be contained # in already existing ones end = 0 elif end - start >= min_length + 1 and \ self(tt[1:len(tt)], trusted=False): yield slice(start + 1, end) pad = min(overlap + 1, end - start - 2) end = start + pad else: end -= 1 start = end - min_length def iter_extendable_slices( self, trajectory, max_length=None, min_length=1, overlap=1, reverse=False ): """ Return an iterator over maxiaml slices of extendable subtrajectories In comparison to the iter_valid_slices this will return maximal subtrajectories that can potentially be extended into samples of the ensemble. Shorter subparts will also always work. Where we always use strict_can_append. So for forward extentable ensembles you can cut at the end and for backward extendable ones you can cut at the beginning. Notes ----- This feature is not yet fully tested and should be used with care! Parameters ---------- trajectory : :class:`openpathsampling.trajectory.Trajectory` the actual trajectory to be splitted into ensemble parts max_length : int > 0, optional if set this determines the maximal size to be tested (is mainly used in the recursion) min_length : int > 0, optional if set this determines the minimal size to be tested (in lazy mode might no overlap : int >= 0, optional determines the allowed overlap of all trajectories to be found. A value of x means that two sub-trajectorie can share up to x frames at the beginning and x frames at the end. Default is 1 reverse : bool if `True` this will start searching from the end of the trajectory. Otherwise (default) it will start at the beginning. Returns ------- list of `slice` Returns a list of index-slices for sub-trajectories in trajectory that are in the ensemble. """ length = len(trajectory) logger.info('`iter_extendable_slices` is experimental. Use it on your ' 'own risk!') if max_length is None: max_length = length max_length = min(length, max_length) min_length = max(1, min_length) logger.debug("Looking for subtrajectories in " + str(trajectory)) old_tt_len = 0 if not reverse: start = 0 end = start + min_length while start <= length - min_length and end <= length: # print start, end tt = trajectory[start:end] if len(tt) != old_tt_len + 1: can_append_tt = self.strict_can_append(tt) else: can_append_tt = self.strict_can_append(tt, trusted=True) old_tt_len = len(tt) if end < length and can_append_tt: end += 1 if end - start > max_length + 1: start += 1 end = start + min_length else: if end - start <= max_length + 1: yield slice(start, end - 1) pad = min(overlap, end - start - 1) start = end - pad if end == length: # This means we have reached the end and should stop # All other possible subtraj can only be contained # in already existing ones start = length else: start += 1 end = start + min_length else: end = length start = end - min_length while start >= 0 and end >= min_length: tt = trajectory[start:end] if len(tt) != old_tt_len + 1: can_prepend_tt = self.can_prepend(tt) else: can_prepend_tt = self.can_prepend(tt, trusted=True) old_tt_len = len(tt) if start > 0 and can_prepend_tt: start -= 1 if end - start > max_length + 1: end -= 1 start = end - min_length else: if end - start <= max_length + 1: yield slice(start, end - 1) pad = min(overlap, end - start - 1) end = start + pad if start == 0: # This means we have reached the end and should stop # All other possible subtraj can only be contained # in already existing ones end = 0 else: end -= 1 start = end - min_length def find_first_subtrajectory(self, trajectory): """ Return the first sub-trajectory that matches the ensemble Parameters ---------- trajectory : :class:`openpathsampling.Trajectory` the trajectory in which to look for sub-trajectories Returns ------- :class:`openpathsampling.Trajectory` or None the found sub-trajectory or None if no sub-trajectory was found """ try: return trajectory[ next(self.iter_valid_slices(trajectory))] except StopIteration: return None def find_last_subtrajectory(self, trajectory): """ Return the last sub-trajectory that matches the ensemble Parameters ---------- trajectory : :class:`openpathsampling.trajectory.Trajectory` the trajectory in which to look for sub-trajectories Returns ------- :class:`openpathsampling.Trajectory` or None the found sub-trajectory or None if no sub-trajectory was found """ try: return trajectory[ next(self.iter_valid_slices(trajectory, reverse=True))] except StopIteration: return None def iter_split( self, trajectory, max_length=None, min_length=1, overlap=1, reverse=False): """Return iterator over subtrajectories satisfying the given ensemble. Parameters ---------- trajectory : :py:class:`openpathsampling.trajectory.Trajectory` the actual trajectory to be splitted into ensemble parts max_length : int > 0 if set this determines the maximal size to be tested (is mainly used in the recursion) min_length : int > 0 if set this determines the minimal size to be tested (in lazy mode might no overlap : int >= 0 determines the allowed overlap of all trajectories to be found. A value of x means that two sub-trajectory can share up to x frames at the beginning and x frames at the end. Default is 1 reverse : bool if `True` this will start searching from the end of the trajectory. Otherwise (default) it will start at the beginning. Returns ------- iterator of :class:`openpathsampling.trajectory.Trajectory` Returns a list of sub-trajectories in trajectory that are in the ensemble. Notes ----- This uses self.iter_valid_slices and returns the actual sub-trajectories """ for part in self.iter_valid_slices( trajectory, max_length, min_length, overlap, reverse): yield trajectory[part] def split( self, trajectory, max_length=None, min_length=1, overlap=1, reverse=False, n_results=0): """Return list of subtrajectories satisfying the given ensemble. Parameters ---------- trajectory : :py:class:`openpathsampling.trajectory.Trajectory` the actual trajectory to be splitted into ensemble parts max_length : int > 0 if set this determines the maximal size to be tested (is mainly used in the recursion) min_length : int > 0 if set this determines the minimal size to be tested (in lazy mode might no overlap : int >= 0 determines the allowed overlap of all trajectories to be found. A value of x means that two sub-trajectory can share up to x frames at the beginning and x frames at the end. Default is 1 reverse : bool if `True` this will start searching from the end of the trajectory. Otherwise (default) it will start at the beginning. n_results : int if `0` this will return all results. If the integer is larger than zero it will stop after the given number of slices has been found Returns ------- list of :class:`openpathsampling.trajectory.Trajectory` Returns a list of sub-trajectories in trajectory that are in the ensemble. Notes ----- This uses self.find_valid_slices and returns the actual sub-trajectories """ indices = self.iter_valid_slices(trajectory, max_length, min_length, overlap, reverse) if n_results > 0: return [ trajectory[part] for part in itertools.islice(indices, n_results)] else: return [trajectory[part] for part in indices] @property def extendable_sub_ensembles(self): return {} def get_sample_from_trajectories( self, trajectories, replica=0, used_trajectories=None, reuse_strategy='avoid-symmetric' ): """ Generate a sample in the ensemble by testing `trajectories` Parameters ---------- trajectories : (list of) :class:`openpathsampling.trajectory.Trajectory` single trajectory of list of trajectories to be used to create a sample in this ensemble replica : int the replica id for the sample to be created used_trajectories : (list of) :class:`openpathsampling.trajectory.Trajectory` trajectories not taken into account in the first attempt reuse_strategy : str if `avoid` then in a second attempt the used trajectories are tried """ trajectories = paths.Trajectory._to_list_of_trajectories(trajectories) used_and_possible = [] for idx, traj in enumerate(trajectories): if traj not in used_trajectories and ( not reuse_strategy.endswith('symmetric') or traj.reversed not in used_trajectories): if self(traj): return paths.Sample( trajectory=traj, ensemble=self, replica=replica ) else: used_and_possible.append(traj) return self._handle_used_trajectories( used_trajectories, used_and_possible, reuse_strategy) def split_sample_from_trajectories( self, trajectories, replica=0, used_trajectories=None, reuse_strategy='avoid-symmetric', unique='shortest'): """ Generate a sample in the ensemble by searching for sub-parts Parameters ---------- trajectories : (list of) :class:`openpathsampling.trajectory.Trajectory` single trajectory of list of trajectories to be used to create a sample in this ensemble replica : int the replica id for the sample to be created used_trajectories : (list of) :class:`openpathsampling.trajectory.Trajectory` trajectories not taken into account in the first attempt reuse_strategy : str if `avoid` then in a second attempt the used trajectories are tried unique : str If `first` the first found subtrajectory is selected. If `shortest` then from all subparts the shortest one is used. """ trajectories = paths.Trajectory._to_list_of_trajectories(trajectories) used_and_possible = [] for idx, traj in enumerate(trajectories): parts = self._get_trajectory_parts_in_order(traj, unique) for part in parts: if part not in used_trajectories and ( not reuse_strategy.endswith('symmetric') or part.reversed not in used_trajectories): return paths.Sample( trajectory=part, ensemble=self, replica=replica ) else: used_and_possible.append(part) return self._handle_used_trajectories( used_trajectories, used_and_possible, reuse_strategy) def extend_sample_from_trajectories( self, trajectories, engine, replica=0, unique='median', level='complex', on_error='retry', attempts=2): """ Generate a sample in the ensemble by extending parts of `trajectories` This will take an initial trajectory look for useable subparts and try to extend them into a valid sample. This works by taking information from an ensemble what are resonable subparts, this is returned by a function `.extendable_sub_ensembles()` which is only defined for complex ensembles like Minus or TIS ensemble. As an example the minus could extend from the segment ensemble or even a segment + parts completely in the inner ensemble. Of course the ensemble itself is always valid. The function tries to find extendable subparts from largest to smallest ones, starting with the ensemble itself and ending with small subparts If a list of trajectories is provided it will be attempt to find a valid trajectory using all the trajectory parts. Parameters ---------- trajectories : (list of) :class:`openpathsampling.trajectory.Trajectory` single trajectory of list of trajectories to be used to create a sample in this ensemble engine : :class:`openpathsampling.dynamicsengine.DynamicsEngine` engine to use for MD extension replica : int the replica id for the sample to be created unique : str If `first` the first found subtrajectory is selected. If `shortest` then from all subparts the shortest one is used. level : str there are three levels you chose and not all are implemented for an ensemble. For all ensembles you can use `native` which will simply try to extend the ensemble itself, the mose simple one, which is always possible. Picking `complex` will use the largest (most complex) sub-ensemble that makes sense. Like in the case of a Minus move this is the segment ensemble. The other choice is `minimal` which choses the minimal necessary subtrajectory extending makes sense from. For TIS or Minus Ensembles this will be crossing from the (initial) core to the outside. You should try `complex` first and then `minimal`. `complex` should be much faster. on_error : str if `retry` (default) then any error will trigger a retry and eventually no sample will be retured. `fail` will raise the exception. Typical things to happen are `MaxLengthError` or `NaNError`, but also initialisation error can happen. `fail` should only be used for debugging purposes since you will not get a preliminary sampleset as a result but an exception. attempts : int the number of attemps on a trajectory to extend """ logger.info("Starting extend_sample_from_trajectories with level " + str(level)) if level == 'native': sub_ensemble = self else: if not hasattr(self, 'extendable_sub_ensembles'): logger.info("Missing ensemble.extendable_sub_ensembles") return None sub_ensembles = self.extendable_sub_ensembles if level not in sub_ensembles: logger.info("Missing level: " + repr(level)) return None sub_ensemble = sub_ensembles[level] trajectories = paths.Trajectory._to_list_of_trajectories(trajectories) for idx, traj in enumerate(trajectories): traj_parts = sub_ensemble._get_trajectory_parts_in_order( traj, unique) for orig in traj_parts: for attempt in range(attempts): part = paths.Trajectory(orig) logger.info(( 'extend - attempt [%d] : extending from initial ' 'length %d\n') % ( attempt + 1, len(part) )) try: if self.strict_can_append(part): # seems we could extend forward part = part[:-1] + \ engine.generate( part[-1], [paths.PrefixTrajectoryEnsemble( self, part ).strict_can_append], direction=+1 ) if self.strict_can_prepend(part): # and extend backward part = engine.generate( part[0].reversed, [paths.SuffixTrajectoryEnsemble( self, part ).strict_can_prepend], direction=-1 ).reversed + part[1:] logger.info("Candidate trajectory: " + str(part)) if self(part): # make sure we found a sample return paths.Sample( trajectory=part, ensemble=self, replica=replica ) except paths.engines.EngineError as e: if on_error == 'fail': raise elif on_error == 'retry': pass else: # This should not happen! pass logger.info("Returning None because nothing worked") return None def _get_trajectory_parts_in_order(self, traj, unique='first'): if unique == 'first': # this returns an iterator and can thus be faster parts = self.iter_split(traj) elif unique == 'shortest': parts = sorted(self.split(traj), key=len) elif unique == 'median': # resort the found trajectories so that the middle one is # first, then the one right to it, then the one before, etc # e.g. [0,1,2,3,4,5,6,7,8,9] is rearranges into # [5,4,6,3,7,2,8,1,9,0] ordered = sorted(self.split(traj), key=len) parts = list([p for p2 in zip( ordered[len(ordered) // 2:], reversed(ordered[:len(ordered) // 2]) ) for p in p2]) if len(ordered) & 1: parts.append(ordered[-1]) elif unique == 'longest': parts = sorted(self.split(traj), key=len, reverse=True) else: parts = [] try: if len(parts) > 0: lens = map(len, parts) logger.info( ('splitting - found %d slices of lengths ' '[%d, ..., %d, ..., %d] ' 'ordered by `%s`\n') % ( len(parts), min(lens), sorted(lens)[len(parts) / 2], max(lens), unique )) except TypeError: pass return parts def _handle_used_trajectories( self, used_trajectories, used_and_possible, reuse_strategy): if reuse_strategy.startswith('avoid') \ and used_trajectories is not None: for part in used_trajectories: if part in used_and_possible: if self(part): # move the used one to the back of the list to # not reuse it directly del used_trajectories[used_trajectories.index(part)] used_trajectories.append(part) return paths.Sample( trajectory=part, ensemble=self ) if reuse_strategy.endswith('symmetric'): if part.reversed in used_and_possible: if self(part): # move the used one to the back of the list to # not reuse it directly del used_trajectories[used_trajectories.index(part)] used_trajectories.append(part) return paths.Sample( trajectory=part, ensemble=self ) return None def __str__(self): if self._saved_str is None: self._saved_str = self._str() return self._saved_str def _str(self): """ Returns a complete mathematical expression that defines the current ensemble in a readable form. Notes ----- This should be cleaned up a little """ return 'Ensemble' def __or__(self, other): if self is other: return self elif type(other) is EmptyEnsemble: return self elif type(other) is FullEnsemble: return other else: return UnionEnsemble(self, other) # This is not correct for all ensembles. # def __xor__(self, other): # # TODO: return (self | other) & ~(self & other) # # NOTE: that should also get the automatic special case handling # # (other is self, Empty, or Full) from treatment in __and__/__or__ # if self is other: # return EmptyEnsemble() # elif type(other) is EmptyEnsemble: # return self # elif type(other) is FullEnsemble: # return NegatedEnsemble(self) # else: # return SymmetricDifferenceEnsemble(self, other) def __and__(self, other): if self is other: return self elif type(other) is EmptyEnsemble: return other elif type(other) is FullEnsemble: return self else: return IntersectionEnsemble(self, other) # This is not correct for all ensembles. # def __sub__(self, other): # if self is other: # return EmptyEnsemble() # elif type(other) is EmptyEnsemble: # return self # elif type(other) is FullEnsemble: # return EmptyEnsemble() # else: # return RelativeComplementEnsemble(self, other) # This is not correct for all ensembles. # def __invert__(self): # return NegatedEnsemble(self) @staticmethod def _indent(s): spl = s.split('\n') spl = [' ' + p for p in spl] return '\n'.join(spl)
[docs] class EmptyEnsemble(Ensemble): """ The empty path ensemble of no trajectories. """
[docs] def __init__(self): super(EmptyEnsemble, self).__init__()
def __call__(self, trajectory, trusted=None, candidate=False): return False def can_append(self, trajectory, trusted=False): return False def can_prepend(self, trajectory, trusted=False): return False def __invert__(self): return FullEnsemble() def __sub__(self, other): return EmptyEnsemble() def __and__(self, other): return self def __xor__(self, other): return other def __or__(self, other): return other def _str(self): return 'empty'
[docs] class FullEnsemble(Ensemble): """ The full path ensemble of all possible trajectories. """
[docs] def __init__(self): super(FullEnsemble, self).__init__()
def __call__(self, trajectory, trusted=None, candidate=False): return True def can_append(self, trajectory, trusted=False): return True def can_prepend(self, trajectory, trusted=False): return True def __invert__(self): return EmptyEnsemble() def __sub__(self, other): if type(other) is EmptyEnsemble: return self elif type(other) is FullEnsemble: return EmptyEnsemble() else: return NegatedEnsemble(other) def __and__(self, other): return other def __xor__(self, other): if type(other) is EmptyEnsemble: return self elif type(other) is FullEnsemble: return EmptyEnsemble() else: return NegatedEnsemble(other) def __or__(self, other): return self def _str(self): return 'all'
class NegatedEnsemble(Ensemble): """ Negates an Ensemble and simulates a `not` statement """ # TODO: this whole concept is false and this should be removed def __init__(self, ensemble): super(NegatedEnsemble, self).__init__() self.ensemble = ensemble def __call__(self, trajectory, trusted=None, candidate=False): return not self.ensemble(trajectory, trusted, candidate) def can_append(self, trajectory, trusted=False): # We cannot guess the result here so keep on running forever return True def can_prepend(self, trajectory, trusted=False): # We cannot guess the result here so keep on running forever return True def _str(self): return 'not ' + str(self.ensemble)
[docs] class EnsembleCombination(Ensemble): """ Logical combination of two ensembles """
[docs] def __init__(self, ensemble1, ensemble2, fnc, str_fnc): super(EnsembleCombination, self).__init__() self.ensemble1 = ensemble1 self.ensemble2 = ensemble2 self.fnc = fnc self.sfnc = str_fnc self.debug = logger.isEnabledFor(logging.DEBUG)
def to_dict(self): return {'ensemble1': self.ensemble1, 'ensemble2': self.ensemble2} def _generalized_short_circuit(self, combo, f1, f2, trajectory, trusted, fname=""): """ Handles short-circuit logic, all in one place for code simplicity. Short-circuit logic skips the second part of the combination if the result doesn't depend on it. Note ---- If you want to enable debug logging for this, it either needs to be enabled when the class is instantiated or set with the .debug instance variable. This is to improve performance since this method is called very frequently. Parameters ---------- combo : the combination function f1 : ensemble1's function. Takes trajectory, returns bool. Examples include `__call__`, `can_append`, etc. f2 : ensemble2's function. As with f1, but for ensemble 2. trajectory : :class:`.Trajectory` input trajectory trusted : bool the `trusted` flag to send to f1 and f2 fname : string name of the functions f1 and f2. Only used in debug output. """ if self.debug: # pragma: no cover logger.debug("Combination is " + self.__class__.__name__) a = f1(trajectory, trusted) if self.debug: # pragma: no cover logger.debug("Combination." + fname + ": " + self.ensemble1.__class__.__name__ + " is " + str(a)) ens2 = f2(trajectory, trusted) # logger.debug("Doing ens2_prime") # ens2_prime = f2(trajectory, trusted) logger.debug("Combination." + fname + ": " + self.ensemble2.__class__.__name__ + " is " + str(ens2)) # assert(ens2 == ens2_prime) logger.debug("Combination should return " + str(self.fnc(a, ens2))) res_true = self.fnc(a, True) res_false = self.fnc(a, False) if res_false == res_true: # result is independent of ensemble_b so ignore it # logger.debug("Returning res_true == res_false ==" + str(res_true)) return res_true else: b = f2(trajectory, trusted) # logger.debug("Needs test:" + str(a) + " " + str(self.fnc) + # str(b) + str(self.fnc(a,b))) return self.fnc(a, b) def __call__(self, trajectory, trusted=None, candidate=False): return self._generalized_short_circuit( combo=self.fnc, f1=self.ensemble1, f2=self.ensemble2, trajectory=trajectory, trusted=trusted, fname="__call__" ) def can_append(self, trajectory, trusted=False): return self._generalized_short_circuit( combo=self.fnc, f1=self.ensemble1.can_append, f2=self.ensemble2.can_append, trajectory=trajectory, trusted=trusted, fname="can_append" ) def can_prepend(self, trajectory, trusted=False): return self._generalized_short_circuit( combo=self.fnc, f1=self.ensemble1.can_prepend, f2=self.ensemble2.can_prepend, trajectory=trajectory, trusted=trusted, fname="can_prepend" ) def strict_can_append(self, trajectory, trusted=False): return self._generalized_short_circuit( combo=self.fnc, f1=self.ensemble1.strict_can_append, f2=self.ensemble2.strict_can_append, trajectory=trajectory, trusted=trusted, fname="strict_can_append" ) def strict_can_prepend(self, trajectory, trusted=False): return self._generalized_short_circuit( combo=self.fnc, f1=self.ensemble1.strict_can_prepend, f2=self.ensemble2.strict_can_prepend, trajectory=trajectory, trusted=trusted, fname="strict_can_prepend" ) def _str(self): # print self.sfnc, self.ensemble1, self.ensemble2, # print self.sfnc.format( # '(' + str(self.ensemble1) + ')', # '(' + str(self.ensemble1) + ')') return self.sfnc.format( '(\n' + Ensemble._indent(str(self.ensemble1)) + '\n)', '(\n' + Ensemble._indent(str(self.ensemble2)) + '\n)')
[docs] class UnionEnsemble(EnsembleCombination):
[docs] def __init__(self, ensemble1, ensemble2): super(UnionEnsemble, self).__init__(ensemble1, ensemble2, fnc=lambda a, b: a or b, str_fnc='{0}\nor\n{1}')
[docs] class IntersectionEnsemble(EnsembleCombination):
[docs] def __init__(self, ensemble1, ensemble2): super(IntersectionEnsemble, self).__init__(ensemble1, ensemble2, fnc=lambda a, b: a and b, str_fnc='{0}\nand\n{1}')
# class SymmetricDifferenceEnsemble(EnsembleCombination): # # TODO: this is not yet supported. Should be removed. ~DWHS # # should just be a shortcut for (ens1 | ens2) & ~(ens1 & ens2) # # should probably not even be a class. Just have `ensemble.__xor__` # # return (ens1 | ens2) & ~(ens1 & ens2) # def __init__(self, ensemble1, ensemble2): # super(SymmetricDifferenceEnsemble, self).__init__( # ensemble1, # ensemble2, # fnc=lambda a, b: a ^ b, # str_fnc='{0}\nxor\n{1}') # class RelativeComplementEnsemble(EnsembleCombination): # # TODO: this is not yet supported. Should be removed. ~DWHS # # should be a shortcut for ens1 & ~ens2 # # should probably not even be a class. Just have `ensemble.__sub__` # # return ens1 & ~ens2 # def __init__(self, ensemble1, ensemble2): # super(RelativeComplementEnsemble, self).__init__( # ensemble1, # ensemble2, # fnc=lambda a, b: a and not b, # str_fnc='{0}\nand not\n{1}')
[docs] class SequentialEnsemble(Ensemble): """Ensemble which satisfies several subensembles in sequence. Attributes ---------- ensembles : tuple of Ensemble The ensembles, in time-order of when they should occur in the trajectory. min_overlap : int or tuple of int The minimum number of frames that overlap between two ensembles in the sequence. A positive number n indicates that at least n frames must be in both ensembles at the transition between them. A negative number -n indicates that at least n frames in neither ensemble at the transition between them. If given as a list, the list should be of length len(ensembles)-1, with one value for each transition. If given as an integer, that value will be used for all transitions. max_overlap : int or list of int The maximum number of frames that overlap between two ensembles in the sequence. A positive number n indicates that no more than n frames can be in both ensembles at the transition between them. A negative number -n indicates no more than n frames in neither ensemble at the transition between them. If given as a list, the list should be of length len(ensembles)-1, with one value for each transition. If given as an integer, that value will be used for all transitions. Notes ----- TODO: Overlap features not implemented because ohmygod this was hard enough already. """
[docs] def __init__(self, ensembles, min_overlap=0, max_overlap=0, greedy=False): # make tuples of the min/max overlaps super(SequentialEnsemble, self).__init__() if type(min_overlap) is int: min_overlap = (min_overlap,) * (len(ensembles) - 1) if type(max_overlap) is int: max_overlap = (max_overlap,) * (len(ensembles) - 1) self.ensembles = ensembles self.min_overlap = min_overlap self.max_overlap = max_overlap self.greedy = greedy self._use_cache = True # cache can be turned off self._cache_can_append = EnsembleCache(+1) self._cache_strict_can_append = EnsembleCache(+1) self._cache_call = EnsembleCache(+1) self._cache_can_prepend = EnsembleCache(-1) self._cache_strict_can_prepend = EnsembleCache(-1) self._cache_check_reverse = EnsembleCache(-1) self._zero_traj = paths.Trajectory([]) # sanity checks if len(self.min_overlap) != len(self.max_overlap): raise ValueError("len(min_overlap) != len(max_overlap)") if len(self.min_overlap) != len(self.ensembles) - 1: raise ValueError( "Number of overlaps doesn't match number of transitions") for i in range(len(self.min_overlap)): if min_overlap[i] > max_overlap[i]: raise ValueError("min_overlap greater than max_overlap!")
@staticmethod def update_cache(cache, ens_num, ens_from, subtraj_from): """Updates the given cache. Parameters ---------- cache : `EnsembleCache` the cache to be updated ens_num : integer current value of `ens_num` in the sequential ensemble ens_from : integer current "start" ensemble index. For forward-direction caches, this is ens_first. For reverse-direction caches, this is ens_final. The "initial" (in the appropriate direction) frame is assigned to this ensemble subtraj_from : integer index of the "start" frame of the subtrajectory in this subensemble. For forward-direction caches, this is the first frame of the subtrajectory. For reverse-direction caches, this is the final frame of the subtrajectory. """ if ens_num == "keep": ens_num = cache.contents['ens_num'] if ens_from == "keep": ens_from = cache.contents['ens_from'] if subtraj_from == "keep": subtraj_from = cache.contents['subtraj_from'] cache.contents['ens_num'] = ens_num cache.contents['ens_from'] = ens_from cache.contents['subtraj_from'] = subtraj_from logger.debug("Setting cache | ens_num " + str(ens_num) + " | ens_from " + str(ens_from) + " | subtraj_from " + str(subtraj_from)) logger.debug("Cache is Trusted: " + str(cache.trusted)) @staticmethod def assign_frames(cache, ens_num, subtraj_first=None, subtraj_final=None): if ens_num is None: cache.contents['assignments'] = {} else: cache.contents['assignments'][ens_num] = \ slice(subtraj_first, subtraj_final) logger.debug("Cache assignments: " + str(cache.contents['assignments'])) def transition_frames(self, trajectory, trusted=None): # it is easiest to understand this decision tree as a simplified # version of the can_append decision tree; see that for detailed # comments # self._check_cache(trajectory, function="call") ens_num = 0 subtraj_first = 0 traj_final = len(trajectory) final_ens = len(self.ensembles) - 1 transitions = [] while True: if ens_num <= final_ens: subtraj_final = self._find_subtraj_final(trajectory, subtraj_first, ens_num) else: return transitions if subtraj_final - subtraj_first > 0: # subtraj = trajectory[slice(subtraj_first, subtraj_final)] if ens_num == final_ens: if subtraj_final == traj_final: # success transitions.append(subtraj_final) return transitions else: # fails because we have more frames to assign transitions.append(subtraj_final) return transitions else: ens_num += 1 transitions.append(subtraj_final) subtraj_first = subtraj_final else: if ens_num <= final_ens and \ self.ensembles[ens_num](self._zero_traj): ens_num += 1 transitions.append(subtraj_final) subtraj_first = subtraj_final else: return transitions def __call__(self, trajectory, trusted=None, candidate=False): logger.debug("Looking for transitions in trajectory " + str(trajectory)) transitions = self.transition_frames(trajectory, trusted) logger.debug("Found transitions: " + str(transitions)) # if we don't have the right number of transitions, or if the last # print transitions if len(transitions) != len(self.ensembles): # print "Returns false b/c not enough ensembles" return False elif transitions[-1] != len(trajectory): # print "Returns false b/c not all frames assigned" return False subtraj_first = 0 subtraj_i = 0 # Make a list before slicing ltraj = _get_list_traj(trajectory) while subtraj_i < len(self.ensembles): subtraj_final = transitions[subtraj_i] subtraj = ltraj[slice(subtraj_first, subtraj_final)] if not self.ensembles[subtraj_i](subtraj): # print "Returns false b/c ensemble", subtraj_i," fails" return False subtraj_i += 1 subtraj_first = subtraj_final return True def _find_subtraj_final(self, traj, subtraj_first, ens_num, last_checked=None): """ Find the longest subtrajectory of trajectory which starts at subtraj_first and satifies self.ensembles[ens_num].can_append Returns ------- int Frame of traj which is the final frame for a subtraj starting at subtraj_first and satisfying self.ensembles.can_append[ens_num] """ if last_checked is None: subtraj_final = subtraj_first else: subtraj_final = max(last_checked, subtraj_first) traj_final = len(traj) ens = self.ensembles[ens_num] # Make a list before slicing ltraj = _get_list_traj(traj) subtraj = ltraj[slice(subtraj_first, subtraj_final + 1)] # if we're in the ensemble or could eventually be in the ensemble, # we keep building the subtrajectory # TODO: this doesn't actually reflect the cleanest behavior: should # be the proper hybrid definition where we can append until/unless # we overshoot logger.debug("*Traj slice " + str(subtraj_first) + " " + str(subtraj_final + 1) + " / " + str(traj_final)) # logger.debug("Ensemble " + str(ens.__class__.__name__))# + str(ens)) # logger.debug("Can-app " + str(ens.can_append(subtraj, trusted=True))) # logger.debug("Call " + str(ens(subtraj, trusted=True))) # TODO: the weird while condition is handling the OVERSHOOTING while ((ens.can_append(subtraj, trusted=True) or ens(subtraj, trusted=True)) and subtraj_final < traj_final): subtraj_final += 1 subtraj = ltraj[slice(subtraj_first, subtraj_final + 1)] logger.debug(" Traj slice " + str(subtraj_first) + " " + str(subtraj_final + 1) + " / " + str(traj_final)) return subtraj_final def _find_subtraj_first(self, traj, subtraj_final, ens_num, last_checked=None): if last_checked is None: subtraj_first = subtraj_final - 1 else: subtraj_first = min(last_checked, subtraj_final - 1) traj_first = 0 ens = self.ensembles[ens_num] # Make a list before slicing ltraj = _get_list_traj(traj) subtraj = ltraj[slice(subtraj_first, subtraj_final)] logger.debug("*Traj slice " + str(subtraj_first) + " " + str(subtraj_final) + " / " + str(len(traj))) # logger.debug("Ensemble " + str(ens.__class__.__name__))# + str(ens)) # logger.debug("Can-app " + str(ens.can_prepend(subtraj, trusted=True))) # logger.debug("Call " + str(ens(subtraj, trusted=True))) # TODO: the weird while condition is handling the OVERSHOOTING while ((ens.can_prepend(subtraj, trusted=True) or ens.check_reverse(subtraj, trusted=True) ) and subtraj_first >= traj_first): subtraj_first -= 1 subtraj = ltraj[slice(subtraj_first, subtraj_final)] logger.debug(" Traj slice " + str(subtraj_first + 1) + " " + str(subtraj_final) + " / " + str(len(traj))) return subtraj_first + 1 def _generic_can_append(self, trajectory, trusted, strict): # treat this like we're implementing a regular expression parser ... # .*ensemble.+ ; but we have to do this for all possible matches # There are three tests we consider: # 1. subtraj_final - subtraj_first > 0: Do we obtain a subtrajectory? # 2. subtraj_final == traj_final: Have we assigned all the frames? # 3. ens_num == final_ens: are we looking at the last ensemble # Various combinations of these result in three possible outcomes: # (a) return True (we can append) # (b) return False (we can't append) # (c) loop around to text another subtrajectory (we can't tell) # Returning false can only happen if all ensembles have been tested # self._check_cache(trajectory, function="can_append") cache = self._cache_can_append if strict: cache = self._cache_strict_can_append if trusted: cache.trusted = True subtraj_first = 0 ens_num = 0 ens_first = 0 if self._use_cache: _ = cache.check(trajectory) if cache.contents == {}: self.update_cache(cache, 0, 0, 0) self.assign_frames(cache, None) else: subtraj_first = cache.contents['subtraj_from'] ens_num = cache.contents['ens_num'] ens_first = cache.contents['ens_from'] traj_final = len(trajectory) final_ens = len(self.ensembles) - 1 # Make a list before slicing ltraj = _get_list_traj(trajectory) # print traj_final, final_ens # logging startup if cache.debug_enabled: # pragma: no cover logger.debug( "Beginning can_append with subtraj_first=" + str(subtraj_first) + "; ens_first=" + str(ens_first) + "; ens_num=" + str(ens_num) + "; strict=" + str(strict) ) logger.debug( "Can-append sees a trusted cache: " + str(cache.trusted) ) if cache.trusted: logger.debug("Cache contents: " + str(cache.contents)) logger.debug("cache.prev_last_frame: " + str(trajectory.index(cache.prev_last_frame))) for i in range(len(self.ensembles)): ens = self.ensembles[i] logger.debug("Ensemble " + str(i) + " : " + ens.__class__.__name__) while True: # main loop, with various exits if self._use_cache and cache.trusted: # TODO: trajectory.index is expensive... how to speed up? # offset = 1 offset = 0 # if cache.last_length == len(trajectory): # offset += 1 try: last_checked_index = cache.prev_last_index - offset except: # on any exception # TODO: ideally, this won't be covered by tests, and can # eventually be removed (along with try/except) last_checked_index = \ trajectory.index(cache.prev_last_frame) - offset #last_checked = trajectory.index(cache.prev_last_frame) - offset last_checked = last_checked_index else: last_checked_index = None last_checked = None if cache.debug_enabled: logger.debug("last_checked = " + str(last_checked)) subtraj_final = self._find_subtraj_final( trajectory, subtraj_first, ens_num, last_checked ) cache.last_length = subtraj_final if cache.debug_enabled: logger.debug( "Subtraj for ens " + str(ens_num) + " : " + "(" + str(subtraj_first) + "," + str(subtraj_final) + ")" ) if subtraj_final - subtraj_first > 0: subtraj = ltraj[slice(subtraj_first, subtraj_final)] if ens_num == final_ens: if subtraj_final == traj_final: # we're in the last ensemble and the whole # trajectory is assigned: can we append? ens = self.ensembles[ens_num] if cache.debug_enabled: logger.debug("Returning can_append for " + str(ens.__class__.__name__)) self.update_cache(cache, ens_num, ens_first, subtraj_first) return ens.can_append(subtraj, trusted=True) else: logger.debug( "Returning false due to incomplete assigns: " + str(subtraj_final) + "!=" + str(traj_final) ) return False # in final ensemble, not all assigned else: # subtraj existed, but not yet final ensemble # so we start with the next ensemble end_traj = (subtraj_final == traj_final) ensemble = self.ensembles[ens_num] if not end_traj and not ensemble(subtraj, trusted=cache.trusted): logger.debug( "Couldn't assign frames " + str(subtraj_first) + " through " + str(subtraj_final) + " to ensemble " + str(ens_num) + ": No match" ) else: logger.debug( "Assigning frames " + str(subtraj_first) + " through " + str(subtraj_final) + " to ensemble " + str(ens_num) ) self.assign_frames(cache, ens_num, subtraj_first, subtraj_final) self.update_cache(cache, ens_num, ens_first, subtraj_first) ens_num += 1 subtraj_first = subtraj_final logger.debug("Moving to the next ensemble " + str(ens_num)) else: # no subtrajectory found if subtraj_final == traj_final: # all frames assigned, but not all ensembles finished; # next frame might satisfy next ensemble if self._use_cache: prev_slice = cache.contents['assignments'][ens_num - 1] prev_subtraj = ltraj[prev_slice] prev_ens = self.ensembles[ens_num - 1] if prev_ens.can_append(prev_subtraj, trusted=True): logger.debug( "Premature promotion: returning to ensemble " + str(ens_num - 1) ) ens_num -= 1 subtraj_first = "keep" self.update_cache(cache, ens_num, ens_first, subtraj_first) logger.debug( "All frames assigned, more ensembles to go: " "returning True") return True elif self.ensembles[ens_num](self._zero_traj): logger.debug( "Moving on because of allowed zero-length ensemble") ens_num += 1 subtraj_first = subtraj_final self.update_cache(cache, ens_num, ens_first, subtraj_first) else: # not all frames assigned, couldn't find a sequence # start over with sequences that begin with the next # ensemble if ens_first == final_ens: logger.debug( "Started with the last ensemble, got nothin'") return False elif strict is False: logger.debug( "Reassigning all frames, starting with ensemble " + str(ens_first) ) ens_first += 1 ens_num = ens_first subtraj_first = 0 self.update_cache(cache, ens_num, ens_first, subtraj_first) else: logger.debug( "First ensemble fails and strict -- return false" ) return False def can_append(self, trajectory, trusted=False): return self._generic_can_append(trajectory, trusted, strict=False) def strict_can_append(self, trajectory, trusted=False): return self._generic_can_append(trajectory, trusted, strict=True) def _generic_can_prepend(self, trajectory, trusted, strict): # based on .can_append(); see notes there for algorithm details cache = self._cache_can_prepend if strict: cache = self._cache_strict_can_prepend if trusted: cache.trusted = True traj_first = 0 first_ens = 0 subtraj_final = len(trajectory) ens_final = len(self.ensembles) - 1 ens_num = ens_final # Make list before slicing ltraj = _get_list_traj(trajectory) if self._use_cache: _ = cache.check(trajectory) if cache.contents == {}: self.update_cache(cache, ens_num, first_ens, subtraj_final) self.assign_frames(cache, None) else: logger.debug( "len(traj)=" + str(len(trajectory)) + "cache_from=" + str(cache.contents['subtraj_from']) ) subtraj_from = cache.contents['subtraj_from'] if subtraj_from is None: subtraj_from = 0 subtraj_final = len(trajectory) + subtraj_from ens_num = cache.contents['ens_num'] ens_final = cache.contents['ens_from'] # logging startup if logger.isEnabledFor(logging.DEBUG): # pragma: no cover logger.debug( "Beginning can_prepend with ens_num:" + str(ens_num) + " ens_final:" + str(ens_final) + " subtraj_final " + str(subtraj_final) + "; strict=" + str(strict) ) if cache.trusted: logger.debug("Cache contents: " + str(cache.contents)) logger.debug("cache.prev_start_frame: " + str(trajectory.index(cache.start_frame))) for i in range(len(self.ensembles)): logger.debug( "Ensemble " + str(i) + " : " + self.ensembles[i].__class__.__name__ ) while True: if self._use_cache and cache.trusted: # offset = 1 offset = 0 try: last_checked_index = cache.prev_last_index + offset except: # on any exception # TODO: ideally, this won't be covered by tests, and can # eventually be removed (along with try/except) last_checked_index = \ trajectory.index(cache.prev_last_frame) + offset last_checked = trajectory.index(cache.prev_last_frame) + offset else: last_checked_index = None last_checked = None subtraj_first = self._find_subtraj_first( trajectory, subtraj_final, ens_num, last_checked) cache.last_length = len(trajectory) - subtraj_first assign_final = subtraj_final - len(trajectory) if assign_final == 0: assign_final = None logger.debug( str(ens_num) + " : " + "(" + str(subtraj_first) + "," + str(subtraj_final) + ")" ) if subtraj_final - subtraj_first > 0: subtraj = ltraj[slice(subtraj_first, subtraj_final)] if ens_num == first_ens: if subtraj_first == traj_first: logger.debug("Returning can_prepend") self.update_cache(cache, ens_num, ens_final, assign_final) return self.ensembles[ens_num].can_prepend(subtraj, trusted=True) else: logger.debug( "Returning false due to incomplete assigns: " + str(subtraj_first) + "!=" + str(traj_first) ) return False else: if subtraj_first != traj_first and \ not self.ensembles[ens_num]( subtraj, trusted=True): logger.debug( "Couldn't assign frames " + str(subtraj_first) + " through " + str(subtraj_final) + " to ensemble " + str(ens_num) + ": No match" ) else: logger.debug( "Assigning frames " + str(subtraj_first) + " through " + str(subtraj_final) + " to ensemble " + str(ens_num) ) assign_first = subtraj_first - len(trajectory) self.assign_frames(cache, ens_num, assign_first, assign_final) self.update_cache(cache, ens_num, ens_final, assign_final) ens_num -= 1 subtraj_final = subtraj_first logger.debug("Moving to the next ensemble " + str(ens_num)) else: if subtraj_first == traj_first: if self._use_cache: prev_slice = cache.contents['assignments'][ens_num + 1] logger.debug("prev_slice " + str(prev_slice)) prev_subtraj = ltraj[prev_slice] logger.debug("prev_subtraj " + str(prev_subtraj)) logger.debug("traj " + str(trajectory)) prev_ens = self.ensembles[ens_num + 1] if prev_ens.can_prepend(prev_subtraj, trusted=True): logger.debug( "Premature promotion: returning to ensemble " + str(ens_num + 1) ) ens_num += 1 assign_final = "keep" logger.debug("(first, final)" + str((subtraj_first, subtraj_final))) self.update_cache(cache, ens_num, ens_final, assign_final) logger.debug( "All frames assigned, more ensembles to go: " "returning True") return True elif self.ensembles[ens_num](self._zero_traj): logger.debug( "Moving on because of allowed zero-length ensemble") ens_num -= 1 subtraj_final = subtraj_first self.update_cache(cache, ens_num, ens_final, subtraj_final) else: if ens_final == first_ens: logger.debug( "Started with the last ensemble, got nothin'") return False elif strict is False: logger.debug( "Reassigning all frames, starting with ensemble " + str(ens_final) ) ens_final -= 1 ens_num = ens_final subtraj_final = len(trajectory) self.update_cache(cache, ens_num, ens_final, subtraj_final) else: logger.debug( "First ensemble fails and strict -- return false" ) return False def can_prepend(self, trajectory, trusted=False): return self._generic_can_prepend(trajectory, trusted, strict=False) def strict_can_prepend(self, trajectory, trusted=False): return self._generic_can_prepend(trajectory, trusted, strict=True) def _str(self): head = "[\n" tail = "\n]" sequence_str = ",\n".join([str(ens) for ens in self.ensembles]) return head + sequence_str + tail
[docs] class LengthEnsemble(Ensemble): """ The ensemble of trajectories of a given length """
[docs] def __init__(self, length): """ A path ensemble that describes path of a specific length Parameters ---------- length : int or slice The specific length (int) or the range of allowed trajectory lengths (slice) """ #TODO: remove support for slice? super(LengthEnsemble, self).__init__() self.length = length
def __call__(self, trajectory, trusted=None, candidate=False): length = len(trajectory) if type(self.length) is int: return length == self.length else: return length >= self.length.start and ( self.length.stop is None or length < self.length.stop) def can_append(self, trajectory, trusted=False): length = len(trajectory) if type(self.length) is int: return_value = (length < self.length) logger.debug("LengthEnsemble.can_append: Segment length " + str(length) + " < " + str(self.length) + " : " + str(return_value)) return return_value else: return self.length.stop is None or length < self.length.stop - 1 def can_prepend(self, trajectory, trusted=False): return self.can_append(trajectory) def _str(self): if type(self.length) is int: return 'len(x) = {0}'.format(self.length) else: start = self.length.start if start is None: start = 0 stop = self.length.stop if stop is None: stop = 'infty' else: stop = str(self.length.stop - 1) return 'len(x) in [{0}, {1}]'.format(start, stop)
[docs] class VolumeEnsemble(Ensemble): """ Path ensembles based on the Volume object """
[docs] def __init__(self, volume, trusted=True): # TODO: does `trusted` actually mean anything or do anything as a # property? it is about the condition of trusting the trajectory # when we run it, so it relevant in functions. I don't think we need # it here. ~DWHS super(VolumeEnsemble, self).__init__() self.volume = volume self.trusted = trusted self._use_cache = True self._cache_can_append = EnsembleCache(+1) self._cache_call = EnsembleCache(+1) self._cache_can_prepend = EnsembleCache(-1) self._cache_check_reverse = EnsembleCache(-1)
@property def _volume(self): """ The volume that is used in the specification. """ return self.volume
[docs] class AllInXEnsemble(VolumeEnsemble): """ Ensemble of trajectories with all frames in the given volume """ def _trusted_call(self, trajectory, cache): """ Generalized version of the call when trusted. This uses a cache, which has the result for the previous trajectory (`trajectory[:-1]` if forward, `trajectory[1:]` if backward) in the `cache.contents['previous']`. Paramters --------- trajectory : paths.Trajectory input trajectory to test cache : paths.EnsembleCache ensemble cache for this function Returns ------- bool : result of __call__ """ frame_num = -(cache.direction + 1) // 2 # 1 -> -1; -1 -> 0 reset = cache.check(trajectory) if reset: if len(trajectory) < 2: cache.contents['previous'] = None else: # NOTE: is it possible that we'd reset a cache more than # once in a single trajectory? that could mean that this # starts to scale quadratically. I can't think of a case # where this is a practical concern (short-circuit logic # means the recache should only happen once per trajectory # for All*XEnsembles, and the call should only happen once # per trajectory for Part*XEnsembles.) In any case, the fix # would be to implement a more complicated cache.reset, # which checks whether the previous traj was a subtraj of # this one (other than one frame less). ~~~DWHS if frame_num == -1: reset_value = self(trajectory[:-1], trusted=False) elif frame_num == 0: reset_value = self(trajectory[1:], trusted=False) else: # pragma: no cover raise RuntimeError("Bad value for frame_num: " + str(frame_num)) cache.contents['previous'] = reset_value cached_val = cache.contents['previous'] if cached_val or cached_val is None: # need to check this frame (no prev traj, or prev traj is True) # This sometimes gets a list instead of a full Trajectory get_frame = getattr(trajectory, "get_as_proxy", trajectory.__getitem__) frame = get_frame(frame_num) cache.contents['previous'] = self._volume(frame) return cache.contents['previous'] else: # cached_val is false, result must be false return False def can_append(self, trajectory, trusted=False): if len(trajectory) == 0: return True elif trusted and self._use_cache: return self._trusted_call(trajectory, self._cache_can_append) else: return self(trajectory) def can_prepend(self, trajectory, trusted=False): if len(trajectory) == 0: return True if trusted and self._use_cache: return self._trusted_call(trajectory, self._cache_can_prepend) else: return self(trajectory) def __call__(self, trajectory, trusted=None, candidate=False): if len(trajectory) == 0: return False # TODO: We might be able to speed this up based on can_append # being the same as call for this ensemble. Something like check # the can_append cache instead of/as well as the call cache. May # still have problems with overshooting -- but this might provide a # speed-up in sequential ensemble's checking phase. ~~~DWHS if trusted and self._use_cache: return self._trusted_call(trajectory, self._cache_call) else: logger.debug("Untrusted VolumeEnsemble " + repr(self)) # logger.debug("Trajectory " + repr(trajectory)) # This can sometimes get a list instead of a Trajectory # Make sure this is a proxy list for frame in _get_list_traj(trajectory): if not self._volume(frame): return False return True def check_reverse(self, trajectory, trusted=False): # order in this one only matters if it is trusted if trusted and self._use_cache: # print "Rev Trusted" return self._trusted_call(trajectory, self._cache_check_reverse) # frame = trajectory.get_as_proxy(0) # return self._volume(frame) else: # print "Rev UnTrusted" return self(trajectory) # in this case, order wouldn't matter def __invert__(self): return PartOutXEnsemble(self.volume, self.trusted) def _str(self): return 'x[t] in {0} for all t'.format(self._volume)
[docs] class AllOutXEnsemble(AllInXEnsemble): """ Ensemble of trajectories with all frames outside the given volume """
[docs] def __init__(self, volume, trusted=True): super(AllOutXEnsemble, self).__init__(volume, trusted) self._cached_volume = ~self.volume
@property def _volume(self): return self._cached_volume def _str(self): return 'x[t] in {0} for all t'.format(self._volume) def __invert__(self): return PartInXEnsemble(self.volume, self.trusted)
[docs] class PartInXEnsemble(VolumeEnsemble): """ Ensemble of trajectory with at least one frame in the volume """ def _str(self): return 'exists t such that x[t] in {0}'.format(self._volume) def __call__(self, trajectory, trusted=None, candidate=False): """ Returns True if the trajectory is part of the PathEnsemble Parameters ---------- trajectory : :class:`openpathsampling.trajectory.Trajectory` The trajectory to be checked """ for frame in _get_list_traj(trajectory): if self._volume(frame): return True return False def __invert__(self): return AllOutXEnsemble(self.volume, self.trusted)
[docs] class PartOutXEnsemble(PartInXEnsemble): """ Ensemble of trajectories with at least one frame outside the volume """
[docs] def __init__(self, volume, trusted=True): super(PartOutXEnsemble, self).__init__(volume, trusted) self._cached_volume = ~self.volume
@property def _volume(self): return self._cached_volume def _str(self): return 'exists t such that x[t] in {0}'.format(self._volume) def __invert__(self): return AllInXEnsemble(self.volume, self.trusted) def __call__(self, trajectory, trusted=None, candidate=False): # Don't load proxies if this is a Trajectory for frame in _get_list_traj(trajectory): if self._volume(frame): return True return False
[docs] class WrappedEnsemble(Ensemble): """ Wraps an ensemble to alter it or the way it sees a trajectory """
[docs] def __init__(self, ensemble): super(WrappedEnsemble, self).__init__() self.ensemble = ensemble # you can also build wrapped ensembles with more flexibility when using # a property for _new_ensemble self._new_ensemble = self.ensemble self.trusted = None self._cache_can_append = EnsembleCache(+1) self._cache_strict_can_append = EnsembleCache(+1) self._cache_call = EnsembleCache(+1) # cache_can_prepend has to think it is going forward because the # frames given to it are from a forward growing trajectory... only # later is everything turned around self._cache_can_prepend = EnsembleCache(+1) self._cache_strict_can_prepend = EnsembleCache(+1)
def __call__(self, trajectory, trusted=None, candidate=False): return self._new_ensemble(self._alter(trajectory), trusted) def _alter(self, trajectory): return trajectory def can_append(self, trajectory, trusted=None): return self._new_ensemble.can_append(self._alter(trajectory), trusted) def can_prepend(self, trajectory, trusted=None): return self._new_ensemble.can_prepend(self._alter(trajectory), trusted) def strict_can_append(self, trajectory, trusted=None): return self._new_ensemble.strict_can_append(self._alter(trajectory), trusted) def strict_can_prepend(self, trajectory, trusted=None): return self._new_ensemble.strict_can_prepend(self._alter(trajectory), trusted) def _str(self): return str(self._new_ensemble)
class SlicedTrajectoryEnsemble(WrappedEnsemble): """ Alters trajectories given as arguments by taking Python slices. """ def __init__(self, ensemble, region): super(SlicedTrajectoryEnsemble, self).__init__(ensemble) if type(region) == int: if region == -1: self.region = slice(region, None) else: self.region = slice(region, region + 1) else: self.region = region def _alter(self, trajectory): return trajectory[self.region] def _str(self): # TODO: someday may add different string support for slices with # only one frame start = "" if self.region.start is None else str(self.region.start) stop = "" if self.region.stop is None else str(self.region.stop) step = "" if self.region.step is None else " every " + str( self.region.step) return ("(" + str(self.ensemble) + " in {" + start + ":" + stop + "}" + step + ")")
[docs] class SuffixTrajectoryEnsemble(WrappedEnsemble): """ Ensemble which prepends its trajectory to a given trajectory. Used in backward shooting. """
[docs] def __init__(self, ensemble, add_trajectory): super(SuffixTrajectoryEnsemble, self).__init__(ensemble) self.add_trajectory = add_trajectory self._cached_trajectory = paths.Trajectory(add_trajectory.as_proxies())
def _alter(self, trajectory): logger.debug("Starting Suffix._alter") # logger.debug( # "altered " + str([id(i) for i in self._cached_trajectory])) reset = self._cache_can_prepend.check(trajectory) # logger.debug( # "altered " + str([id(i) for i in self._cached_trajectory])) # logger.debug("traj " + str([id(i) for i in trajectory])) # logger.debug("trajrev " + str([id(i) for i in trajectory.reversed])) # reset = False if not reset: logger.debug("SuffixTrajectory was not reset") first_frame = trajectory.get_as_proxy(-1) if self._cached_trajectory.get_as_proxy(0) != first_frame: self._cached_trajectory.insert(0, first_frame) else: self._cached_trajectory = trajectory.reversed + self.add_trajectory # logger.debug("revtraj " + str([id(i) for i in revtraj])) # logger.debug("add " + str([id(i) for i in self.add_trajectory])) # logger.debug( # "altered " + str([id(i) for i in self._cached_trajectory])) return self._cached_trajectory def can_append(self, trajectory, trusted=None): raise RuntimeError("SuffixTrajectoryEnsemble.can_append is nonsense.") def strict_can_append(self, trajectory, trusted=None): # was overridden in WrappedEnsemble: here should raise same error as # can_append does return self.can_append(trajectory, trusted)
[docs] class PrefixTrajectoryEnsemble(WrappedEnsemble): """ Ensemble which appends its trajectory to a given trajectory. Used in forward shooting. """
[docs] def __init__(self, ensemble, add_trajectory): super(PrefixTrajectoryEnsemble, self).__init__(ensemble) self.add_trajectory = add_trajectory self._cached_trajectory = paths.Trajectory(add_trajectory.as_proxies())
def _alter(self, trajectory): logger.debug("Starting _alter") reset = self._cache_can_append.check(trajectory) if not reset: final_frame = trajectory.get_as_proxy(-1) if self._cached_trajectory.get_as_proxy(-1) != final_frame: self._cached_trajectory.append(final_frame) else: logger.debug("doing it oldstyle") self._cached_trajectory = self.add_trajectory + trajectory # DEBUG # logger.debug("add " + str([i for i in self.add_trajectory])) # logger.debug("traj " + str([i for i in trajectory])) # logger.debug("cache " + str([i for i in self._cached_trajectory])) # oldstyle = self.add_trajectory + trajectory # for (t,b) in zip(self._cached_trajectory, # self.add_trajectory+trajectory): # logger.debug(str(t) + " ?=? " + str(b)) # assert(t == b) # assert(len(self._cached_trajectory) == len(oldstyle)) return self._cached_trajectory def can_prepend(self, trajectory, trusted=None): raise RuntimeError("PrefixTrajectoryEnsemble.can_prepend is nonsense.") def strict_can_prepend(self, trajectory, trusted=None): # was overridden in WrappedEnsemble: here should raise same error as # can_append does return self.can_prepend(trajectory, trusted)
[docs] class ReversedTrajectoryEnsemble(WrappedEnsemble): """ Ensemble based on reversing the trajectory. """ def _alter(self, trajectory): return trajectory.reverse()
class AppendedNameEnsemble(WrappedEnsemble): """ Add string to ensemble name: allows multiple copies of an ensemble. """ def __init__(self, ensemble, label): self.label = label super(AppendedNameEnsemble, self).__init__(ensemble) def _str(self): return str(self.ensemble) + " " + self.label
[docs] class OptionalEnsemble(WrappedEnsemble): """ An ensemble which is optional for SequentialEnsembles. """
[docs] def __init__(self, ensemble): super(OptionalEnsemble, self).__init__(ensemble) self._new_ensemble = LengthEnsemble(0) | self.ensemble
def _str(self): return "{" + str(self.ensemble) + "} (OPTIONAL)"
[docs] class SingleFrameEnsemble(WrappedEnsemble): """ Convenience ensemble to `and` a LengthEnsemble(1) with a given ensemble. Frequently used for SequentialEnsembles. Attributes ---------- ensemble : :class:`openpathsampling.ensemble.Ensemble` the ensemble which should be represented in the single frame Notes ----- We allow the user to choose to be stupid: if, for example, the user tries to make a SingleFrameEnsemble from an ensemble which requires more than one frame to be satisfied (e.g., a SequentialEnsemble with more than one subensemble), it can be created, but no path will ever satisfy it. Since we can't stop all possible mistakes, we don't bother here. """
[docs] def __init__(self, ensemble): super(SingleFrameEnsemble, self).__init__(ensemble) self._new_ensemble = LengthEnsemble(1) & self.ensemble
def _str(self): return "{" + str(self.ensemble) + "} (SINGLE FRAME)"
[docs] class MinusInterfaceEnsemble(WrappedEnsemble): """ This creates an ensemble for the minus interface. The specific implementation allows us to use the multiple-segment minus ensemble described by Swenson and Bolhuis. The minus interface was originally developed by van Erp. For more details, see the section "Anatomy of a PathMover: the Minus Move" in the OpenPathSampling Documentation. Parameters ---------- state_vol : :class:`.Volume` The Volume which defines the state for this minus interface innermost_vols : list of :class:`.Volume` The Volume defining the innermost interface with which this minus interface does its replica exchange. n_l : integer (greater than one) The number of segments crossing innermost_vol for this interface. References ---------- T.S. van Erp. Phys. Rev. Lett. D.W.H. Swenson and P.G. Bolhuis. J. Chem. Phys. 141, 044101 (2014). doi:10.1063/1.4890037 """ # don't store unnecessary stuff we recreate at initialization # TODO: Check with David if it makes sense to store these and allow # them being used in __init__ instead of the self-made ones
[docs] def __init__(self, state_vol, innermost_vols, n_l=2, forbidden=None, greedy=False): if n_l < 2: raise ValueError("The number of segments n_l must be at least 2") self.state_vol = state_vol try: innermost_vols = list(innermost_vols) except TypeError: innermost_vols = [innermost_vols] if forbidden is None: forbidden = [paths.EmptyVolume()] else: try: forbidden = list(forbidden) except TypeError: forbidden = [forbidden] self.forbidden = forbidden forbidden_volume = paths.join_volumes(forbidden) forbidden_ensemble = paths.AllOutXEnsemble(forbidden_volume) self.innermost_vols = innermost_vols self.innermost_vol = paths.FullVolume() for vol in self.innermost_vols: self.innermost_vol = self.innermost_vol & vol self.greedy = greedy in_A = AllInXEnsemble(state_vol) out_A = AllOutXEnsemble(state_vol) in_X = AllInXEnsemble(self.innermost_vol) leave_X = PartOutXEnsemble(self.innermost_vol) # interstitial = out_A & in_X interstitial = self.innermost_vol - state_vol in_interstitial = AllInXEnsemble(interstitial) segment_ensembles = [paths.TISEnsemble(state_vol, state_vol, inner) for inner in self.innermost_vols] self._segment_ensemble = join_ensembles(segment_ensembles) # interstitial = AllInXEnsemble(self.innermost_vol - state_vol) start = [ SingleFrameEnsemble(in_A), OptionalEnsemble(in_interstitial), ] loop = [ out_A, # & leave_X, # redundant b/c next stop for previous in_X # & hitA # redundant due to stop req for previous outA ] end = [ out_A, # & leave_X, OptionalEnsemble(in_interstitial), SingleFrameEnsemble(in_A) ] sequence = start + loop * (n_l - 1) + end ensemble = paths.SequentialEnsemble(sequence) & forbidden_ensemble self.n_l = n_l super(MinusInterfaceEnsemble, self).__init__(ensemble)
def to_dict(self): dct = super(MinusInterfaceEnsemble, self).to_dict() dct['state_vol'] = self.state_vol dct['innermost_vols'] = self.innermost_vols dct['innermost_vol'] = self.innermost_vol dct['_segment_ensemble'] = self._segment_ensemble dct['forbidden'] = self.forbidden dct['n_l'] = self.n_l return dct @property def extendable_sub_ensembles(self): # A-X-A and the one from TISEnsemble state_vol = self.state_vol sub_ensembles = {} in_A = AllInXEnsemble(state_vol) out_A = AllOutXEnsemble(state_vol) # this code is for potential # in_X = AllInXEnsemble(self.innermost_vol) # leave_X = PartOutXEnsemble(self.innermost_vol) # interstitial = out_A & in_X # segment_ensembles = [paths.TISEnsemble(state_vol, state_vol, inner) # for inner in self.innermost_vols] # start = [ # SingleFrameEnsemble(in_A), # OptionalEnsemble(interstitial), # ] # loop = [ # out_A & leave_X, # in_X # & hitA # redundant due to stop req for previous outA # ] # end = [ # out_A & leave_X, # OptionalEnsemble(interstitial), # SingleFrameEnsemble(in_A) # ] # do not add higher orders, you would # for n_l in range(self.n_l - 2, 0, -1): # # add ens with less loops # sub_ensembles.append( # SequentialEnsemble(start + loop * n_l + end)) sub_ensembles['complex'] = self._segment_ensemble # and the simplest possible just crossing from in_state to outside sub_ensembles['minimal'] = \ LengthEnsemble(2) & \ SequentialEnsemble([ SingleFrameEnsemble(in_A), SingleFrameEnsemble(out_A) ]) return sub_ensembles
# def populate_minus_ensemble(self, partial_traj, minus_replica_id, engine): # """ # Generate a sample for the minus ensemble by extending `partial_traj` # # Parameters # ---------- # partial_traj : :class:`openpathsampling.trajectory.Trajectory` # trajectory to extend # minus_replica_id : int or str # replica ID for this sample # engine : :class:`openpathsampling.dynamicsengine.DynamicsEngine` # engine to use for MD extension # """ # last_frame = partial_traj[-1] # if not self._segment_ensemble(partial_traj): # raise RuntimeError( # "Invalid input trajectory for minus extension. (Not A-to-A?)" # ) # fwd_extend_ens = PrefixTrajectoryEnsemble(self, partial_traj) # extension = engine.generate(last_frame, # [fwd_extend_ens.can_append]) # first_minus = paths.Trajectory(partial_traj + extension[1:]) # assert self(first_minus) # minus_samp = paths.Sample( # replica=minus_replica_id, # trajectory=first_minus, # ensemble=self # ) # logger.info(first_minus.summarize_by_volumes_str( # {"A": self.state_vol, # "I": ~self.state_vol & self.innermost_vol, # "X": ~self.innermost_vol}) # ) # return minus_samp # def populate_minus_ensemble_from_set(self, samples, minus_replica_id, # engine): # """ # Generate a sample for this minus ensemble by extending trajectory. # # Parameters # ---------- # samples : iterable of :class:`.Sample` # samples with trajectories that might be extended # minus_replica_id : int or str # replica ID for the return sample # engine : :class:`openpathsampling.dynamicsengine.DynamicsEngine` # engine to use for MD extension # # Returns # ------- # :class:`.Sample` : # a sample for this minus ensemble # """ # partials = [s.trajectory for s in samples # if self._segment_ensemble(s.trajectory)] # if len(partials) == 0: # # TODO: add support for trying to run backwards # raise RuntimeError("No trajectories can be extended") # # samp = None # # good_sample = False # while not good_sample: # partial_traj = partials[0] # # I think it should be impossible to RuntimeError in this # samp = self.populate_minus_ensemble( # partial_traj=partial_traj, # minus_replica_id=minus_replica_id, # engine=engine # ) # # good_sample = samp.ensemble(samp.trajectory) # # return samp
[docs] class TISEnsemble(WrappedEnsemble): """An ensemble for TIS (or AMS). Begin in `initial_states`, end in either `initial_states` or `final_states`, and cross `interface`. Attributes ---------- initial_states : `openpathsampling.volume.Volume` or list of `openpathsampling.volume.Volume` Volume(s) that only the first or last frame may be in final_states : `openpathsampling.volume.Volume` or list of `openpathsampling.volume.Volume` Volume(s) that only the last frame may be in interface : `openpathsampling.volume.Volume` Volume which the trajectory must exit to be accepted orderparameter : `openpathsampling.collectivevariable.CollectiveVariable` CV to be used as order parameter for this """ @property def extendable_sub_ensembles(self): # this is tricky. The only extendable sub-ensembles are (In, Out) # at the crossing of leaving or entering the core # pick only the initial ones like for A-X-AB pick A states = list(set(self.initial_states)) volume = paths.volume.join_volumes(states) return { 'minimal': LengthEnsemble(2) & SequentialEnsemble([ SingleFrameEnsemble(AllInXEnsemble(volume)), SingleFrameEnsemble(AllOutXEnsemble(volume)) ]) }
[docs] def __init__(self, initial_states, final_states, interface, orderparameter=None, cv_max=None, lambda_i=None): # regularize to list of volumes # without orderparameter, some info can't be obtained try: _ = len(initial_states) except TypeError: initial_states = [initial_states] try: _ = len(final_states) except TypeError: final_states = [final_states] volume_a = paths.volume.join_volumes(initial_states) volume_b = paths.volume.join_volumes(final_states) ensemble = SequentialEnsemble([ AllInXEnsemble(volume_a) & LengthEnsemble(1), OptionalEnsemble(AllOutXEnsemble(volume_a | volume_b)), AllInXEnsemble(volume_a | volume_b) & LengthEnsemble(1) ]) & PartOutXEnsemble(interface) super(TISEnsemble, self).__init__(ensemble) self.initial_states = initial_states self.final_states = final_states self.interface = interface # self.name = interface.name self.orderparameter = orderparameter # TODO: is this used? remove? self.cv_max = cv_max self.lambda_i = lambda_i self._initial_volumes = volume_a self._final_volumes = volume_b | volume_a
def __call__(self, trajectory, trusted=None, candidate=False): logger.debug("TIS ENSEMBLE: candidate={0}".format(str(candidate))) use_candidate = (candidate and self.lambda_i is not None) if use_candidate and self.cv_max is not None: logger.debug("Using candidate shortcut with self.cv_max") return ( self._initial_volumes(trajectory[0]) & self._final_volumes(trajectory[-1]) & (self.cv_max(trajectory) > self.lambda_i) ) elif use_candidate and self.orderparameter is not None: logger.debug("Using candidate shortcut with max(orderparameter)") # as a candidate trajectory, we assume that only the first and # final frames can be in a state #logger.debug("initial: " + #str(self._initial_volumes(trajectory[0]))) #logger.debug("final: " + #str(self._final_volumes(trajectory[0]))) #logger.debug("max: " + #str(max(self.orderparameter(trajectory)))) return ( self._initial_volumes(trajectory[0]) & self._final_volumes(trajectory[-1]) & (max(self.orderparameter(trajectory)) > self.lambda_i) ) else: logger.debug("No shortcut possible") # it still works fine if we use the slower algorithm return super(TISEnsemble, self).__call__(trajectory, trusted) def trajectory_summary(self, trajectory): initial_state_i = None final_state_i = None for state_i in range(len(self.initial_states)): if self.initial_states[state_i](trajectory.get_as_proxy(0)): initial_state_i = state_i break all_states = self.initial_states + self.final_states for state_i in range(len(all_states)): if all_states[state_i](trajectory.get_as_proxy(-1)): final_state_i = state_i break if self.orderparameter is not None: lambda_traj = self.orderparameter(trajectory) min_lambda = min(lambda_traj) max_lambda = max(lambda_traj) else: min_lambda = None max_lambda = None return { 'initial_state': initial_state_i, 'final_state': final_state_i, 'max_lambda': max_lambda, 'min_lambda': min_lambda } def trajectory_summary_str(self, trajectory): summ = self.trajectory_summary(trajectory) all_states = self.initial_states + self.final_states # TODO: remove the .name from this when string returns correctly init_st_i = summ['initial_state'] fin_st_i = summ['final_state'] # TODO: how can we have None? if init_st_i is None: init_st = "None" else: init_st = str(self.initial_states[summ['initial_state']].name) if fin_st_i is None: fin_st = "None" else: fin_st = str(all_states[summ['final_state']].name) # if self.orderparameter is not None: # opname = self.orderparameter.name # else: # opname = "None" min_l = str(summ['min_lambda']) max_l = str(summ['max_lambda']) mystr = ( "initial_state=" + init_st + " " + "final_state=" + fin_st + " " + "min_lambda=" + min_l + " " + "max_lambda=" + max_l + " " ) return mystr def _str(self): return str(self.ensemble)
# class EnsembleFactory(object): # """ # Convenience class to construct Ensembles # """ # @staticmethod # def StartXEnsemble(volume): # """ # Construct an ensemble that starts (x[0]) in the specified volume # Parameters # ---------- # volume : :class:`openpathsampling.volume.Volume` # The volume to start in # Returns # ------- # ensemble : :class:`openpathsampling.ensemble.Ensemble` # The constructed Ensemble # """ # return AllInXEnsemble(volume, 0) # @staticmethod # def EndXEnsemble(volume): # """ # Construct an ensemble that ends (x[-1]) in the specified volume # Parameters # ---------- # volume : :class:`openpathsampling.volume.Volume` # The volume to end in # Returns # ------- # ensemble : :class:`openpathsampling.ensemble.Ensemble` # The constructed Ensemble # """ # return AllInXEnsemble(volume, -1) # @staticmethod # def A2BEnsemble(volume_a, volume_b, trusted=True): # """ # Construct an ensemble that starts in `volume_a`, ends in # `volume_b` and is in either volumes in between # Parameters # ---------- # volume_a : :class:`openpathsampling.Volume` # The volume to start in # volume_b : :class:`openpathsampling.Volume` # The volume to end in # Returns # ------- # ensemble : :class:`openpathsampling.Ensemble` # The constructed Ensemble # """ # # TODO: this is actually only for flexible path length TPS now # return SequentialEnsemble([ # SingleFrameEnsemble(AllInXEnsemble(volume_a)), # AllOutXEnsemble(volume_a | volume_b), # SingleFrameEnsemble(AllInXEnsemble(volume_b)) # ]) # @staticmethod # def TISEnsembleSet(volume_a, volume_b, volumes_x, orderparameter, # lambdas=None): # if lambdas is None: # lambdas = [None] * len(volumes_x) # myset = [paths.TISEnsemble(volume_a, volume_b, vol, orderparameter, # lambda_i) # for (vol, lambda_i) in zip(volumes_x, lambdas)] # return myset