Source code for openpathsampling.numerics.lookup_function

import pandas as pd
import numpy as np
import collections


[docs] class LookupFunction(object): """ Interpolation between datapoints. Parameters ---------- ordinate : iterable of numbers values for the ordinate abscissa : iterable of numbers values for the abscissa Iteration and numpy ufuncs work on the values. Callable with any number. Notes ----- Largely, this class mimics an immutable dictionary, except instead of implementing __getitem__, we use the __call__ function. If you call a number that is in the dictionary, you get exactly that number. If you call a number that it not in the dictionary, the get the linear interpolation/extrapolation for that number based on the dictionary values. """
[docs] def __init__(self, ordinate, abscissa): self.pairs = {} for (x, y) in zip(ordinate, abscissa): if not np.isnan(y): self.pairs[x] = y self.sorted_ordinates = np.array(sorted(self.pairs.keys())) self._values = np.array([self.pairs[x] for x in self.sorted_ordinates])
@classmethod def from_dict(cls, dct): x = dct.keys() y = dct.values() return cls(x, y) def keys(self): """ Return the (ordered) list of ordinates """ return list(self.sorted_ordinates) def values(self): """ Return the list of values (ordered by ordinate) """ return self._values @property def x(self): """ Property to return the ordinates """ return self.sorted_ordinates def __len__(self): return len(self.sorted_ordinates) def __iter__(self): for val in self.values(): yield val # TODO: may need better array behaviors def __array__(self, result=None): return np.array(self.values()) def __array_wrap__(self, result, context=None): res_arr = np.ndarray.__array_wrap__(self._values, result, context) return LookupFunction(self.sorted_ordinates, res_arr) def __array_prepare__(self, result, context=None): return result def series(self): """Return a pandas.Series representation of data points""" # TODO: temp hack until I can get matplotlib to plot natively ser = pd.Series(self.values(), self.keys()) return ser def __call__(self, value): # only a 1D implementation so far i = 0 xvals = self.sorted_ordinates nvals = len(xvals) if value < xvals[i]: # extrapolation TODO: add log warning x1 = xvals[0] x2 = xvals[1] while (i < nvals and xvals[i] < value): i += 1 if i == nvals: # extrapolation TODO: add log warning x1 = xvals[-2] x2 = xvals[-1] else: # interpolation x1 = xvals[i-1] x2 = xvals[i] y1 = self.pairs[x1] y2 = self.pairs[x2] y = float(value - x1) / (x2 - x1) * (y2-y1) + y1 return y
[docs] class LookupFunctionGroup(LookupFunction): """ Simple mean and std for a group of LookupFunctions. The mean and std from this are, themselves, LookupFunctions, and so can interpolate between included values. Calling the group acts as calling the mean. __getitem__, __setitem__, and append act on the list of functions. Parameters ---------- functions : list of LookupFunctions the functions included use_x : "shared" (default), "all", or list of numbers the values to consider as the ordinates. If "shared", includes only values which appear in all the functions. If "all", includes all values which appear in any function. A list of numbers will use that list as the ordinate values. Notes ----- The choice of `use_x` is very important for the calculation of the mean and standard deviation: if you use "shared", then you only calculate the mean/std at points where all functions have measured values. If you use "all", you will include points which are interpolated/extrapolated, instead of measured. In the current implementation, there is no way to get a mean/std with different numbers of contributions at each point, depending on whether the point has a measurement or is an extrapolation. """
[docs] def __init__(self, functions, use_x="shared"): self.functions = functions self.shared_x = set(self.functions[0].x) self.all_x = set(self.functions[0].x) for fcn in self.functions: self.shared_x = self.shared_x & set(fcn.x) self.all_x = self.all_x | set(fcn.x) self.shared_x = sorted(self.shared_x) self.all_x = sorted(self.all_x) self.use_x = use_x
@property def use_x(self): return self._use_x @use_x.setter def use_x(self, use_x): self._use_x = use_x if use_x == "all": self.sorted_ordinates = self.all_x elif use_x == "shared": self.sorted_ordinates = self.shared_x else: self.sorted_ordinates = use_x @property def std(self): """Standard deviation.""" std = [] for val in self.x: std.append( np.array([fcn(val) for fcn in self.functions]).std() ) return LookupFunction(self.x, std) @property def mean(self): """Mean.""" mean = [] for val in self.x: mean.append( np.array([fcn(val) for fcn in self.functions]).mean() ) return LookupFunction(self.x, mean) def __call__(self, value): return self.mean(value) def __getitem__(self, item): return self.functions[item] def __setitem__(self, item, value): self.functions[item] = value def __contains__(self, item): return item in self.functions def append(self, item): self.functions.append(item)
[docs] class VoxelLookupFunction(object): """Turn sparse histogram into a lookup function. For any data point inside a voxel, return the sparse histogram value for that voxel. If no such voxel, returns 0.0. No interpolation. """
[docs] def __init__(self, left_bin_edges, bin_widths, counter): self.left_bin_edges = left_bin_edges self.bin_widths = bin_widths self.counter = counter
def keys(self): return self.counter.keys() def values(self): return self.counter.values() def bin_to_left_edge(self, bin_num): return np.asarray(bin_num) * self.bin_widths + self.left_bin_edges def val_to_bin(self, val): return (np.asarray(val) - self.left_bin_edges) / self.bin_widths @property def counter_by_bin_edges(self): return collections.Counter( {tuple(self.bin_to_left_edge(k)): self.counter[k] for k in self.counter.keys()} ) def df_2d(self, x_range=None, y_range=None): """ Return a pandas.DataFrame for 2D lookup functions. Error if not 2D. Parameters ---------- xrange yrange Returns ------- pandas.DataFrame : Values of the lookup function for each bin. The index and columns are bin numbers. """ if len(self.left_bin_edges) != 2: raise RuntimeError("Can't make 2D dataframe from non-2D data!") counter = self.counter index = None columns = None if x_range is not None: index = range(x_range[0], x_range[1]+1) if y_range is not None: columns = range(y_range[0], y_range[1]+1) df = pd.DataFrame(index=index, columns=columns) for (k, v) in counter.items(): df.at[k[0], k[1]] = v df = df.sort_index(axis=0).sort_index(axis=1) return df def __call__(self, value): val_bin = tuple(np.floor(self.val_to_bin(value))) return self.counter[val_bin]