Source code for openpathsampling.engines.topology

import numpy as np
import pandas as pd

from openpathsampling.netcdfplus import StorableNamedObject
from openpathsampling.integration_tools import error_if_no_mdtraj, md

import logging
logger = logging.getLogger(__name__)


[docs] class Topology(StorableNamedObject): """ Topology is the object that contains all information about the structure of the system to be simulated. Attributes ---------- n_atoms : int number of atoms n_spatial : int number of spatial dimensions, default is 3 """
[docs] def __init__(self, n_atoms, n_spatial=3): super(Topology, self).__init__() self.n_atoms = n_atoms self.n_spatial = n_spatial
[docs] class MDTrajTopology(Topology):
[docs] def __init__(self, mdtraj_topology): super(MDTrajTopology, self).__init__(int(mdtraj_topology.n_atoms), 3) self.mdtraj = mdtraj_topology
def to_dict(self): out = dict() atom_data = [] for atom in self.mdtraj.atoms: if atom.element is None: element_symbol = "" else: element_symbol = atom.element.symbol if hasattr(atom, 'segment_id'): atom_data.append(( atom.serial, atom.name, element_symbol, int(atom.residue.resSeq), atom.residue.name, atom.residue.chain.index, atom.segment_id)) else: atom_data.append(( atom.serial, atom.name, element_symbol, int(atom.residue.resSeq), atom.residue.name, atom.residue.chain.index, '')) out['atom_columns'] = ["serial", "name", "element", "resSeq", "resName", "chainID", "segmentID"] out['atoms'] = atom_data out['bonds'] = [(a.index, b.index) for (a, b) in self.mdtraj.bonds] return {'mdtraj': out} @classmethod def from_dict(cls, dct): error_if_no_mdtraj("MDTrajTopology") top_dict = dct['mdtraj'] atoms = pd.DataFrame( top_dict['atoms'], columns=top_dict['atom_columns']) bonds = np.array(top_dict['bonds']) try: md_topology = md.Topology.from_dataframe(atoms, bonds) return cls(md_topology) except Exception: # we try a fix and add multiples of 10000 to the resSeq logger.info('Normal reconstruction of topology failed. ' 'Trying a fix to the 10k residue ID problem.') for ci in np.unique(atoms['chainID']): chain_atoms = atoms[atoms['chainID'] == ci] indices = chain_atoms.index.tolist() old_residue_id = 0 multiplier = 0 places = [] for row, res_id in zip(indices, list(chain_atoms['resSeq'])): if res_id < old_residue_id: if multiplier > 0: atoms.loc[places, 'resSeq'] += 10000 * multiplier places = [] multiplier += 1 if multiplier > 0: places.append(row) old_residue_id = res_id if multiplier > 0: atoms.loc[places, 'resSeq'] += 10000 * multiplier # this function is really slow! Reads ~ 1000 atoms per second md_topology = md.Topology.from_dataframe(atoms, bonds) # that we have successfully created the topology using from_df # we remove the wrong multipliers # this is weird, but reproduces the current behaviour for atom in md_topology.atoms: atom.residue.resSeq %= 10000 return cls(md_topology)