Source code for openpathsampling.engines.gromacs.engine

"""
Gromacs support in OpenPathSampling

OpenPathSampling supports Gromacs as an "external engine," meaning that
Gromacs itself runs as a coprocess, and OPS uses the file system as an
intermediary to monitor when a path needs to be terminated.
"""

import logging
logger = logging.getLogger(__name__)

from openpathsampling.integration_tools import md, HAS_MDTRAJ
if HAS_MDTRAJ:
    TRRTrajectoryFile = md.formats.TRRTrajectoryFile

from openpathsampling.engines import ExternalEngine
from openpathsampling.engines import features
from openpathsampling.engines.snapshot import BaseSnapshot, SnapshotDescriptor
from openpathsampling.engines.topology import MDTrajTopology
from openpathsampling.engines.external_snapshots import \
        ExternalMDSnapshot, InternalizedMDSnapshot
from openpathsampling.tools import ensure_file

import os
import psutil
import shlex
import time
import numpy as np

from openpathsampling.engines.external_engine import (
    _debug_open_files, close_file_descriptors
)

def _remove_file_if_exists(filename):  # pragma: no cover
    #  not requiring coverage here because it's part of Gromacs integration;
    #  gets covered if gmx is installed though
    if os.path.isfile(filename):
        os.remove(filename)

class _GroFileEngine(ExternalEngine):
    SnapshotClass = ExternalMDSnapshot
    InternalizedSnapshotClass = InternalizedMDSnapshot
    def __init__(self, gro):
        self.gro = gro
        traj = md.load(gro)
        self.topology = MDTrajTopology(traj.topology)
        n_atoms = self.topology.n_atoms
        n_spatial = self.topology.n_spatial
        descriptor = SnapshotDescriptor.construct(
            snapshot_class=self.SnapshotClass,
            snapshot_dimensions={'n_spatial': n_spatial,
                                 'n_atoms': n_atoms}
        )
        super(_GroFileEngine, self).__init__(options={},
                                            descriptor=descriptor,
                                            template=None)

    def to_dict(self):
        return {'gro': self.gro}

    @classmethod
    def from_dict(cls, dct):
        return cls(dct['gro'])

    def read_frame_data(self, file_name, file_position):
        traj = md.load(file_name)
        xyz = traj.xyz[0]
        vel = np.zeros(shape=xyz.shape)
        box = traj.unitcell_vectors[0]
        return (xyz, vel, box)



def snapshot_from_gro(gro_file):
    template_engine = _GroFileEngine(gro_file)
    snapshot = ExternalMDSnapshot(file_name=gro_file,
                                  file_position=0,
                                  engine=template_engine)
    return snapshot


[docs] class GromacsEngine(ExternalEngine): """ External engine wrapper for Gromacs (using indirect API). This provides Gromacs support, using our indirect engine API (TODO link). OPS runs Gromacs based on the mdp file that you provide. This mdp file MUST output in the TRR format, and the velocity and position save frequency in the TRR must be the same (that is, you need to have ``nstxout`` = ``nstvout``, and they must not be 0). Parameters ---------- gro : string File for the grompp ``-c`` flag. This is often a .gro, but note that you may get better support with other integrations (e.g., MDTraj) if you use a PDB. mdp : string .mdp file top : string .top file options : dict Dictionary of option name to value. Gromacs-specific option names are * ``gmx_executable``: Prefix to gromacs commands, which are run as ``{gmx_prefix}command``. Default is 'gmx ' (note the space). This allows you to use either Gromacs 4 or Gromacs 5, as well as specifying the path to your version of Gromacs. * ``grompp_args``: Additional arguments to ``grompp``. The defaults take ``-c {self.gro} -f {self.mdp} -p {self.top} -t {self.input_file}``, where the input filename is set by :meth:`.set_filenames`. Default is the empty string. * ``mdrun_args``: Additional arguments to ``mdrun``. The defaults take ``-s topol.top -o self.output_file -e self.edr_file -g self.log_file``, where the ``topol.top`` is generated by :meth:`.prepare`, and the other filenames are set by :meth:`.set_filenames`. Default is the empty string. * ``snapshot_timestep``: time between frames analysed by ops. You keep track of the unit, I'd advise ps so the output rates will be in ps. Example. 2 fs timestep in the mdp with nstxout of 30 would give snapshot_timestep of 60 fs = 0.06 ps base_dir : string root directory where all files will be found (defaults to pwd) prefix : string prefix within ``base_dir`` for output folders (defaults to gmx) """ _default_options = dict(ExternalEngine._default_options, **{ 'gmx_executable': "gmx ", 'grompp_args': "", 'mdrun_args': "", 'snapshot_timestep':1.0 } ) GROMPP_CMD = ("{e.options[gmx_executable]}grompp -c {e.gro} " + "-f {e.mdp} -p {e.top} -t {e.input_file} " + "-po {e.mdout_file} -o {e.tpr_file} " + "{e.options[grompp_args]}") MDRUN_CMD = ("{e.options[gmx_executable]}mdrun -s {e.tpr_file} " + "-o {e.output_file} -e {e.edr_file} -g {e.log_file} " + "{mdrun_args}") # use these as CMD.format(e=engine, **engine.options) SnapshotClass = ExternalMDSnapshot InternalizedSnapshotClass = InternalizedMDSnapshot clear_snapshot_cache = True
[docs] def __init__(self, gro, mdp, top, options, base_dir="", prefix="gmx"): self.base_dir = base_dir self.gro = os.path.join(base_dir, gro) self.mdp = os.path.join(base_dir, mdp) self.top = os.path.join(base_dir, top) self.prefix = os.path.join(base_dir, prefix) self.gro_contents, self._gro_hash = ensure_file(self.gro) self.mdp_contents, self._mdp_hash = ensure_file(self.mdp) self.top_contents, self._top_hash = ensure_file(self.top) # TODO: move to a later stage, before first traj dirs = [self.prefix + s for s in ['_trr', '_log', '_edr']] for d in dirs: try: os.mkdir(d) except OSError: pass # the directory already exists self._file = None # file open/close efficiency self._last_filename = None # TODO: add snapshot_timestep; first via options, later read mdp template = snapshot_from_gro(self.gro) self.topology = template.topology descriptor = template.engine.descriptor # descriptor from gro file # initial placeholders self.input_file = "INITIAL.trr" self.output_file = self.prefix + "_trr/OUTPUT_NAME.trr" self.edr_file = self.prefix + "_edr/OUTPUT_NAME.edr" self.log_file = self.prefix + "_log/OUTPUT_NAME.log" self.tpr_file = "topol.tpr" self.mdout_file = "mdout.mdp" self._mdtraj_topology = None super(GromacsEngine, self).__init__(options, descriptor, template, first_frame_in_file=True)
def to_dict(self): dct = super(GromacsEngine, self).to_dict() local_dct = { 'gro': self.gro, 'mdp': self.mdp, 'top': self.top, 'options': self.options, 'base_dir': self.base_dir, 'prefix': self.prefix, 'gro_contents': self.gro_contents, 'mdp_contents': self.mdp_contents, 'top_contents': self.top_contents, } dct.update(local_dct) return dct @classmethod def from_dict(cls, dct): dct = dict(dct) # make a copy for ftype in ['gro', 'top', 'mdp']: contents = dct.pop(ftype + "_contents") _ = ensure_file(filename=dct[ftype], old_contents=contents) return super(GromacsEngine, cls).from_dict(dct) @property def mdtraj_topology(self): if self._mdtraj_topology: return self._mdtraj_topology return self.topology.mdtraj @mdtraj_topology.setter def mdtraj_topology(self, value): self._mdtraj_topology = value def read_frame_data(self, filename, frame_num): """ Returns pos, vel, box or raises error """ # if self._last_filename != filename: # try: # self._file.close() # except AttributeError: # pass # first time thru, self._file is None # self._file = TRRTrajectoryFile(filename) # f = self._file # do we need to reopen the TRR each time to avoid problems with the # fiel length changing? trr = TRRTrajectoryFile(filename) f = trr logger.debug("Reading file %s frame %d (of %d)", filename, frame_num, len(f)) # logger.debug("File length: %d", len(f)) try: f.seek(offset=frame_num) data = f._read(n_frames=1, atom_indices=None, get_velocities=True) finally: trr.close() return data[0][0], data[5][0], data[3][0] def read_frame_from_file(self, file_name, frame_num): # note: this only needs to return the file pointers -- but should # only do so once that frame has been written! basename = os.path.basename(file_name) # basename should be in the format [0-9]+\.trr (as set by the # trajectory_filename method) # file_number = int(basename.split('.')[0]) try: xyz, vel, box = self.read_frame_data(file_name, frame_num) except (IndexError, OSError, IOError) as e: # this means that no such frame exists yet, so we return None # IndexError in older version, OSError more recently (specific # MDTraj error) logger.debug("Expected exception caught: " + str(e)) close_file_descriptors(basename) return None except RuntimeError as e: # TODO: matches "TRR read error" logger.debug("Received partial frame for %s %d", file_name, frame_num+1) return 'partial' else: logger.debug("Creating snapshot") snapshot = ExternalMDSnapshot(file_name=file_name, file_position=frame_num, engine=self) return snapshot def write_frame_to_file(self, filename, snapshot, mode='w'): if os.path.isfile(filename): # stop if we already have this file; could also happen because # of a weird behavior in a mover. You must remove the files if # you don't want them. raise RuntimeError("File " + str(filename) + " exists. " + "Preventing overwrite.") # type control before passing things to Cython code xyz = np.asarray([snapshot.xyz], dtype=np.float32) time = np.asarray([0.0], dtype=np.float32) step = np.asarray([0], dtype=np.int32) box = np.asarray([np.asarray(snapshot.box_vectors)], dtype=np.float32) lambd = np.asarray([0.0], dtype=np.float32) vel = np.asarray([np.asarray(snapshot.velocities)], dtype=np.float32) try: trr = TRRTrajectoryFile(filename, mode) trr._write(xyz, time, step, box, lambd, vel) finally: trr.close() close_file_descriptors(filename) def trajectory_filename(self, number): # TODO: remove code path allowing ints (API break for 2.0) trr_dir = self.prefix + "_trr/" if isinstance(number, int): num_str = num_str = '{:07d}'.format(number) else: num_str = number return trr_dir + num_str + '.trr' def set_filenames(self, number): if isinstance(number, int): num_str = '{:07d}'.format(number + 1) self.output_file = self.trajectory_filename(number + 1) init_filename = "initial_frame.trr" self.filename_setter.reset(number + 1) else: num_str = number self.output_file = self.trajectory_filename(num_str) init_filename = num_str + "_initial_frame.trr" self.mdout = num_str + "_mdout.mdp" self.tpr_file = num_str + "_" + "topol.tpr" self.input_file = os.path.join(self.base_dir, init_filename) self.edr_file = os.path.join(self.prefix + "_edr", num_str + '.edr') self.log_file = os.path.join(self.prefix + "_log", num_str + '.log') @property def grompp_command(self): cmd = self.GROMPP_CMD.format(e=self) return cmd def prepare(self): # pragma: no cover # coverage ignored b/c Travis won't have gmx. However, we do have a # test that covers this if gmx is present (otherwise it is skipped) # ensure that the files haven't changed before we run trajectory _ = ensure_file(self.gro, self.gro_contents, self._gro_hash) _ = ensure_file(self.mdp, self.mdp_contents, self._mdp_hash) _ = ensure_file(self.top, self.top_contents, self._top_hash) # grompp and mdrun cmd = self.grompp_command logger.info(cmd) run_cmd = shlex.split(cmd) return_code = psutil.Popen(run_cmd, preexec_fn=os.setsid).wait() return return_code def cleanup(self): # pragma: no cover # tested when traj is run, which we don't on CI _remove_file_if_exists(self.input_file) _remove_file_if_exists(self.tpr_file) _remove_file_if_exists(self.mdout_file) def engine_command(self): args = self.options['mdrun_args'].format(prev_traj=self._traj_num-1, next_traj=self._traj_num) cmd = self.MDRUN_CMD.format(e=self, mdrun_args=args) return cmd