#!/usr/bin/env python
# coding: utf-8
# # Advanced analysis techniques
#
# Now we'll move on to a few more advanced analysis techniques. (These are discussed in Paper II.)
#
# With the fixed path length ensemble, we should check for recrossings. To do this, we create an ensemble which represents the recrossing paths: a frame in $\beta$, possible frames in neither $\alpha$ nor $\beta$, and then a frame in $\alpha$.
#
# Then we check whether any subtrajectory of a trial trajectory matches that ensemble, by using the `Ensemble.split()` function. We can then further refine to see which steps that included trials with recrossings were actually accepted.
# In[1]:
from __future__ import print_function
get_ipython().run_line_magic('matplotlib', 'inline')
import openpathsampling as paths
import numpy as np
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
import os
import openpathsampling.visualize as ops_vis
from IPython.display import SVG
# In[2]:
get_ipython().run_cell_magic('time', '', 'flexible = paths.Storage("ad_tps.nc")\n')
# In[3]:
get_ipython().run_cell_magic('time', '', 'fixed = paths.Storage("ad_fixed_tps.nc")\n')
# In[4]:
flex_network = flexible.networks['tps_network']
# In[5]:
# create the ensemble that identifies recrossings
alpha = fixed.volumes['C_7eq']
beta = fixed.volumes['alpha_R']
recrossing_ensemble = paths.SequentialEnsemble([
paths.LengthEnsemble(1) & paths.AllInXEnsemble(beta),
paths.OptionalEnsemble(paths.AllOutXEnsemble(alpha | beta)),
paths.LengthEnsemble(1) & paths.AllInXEnsemble(alpha)
])
# In[6]:
get_ipython().run_cell_magic('time', '', '# now we check each step to see if its trial has a recrossing\nsteps_with_recrossing = []\nfor step in tqdm(fixed.steps):\n # trials is a list of samples: with shooting, only one in the list\n recrossings = [] # default for initial empty move (no trials in step[0].change)\n for trial in step.change.trials:\n recrossings = recrossing_ensemble.split(trial.trajectory)\n # recrossing contains a list with the recrossing trajectories\n # (len(recrossing) == 0 if no recrossings)\n if len(recrossings) > 0:\n steps_with_recrossing += [step] # save for later analysis\n')
# In[7]:
accepted_recrossings = [step for step in steps_with_recrossing if step.change.accepted is True]
# In[8]:
print("Trials with recrossings:", len(steps_with_recrossing))
print("Accepted trials with recrossings:", len(accepted_recrossings))
# Note that the accepted trials with recrossing does not account for how long the trial remained active. It also doesn't tell us whether the trial represented a new recrossing event, or was correlated with the previous recrossing event.
# Let's take a look at one of the accepted trajectories with a recrossing event. We'll plot the value of $\psi$, since this is what distinguishes the two states. We'll also select the frames that are actually inside each state and color them (red for $\alpha$, blue for $\beta$).
# In[9]:
psi = fixed.cvs['psi']
trajectory = accepted_recrossings[0].active[0].trajectory
in_alpha_indices = [trajectory.index(s) for s in trajectory if alpha(s)]
in_alpha_psi = [psi(trajectory)[i] for i in in_alpha_indices]
in_beta_indices = [trajectory.index(s) for s in trajectory if beta(s)]
in_beta_psi = [psi(trajectory)[i] for i in in_beta_indices]
plt.plot(psi(trajectory), 'k-')
plt.plot(in_alpha_indices, in_alpha_psi, 'ro') # alpha in red
plt.plot(in_beta_indices, in_beta_psi, 'bo') # beta in blue
# Now let's see how many recrossing events there are in each accepted trial. If there's one recrossing, then the trajectory must go $\alpha\to\beta\to\alpha\to\beta$ to be accepted. Two recrossings would mean $\alpha\to\beta\to\alpha\to\beta\to\alpha\to\beta$.
# In[10]:
recrossings_per = []
for step in accepted_recrossings:
for test in step.change.trials:
recrossings_per.append(len(recrossing_ensemble.split(test.trajectory)))
# In[11]:
print(recrossings_per)
# In[12]:
# these numbers come from accepted trial steps, not all steps
print(sum(recrossings_per))
print(len(recrossings_per))
print(len([x for x in recrossings_per if x==2]))
# # Comparing the fixed and flexible simulations
# In[13]:
get_ipython().run_cell_magic('time', '', '# transition path length distribution\nflex_ens = flex_network.sampling_ensembles[0]\nfixed_transition_segments = sum([flex_ens.split(step.active[0].trajectory)\n for step in tqdm(fixed.steps)],[])\nfixed_transition_length = [len(traj) for traj in fixed_transition_segments]\n')
# In[14]:
flexible_transition_length = [len(s.active[0].trajectory) for s in flexible.steps]
# In[15]:
print(len(fixed_transition_length))
# In[16]:
bins = np.linspace(0, 400, 80);
plt.hist(flexible_transition_length, bins, alpha=0.5, density=True, label="flexible");
plt.hist(fixed_transition_length, bins, alpha=0.5, density=True, label="fixed");
plt.legend(loc='upper right');
# #### Identifying different mechanisms using custom ensembles
#
# We expected the plot above to be very similar for both cases. However, we know that the $\alpha\to\beta$ transition in alanine dipeptide can occur via two mechanisms: since $\psi$ is periodic, the transition can occur due to an overall increase in $\psi$, or due to an overall decrease in $\psi$. We also know that the alanine dipeptide transitions aren't actually all that rare, so they will occur spontaneously in long simulations.
#
#
# This section shows how to create custom ensembles to identify whether the transition occurred with an increasing $\psi$ or a decreasing $\psi$. We also need to account for (unlikely) edge cases where the path starts in one direction but completes the transition from the other.
# First, we'll create a few more `Volume` objects. In this case, we will completely tile the Ramachandran space; while a complete tiling isn't necessary, it is often useful.
# In[17]:
# first, we fully subdivide the Ramachandran space
phi = fixed.cvs['phi']
deg = 180.0/np.pi
nml_plus = paths.PeriodicCVDefinedVolume(psi, -160/deg, -100/deg, -np.pi, np.pi)
nml_minus = paths.PeriodicCVDefinedVolume(psi, 0/deg, 100/deg, -np.pi, np.pi)
nml_alpha = (paths.PeriodicCVDefinedVolume(phi, 0/deg, 180/deg, -np.pi, np.pi) &
paths.PeriodicCVDefinedVolume(psi, 100/deg, 200/deg, -np.pi, np.pi))
nml_beta = (paths.PeriodicCVDefinedVolume(phi, 0/deg, 180/deg, -np.pi, np.pi) &
paths.PeriodicCVDefinedVolume(psi, -100/deg, 0/deg, -np.pi, np.pi))
# In[18]:
#TODO: plot to display where these volumes are
# Next, we'll create ensembles for the "increasing" and "decreasing" transitions. These transitions mark a crossing of either the `nml_plus` or the `nml_minus`. These aren't necessarily $\alpha\to\beta$ transitions. However, any $\alpha\to\beta$ transition must contain at least one subtrajectory which satsifies one of these ensembles.
# In[19]:
increasing = paths.SequentialEnsemble([
paths.AllInXEnsemble(alpha | nml_alpha),
paths.AllInXEnsemble(nml_plus),
paths.AllInXEnsemble(beta | nml_beta)
])
decreasing = paths.SequentialEnsemble([
paths.AllInXEnsemble(alpha | nml_alpha),
paths.AllInXEnsemble(nml_minus),
paths.AllInXEnsemble(beta | nml_beta)
])
# Finally, we'll write a little function that characterizes a set of trajectories according to these ensembles. It returns a dictionary mapping the ensemble (`increasing` or `decreasing`) to a list of trajectories that have a subtrajectory that satisfies it (at least one entry in `ensemble.split(trajectory)`). That dictionary also contains keys for `'multiple'` matched ensembles and `None` if no ensemble was matched. Trajectories for either of these keys would need to be investigated further.
# In[20]:
def categorize_transitions(ensembles, trajectories):
results = {ens : [] for ens in ensembles + ['multiple', None]}
for traj in trajectories:
matched_ens = None
for ens in ensembles:
if len(ens.split(traj)) > 0:
if matched_ens is not None:
matched_ens = 'multiple'
else:
matched_ens = ens
results[matched_ens].append(traj)
return results
# With that function defined, let's use it!
# In[21]:
categorized = categorize_transitions(ensembles=[increasing, decreasing],
trajectories=fixed_transition_segments)
# In[22]:
print("increasing:", len(categorized[increasing]))
print("decreasing:", len(categorized[decreasing]))
print(" multiple:", len(categorized['multiple']))
print(" None:", len(categorized[None]))
# Comparing to the flexible length simulation:
# In[23]:
flex_trajs = [step.active[0].trajectory for step in flexible.steps]
flex_categorized = categorize_transitions(ensembles=[increasing, decreasing],
trajectories=flex_trajs[::10])
# In[24]:
print("increasing:", len(flex_categorized[increasing]))
print("decreasing:", len(flex_categorized[decreasing]))
print(" multiple:", len(flex_categorized['multiple']))
print(" None:", len(flex_categorized[None]))
# So the fixed length sampling is somehow capturing both kinds of transitions (probably because they are not really that rare). Let's see what the path length distribution from only the decreasing transitions looks
# In[25]:
plt.hist([len(traj) for traj in flex_categorized[decreasing]], bins, alpha=0.5, density=True);
plt.hist([len(traj) for traj in categorized[decreasing]], bins, alpha=0.5, density=True);
# Still a little off, although this might be due to bad sampling. Let's see how many of the decorrelated trajectories have this kind of transition.
# In[26]:
full_fixed_tree = ops_vis.PathTree(
fixed.steps,
ops_vis.ReplicaEvolution(replica=0)
)
full_history = full_fixed_tree.generator
# In[27]:
# start with the decorrelated tragectories
fixed_decorrelated = full_history.decorrelated_trajectories
# find the A->B transitions from the decorrelated trajectories
decorrelated_transitions = sum([flex_ens.split(traj) for traj in fixed_decorrelated], [])
# find the A->B transition from these which are decreasing
decorrelated_decreasing = sum([decreasing.split(traj) for traj in decorrelated_transitions], [])
print(len(decorrelated_decreasing))
# So this is based off of 11 decorrelated trajectory transitions. That's not a lot of statistics.
#
# However, we expect to see a *very* different distribution for the "increasing" paths:
# In[28]:
plt.hist([len(traj) for traj in categorized[increasing]], bins, density=True, alpha=0.5, color='g');
# Let's also check whether we go back and forth between the increasing transition and the decreasing transition, or whether there's just a single change from one type to the other.
# In[29]:
def find_switches(ensembles, trajectories):
switches = []
last_category = None
traj_num = 0
for traj in trajectories:
category = None
for ens in ensembles:
if len(ens.split(traj)) > 0:
if category is not None:
category = 'multiple'
else:
category = ens
if last_category != category:
switches.append((category, traj_num))
traj_num += 1
last_category = category
return switches
# In[30]:
switches = find_switches([increasing, decreasing], fixed_transition_segments)
# In[31]:
print([switch[1] for switch in switches], len(fixed_transition_segments))
# So there are a lot of switches early in the simulation, and then it gets stuck in one state for much longer.
# Even though we know the alanine dipeptide transitions are not particularly rare, this does give us reason to re-check the temperature. First we'll check what the intergrator says its temperature is, then we'll calculate the temperature based on the kinetic energy of every 50th trajectory.
#
# Note that the code below is specific to using the OpenMM engine.
# In[32]:
every_50th_trajectory = [step.active[0].trajectory for step in fixed.steps[::50]]
# In[33]:
# make a set to remove duplicates, if trajs aren't decorrelated
every_50th_traj_snapshots = list(set(sum(every_50th_trajectory, [])))
# sadly, it looks like that trick with set doesn't do any good here
# In[34]:
temperatures = [snap.instantaneous_temperature for snap in tqdm(every_50th_traj_snapshots)]
# In[36]:
plt.plot([T / T.unit for T in temperatures])
mean_T = np.mean(temperatures)
plt.plot([mean_T / mean_T.unit]*len(temperatures), 'r')
print("Mean temperature:", np.mean(temperatures).format("%.2f"))