Source code for BioSimSpace.Metadynamics.CollectiveVariable._rmsd

######################################################################
# BioSimSpace: Making biomolecular simulation a breeze!
#
# Copyright: 2017-2025
#
# Authors: Lester Hedges <lester.hedges@gmail.com>
#
# BioSimSpace is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# BioSimSpace is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with BioSimSpace. If not, see <http://www.gnu.org/licenses/>.
#####################################################################

"""Functionality for a root-mean-square deviation collective variable."""

__author__ = "Lester Hedges"
__email__ = "lester.hedges@gmail.com"

__all__ = ["RMSD"]

from math import ceil as _ceil
from math import sqrt as _sqrt

from sire.legacy import IO as _SireIO
from sire.legacy import Mol as _SireMol

from sire.mol import selection_to_atoms as _selection_to_atoms

from ... import _isVerbose
from ..._Exceptions import IncompatibleError as _IncompatibleError
from ..._SireWrappers import Atom as _Atom
from ..._SireWrappers import Molecule as _Molecule
from ..._SireWrappers import System as _System
from ...Align import rmsdAlign as _rmsdAlign

from ._collective_variable import CollectiveVariable as _CollectiveVariable
from .._bound import Bound as _Bound
from .._grid import Grid as _Grid
from ...Types import Length as _Length


[docs] class RMSD(_CollectiveVariable): """A class for a root-mean-square deviation (RMSD) collective variable."""
[docs] def __init__( self, system, reference, align_selection, rmsd_selection, reference_mapping=None, hill_width=_Length(0.1, "nanometer"), lower_bound=None, upper_bound=None, grid=None, alignment_type="optimal", pbc=True, property_map={}, ): """ Constructor. Parameters ---------- system : :class:`System <BioSimSpace._SireWrappers.System>` The molecular system of interest. reference : :class:`System <BioSimSpace._SireWrappers.System>` The reference system, against which the RMSD will be measured. align_selection : str A Sire selection string that defines the atoms to be used when aligning the two structures. If None, then the RMSD will be calculated without alignment. rmsd_selection : str A Sire selection string that defines the atoms to be used when calculating the RMSD. reference_mapping : dict A dictionary mapping molecule indices in the reference to those in the system. This must be used when the reference represents a sub-set of the system. hill_width : :class:`Length <BioSimSpace.Types.Length>` The width of the Gaussian hill used to sample this variable. lower_bound : :class:`Bound <BioSimSpace.Metadynamics.Bound>` A lower bound on the value of the collective variable. upper_bound : :class:`Bound <BioSimSpace.Metadynamics.Bound>` An upper bound on the value of the collective variable. grid : :class:`Grid <BioSimSpace.Metadynamics.Grid>` The grid on which the collective variable will be sampled. This can help speed up long metadynamics simulations where the number of Gaussian kernels can become prohibitive. alignment_type : str The mannier in which RMSD alignment is performed. Options are "optimal" or "simple". pbc : bool Whether to use periodic boundary conditions when computing the collective variable. property_map : dict A dictionary that maps system "properties" to their user defined values. This allows the user to refer to properties with their own naming scheme, e.g. { "charge" : "my-charge" } """ # Call the base class constructor. super().__init__() # Set the types associated with this collective variable. self._types = [_Length] # Validate input. if not isinstance(system, _System): raise TypeError( "'system' must be of type 'BioSimSpace._SireWrappers.System'." ) self._system = system.copy() if not isinstance(reference, _System): raise TypeError( "'reference' must be of type 'BioSimSpace._SireWrappers.System'." ) self._reference = reference.copy() from sire.system import System sire_reference = System(reference._sire_object) if reference_mapping is None: # Make sure that the reference system is compatible with the system. if system.nMolecules() != reference.nMolecules(): raise _IncompatibleError( "The number of molecules in 'system' and 'reference' must match." ) if system.nResidues() != reference.nResidues(): raise _IncompatibleError( "The number of residues in 'system' and 'reference' must match." ) if system.nAtoms() != reference.nAtoms(): raise _IncompatibleError( "The number of atoms in 'system' and 'reference' must match." ) self._reference_mapping = {i: i for i in range(system.nMolecules())} else: if not isinstance(reference_mapping, dict): raise TypeError("'reference_mapping' must be of type 'dict'") # Validate the mapping. for k, v in reference_mapping.items(): if not isinstance(k, int): raise TypeError("Keys of 'reference_mapping' must be of type 'int'") if not isinstance(v, int): raise TypeError( "Values of 'reference_mapping' must be of type 'int'" ) # Allow negative indeices. if k < 0: k += reference.nMolecules() if v < 0: v += system.nMolecules() if k >= reference.nMolecules(): raise ValueError( "Keys of 'reference_mapping' must be in the range [0, nMolecules)" ) if v >= system.nMolecules(): raise ValueError( "Values of 'reference_mapping' must be in the range [0, nMolecules)" ) # Make sure the mapped molecules have the same number of atoms. if reference[k].nAtoms() != system[v].nAtoms(): raise _IncompatibleError( "Molecules mapped via the 'reference_mapping' must have the same number of atoms." ) self._reference_mapping = reference_mapping # Validate alignment selection string. if not isinstance(align_selection, str): raise TypeError("'align_selection' must be of type 'str'") self._align_selection = align_selection try: self._aligment_atoms = _selection_to_atoms(sire_reference, align_selection) except Exception as e: msg = "Invalid 'align_selection' string." if _isVerbose(): raise IOError(msg) from e else: raise IOError(msg) from None # Validate RMSD selection string. if not isinstance(rmsd_selection, str): raise TypeError("'rmsd_selection' must be of type 'str'") self._rmsd_selection = rmsd_selection try: self._rmsd_atoms = _selection_to_atoms(sire_reference, rmsd_selection) except Exception as e: msg = "Invalid 'rmsd_selection' string." if _isVerbose(): raise IOError(msg) from e else: raise IOError(msg) from None # Check that the map is valid. if not isinstance(property_map, dict): raise TypeError("'property_map' must be of type 'dict'") # Work out the unique molecule numbers used for alignment and RMSD. mol_nums = set() for atom in self._aligment_atoms: if atom.molecule().number() not in mol_nums: mol_nums.add(atom.molecule().number()) for atom in self._rmsd_atoms: if atom.molecule().number() not in mol_nums: mol_nums.add(atom.molecule().number()) # List to store the molecule indices in the system. self._molecule_indices = [] # List to store index pairs for the mapped molecule (system, reference). molecule_pairs = [] # List to store the absolute atom indices. abs_atom_indices = [] # Dictionary to store alignment and RMSD indices for each molecule. align_indices = {} rmsd_indices = {} # Create mappings for the alignment and RMSD. # Loop over the molecules numbers. for num in mol_nums: # Extract the molecule from the reference system. molecule = reference._sire_object[num] # Work out the index of the molecule in the reference. index = reference.getIndex(_Molecule(molecule)) # Map the index to the system. self._molecule_indices.append(self._reference_mapping[index]) # Store the index pair. molecule_pairs.append((self._reference_mapping[index], index)) # Set of atoms to select. selected = set() # Create a cursor for editing the molecule. cursor = molecule.cursor() # Loop over the atoms. for i, atom in enumerate(molecule.atoms()): is_align = False is_rmsd = False # This atom is used for alignment. if atom in self._aligment_atoms: is_align = True try: align_indices[num].append(atom.index()) except: align_indices[num] = [atom.index()] # This atom is used for RMSD. if atom in self._rmsd_atoms: is_rmsd = True try: rmsd_indices[num].append(atom.index()) except: rmsd_indices[num] = [atom.index()] if is_align or is_rmsd: # Append to the list of atoms to select. selected.add(atom.index()) # Add the absolute atom index. abs_atom_indices.append(1 + reference.getIndex(_Atom(atom))) # Set occupancy and beta factor. if is_align: cursor.atom(i)["occupancy"] = 1.0 else: cursor.atom(i)["occupancy"] = 0.0 if is_rmsd: cursor.atom(i)["beta_factor"] = 1.0 else: cursor.atom(i)["beta_factor"] = 0.0 # Commit the changes. new_molecule = cursor.commit() # Create an AtomSelection. selection = new_molecule.selection() # Unselect all of the atoms. selection.selectNone() # Now add all of the atoms that appear in the reference. for idx in selected: selection.select(idx) # Create a partial molecule and extract the atoms. partial_molecule = ( _SireMol.PartialMolecule(new_molecule, selection).extract().molecule() ) # Update the new system. try: new_system += _Molecule(partial_molecule) except: new_system = _Molecule(partial_molecule).toSystem() # Parse as a PDB file and store the lines. pdb = _SireIO.PDB2(new_system._sire_object) lines = pdb.toLines() # Format for PLUMED, making sure to use the same indices as in the system. # Also strip any TER records. self._reference_pdb = [] for line, idx in zip(lines[1:-2], abs_atom_indices): if not "TER" in line: self._reference_pdb.append(line[:6] + str(idx).rjust(5) + line[11:]) self._reference_pdb.append(lines[-1]) # Store the initial value of the RMSD. This is useful to use as a starting # point for the restraint when performing steered molecular dynamics. self._initial_value = self._compute_initial_rmsd( system, reference, molecule_pairs, align_indices, rmsd_indices, property_map, ) # Set the "settable" parameters. self.setHillWidth(hill_width) self.setAlignmentType(alignment_type) self.setPeriodicBoundaries(pbc) # Set defaults for optional values. self._lower_bound = None self._upper_bound = None self._grid = None # Set the optional parameters. if lower_bound is not None: self.setLowerBound(lower_bound) if upper_bound is not None: self.setUpperBound(upper_bound) if grid is not None: self.setGrid(grid) # Validate that the state is self-consistent. self._validate() # Flag that the object has been instantiated, i.e. it is no longer "new". self._is_new_object = False
def __str__(self): """Return a human readable string representation of the object.""" string = "<BioSimSpace.Metadynamics.CollectiveVariable.RMSD: " string += " align_selection=%s" % self._align_selection string += ", rmsd_selection=%s" % self._rmsd_selection string += ", hill_width=%s" % self._hill_width if self._lower_bound is not None: string += ", lower_bound=%s" % self._lower_bound if self._upper_bound is not None: string += ", upper_bound=%s" % self._upper_bound if self._grid is not None: string += ", grid=%s" % self._grid string += ", alignment_type=%s" % self._alignment_type string += ", pbc=%s" % self._pbc string += ">" return string def __repr__(self): """Return a string showing how to instantiate the object.""" return self.__str__() def __eq__(self, other): """Equality operator.""" return ( self._reference == other._reference and self._hill_width == other._hill_width and self._lower_bound == other._lower_bound and self._upper_bound == other._upper_bound and self._grid == other._grid and self._alignment_type == other._alignment_type and self._pbc == other._pbc )
[docs] def getReferencePDB(self): """ Return the reference PDB file as a list of strings. Returns ------- pdb : [str] The reference PDB file as list of strings. """ return self._reference_pdb
[docs] def getInitialValue(self): """ Return the initial value of the collective variable. Returns ------- rmsd : :class:`Length <BioSimSpace.Types.Length>` The initial value of the collective variable. """ return self._initial_value
[docs] def setHillWidth(self, hill_width): """ Set the width of the Gaussian hills used to bias this collective variable. hill_width : :class:`Length <BioSimSpace.Types.Length>` The width of the Gaussian hill. """ if not isinstance(hill_width, _Length): raise TypeError("'hill_width' must be of type 'BioSimSpace.Types.Length'") if hill_width.value() < 0: raise ValueError("'hill_width' must have a value of > 0") # Convert to the internal unit. self._hill_width = hill_width.nanometers()
[docs] def getHillWidth(self): """ Return the width of the Gaussian hill used to bias this collective variable. Returns ------- hill_width : :class:`Length <BioSimSpace.Types.Length>` The width of the Gaussian hill. """ return self._hill_width
[docs] def setAlignmentType(self, alignment_type): """ Set the RMSD alignment type. Options are "optimal" or "simple". Parameters ---------- alignment_type : str The RMSD alignment type. """ if not isinstance(alignment_type, str): raise TypeError("'alignment_type' must be of type 'str'") alignment_type = alignment_type.lower().replace(" ", "") if alignment_type not in ["optimal", "simple"]: raise ValueError("'alignment_type' must be 'optimal' or 'simple'.") self._alignment_type = alignment_type
[docs] def getAlignmentType(self): """ Return the RMSD alignment type. Returns ------- alignment_type : str The RMSD alignment type. """ return self._alignment_type
[docs] def getMoleculeIndices(self): """ Return the indices of molecules involved in the collective variable. Returns ------- molecule_indices : int The indices of molecules involved in the collective variable. """ return self._molecule_indices
[docs] def setPeriodicBoundaries(self, pbc): """ Set whether to use periodic_boundaries when calculating the collective variable. Parameters ---------- pbc : bool Whether to use periodic boundaries conditions. """ if not isinstance(pbc, bool): raise TypeError("'pbc' must be of type 'bool'") self._pbc = pbc
[docs] def getPeriodicBoundaries(self): """ Return whether to take account of periodic boundary conditions when computing the collective variable. Returns ------- pbc : bool Whether to use periodic boundaries conditions. """ return self._pbc
def _compute_initial_rmsd( self, system, reference, molecule_pairs, align_indices, rmsd_indices, property_map={}, ): """ Compute the initial value of the RMSD collective variable. Parameters ---------- system : :class:`System <BioSimSpace._SireWrappers.System>` The molecular system of interest. reference : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>` The reference molecule, against which the RMSD will be measured. This molecule should match with a single molecule from the system, i.e. contain the same residues as the matching molecule in the same order. molecule_pairs : [(int, int), ...] The indices of molecules in the system and reference that contain atoms involved in alignment and RMSD. align_indices : {Sire.Mol.MolNum: [Sire.Mol.AtomIdx, ...]} A dictionary mapping molecules to the indices of atoms that will be used for alignment. rmsd_indices : {Sire.Mol.MolNum: [Sire.Mol.AtomIdx, ...]} A dictionary mapping molecules to the indices of atoms that will be used for the RMSD. property_map : dict A dictionary that maps system "properties" to their user defined values. This allows the user to refer to properties with their own naming scheme, e.g. { "charge" : "my-charge" } Returns ------- rmsd : :class:`Length <BioSimSpace.Types.Length>` The initial value of the RMSD. """ # Note that we need to do this manually, since Sire.Mol.Evaluator doesn't # work correctly for molecules with different numbers of coordinate groups. if not isinstance(system, _System): raise TypeError( "'system' must be of type 'BioSimSpace._SireWrappers.System'." ) if not isinstance(reference, _System): raise TypeError( "'reference' must be of type 'BioSimSpace._SireWrappers.System'." ) if not isinstance(molecule_pairs, list): raise TypeError("'molecule_pairs' must be a list of integer tuples.") for pair in molecule_pairs: if not isinstance(pair, tuple): raise TypeError("'molecule_pairs' must be a list of integer tuples.") if len(pair) != 2: raise ValueError("'molecule_pairs' must be a list of integer tuples.") if not isinstance(pair[0], int): raise TypeError("'molecule_pairs' must be a list of integer tuples.") if not isinstance(pair[1], int): raise TypeError("'molecule_pairs' must be a list of integer tuples.") if not isinstance(align_indices, dict): raise TypeError("'align_indices' must be a dictionary.") for key, value in align_indices.items(): if not isinstance(key, _SireMol.MolNum): raise TypeError( "Keys of 'align_indices' must be of type 'sire.legacy.Mol.MolMolNum'." ) if not isinstance(value, list): raise TypeError( "Values of 'align_indices' must be lists of 'sire.legacy.Mol.AtomIdx' types." ) for idx in value: if not isinstance(idx, _SireMol.AtomIdx): raise TypeError( "Values of 'align_indices' must be lists of 'sire.legacy.Mol.AtomIdx' types." ) if not isinstance(rmsd_indices, dict): raise TypeError("'rmsd_indices' must be a dictionary.") for key, value in rmsd_indices.items(): if not isinstance(key, _SireMol.MolNum): raise TypeError( "Keys of 'rmsd_indices' must be of type 'sire.legacy.Mol.MolNum'." ) if not isinstance(value, list): raise TypeError( "Values of 'rmsd_indices' must be lists of 'sire.legacy.Mol.AtomIdx' types." ) for idx in value: if not isinstance(idx, _SireMol.AtomIdx): raise TypeError( "Values of 'rmsd_indices' must be lists of 'sire.legacy.Mol.AtomIdx' types." ) if not isinstance(property_map, dict): raise TypeError("'property_map' must be of type 'dict'") # Get the 'space' property from the system. try: space_prop = property_map.get("space", "space") space = system._sire_object.property(space_prop) except: raise ValueError( f"'system' has no '{space_prop}' property. Unable to compute RMSD!" ) # Set the user-define coordinates property. coord_prop = property_map.get("coordinates", "coordinates") # Total squared distance. dist2 = 0 # Total number of RMSD atoms. num_rmsd = 0 # Loop over the molecules. for idx_system, idx_ref in molecule_pairs: mol = system[idx_system] ref = reference[idx_ref] align_mapping = {} try: align_mapping = { i.value(): i.value() for i in align_indices[ref._sire_object.number()] } except Exception as e: pass if len(align_mapping) > 0: try: new_mol = _rmsdAlign( mol, ref, align_mapping, property_map0=property_map, property_map1=property_map, ) except: ValueError( "Unable to align 'molecule' to 'reference' based on 'align_mapping'." ) rmsd_mapping = {} try: rmsd_mapping = {i: i for i in rmsd_indices[ref._sire_object.number()]} except Exception as e: pass if len(rmsd_mapping) > 0: # Loop over all atom matches and compute the squared distance. for idx0, idx1 in rmsd_mapping.items(): try: coord0 = new_mol._sire_object.atom(idx0).property(coord_prop) coord1 = ref._sire_object.atom(idx1).property(coord_prop) except: raise ValueError( "Could not calculate initial RMSD due to missing coordinates!" ) dist2 += space.calcDist2(coord0, coord1) num_rmsd += 1 # Compute the RMSD. dist2 /= num_rmsd rmsd = _sqrt(dist2) return _Length(rmsd, "Angstrom") def _validate(self): """Internal function to check that the object is in a consistent state.""" if self._lower_bound is not None: if type(self._lower_bound.getValue()) not in self._types: raise TypeError( "'lower_bound' must be of type 'BioSimSpace.Types.Length'" ) if self._upper_bound is not None: if type(self._upper_bound.getValue()) not in self._types: raise TypeError( "'upper_bound' must be of type 'BioSimSpace.Types.Length'" ) if self._lower_bound is not None and self._upper_bound is not None: if self._lower_bound.getValue() >= self._upper_bound.getValue(): raise TypeError("'lower_bound' must less than 'upper_bound'") if self._grid is not None: if type(self._grid.getMinimum()) not in self._types: raise TypeError( "'grid' minimum must be of type 'BioSimSpace.Types.Length'" ) if type(self._grid.getMaximum()) not in self._types: raise TypeError( "Grid 'maximum' must be of type 'BioSimSpace.Types.Length'" ) if ( self._lower_bound is not None and self._grid.getMinimum() > self._lower_bound.getValue() ): raise ValueError("'lower_bound' is less than 'grid' minimum.") if ( self._upper_bound is not None and self._grid.getMaximum() < self._upper_bound.getValue() ): raise ValueError("'upper_bound' is greater than 'grid' maximum.") # If the number of bins isn't specified, estimate it out from the hill width. if self._grid.getBins() is None: grid_range = (self._grid.getMaximum() - self._grid.getMinimum()).value() num_bins = _ceil(5.0 * (grid_range / self._hill_width.value())) self._grid.setBins(num_bins)