Source code for BioSimSpace.FreeEnergy._atm

######################################################################
# BioSimSpace: Making biomolecular simulation a breeze!
#
# Copyright: 2017-2024
#
# Authors: Lester Hedges <lester.hedges@gmail.com>
#          Matthew Burman <matthew@openbiosim.org>
#
# BioSimSpace is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# BioSimSpace is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with BioSimSpace. If not, see <http://www.gnu.org/licenses/>.
######################################################################

# Functionality for creating and viewing systems for Atomic transfer.

__all__ = ["ATMSetup", "ATM"]

import copy as _copy
import json as _json
import os as _os
import pathlib as _pathlib
import shutil as _shutil
import warnings as _warnings
import zipfile as _zipfile

from sire.legacy import IO as _SireIO

from .._SireWrappers import Molecule as _Molecule
from .._SireWrappers import System as _System
from .. import _Utils
from ..Types import Length as _Length
from ..Types import Vector as _Vector
from ..Types import Coordinate as _Coordinate
from ..Align import matchAtoms as _matchAtoms
from ..Align import rmsdAlign as _rmsdAlign
from ..Notebook import View as _View
from .. import _isVerbose
from .. import _is_notebook
from ..Process import OpenMM as _OpenMM
from ..Process import ProcessRunner as _ProcessRunner

if _is_notebook:
    from IPython.display import FileLink as _FileLink


[docs] class ATMSetup: """ A class for setting up a system for ATM simulations. """
[docs] def __init__( self, system=None, receptor=None, ligand_bound=None, ligand_free=None, protein_index=0, ligand_bound_index=1, ligand_free_index=2, ): """Constructor for the ATM class. Parameters ---------- system : :class:`System <BioSimSpace._SireWrappers.System>` A pre-prepared ATM system containing protein and ligands placed in their correct positions. If provided takes precedence over protein, ligand_bound and ligand_free. receptor : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>` A receptor molecule. Will be used along with ligand_bound and ligand_free to create a system. ligand_bound : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>` The bound ligand. Will be used along with protein and ligand_free to create a system. ligand_free : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>` The free ligand. Will be used along with protein and ligand_bound to create a system. protein_index : int, [int] If passing a pre-prepared system, the index (or indices) of the protein molecule in the system (Default 0). ligand_bound_index : int If passing a pre-prepared system, the index of the bound ligand molecule in the system (Default 1). ligand_free_index : int If passing a pre-prepared system, the index of the free ligand molecule in the system (Default 2). """ # make sure that either system or protein, ligand_bound and ligand_free are given if system is None and not all( x is not None for x in [receptor, ligand_bound, ligand_free] ): raise ValueError( "Either a pre-prepared system or protein, bound ligand and free ligand must be given." ) # check that the system is a BioSimSpace system # or the other inputs are BioSimSpace molecules if system is not None and not isinstance(system, _System): raise ValueError("The system must be a BioSimSpace System object.") elif not all( isinstance(x, _Molecule) for x in [receptor, ligand_bound, ligand_free] if x is not None ): raise ValueError( "The protein, bound ligand and free ligand must be BioSimSpace Molecule objects." ) self._is_prepared = False self._setSystem(system) if not self._is_prepared: self._setProtein(receptor) self._setLigandBound(ligand_bound) self._setLigandFree(ligand_free) else: self._setProteinIndex(protein_index) self._setLigandBoundIndex(ligand_bound_index) self._setLigandFreeIndex(ligand_free_index)
def _setSystem(self, system, is_prepared=True): """ Set the system for the ATM simulation. Parameters ---------- system : BioSimSpace._SireWrappers.System The system for the ATM simulation. """ if system is not None: if not isinstance(system, _System): raise ValueError( f"The system must be a BioSimSpace System object. It is currently {type(system)}." ) elif len(system.getMolecules()) < 3: raise ValueError( "The system must contain at least three molecules (a protein and two ligands)." ) else: self._system = system self._is_prepared = is_prepared else: self._system = None self._is_prepared = False def _getSystem(self): """Get the system for the ATM simulation. Returns ------- BioSimSpace._SireWrappers.System The system for the ATM simulation. """ return self._system def _setProtein(self, protein): """Set the protein for the ATM simulation. Parameters ---------- protein : BioSimSpace._SireWrappers.Molecule The protein for the ATM simulation. """ if protein is not None: if not isinstance(protein, _Molecule): raise ValueError("The protein must be a BioSimSpace Molecule object.") else: self._protein = protein else: self._protein = None def _getProtein(self): """Get the protein for the ATM simulation. Returns ------- BioSimSpace._SireWrappers.Molecule The protein for the ATM simulation. """ return self._protein def _setLigandBound(self, ligand_bound): """Set the bound ligand for the ATM simulation. Parameters ---------- ligand_bound : BioSimSpace._SireWrappers.Molecule The bound ligand for the ATM simulation. """ if ligand_bound is not None: if not isinstance(ligand_bound, _Molecule): raise ValueError( "The bound ligand must be a BioSimSpace Molecule object." ) else: self._ligand_bound = ligand_bound else: self._ligand_bound = None def _getLigandBound(self): """Get the bound ligand for the ATM simulation. Returns ------- BioSimSpace._SireWrappers.Molecule The bound ligand for the ATM simulation. """ return self._ligand_bound def _setLigandFree(self, ligand_free): """Set the free ligand for the ATM simulation. Parameters ---------- ligand_free : BioSimSpace._SireWrappers.Molecule The free ligand for the ATM simulation. """ if ligand_free is not None: if not isinstance(ligand_free, _Molecule): raise ValueError( "The free ligand must be a BioSimSpace Molecule object." ) else: self._ligand_free = ligand_free else: self._ligand_free = None def _getLigandFree(self): """Get the free ligand for the ATM simulation. Returns ------- BioSimSpace._SireWrappers.Molecule The free ligand for the ATM simulation. """ return self._ligand_free def _setDisplacement(self, displacement): """Set the displacement of the free ligand along the normal vector.""" if isinstance(displacement, str): try: self._displacement = _Length(displacement) except Exception as e: raise ValueError( f"Could not convert {displacement} to a BSS length, due to the following error: {e}" ) elif isinstance(displacement, _Length): self._displacement = displacement elif isinstance(displacement, list): if len(displacement) != 3: raise ValueError("displacement must have length 3") if all(isinstance(x, (float, int)) for x in displacement): self._displacement = _Vector(*displacement) elif all(isinstance(x, _Length) for x in displacement): self._displacement = _Vector([x.value() for x in displacement]) else: raise TypeError("displacement must be a list of floats or BSS lengths") elif isinstance(displacement, _Vector): self._displacement = displacement else: raise TypeError( f"displacement must be a string, BSS length or list. It is currently {type(displacement)}." ) if self._is_prepared: if not isinstance(self._displacement, _Vector): raise ValueError( "Displacement must be a vector or list if a pre-prepared system is given" ) def _getDisplacement(self): """Get the displacement of the free ligand along the normal vector. Returns ------- BioSimSpace.Types.Length The displacement of the free ligand along the normal vector. """ return self._displacement def _setLigandBoundRigidCore(self, ligand_bound_rigid_core): """Set the indices for the rigid core atoms of ligand 1. Parameters ---------- ligand_bound_rigid_core : BioSimSpace._SireWrappers.Molecule The rigid core of the bound ligand for the ATM simulation. """ if ligand_bound_rigid_core is None: self._ligand_bound_rigid_core = None else: if not isinstance(ligand_bound_rigid_core, list): raise TypeError("ligand_bound_rigid_core must be a list") if len(ligand_bound_rigid_core) != 3: raise ValueError("ligand_bound_rigid_core must have length 3") # make sure all indices are ints if not all(isinstance(x, int) for x in ligand_bound_rigid_core): raise TypeError("ligand_bound_rigid_core must contain only integers") if any(x >= self._ligand_bound_atomcount for x in ligand_bound_rigid_core): raise ValueError( "ligand_bound_rigid_core contains an index that is greater than the number of atoms in the ligand" ) self._ligand_bound_rigid_core = ligand_bound_rigid_core def _getLigandBoundRigidCore(self): """Get the indices for the rigid core atoms of ligand 1. Returns ------- list The indices for the rigid core atoms of ligand 1. """ return self._ligand_bound_rigid_core def _setLigandFreeRigidCore(self, ligand_free_rigid_core): """Set the indices for the rigid core atoms of ligand 2. Parameters ---------- ligand_free_rigid_core : BioSimSpace._SireWrappers.Molecule The rigid core of the free ligand for the ATM simulation. """ if ligand_free_rigid_core is None: self._ligand_free_rigid_core = None else: if not isinstance(ligand_free_rigid_core, list): raise TypeError("ligand_free_rigid_core must be a list") if len(ligand_free_rigid_core) != 3: raise ValueError("ligand_free_rigid_core must have length 3") # make sure all indices are ints if not all(isinstance(x, int) for x in ligand_free_rigid_core): raise TypeError("ligand_free_rigid_core must contain only integers") if any(x >= self._ligand_free_atomcount for x in ligand_free_rigid_core): raise ValueError( "ligand_free_rigid_core contains an index that is greater than the number of atoms in the ligand" ) self._ligand_free_rigid_core = ligand_free_rigid_core def _getLigandFreeRigidCore(self): """Get the indices for the rigid core atoms of ligand 2. Returns ------- list The indices for the rigid core atoms of ligand 2. """ return self._ligand_free_rigid_core def _setProteinIndex(self, protein_index): """ Set the index of the protein in the system Parameters ---------- protein_index : list The index or indices of the protein in the system. """ if isinstance(protein_index, list): # check that all elements are ints if not all(isinstance(x, int) for x in protein_index): raise TypeError("protein_index must be a list of ints or a single int") for p in protein_index: if p < 0: raise ValueError("protein_index must be a positive integer") if self._system[p].isWater(): _warnings.warn( f"The molecule at index {p} is a water molecule, check your protein_index list." ) self.protein_index = protein_index elif isinstance(protein_index, int): self.protein_index = [protein_index] else: raise TypeError("protein_index must be an int or a list of ints") def _getProteinIndex(self): """Get the index of the protein molecule in the system. Returns ------- int The index of the protein molecule in the system. """ return self.protein_index def _setLigandBoundIndex(self, ligand_bound_index): """Set the index of the bound ligand molecule in the system. Parameters ---------- ligand_bound_index : int The index of the bound ligand molecule in the system. """ if not isinstance(ligand_bound_index, int): raise ValueError("ligand_bound_index must be an integer.") else: if ligand_bound_index < 0: raise ValueError("ligand_bound_index must be a positive integer") if self._system[ligand_bound_index].isWater(): _warnings.warn( f"The molecule at index {ligand_bound_index} is a water molecule, check your ligand_bound_index." ) self._ligand_bound_index = ligand_bound_index def _getLigandBoundIndex(self): """Get the index of the bound ligand molecule in the system. Returns ------- int The index of the bound ligand molecule in the system. """ return self._ligand_bound_index def _setLigandFreeIndex(self, ligand_free_index): """Set the index of the free ligand molecule in the system. Parameters ---------- ligand_free_index : int The index of the free ligand molecule in the system. """ if not isinstance(ligand_free_index, int): raise ValueError("ligand_free_index must be an integer.") else: if ligand_free_index < 0: raise ValueError("ligand_free_index must be a positive integer") if self._system[ligand_free_index].isWater(): _warnings.warn( f"The molecule at index {ligand_free_index} is a water molecule, check your ligand_free_index." ) self._ligand_free_index = ligand_free_index def _getLigandFreeIndex(self): """Get the index of the free ligand molecule in the system. Returns ------- int The index of the free ligand molecule in the system. """ return self._ligand_free_index
[docs] def prepare( self, ligand_bound_rigid_core, ligand_free_rigid_core, displacement="20A", protein_com_atoms=None, ligand_bound_com_atoms=None, ligand_free_com_atoms=None, ): """ Prepare the system for an ATM simulation. Parameters ---------- ligand_bound_rigid_core : [int] A list of three atom indices that define the rigid core of the bound ligand. Indices are set relative to the ligand, not the system and are 0-indexed. ligand_free_rigid_core : [int] A list of three atom indices that define the rigid core of the free ligand. Indices are set relative to the ligand, not the system and are 0-indexed. displacement : float, string, [float, float, float] The diplacement between the bound and free ligands. If a float or string is given, BioSimSpace will attempt to find the ideal vector along which to displace the ligand by the given magnitude. If a list is given, the vector will be used directly. protein_com_atoms : [int] A list of atom indices that define the center of mass of the protein. If None, the center of mass of the protein will be found automatically. ligand_bound_com_atoms : [int] A list of atom indices that define the center of mass of the bound ligand. If None, the center of mass of the bound ligand will be found automatically. ligand_free_com_atoms : [int] A list of atom indices that define the center of mass of the free ligand. If None, the center of mass of the free ligand will be found automatically. Returns ------- system : :class:`System <BioSimSpace._SireWrappers.System>` The prepared system, including protein and ligands in their correct positions. data : dict A dictionary containing the data needed for the ATM simulation. This is also encoded in the system for consistency, but is returned so that the user can easily query and validate the data. """ if self._is_prepared: self._systemInfo() self._setLigandBoundRigidCore(ligand_bound_rigid_core) self._setLigandFreeRigidCore(ligand_free_rigid_core) self._setDisplacement(displacement) self._setProtComAtoms(protein_com_atoms) self._setLig1ComAtoms(ligand_bound_com_atoms) self._setLig2ComAtoms(ligand_free_com_atoms) self._findAtomIndices() self._makeData() serialisable_disp = [ self._displacement.x(), self._displacement.y(), self._displacement.z(), ] temp_data = self.data.copy() temp_data["displacement"] = serialisable_disp self._system._sire_object.setProperty("atom_data", _json.dumps(temp_data)) return self._system, self.data else: # A bit clunky, but setDisplacement needs to be called twice - before and after _makeSystemFromThree # the final value will be set after the system is made, but the initial value is needed to make the system self._setDisplacement(displacement) system, prot_ind, lig1_ind, lig2_ind, dis_vec = self._makeSystemFromThree( self._protein, self._ligand_bound, self._ligand_free, self._displacement ) self._setSystem(system, is_prepared=False) self._setDisplacement(dis_vec) self._setProteinIndex(prot_ind) self._setLigandBoundIndex(lig1_ind) self._setLigandFreeIndex(lig2_ind) self._systemInfo() self._setLigandBoundRigidCore(ligand_bound_rigid_core) self._setLigandFreeRigidCore(ligand_free_rigid_core) self._setProtComAtoms(protein_com_atoms) self._setLig1ComAtoms(ligand_bound_com_atoms) self._setLig2ComAtoms(ligand_free_com_atoms) self._findAtomIndices() self._makeData() serialisable_disp = [ self._displacement.x(), self._displacement.y(), self._displacement.z(), ] temp_data = self.data.copy() temp_data["displacement"] = serialisable_disp # encode data in system for consistency self._system._sire_object.setProperty("atom_data", _json.dumps(temp_data)) return self._system, self.data
@staticmethod def _makeSystemFromThree(protein, ligand_bound, ligand_free, displacement): """Create a system for ATM simulations. Parameters ---------- protein : BioSimSpace._SireWrappers.Molecule The protein for the ATM simulation. ligand_bound : BioSimSpace._SireWrappers.Molecule The bound ligand for the ATM simulation. ligand_free : BioSimSpace._SireWrappers.Molecule The free ligand for the ATM simulation. displacement : BioSimSpace.Types.Length The displacement of the ligand along the normal vector. Returns ------- BioSimSpace._SireWrappers.System The system for the ATM simulation. """ def _findTranslationVector(system, displacement, protein, ligand): from sire.legacy.Maths import Vector if not isinstance(system, _System): raise TypeError("system must be a BioSimSpace system") if not isinstance(protein, (_Molecule, type(None))): raise TypeError("protein must be a BioSimSpace molecule") if not isinstance(ligand, (_Molecule, type(None))): raise TypeError("ligand must be a BioSimSpace molecule") # Assume that binding sire is the center of mass of the ligand binding = _Coordinate(*ligand._getCenterOfMass()) # Create grid around the binding site # This will act as the search region grid_length = _Length(20.0, "angstroms") num_edges = 5 search_radius = (grid_length / num_edges) / 2 grid_min = binding - 0.5 * grid_length grid_max = binding + 0.5 * grid_length non_protein_coords = Vector() # Count grid squares that contain no protein atoms num_non_prot = 0 import numpy as np # Loop over the grid for x in np.linspace(grid_min.x().value(), grid_max.x().value(), num_edges): for y in np.linspace( grid_min.y().value(), grid_max.y().value(), num_edges ): for z in np.linspace( grid_min.z().value(), grid_max.z().value(), num_edges ): search = ( f"atoms within {search_radius.value()} of ({x}, {y}, {z})" ) try: protein.search(search) except: non_protein_coords += Vector(x, y, z) num_non_prot += 1 non_protein_coords /= num_non_prot non_protein_coords = _Coordinate._from_sire_vector(non_protein_coords) # Now search out alpha carbons in system x = binding.x().angstroms().value() y = binding.y().angstroms().value() z = binding.z().angstroms().value() string = f"(atoms within 10 of {x},{y},{z}) and atomname CA" try: search = system.search(string) except: _warnings.warn( "No alpha carbons found in system, falling back on any carbon atoms." ) try: string = f"(atoms within 10 of {x},{y},{z}) and element C" search = system.search(string) except: raise ValueError("No carbon atoms found in system") com = _Coordinate(_Length(0, "A"), _Length(0, "A"), _Length(0, "A")) atoms1 = [] for atom in search: com += atom.coordinates() atoms1.append(system.getIndex(atom)) com /= search.nResults() initial_normal_vector = (non_protein_coords - com).toVector().normalise() out_of_protein = displacement.value() * initial_normal_vector return out_of_protein mapping = _matchAtoms(ligand_free, ligand_bound) ligand_free_aligned = _rmsdAlign(ligand_free, ligand_bound, mapping) prot_lig1 = (protein + ligand_bound).toSystem() if isinstance(displacement, _Vector): ligand_free_aligned.translate( [displacement.x(), displacement.y(), displacement.z()] ) vec = displacement else: vec = _findTranslationVector(prot_lig1, displacement, protein, ligand_bound) ligand_free_aligned.translate([vec.x(), vec.y(), vec.z()]) sys = (protein + ligand_bound + ligand_free_aligned).toSystem() prot_ind = sys.getIndex(protein) lig1_ind = sys.getIndex(ligand_bound) lig2_ind = sys.getIndex(ligand_free_aligned) return sys, prot_ind, lig1_ind, lig2_ind, vec def _systemInfo(self): """ If the user gives a pre-prepared ATM system, extract the needed information. """ for p in self.protein_index: if self._system[p].isWater(): _warnings.warn( f"The molecule at index {self.protein_index} appears to be a water molecule." " This should be a protein." ) if self._system[self._ligand_bound_index].isWater(): _warnings.warn( f"The molecule at index {self._ligand_bound_index} appears to be a water molecule." " This should be the bound ligand." ) if self._system[self._ligand_free_index].isWater(): _warnings.warn( f"The molecule at index {self._ligand_free_index} appears to be a water molecule." " This should be the free ligand." ) self._protein_atomcount = sum( self._system[i].nAtoms() for i in self.protein_index ) self._ligand_bound_atomcount = self._system[self._ligand_bound_index].nAtoms() self._ligand_free_atomcount = self._system[self._ligand_free_index].nAtoms() def _findAtomIndices(self): """ Find the indices of the protein and ligand atoms in the system Returns ------- dict A dictionary containing the indices of the protein and ligand atoms in the system """ protein_atom_start = self._system[self.protein_index[0]].getAtoms()[0] protein_atom_end = self._system[self.protein_index[-1]].getAtoms()[-1] self._first_protein_atom_index = self._system.getIndex(protein_atom_start) self._last_protein_atom_index = self._system.getIndex(protein_atom_end) ligand_bound_atom_start = self._system[self._ligand_bound_index].getAtoms()[0] ligand_bound_atom_end = self._system[self._ligand_bound_index].getAtoms()[-1] self._first_ligand_bound_atom_index = self._system.getIndex( ligand_bound_atom_start ) self._last_ligand_bound_atom_index = self._system.getIndex( ligand_bound_atom_end ) ligand_free_atom_start = self._system[self._ligand_free_index].getAtoms()[0] ligand_free_atom_end = self._system[self._ligand_free_index].getAtoms()[-1] self._first_ligand_free_atom_index = self._system.getIndex( ligand_free_atom_start ) self._last_ligand_free_atom_index = self._system.getIndex(ligand_free_atom_end) def _getProtComAtoms(self): """ Get the atoms that define the center of mass of the protein as a list of ints Returns ------- list A list of atom indices that define the center of mass of the protein. """ return self._mol1_com_atoms def _setProtComAtoms(self, prot_com_atoms): """ Set the atoms that define the center of mass of the protein If a list is given, simply set them according to the list. If None, find them based on the center of mass of the protein. """ if prot_com_atoms is not None: # Make sure its a list of ints if not isinstance(prot_com_atoms, list): raise TypeError("mol1_com_atoms must be a list") if not all(isinstance(x, int) for x in prot_com_atoms): raise TypeError("mol1_com_atoms must be a list of ints") self._mol1_com_atoms = prot_com_atoms else: # Find com of the protein if self._is_prepared: temp_system = self._system._sire_object protein = temp_system[self.protein_index[0]] for i in self.protein_index[1:]: protein += temp_system[i] com = protein.coordinates() self._mol1_com_atoms = [ a.index().value() for a in protein[f"atoms within 11 angstrom of {com}"] ] del temp_system del protein else: protein = self._protein com = protein._sire_object.coordinates() self._mol1_com_atoms = [ a.index().value() for a in protein._sire_object[f"atoms within 11 angstrom of {com}"] ] def _getLig1ComAtoms(self): """ Get the atoms that define the center of mass of the bound ligand as a list of ints Returns ------- list A list of atom indices that define the center of mass of the bound ligand. """ return self._lig1_com_atoms def _setLig1ComAtoms(self, lig1_com_atoms): """ Set the atoms that define the center of mass of the bound ligand If a list is given, simply set them according to the list. If None, find them based on the center of mass of the bound ligand. In most cases this will be all atoms within the ligand """ if lig1_com_atoms is not None: # Make sure its a list of ints if not isinstance(lig1_com_atoms, list): raise TypeError("lig1_com_atoms must be a list") if not all(isinstance(x, int) for x in lig1_com_atoms): raise TypeError("lig1_com_atoms must be a list of ints") self._lig1_com_atoms = lig1_com_atoms else: # Find com of the ligand if self._is_prepared: ligand_bound = self._system[self._ligand_bound_index] else: ligand_bound = self._ligand_bound com = ligand_bound._sire_object.coordinates() self._lig1_com_atoms = [ a.index().value() for a in ligand_bound._sire_object[f"atoms within 11 angstrom of {com}"] ] def _getLig2ComAtoms(self): """ Get the atoms that define the center of mass of the free ligand as a list of ints Returns ------- list A list of atom indices that define the center of mass of the free ligand. """ return self._lig2_com_atoms def _setLig2ComAtoms(self, lig2_com_atoms): """ Set the atoms that define the center of mass of the free ligand If a list is given, simply set them according to the list. If None, find them based on the center of mass of the free ligand. In most cases this will be all atoms within the ligand """ if lig2_com_atoms is not None: # Make sure its a list of ints if not isinstance(lig2_com_atoms, list): raise TypeError("lig2_com_atoms must be a list") if not all(isinstance(x, int) for x in lig2_com_atoms): raise TypeError("lig2_com_atoms must be a list of ints") self._lig2_com_atoms = lig2_com_atoms else: # Find com of the ligand if self._is_prepared: ligand_free = self._system[self._ligand_free_index] else: ligand_free = self._ligand_free com = ligand_free._sire_object.coordinates() self._lig2_com_atoms = [ a.index().value() for a in ligand_free._sire_object[f"atoms within 11 angstrom of {com}"] ] def _makeData(self): """ Make the data dictionary for the ATM system """ self.data = {} self.data["displacement"] = self._getDisplacement() self.data["protein_index"] = self._getProteinIndex() self.data["ligand_bound_index"] = self._getLigandBoundIndex() self.data["ligand_free_index"] = self._getLigandFreeIndex() self.data["ligand_bound_rigid_core"] = self._getLigandBoundRigidCore() self.data["ligand_free_rigid_core"] = self._getLigandFreeRigidCore() self.data["mol1_atomcount"] = self._protein_atomcount self.data["ligand_bound_atomcount"] = self._ligand_bound_atomcount self.data["ligand_free_atomcount"] = self._ligand_free_atomcount self.data["first_protein_atom_index"] = self._first_protein_atom_index self.data["last_protein_atom_index"] = self._last_protein_atom_index self.data["first_ligand_bound_atom_index"] = self._first_ligand_bound_atom_index self.data["last_ligand_bound_atom_index"] = self._last_ligand_bound_atom_index self.data["first_ligand_free_atom_index"] = self._first_ligand_free_atom_index self.data["last_ligand_free_atom_index"] = self._last_ligand_free_atom_index self.data["protein_com_atoms"] = self._mol1_com_atoms self.data["ligand_bound_com_atoms"] = self._lig1_com_atoms self.data["ligand_free_com_atoms"] = self._lig2_com_atoms
[docs] @staticmethod def viewRigidCores( system=None, ligand_bound=None, ligand_free=None, ligand_bound_rigid_core=None, ligand_free_rigid_core=None, ): """ View the rigid cores of the ligands. Rigid core atoms within the bound ligand are shown in green, those within the free ligand are shown in red. Parameters ---------- system : :class:`System <BioSimSpace._SireWrappers.System>` The system for the ATM simulation that has been prepared ATM.prepare(). All other arguments are ignored if this is provided. ligand_bound : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>` The bound ligand. ligand_free : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>` The free ligand. ligand_bound_rigid_core : list The indices for the rigid core atoms of the bound ligand. ligand_free_rigid_core : list The indices for the rigid core atoms of the free ligand. """ import math as _math def move_to_origin(lig): com = _Coordinate(*lig._getCenterOfMass()) lig.translate([-com.x().value(), -com.y().value(), -com.z().value()]) def euclidean_distance(point1, point2): return _math.sqrt( (point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2 + (point1[2] - point2[2]) ** 2 ) def furthest_points(points): max_distance = 0 furthest_pair = None n = len(points) if n < 2: return None, None, 0 # Not enough points to compare for i in range(n): for j in range(i + 1, n): distance = euclidean_distance(points[i], points[j]) if distance > max_distance: max_distance = distance furthest_pair = (points[i], points[j]) return furthest_pair[0], furthest_pair[1], max_distance def vector_from_points(point1, point2): dx = point2[0] - point1[0] dy = point2[1] - point1[1] dz = point2[2] - point1[2] magnitude = _math.sqrt(dx**2 + dy**2 + dz**2) if magnitude == 0: return (0, 0, 0) return (dx / magnitude, dy / magnitude, dz / magnitude) def find_arrow_points(center1, center2, diameter1, diameter2): import numpy as np # logic to make sure arrows pointing from spheres start on their edge, not centre. # Convert the points to numpy arrays p1 = np.array(center1) p2 = np.array(center2) # Calculate the radii from the diameters radius1 = diameter1 / 2 radius2 = diameter2 / 2 # Calculate the vector between the two centers v = p2 - p1 # Calculate the magnitude (distance between the two centers) dist = np.linalg.norm(v) # Normalize the vector v_norm = v / dist # Calculate the start and end points of the arrow start_point = p1 + radius1 * v_norm # From the surface of Sphere 1 end_point = p2 - radius2 * v_norm # To the surface of Sphere 2 return start_point, end_point # if a system is provided, check that it has the "atom_data" property if system is not None: sdata = _json.loads(system._sire_object.property("atom_data").value()) local_s = system.copy() ligand_bound = local_s[sdata["ligand_bound_index"]] move_to_origin(ligand_bound) ligand_free = local_s[sdata["ligand_free_index"]] move_to_origin(ligand_free) ligand_bound_rigid_core = sdata["ligand_bound_rigid_core"] ligand_free_rigid_core = sdata["ligand_free_rigid_core"] # if not system provided, ALL other parameters must be provided else: if ligand_bound is None: raise ValueError("ligand_bound must be provided") if ligand_free is None: raise ValueError("ligand_free must be provided") if ligand_bound_rigid_core is None: raise ValueError("ligand_bound_rigid_core must be provided") if ligand_free_rigid_core is None: raise ValueError("ligand_free_rigid_core must be provided") if not isinstance(ligand_bound, _Molecule): raise TypeError("ligand_bound must be a BioSimSpace molecule") if not isinstance(ligand_free, _Molecule): raise TypeError("ligand_free must be a BioSimSpace molecule") if not isinstance(ligand_bound_rigid_core, list): raise TypeError("ligand_bound_rigid_core must be a list") elif not len(ligand_bound_rigid_core) == 3: raise ValueError("ligand_bound_rigid_core must have length 3") if not isinstance(ligand_free_rigid_core, list): raise TypeError("ligand_free_rigid_core must be a list") elif not len(ligand_free_rigid_core) == 3: raise ValueError("ligand_free_rigid_core must have length 3") # copy the ligands ligand_bound = ligand_bound.copy() move_to_origin(ligand_bound) ligand_free = ligand_free.copy() move_to_origin(ligand_free) pre_translation_lig1_core_coords = [] for i in ligand_bound_rigid_core: x = ligand_bound.getAtoms()[i].coordinates().x().value() y = ligand_bound.getAtoms()[i].coordinates().y().value() z = ligand_bound.getAtoms()[i].coordinates().z().value() pre_translation_lig1_core_coords.append((x, y, z)) point1, point2, distance = furthest_points(pre_translation_lig1_core_coords) vector = vector_from_points(point1, point2) # need to know the size of ligand_bound lig1_coords = [] for i in ligand_bound.getAtoms(): x = i.coordinates().x().value() y = i.coordinates().y().value() z = i.coordinates().z().value() lig1_coords.append((x, y, z)) lig1_point1, lig1_point2, lig1_distance = furthest_points(lig1_coords) # Translate ligand_free so they don't overlap ligand_free.translate( [ -1.0 * lig1_distance * 2 * vector[0], -1.0 * lig1_distance * 2 * vector[1], -1.0 * lig1_distance * 2 * vector[2], ] ) # Get coords of rigid core atoms ligand_bound_core_coords = [] ligand_free_core_coords = [] for i in ligand_bound_rigid_core: ligand_bound_core_coords.append(ligand_bound.getAtoms()[i].coordinates()) for i in ligand_free_rigid_core: ligand_free_core_coords.append(ligand_free.getAtoms()[i].coordinates()) # Create molecule containing both ligands mol = ligand_bound + ligand_free # Create view view = _ViewAtoM(mol) # Create nglview object ngl = view.system(mol) ngl.add_ball_and_stick("all", opacity=0.5) # Add spheres to rigid core locations - first the obund ligand with red spheres for coord1, core_atom_1 in zip( ligand_bound_core_coords, ligand_bound_rigid_core, ): ngl.shape.add_sphere( [coord1.x().value(), coord1.y().value(), coord1.z().value()], [0, 1, 0], 0.45, ) ngl.shape.add( "text", [coord1.x().value(), coord1.y().value(), coord1.z().value() - 0.9], [0, 0, 0], 2.5, f"{core_atom_1}", ) # now the free ligand with black spheres for coord1, core_atom_1 in zip( ligand_free_core_coords, ligand_free_rigid_core, ): ngl.shape.add_sphere( [coord1.x().value(), coord1.y().value(), coord1.z().value()], [1, 0, 0], 0.45, ) ngl.shape.add( "text", [coord1.x().value(), coord1.y().value(), coord1.z().value() - 0.9], [0, 0, 0], 2.5, f"{core_atom_1}", ) for i in range(2): c00 = ligand_bound_core_coords[i] coord00 = [c00.x().value(), c00.y().value(), c00.z().value()] c01 = ligand_bound_core_coords[i + 1] coord01 = [c01.x().value(), c01.y().value(), c01.z().value()] start, end = find_arrow_points(coord00, coord01, 0.9, 0.9) ngl.shape.add_arrow( start, end, [0, 0, 0], 0.1, ) c10 = ligand_free_core_coords[i] coord10 = [c10.x().value(), c10.y().value(), c10.z().value()] c11 = ligand_free_core_coords[i + 1] coord11 = [c11.x().value(), c11.y().value(), c11.z().value()] start, end = find_arrow_points(coord10, coord11, 0.9, 0.9) ngl.shape.add_arrow( start, end, [0, 0, 0], 0.1, ) if system is not None: del local_s return ngl
[docs] class ATM: """ A class for setting up, running, and analysis RBFE calculations using the Alchemical Transfer Method. """
[docs] def __init__( self, system, protocol, platform="CPU", work_dir=None, setup_only=False, property_map={}, ): """ Constructor. Parameters ---------- system : BioSimSpace._SireWrappers.System A prepared ATM system containing a protein and two ligands, one bound and one free. Assumed to already be equilibrated. protocol : BioSimSpace.Protocol.ATM The ATM protocol to use for the simulation. platform : str The platform for the simulation: “CPU”, “CUDA”, or “OPENCL”. For CUDA use the CUDA_VISIBLE_DEVICES environment variable to set the GPUs on which to run, e.g. to run on two GPUs indexed 0 and 1 use: CUDA_VISIBLE_DEVICES=0,1. For OPENCL, instead use OPENCL_VISIBLE_DEVICES. work_dir : str The working directory for the simulation. setup_only : bool Whether to only support simulation setup. If True, then no simulation processes objects will be created, only the directory hierarchy and input files to run a simulation externally. This can be useful when you don't intend to use BioSimSpace to run the simulation. Note that a 'work_dir' must also be specified. property_map : dict A dictionary that maps system "properties" to their user defined values. This allows the user to refer to properties with their own naming scheme, e.g. { "charge" : "my-charge" } """ self._system = system.copy() # Validate the protocol. if protocol is not None: from ..Protocol._atm import ATMProduction as _Production if not isinstance(protocol, _Production): raise TypeError( "'protocol' must be of type 'BioSimSpace.Protocol.ATMProduction'" ) else: self._protocol = protocol else: # No default protocol due to the need for well-defined rigid cores raise ValueError("A protocol must be specified") # Check the platform. if not isinstance(platform, str): raise TypeError("'platform' must be of type 'str'.") else: self._platform = platform if not isinstance(setup_only, bool): raise TypeError("'setup_only' must be of type 'bool'.") else: self._setup_only = setup_only if work_dir is None and setup_only: raise ValueError( "A 'work_dir' must be specified when 'setup_only' is True!" ) # Create the working directory. self._work_dir = _Utils.WorkDir(work_dir) # Check that the map is valid. if not isinstance(property_map, dict): raise TypeError("'property_map' must be of type 'dict'") self._property_map = property_map self._inititalise_runner(system=self._system)
[docs] def run(self, serial=True): """ Run the simulations. serial : bool Whether to run the individual processes for the lambda windows """ if not isinstance(serial, bool): raise TypeError("'serial' must be of type 'bool'.") if self._setup_only: _warnings.warn("No processes exist! Object created in 'setup_only' mode.") else: self._runner.startAll(serial=serial)
[docs] def wait(self): """Wait for the simulation to finish.""" if self._setup_only: _warnings.warn("No processes exist! Object created in 'setup_only' mode.") else: self._runner.wait()
[docs] def kill(self, index): """ Kill a process for a specific lambda window. Parameters ---------- index : int The index of the lambda window. """ self._runner.kill(index)
[docs] def killAll(self): """Kill any running processes for all lambda windows.""" self._runner.killAll()
[docs] def workDir(self): """ Return the working directory. Returns ------- work_dir : str The path of the working directory. """ return str(self._work_dir)
[docs] def getData(self, name="data", file_link=False, work_dir=None): """ Return a link to a zip file containing the data files required for post-simulation analysis. Parameters ---------- name : str The name of the zip file. file_link : bool Whether to return a FileLink when working in Jupyter. work_dir : str The working directory for the free-energy perturbation simulation. Returns ------- output : str, IPython.display.FileLink A path, or file link, to an archive of the process input. """ if self._work_dir is None: raise ValueError("'work_dir' must be set!") else: if not isinstance(work_dir, str): raise TypeError("'work_dir' must be of type 'str'.") if not _os.path.isdir(work_dir): raise ValueError("'work_dir' doesn't exist!") if not isinstance(name, str): raise TypeError("'name' must be of type 'str'") # Generate the zip file name. zipname = "%s.zip" % name # Get the current working directory. cwd = _os.getcwd() # Change into the working directory. with _cd(work_dir): # Specify the path to glob. glob_path = _pathlib.Path(work_dir) # First try SOMD data. files = glob_path.glob("**/gradients.dat") if len(files) == 0: files = glob_path.glob("**/[!bar]*.xvg") if len(files) == 0: raise ValueError( f"Couldn't find any analysis files in '{work_dir}'" ) # Write to the zip file. with _zipfile.Zipfile(_os.join(cwd, zipname), "w") as zip: for file in files: zip.write(file) # Return a link to the archive. if _is_notebook: if file_link: # Create a FileLink to the archive. f_link = _FileLink(zipname) # Set the download attribute so that JupyterLab doesn't try to open the file. f_link.html_link_str = ( f"<a href='%s' target='_blank' download='{zipname}'>%s</a>" ) # Return a link to the archive. return f_link else: return zipname # Return the path to the archive. else: return zipname
def _inititalise_runner(self, system): """ Internal helper function to initialise the process runner. Parameters ---------- system : :class:`System <BioSimSpace._SireWrappers.System>` The molecular system. """ # This protocol will have to be minimal - cannot guess rigid core atoms if self._protocol is None: raise RuntimeError("No protocol has been set - cannot run simulations.") # Initialise list to store the processe processes = [] # Get the list of lambda1 values so that the total number of simulations can # be asserted lambda_list = self._protocol._get_lambda_values() # Set index of current simulation to 0 self._protocol.set_current_index(0) lam = lambda_list[0] first_dir = "%s/lambda_%5.4f" % (self._work_dir, lam) # Create the first simulation, which will be copied and used for future simulations. first_process = _OpenMM( system=system, protocol=self._protocol, platform=self._platform, work_dir=first_dir, property_map=self._property_map, ) if self._setup_only: del first_process else: processes.append(first_process) # Remove first index as its already been used lambda_list = lambda_list[1:] # Enumerate starting at 1 to account for the removal of the first lambda value for index, lam in enumerate(lambda_list, 1): # Files are named according to index, rather than lambda value # This is to avoid confusion arising from the fact that there are multiple lambdas # and that the values of lambda1 and lambda2 wont necessarily be go from 0 to 1 # and may contain duplicates new_dir = "%s/lambda_%5.4f" % (self._work_dir, lam) # Use absolute path. if not _os.path.isabs(new_dir): new_dir = _os.path.abspath(new_dir) # Delete any existing directories. if _os.path.isdir(new_dir): _shutil.rmtree(new_dir, ignore_errors=True) # Copy the first directory to that of the current lambda value. _shutil.copytree(first_dir, new_dir) # For speed reasons, additional processes need to be created by copying the first process. # this is more difficult than usual due to the number of window-dependent variables new_config = [] # All variables that need to change new_lam_1 = self._protocol.getLambda1()[index] new_lam_2 = self._protocol.getLambda2()[index] new_alpha = self._protocol.getAlpha()[index].value() new_uh = self._protocol.getUh()[index].value() new_w0 = self._protocol.getW0()[index].value() new_direction = self._protocol.getDirection()[index] with open(new_dir + "/openmm_script.py", "r") as f: for line in f: if line.startswith("lambda1"): new_config.append(f"lambda1 = {new_lam_1}\n") elif line.startswith("lambda2"): new_config.append(f"lambda2 = {new_lam_2}\n") elif line.startswith("alpha"): new_config.append( f"alpha = {new_alpha} * kilocalories_per_mole\n" ) elif line.startswith("uh"): new_config.append(f"uh = {new_uh} * kilocalories_per_mole\n") elif line.startswith("w0"): new_config.append(f"w0 = {new_w0} * kilocalories_per_mole\n") elif line.startswith("direction"): new_config.append(f"direction = {new_direction}\n") elif line.startswith("window_index"): new_config.append(f"window_index = {index}\n") else: new_config.append(line) with open(new_dir + "/openmm_script.py", "w") as f: for line in new_config: f.write(line) # Create a new process object for the current lambda value and append # to the list of processes if not self._setup_only: process = _copy.copy(first_process) process._system = first_process._system.copy() process._protocol = self._protocol process._work_dir = new_dir process._stdout_file = new_dir + "/ATM.out" process._stderr_file = new_dir + "/ATM.err" process._rst_file = new_dir + "/openmm.rst7" process._top_file = new_dir + "/openmm.prm7" process._traj_file = new_dir + "/openmm.dcd" process._config_file = new_dir + "/openmm_script.py" process._input_files = [ process._config_file, process._rst_file, process._top_file, ] processes.append(process) if not self._setup_only: # Initialise process runner. self._runner = _ProcessRunner(processes)
[docs] @staticmethod def analyse( work_dir, method="UWHAM", ignore_lower=0, ignore_upper=None, inflex_indices=None, ): """Analyse the ATM simulation. Parameters ---------- work_dir : str The working directory where the ATM simulation is located. method : str The method to use for the analysis. Currently only UWHAM is supported. ignore_lower : int Ignore the first N samples when analysing. inflex_indices : [int] The indices at which the direction changes. For example, if direction=[1,1,-1,-1], then inflex_indices=[1,2]. If None, the inflexion point will be found automatically. Returns ------- ddg : :class:`BioSimSpace.Types.Energy` The free energy difference between the two ligands. ddg_err :class:`BioSimSpace.Types.Energy` The error in the free energy difference. """ if not isinstance(ignore_lower, int): raise TypeError("'ignore_lower' must be an integer.") if ignore_lower < 0: raise ValueError("'ignore_lower' must be a positive integer.") if ignore_upper is not None: if not isinstance(ignore_upper, int): raise TypeError("'ignore_upper' must be an integer.") if ignore_upper < 0: raise ValueError("'ignore_upper' must be a positive integer.") if ignore_upper < ignore_lower: raise ValueError( "'ignore_upper' must be greater than or equal to 'ignore_lower'." ) if inflex_indices is not None: if not isinstance(inflex_indices, list): raise TypeError("'inflex_indices' must be a list.") if not all(isinstance(x, int) for x in inflex_indices): raise TypeError("'inflex_indices' must be a list of integers.") if not len(inflex_indices) == 2: raise ValueError("'inflex_indices' must have length 2.") if method == "UWHAM": total_ddg, total_ddg_err = ATM._analyse_UWHAM( work_dir, ignore_lower, ignore_upper, inflex_indices ) return total_ddg, total_ddg_err if method == "MBAR": from ._relative import Relative as _Relative # temporary version to check that things are working ddg_forward, ddg_reverse = ATM._analyse_MBAR(work_dir) ddg_forward = _Relative.difference(ddg_forward) ddg_reverse = _Relative.difference(ddg_reverse) return ddg_forward, ddg_reverse else: raise ValueError(f"Method {method} is not supported for analysis.")
@staticmethod def _analyse_UWHAM(work_dir, ignore_lower, ignore_upper, inflex_indices=None): """ Analyse the UWHAM results from the ATM simulation. """ from ._ddg import analyse_UWHAM as _UWHAM total_ddg, total_ddg_err = _UWHAM( work_dir, ignore_lower, ignore_upper, inflection_indices=inflex_indices ) return total_ddg, total_ddg_err @staticmethod def _analyse_MBAR(work_dir): """ Analyse the MBAR results from the ATM simulation. """ from ._ddg import analyse_MBAR as _MBAR ddg_forward, ddg_reverse = _MBAR(work_dir) return ddg_forward, ddg_reverse @staticmethod def _analyse_test(work_dir): """ Analyse the test results from the ATM simulation. """ from ._ddg import new_MBAR as _test ddg_forward, ddg_reverse = _test(work_dir) return ddg_forward, ddg_reverse @staticmethod def _analyse_femto(work_dir): from ._ddg import MBAR_hijack_femto est, o = MBAR_hijack_femto(work_dir) return est, o
class _ViewAtoM(_View): """ Overloads regular view class needed to pass default_representation=False into show_file. """ # Initialise super def __init__(self, handle, property_map={}, is_lambda1=False): super().__init__(handle, property_map, is_lambda1) def _create_view(self, system=None, view=None, gui=True, **kwargs): if system is None and view is None: raise ValueError("Both 'system' and 'view' cannot be 'None'.") elif system is not None and view is not None: raise ValueError("One of 'system' or 'view' must be 'None'.") # Make sure gui flag is valid. if gui not in [True, False]: gui = True # Default to the most recent view. if view is None: index = self._num_views else: index = view # Create the file name. filename = "%s/view_%04d.pdb" % (self._work_dir, index) # Increment the number of views. if view is None: self._num_views += 1 # Create a PDB object and write to file. if system is not None: try: pdb = _SireIO.PDB2(system, self._property_map) pdb.writeToFile(filename) except Exception as e: msg = "Failed to write system to 'PDB' format." if _isVerbose(): print(msg) raise IOError(e) from None else: raise IOError(msg) from None # Import NGLView when it is used for the first time. import nglview as _nglview # Create the NGLview object. view = _nglview.show_file(filename, default_representation=False) # Return the view and display it. return view.display(gui=gui)