######################################################################
# BioSimSpace: Making biomolecular simulation a breeze!
#
# Copyright: 2017-2024
#
# Authors: Lester Hedges <lester.hedges@gmail.com>
#
# BioSimSpace is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# BioSimSpace is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with BioSimSpace. If not, see <http://www.gnu.org/licenses/>.
#####################################################################
"""Functionality for parameterising molecules."""
__author__ = "Lester Hedges"
__email__ = "lester.hedges@gmail.com"
# A list of functions that will be exposed to the user. This list will be
# dynamically updated with the names of additional functions for the
# supported AMBER and Open Force Field models determined at import time.
__all__ = [
"parameterise",
"forceFields",
"amberForceFields",
"amberProteinForceFields",
"openForceFields",
]
# A dictionary mapping AMBER protein force field names to their pdb2gmx
# compatibility. Note that the names specified below will be used for the
# parameterisation functions, so they should be suitably formatted. Once we
# have CMAP support we should be able to determine the available force fields
# by scanning the AmberTools installation directory, as we do for those from
# OpenFF.
_amber_protein_forcefields = {
"ff03": True,
"ff99": True,
"ff99SB": False,
"ff99SBildn": False,
"ff14SB": False,
}
from .. import _amber_home, _gmx_exe, _gmx_path, _isVerbose
from .._Exceptions import IncompatibleError as _IncompatibleError
from .._Exceptions import MissingSoftwareError as _MissingSoftwareError
from .._SireWrappers import Atom as _Atom
from .._SireWrappers import Molecule as _Molecule
from ..Solvent import waterModels as _waterModels
from ..Types import Charge as _Charge
from ..Types import Length as _Length
from .._Utils import _try_import, _have_imported
from .. import _Utils
from ._process import Process as _Process
from . import _Protocol
[docs]
def parameterise(
molecule,
forcefield,
ensure_compatible=True,
work_dir=None,
property_map={},
**kwargs,
):
"""
Parameterise a molecule using a specified force field.
Parameters
----------
molecule : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>`, str
The molecule to parameterise, either as a Molecule object or SMILES
string.
forcefield : str
The force field. Run BioSimSpace.Parameters.forceFields() to get a
list of the supported force fields.
ensure_compatible : bool
Whether to ensure that the topology of the parameterised molecule is
compatible with that of the original molecule. An exception will be
raised if this isn't the case, e.g. if atoms have been added. Set
this to False is parameterising lone oxygen atoms corresponding to
structural (crystal) water molecules. When True, the parameterised
molecule will preserve the topology of the original molecule, e.g.
the original atom and residue names will be kept.
work_dir : str
The working directory for the process.
property_map : dict
A dictionary that maps system "properties" to their user defined
values. This allows the user to refer to properties with their
own naming scheme, e.g. { "charge" : "my-charge" }
kwargs : dict
A dictionary of additional keyword arguments required for specific
parameterisation functions.
Returns
-------
process : :class:`Process <BioSimSpace.Parameters._process.Process>`
A process to parameterise the molecule in the background. Call the
.getMolecule() method on the returned process to block until the
parameterisation is complete and get the parameterised molecule.
"""
if not isinstance(forcefield, str):
raise TypeError("'forcefield' must be of type 'str'")
else:
# Strip whitespace and convert to lower case.
forcefield = forcefield.replace(" ", "").lower()
if forcefield not in _forcefields_lower:
raise ValueError("Supported force fields are: %s" % forceFields())
return _forcefield_dict[forcefield](
molecule, work_dir=work_dir, property_map=property_map, **kwargs
)
def _parameterise_amber_protein(
forcefield,
molecule,
tolerance=1.2,
max_distance=_Length(6, "A"),
water_model=None,
pre_mol_commands=None,
post_mol_commands=None,
bonds=None,
ensure_compatible=True,
work_dir=None,
property_map={},
**kwargs,
):
"""
Parameterise using the named AMBER protein force field.
Parameters
----------
forcefield : str
The name of the AMBER force field.
molecule : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>`, str
The molecule to parameterise, either as a Molecule object or SMILES
string.
tolerance : float
The tolerance used when searching for disulphide bonds. Atoms will
be considered to be bonded if they are a distance of less than
tolerance times the sum of the equilibrium bond radii apart.
max_distance : :class:`Length <BioSimSpace.Types.Length>`
The maximum distance between atoms when searching for disulphide
bonds.
water_model : str
The water model used to parameterise any structural ions.
Run 'BioSimSpace.Solvent.waterModels()' to see the supported
water models. This can also be used to parameterise water molecules,
or lone oxygen atoms corresponding to structural (crystal) water
molecules.
pre_mol_commands : [str]
A list of custom LEaP commands to be executed before loading the molecule.
This can be used for loading custom parameter files, or sourcing additional
scripts. Make sure to use absolute paths when specifying any external files.
When this option is set, we can no longer fall back on GROMACS's pdb2gmx.
post_mol_commands : [str]
A list of custom LEaP commands to be executed after loading the molecule.
This allows the use of additional commands that operate on the molecule,
which is named "mol". When this option is set, we can no longer fall
back on GROMACS's pdb2gmx.
bonds : ((class:`Atom <BioSimSpace._SireWrappers.Atom>`, class:`Atom <BioSimSpace._SireWrappers.Atom>`))
An optional tuple of atom pairs to specify additional atoms that
should be bonded.
ensure_compatible : bool
Whether to ensure that the topology of the parameterised molecule is
compatible with that of the original molecule. An exception will be
raised if this isn't the case, e.g. if atoms have been added. Set
this to False is parameterising lone oxygen atoms corresponding to
structural (crystal) water molecules. When True, the parameterised
molecule will preserve the topology of the original molecule, e.g.
the original atom and residue names will be kept.
work_dir : str
The working directory for the process.
property_map : dict
A dictionary that maps system "properties" to their user defined
values. This allows the user to refer to properties with their
own naming scheme, e.g. { "charge" : "my-charge" }
Returns
-------
process : :class:`Process <BioSimSpace.Parameters._process.Process>`
A process to parameterise the molecule in the background. Call the
.getMolecule() method on the returned process to block until the
parameterisation is complete and get the parameterised molecule.
"""
if not isinstance(forcefield, str):
raise TypeError("'forcefield' must be of type 'str'.")
if forcefield not in _amber_protein_forcefields.keys():
raise ValueError(
f"Unsupported AMBER forcefield '{forcefield}' "
f"options are {', '.join(_amber_protein_forcefields.keys())}."
)
# Extract the pdb2gmx support flag.
is_pdb2gmx = _amber_protein_forcefields[forcefield]
if _amber_home is None:
if is_pdb2gmx and (_gmx_exe is None or _gmx_path is None):
raise _MissingSoftwareError(
"'BioSimSpace.Parameters.ff99' is not supported. "
"Please install AmberTools (http://ambermd.org) or "
"GROMACS (http://www.gromacs.org)."
)
else:
raise _MissingSoftwareError(
"'BioSimSpace.Parameters.ff99' is not supported. "
"Please install AmberTools (http://ambermd.org)."
)
# Validate arguments.
_validate(
molecule=molecule,
tolerance=tolerance,
max_distance=max_distance,
water_model=water_model,
check_ions=True,
pre_mol_commands=pre_mol_commands,
post_mol_commands=post_mol_commands,
bonds=bonds,
ensure_compatible=ensure_compatible,
property_map=property_map,
)
# Create the protocol.
protocol = _Protocol.AmberProtein(
forcefield=forcefield,
pdb2gmx=is_pdb2gmx,
tolerance=tolerance,
water_model=water_model,
max_distance=max_distance,
pre_mol_commands=pre_mol_commands,
post_mol_commands=post_mol_commands,
bonds=bonds,
ensure_compatible=ensure_compatible,
property_map=property_map,
)
# Run the parameterisation protocol in the background and return
# a handle to the thread.
return _Process(molecule, protocol, work_dir=work_dir, auto_start=True)
[docs]
def gaff(
molecule,
work_dir=None,
net_charge=None,
charge_method="BCC",
ensure_compatible=True,
property_map={},
**kwargs,
):
"""
Parameterise using the GAFF force field.
Parameters
----------
molecule : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>`, str
The molecule to parameterise, either as a Molecule object or SMILES
string.
net_charge : int, :class:`Charge <BioSimSpace.Types.Charge>`
The net charge on the molecule.
charge_method : str
The method to use when calculating atomic charges:
"RESP", "CM2", "MUL", "BCC", "ESP", "GAS"
ensure_compatible : bool
Whether to ensure that the topology of the parameterised molecule is
compatible with that of the original molecule. An exception will be
raised if this isn't the case, e.g. if atoms have been added. When True,
the parameterised molecule will preserve the topology of the original
molecule, e.g. the original atom and residue names will be kept.
work_dir : str
The working directory for the process.
property_map : dict
A dictionary that maps system "properties" to their user defined
values. This allows the user to refer to properties with their
own naming scheme, e.g. { "charge" : "my-charge" }
Returns
-------
process : :class:`Process <BioSimSpace.Parameters._process.Process>`
A process to parameterise the molecule in the background. Call the
.getMolecule() method on the returned process to block until the
parameterisation is complete and get the parameterised molecule.
"""
if _amber_home is None:
raise _MissingSoftwareError(
"'BioSimSpace.Parameters.gaff' is not supported. "
"Please install AmberTools (http://ambermd.org)."
)
# Validate arguments.
_validate(
molecule=molecule,
check_ions=False,
ensure_compatible=ensure_compatible,
property_map=property_map,
)
if net_charge is not None:
# Get the value of the charge.
if isinstance(net_charge, _Charge):
net_charge = net_charge.value()
if isinstance(net_charge, float):
if net_charge % 1 != 0:
raise ValueError("'net_charge' must be integer valued.")
# Try to convert to int.
try:
net_charge = int(net_charge)
except:
raise TypeError(
"'net_charge' must be of type 'int', or `BioSimSpace.Types.Charge'"
)
# Create a default protocol.
protocol = _Protocol.GAFF(
version=1,
net_charge=net_charge,
charge_method=charge_method,
ensure_compatible=ensure_compatible,
property_map=property_map,
)
# Run the parameterisation protocol in the background and return
# a handle to the thread.
return _Process(molecule, protocol, work_dir=work_dir, auto_start=True)
[docs]
def gaff2(
molecule,
work_dir=None,
net_charge=None,
charge_method="BCC",
ensure_compatible=True,
property_map={},
**kwargs,
):
"""
Parameterise using the GAFF2 force field.
Parameters
----------
molecule : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>`, str
The molecule to parameterise, either as a Molecule object or SMILES
string.
net_charge : int, :class:`Charge <BioSimSpace.Types.Charge>`
The net charge on the molecule.
charge_method : str
The method to use when calculating atomic charges:
"RESP", "CM2", "MUL", "BCC", "ESP", "GAS"
ensure_compatible : bool
Whether to ensure that the topology of the parameterised molecule is
compatible with that of the original molecule. An exception will be
raised if this isn't the case, e.g. if atoms have been added. When True,
the parameterised molecule will preserve the topology of the original
molecule, e.g. the original atom and residue names will be kept.
work_dir : str
The working directory for the process.
property_map : dict
A dictionary that maps system "properties" to their user defined
values. This allows the user to refer to properties with their
own naming scheme, e.g. { "charge" : "my-charge" }
Returns
-------
process : :class:`Process <BioSimSpace.Parameters._process.Process>`
A process to parameterise the molecule in the background. Call the
.getMolecule() method on the returned process to block until the
parameterisation is complete and get the parameterised molecule.
"""
if _amber_home is None:
raise _MissingSoftwareError(
"'BioSimSpace.Parameters.gaff2' is not supported. "
"Please install AmberTools (http://ambermd.org)."
)
# Validate arguments.
_validate(
molecule=molecule,
check_ions=False,
ensure_compatible=ensure_compatible,
property_map=property_map,
)
if net_charge is not None:
# Get the value of the charge.
if isinstance(net_charge, _Charge):
net_charge = net_charge.value()
if isinstance(net_charge, float):
if net_charge % 1 != 0:
raise ValueError("'net_charge' must be integer valued.")
# Try to convert to int.
try:
net_charge = int(net_charge)
except:
raise TypeError(
"'net_charge' must be of type 'int', or `BioSimSpace.Types.Charge'"
)
if net_charge % 1 != 0:
raise ValueError("'net_charge' must be integer valued.")
# Create a default protocol.
protocol = _Protocol.GAFF(
version=2,
net_charge=net_charge,
charge_method=charge_method,
ensure_compatible=ensure_compatible,
property_map=property_map,
)
# Run the parameterisation protocol in the background and return
# a handle to the thread.
return _Process(molecule, protocol, work_dir=work_dir, auto_start=True)
def _parameterise_openff(
forcefield,
molecule,
ensure_compatible=True,
use_nagl=True,
work_dir=None,
property_map={},
**kwargs,
):
"""
Parameterise a molecule using a force field from the Open Force Field
Initiative.
Parameters
----------
forcefield : str
The force field. Run BioSimSpace.Parameters.openForceFields() to get a
list of the supported force fields.
molecule : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>`, str
The molecule to parameterise, either as a Molecule object or SMILES
string.
ensure_compatible : bool
Whether to ensure that the topology of the parameterised molecule is
compatible with that of the original molecule. An exception will be
raised if this isn't the case, e.g. if atoms have been added. When True,
the parameterised molecule will preserve the topology of the original
molecule, e.g. the original atom and residue names will be kept.
use_nagl : bool
Whether to use NAGL to compute AM1-BCC charges. If False, the default
is to use AmberTools via antechamber and sqm. (This option is only
used if NAGL is available.)
work_dir : str
The working directory for the process.
property_map : dict
A dictionary that maps system "properties" to their user defined
values. This allows the user to refer to properties with their
own naming scheme, e.g. { "charge" : "my-charge" }
Returns
-------
process : :class:`Process <BioSimSpace.Parameters._process.Process>`
A process to parameterise the molecule in the background. Call the
.getMolecule() method on the returned process to block until the
parameterisation is complete and get the parameterised molecule.
"""
from sire.legacy.Base import findExe as _findExe
try:
_findExe("antechamber")
except:
raise _MissingSoftwareError(
f"'{forcefield}' is not supported. AmberTools "
"(http://ambermd.org) is needed for charge "
"calculation and 'antechamber' executable "
"must be in your PATH."
) from None
# Check the Antechamber version. Open Force Field requires Antechamber >= 22.0.
try:
# Antechamber returns an exit code of 1 when requesting version information.
# As such, we wrap the call within a try-except block in case it fails.
import shlex as _shlex
import subprocess as _subprocess
# Generate the command-line string. (Antechamber must be in the PATH,
# so no need to use AMBERHOME.
command = "antechamber -v"
# Run the command as a subprocess.
proc = _subprocess.run(
_Utils.command_split(command),
shell=False,
text=True,
stdout=_subprocess.PIPE,
stderr=_subprocess.STDOUT,
)
# Get stdout and split into lines.
lines = proc.stdout.split("\n")
# If present, version information is on line 1.
string = lines[1]
# Delete the welcome message.
string = string.replace("Welcome to antechamber", "")
# Extract the version and convert to float.
version = float(string.split(":")[0])
# The version is okay, enable Open Force Field support.
if version >= 22:
is_compatible = True
# Disable Open Force Field support.
else:
is_compatible = False
del _shlex
del _subprocess
# Something went wrong, disable Open Force Field support.
except:
is_compatible = False
raise
if not is_compatible:
raise _IncompatibleError(f"'{forcefield}' requires Antechamber >= 22.0")
# Validate arguments.
if not isinstance(molecule, (_Molecule, str)):
raise TypeError(
"'molecule' must be of type 'BioSimSpace._SireWrappers.Molecule' or 'str'"
)
if not isinstance(forcefield, str):
raise TypeError("'forcefield' must be of type 'str'")
else:
# Strip whitespace and convert to lower case.
forcefield = forcefield.replace(" ", "").lower()
if forcefield not in _forcefields_lower:
raise ValueError("Supported force fields are: %s" % openForceFields())
if not isinstance(ensure_compatible, bool):
raise TypeError("'ensure_compatible' must be of type 'bool'.")
if not isinstance(use_nagl, bool):
raise TypeError("'use_nagl' must be of type 'bool'.")
if not isinstance(property_map, dict):
raise TypeError("'property_map' must be of type 'dict'")
# Create a default protocol.
protocol = _Protocol.OpenForceField(
forcefield,
ensure_compatible=ensure_compatible,
use_nagl=use_nagl,
property_map=property_map,
)
# Run the parameterisation protocol in the background and return
# a handle to the thread.
return _Process(molecule, protocol, work_dir=work_dir, auto_start=True)
[docs]
def forceFields():
"""
Return a list of the supported force fields.
Returns
-------
force_fields : [str]
A list of the supported force fields.
"""
return _forcefields
[docs]
def amberForceFields():
"""
Return a list of the supported AMBER force fields.
Returns
-------
force_fields : [str]
A list of the supported AMBER force fields.
"""
return list(_amber_protein_forcefields.keys()) + ["gaff", "gaff2"]
[docs]
def amberProteinForceFields():
"""
Return a list of the supported AMBER protein force fields.
Returns
-------
force_fields : [str]
A list of the supported AMBER protein force fields.
"""
return list(_amber_protein_forcefields.keys())
[docs]
def openForceFields():
"""
Return a list of the supported force fields from the Open Force Field
Initiative.
Returns
-------
force_fields : [str]
A list of the supported force fields from the Open Force Field Initiative.
"""
return _open_forcefields
def _validate_water_model(water_model):
"""
Internal helper function used to validate the water model chosen
for parameterising structural ions.
Parameters
----------
water_model : str
The chosen water model.
Returns
-------
is_valid : bool
Whether the water model is supported.
"""
# Srip whitespace and convert to lower case.
water_model = water_model.replace(" ", "").lower()
# Check that we support this water model.
if water_model in _waterModels():
return True
else:
return False
def _has_ions(molecule, property_map={}):
"""
Internal helper function to check whether a molecule contains ions.
Parameters
----------
molecule : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>`
A molecule object.
property_map : dict
A dictionary that maps system "properties" to their user defined
values. This allows the user to refer to properties with their
own naming scheme, e.g. { "charge" : "my-charge" }
Returns
-------
has_ions : bool
Whether the molecule contains ions.
ions : [str]
A list of the ions that were found.
"""
# Store a list of ionic elements.
# (Taken from an AMBER leaprc.water.* file.)
elements = [
"F",
"Cl",
"Br",
"I",
"Li",
"Na",
"K",
"Rb",
"Cs",
"Mg",
"Tl",
"Cu",
"Ag",
"Be",
"Cu",
"Ni",
"Pt",
"Zn",
"Co",
"Pd",
"Ag",
"Cr",
"Fe",
"Mg",
"V",
"Mn",
"Hg",
"Cd",
"Yb",
"Ca",
"Sn",
"Pb",
"Eu",
"Sr",
"Sm",
"Ba",
"Ra",
"Al",
"Fe",
"Cr",
"In",
"Tl",
"Y",
"La",
"Ce",
"Pr",
"Nd",
"Sm",
"Eu",
"Gd",
"Tb",
"Dy",
"Er",
"Tm",
"Lu",
"Hf",
"Zr",
"Ce",
"U",
"Pu",
"Th",
]
# We need to search for ions individually since Sire can't
# handle or'ed search strings beyond a certain size.
# A list of ions that we've found.
ions = []
# Whether the molecule is a string.
if isinstance(molecule, str):
is_string = True
molecule = molecule.upper()
else:
is_string = False
for element in elements:
if is_string:
if element.upper() in molecule:
ions.append(element)
else:
try:
molecule.search(f"element {element}", property_map=property_map)
ions.append(element)
except:
pass
# Check whether we found any ions.
if len(ions) > 0:
return True, ions
else:
return False, ions
def _validate(
molecule=None,
tolerance=None,
max_distance=None,
water_model=None,
check_ions=False,
pre_mol_commands=None,
post_mol_commands=None,
bonds=None,
ensure_compatible=True,
work_dir=None,
property_map=None,
):
"""
Internal function to validate arguments.
Parameters
----------
molecule : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>`, str
The molecule to parameterise, either as a Molecule object or SMILES
string.
tolerance : float
The tolerance used when searching for disulphide bonds. Atoms will
be considered to be bonded if they are a distance of less than
tolerance times the sum of the equilibrium bond radii apart.
max_distance : :class:`Length <BioSimSpace.Types.Length>`
The maximum distance between atoms when searching for disulphide
bonds.
water_model : str
The water model used to parameterise any structural ions.
Run 'BioSimSpace.Solvent.waterModels()' to see the supported
water models. This can also be used to parameterise water molecules,
or lone oxygen atoms corresponding to structural (crystal) water
molecules.
check_ions : bool
Whether to check for the presence of structural ions. This is only
required when parameterising with protein force fields.
pre_mol_commands : [str]
A list of custom LEaP commands to be executed before loading the molecule.
This can be used for loading custom parameter files, or sourcing additional
scripts. Make sure to use absolute paths when specifying any external files.
When this option is set, we can no longer fall back on GROMACS's pdb2gmx.
post_mol_commands : [str]
A list of custom LEaP commands to be executed after loading the molecule.
This allows the use of additional commands that operate on the molecule,
which is named "mol". When this option is set, we can no longer fall
back on GROMACS's pdb2gmx.
bonds : ((class:`Atom <BioSimSpace._SireWrappers.Atom>`, class:`Atom <BioSimSpace._SireWrappers.Atom>`))
An optional tuple of atom pairs to specify additional atoms that
should be bonded.
ensure_compatible : bool
Whether to ensure that the topology of the parameterised molecule is
compatible with that of the original molecule. An exception will be
raised if this isn't the case, e.g. if atoms have been added. Set this
to False is parameterising lone oxygen atoms corresponding to
structural (crystal) water molecules. When True, the parameterised
molecule will preserve the topology of the original molecule, e.g.
the original atom and residue names will be kept.
work_dir : str
The working directory for the process.
property_map : dict
A dictionary that maps system "properties" to their user defined
values. This allows the user to refer to properties with their
own naming scheme, e.g. { "charge" : "my-charge" }
"""
if molecule is not None:
if not isinstance(molecule, (_Molecule, str)):
raise TypeError(
"'molecule' must be of type 'BioSimSpace._SireWrappers.Molecule' or 'str'"
)
if tolerance is not None:
if not isinstance(tolerance, (int, float)):
raise TypeError("'tolerance' must be of type 'float'.")
if tolerance < 1:
raise ValueError("'tolerance' must be >= 1.0.")
if max_distance is not None:
if not isinstance(max_distance, _Length):
raise TypeError(
"'max_distance' must be of type 'BioSimSpace.Types.Length'."
)
if water_model is not None:
if not isinstance(water_model, str):
raise TypeError("'water_model' must be of type 'str'.")
if not _validate_water_model(water_model):
water_models = ", ".join(_waterModels())
raise ValueError(
f"'{water_model}' is unsupported. Supported models are: {water_models}"
)
elif check_ions:
has_ions, ions = _has_ions(molecule, property_map=property_map)
if has_ions:
ion_string = ", ".join(ions)
raise ValueError(
f"The molecule contains the following ions: {ion_string}. "
"Please choose a 'water_model' for the ion parameters."
)
if pre_mol_commands is not None:
if not isinstance(pre_mol_commands, (list, tuple)):
raise TypeError("'pre_mol_commands' must be a 'list' of 'str' types.")
else:
if not all(isinstance(x, str) for x in pre_mol_commands):
raise TypeError("'pre_mol_commands' must be a 'list' of 'str' types.")
if post_mol_commands is not None:
if not isinstance(post_mol_commands, (list, tuple)):
raise TypeError("'post_mol_commands' must be a 'list' of 'str' types.")
else:
if not all(isinstance(x, str) for x in post_mol_commands):
raise TypeError("'post_mol_commands' must be a 'list' of 'str' types.")
if bonds is not None:
if molecule is None:
raise ValueError("Cannot add bonds when no 'molecule' is specified!")
else:
if not isinstance(bonds, (tuple, list)):
raise TypeError("'bonds' must be of type 'tuple' or 'list'.")
for bond in bonds:
if not isinstance(bond, (tuple, list)):
raise TypeError(
"Each bond entry must be a 'tuple' or 'list' of atom pairs."
)
else:
if len(bond) != 2:
raise ValueError("Each 'bonds' entry must contain two items.")
else:
# Extract the atoms in the bond.
atom0, atom1 = bond
# Make sure these are atoms.
if not isinstance(atom0, _Atom) or not isinstance(atom1, _Atom):
raise TypeError(
"'bonds' must contain tuples of "
"'BioSimSpace._SireWrappers.Atom' types."
)
# Make sure that they belong to the molecule being parameterised.
if not (
atom0._sire_object.molecule() == molecule._sire_object
) or not (
atom1._sire_object.molecule() == molecule._sire_object
):
raise ValueError(
"Atoms in 'bonds' don't belong to the 'molecule'."
)
if not isinstance(ensure_compatible, bool):
raise TypeError("'ensure_compatible' must be of type 'bool'.")
if property_map is not None:
if not isinstance(property_map, dict):
raise TypeError("'property_map' must be of type 'dict'")
# Wrapper function to dynamically generate functions with a given name.
# Here "name" refers to the name of a supported AMBER protein force field.
# We could do this with functools.partial and functions.update_wrapper, but
# this approach preserves correct documentation for the partial function,
# both within the Python console, or via Sphinx.
import sys as _sys
_namespace = _sys.modules[__name__]
def _make_amber_protein_function(name):
def _function(
molecule,
tolerance=1.2,
max_distance=_Length(6, "A"),
water_model=None,
pre_mol_commands=None,
post_mol_commands=None,
bonds=None,
ensure_compatible=True,
work_dir=None,
property_map={},
):
"""
Parameterise a molecule using the named AMBER force field.
Parameters
----------
molecule : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>`, str
The molecule to parameterise, either as a Molecule object or SMILES
string.
tolerance : float
The tolerance used when searching for disulphide bonds. Atoms will
be considered to be bonded if they are a distance of less than
tolerance times the sum of the equilibrium bond radii apart.
max_distance : :class:`Length <BioSimSpace.Types.Length>`
The maximum distance between atoms when searching for disulphide
bonds.
water_model : str
The water model used to parameterise any structural ions.
Run 'BioSimSpace.Solvent.waterModels()' to see the supported
water models. This can also be used to parameterise water molecules,
or lone oxygen atoms corresponding to structural (crystal) water
molecules.
pre_mol_commands : [str]
A list of custom LEaP commands to be executed before loading the molecule.
This can be used for loading custom parameter files, or sourcing additional
scripts. Make sure to use absolute paths when specifying any external files.
When this option is set, we can no longer fall back on GROMACS's pdb2gmx.
post_mol_commands : [str]
A list of custom LEaP commands to be executed after loading the molecule.
This allows the use of additional commands that operate on the molecule,
which is named "mol". When this option is set, we can no longer fall
back on GROMACS's pdb2gmx.
bonds : ((class:`Atom <BioSimSpace._SireWrappers.Atom>`, class:`Atom <BioSimSpace._SireWrappers.Atom>`))
An optional tuple of atom pairs to specify additional atoms that
should be bonded.
ensure_compatible : bool
Whether to ensure that the topology of the parameterised molecule is
compatible with that of the original molecule. An exception will be
raised if this isn't the case, e.g. if atoms have been added. Set
this to False is parameterising lone oxygen atoms corresponding to
structural (crystal) water molecules. When True, the parameterised
molecule will preserve the topology of the original molecule, e.g.
the original atom and residue names will be kept.
work_dir : str
The working directory for the process.
property_map : dict
A dictionary that maps system "properties" to their user defined
values. This allows the user to refer to properties with their
own naming scheme, e.g. { "charge" : "my-charge" }
Returns
-------
process : :class:`Process <BioSimSpace.Parameters._process.Process>`
A process to parameterise the molecule in the background. Call the
.getMolecule() method on the returned process to block until the
parameterisation is complete and get the parameterised molecule.
"""
return _parameterise_amber_protein(
name,
molecule,
tolerance=tolerance,
max_distance=max_distance,
water_model=water_model,
pre_mol_commands=pre_mol_commands,
post_mol_commands=post_mol_commands,
bonds=bonds,
ensure_compatible=ensure_compatible,
work_dir=work_dir,
property_map=property_map,
)
return _function
# Wrapper function to dynamically generate functions with a given name.
# Here "name" refers to the name of a supported force field from the Open
# Force Field Initiative. The force field name has been "tidied" so that
# it conforms to sensible function naming standards, i.e. "-" and "."
# characters replaced by underscores.
def _make_openff_function(name):
def _function(
molecule, ensure_compatible=True, use_nagl=True, work_dir=None, property_map={}
):
"""
Parameterise a molecule using the named force field from the
Open Force Field initiative.
Parameters
----------
molecule : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>`, str
The molecule to parameterise, either as a Molecule object or SMILES
string.
ensure_compatible : bool
Whether to ensure that the topology of the parameterised molecule is
compatible with that of the original molecule. An exception will be
raised if this isn't the case, e.g. if atoms have been added. Set
this to False is parameterising lone oxygen atoms corresponding to
structural (crystal) water molecules. When True, the parameterised
molecule will preserve the topology of the original molecule, e.g.
the original atom and residue names will be kept.
use_nagl : bool
Whether to use NAGL to compute AM1-BCC charges. If False, the default
is to use AmberTools via antechamber and sqm. (This option is only
used if NAGL is available.)
work_dir : str
The working directory for the process.
property_map : dict
A dictionary that maps system "properties" to their user defined
values. This allows the user to refer to properties with their
own naming scheme, e.g. { "charge" : "my-charge" }
Returns
-------
process : :class:`Process <BioSimSpace.Parameters._process.Process>`
A process to parameterise the molecule in the background. Call the
.getMolecule() method on the returned process to block until the
parameterisation is complete and get the parameterised molecule.
"""
return _parameterise_openff(
name,
molecule,
ensure_compatible=ensure_compatible,
use_nagl=use_nagl,
work_dir=work_dir,
property_map=property_map,
)
return _function
# Dynamically create functions for all of the supported AMBER protein force fields.
for _ff in _amber_protein_forcefields.keys():
# Generate the function and bind it to the namespace.
_function = _make_amber_protein_function(_ff)
_function.__name__ = _ff
_function.__qualname = _ff
setattr(_namespace, _ff, _function)
# Expose the function to the user.
__all__.append(_ff)
# Expose the two GAFF functions.
__all__.append("gaff")
__all__.append("gaff2")
# Create a list of the force field names (to date.)
# This needs to come after all of the force field functions.
_forcefields = [] # List of force fields (actual names).
_forcefields_lower = [] # List of lower case names.
_forcefield_dict = {} # Mapping between lower case names and functions.
for _ff in list(_amber_protein_forcefields.keys()) + ["gaff", "gaff2"]:
_forcefields.append(_ff)
_forcefields_lower.append(_ff.lower())
_forcefield_dict[_ff.lower()] = getattr(_namespace, _ff)
# Dynamically create functions for all available force fields from the Open
# Force Field Initiative.
_openforcefields = _try_import("openforcefields")
if _have_imported(_openforcefields):
from glob import glob as _glob
import os as _os
_openff_dirs = _openforcefields.get_forcefield_dirs_paths()
_open_forcefields = []
# Loop over all force field directories.
for _dir in _openff_dirs:
# Glob all offxml files in the directory.
_ff_list = _glob(f"{_dir}" + "/*.offxml")
for _ff in _ff_list:
# Get the force field name (base name minus extension).
_base = _os.path.basename(_ff)
_ff = _os.path.splitext(_base)[0]
# Only include unconstrained force-fields since we need to go via
# an intermediate ParmEd conversion. This means ParmEd must receive
# bond parameters. We can then choose to constrain later, if required.
# See, e.g: https://github.com/openforcefield/openff-toolkit/issues/603
if "unconstrained" in _ff:
# Append to the list of available force fields.
_forcefields.append(_ff)
_open_forcefields.append(_ff)
# Create a sane function name, i.e. replace "-" and "."
# characters with "_".
_func_name = _ff.replace("-", "_")
_func_name = _func_name.replace(".", "_")
# Generate the function and bind it to the namespace.
_function = _make_openff_function(_ff)
_function.__name__ = _func_name
_function.__qualname = _func_name
setattr(_namespace, _func_name, _function)
# Expose the function to the user.
__all__.append(_func_name)
# Convert force field name to lower case and map to its function.
_forcefields_lower.append(_ff.lower())
_forcefield_dict[_ff.lower()] = getattr(_namespace, _func_name)
# Clean up redundant attributes.
del _base
del _dir
del _ff_list
del _func_name
del _glob
del _make_openff_function
del _openff_dirs
del _os
elif _isVerbose():
print("openforcefields not available as this module cannot be loaded.")
# Clean up redundant attributes.
del _ff
del _function
del _namespace
del _openforcefields
del _sys