######################################################################
# BioSimSpace: Making biomolecular simulation a breeze!
#
# Copyright: 2017-2024
#
# Authors: Lester Hedges <lester.hedges@gmail.com>
#
# BioSimSpace is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# BioSimSpace is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with BioSimSpace. If not, see <http://www.gnu.org/licenses/>.
#####################################################################
"""Functionality for converting between molecular representations."""
__author__ = "Lester Hedges"
__email__ = "lester.hedges@gmail.com"
__all__ = [
"smiles",
"supportedFormats",
"to",
"toBioSimSpace",
"toOpenMM",
"toRDKit",
"toSire",
]
from .._Utils import _try_import, _have_imported
_openmm = _try_import("openmm")
import os as _os
from rdkit.Chem.rdchem import Mol as _RDMol
import rdkit.Chem as _Chem
from sire import convert as _sire_convert
from sire import smiles as _sire_smiles
import sire.legacy.Mol as _SireMol
import sire.legacy.System as _SireSystem
import sire.legacy.Vol as _SireVol
import sire.system as _NewSireSystem
from .._Exceptions import ConversionError as _ConversionError
from .. import IO as _IO
from .. import _SireWrappers
[docs]
def smiles(
smiles_string, add_hydrogens=True, generate_coordinates=True, property_map={}
):
"""
Generate a BioSimSpace Molecule froma SMILES string.
Parameters
----------
smiles_string : str
The molecule in SMILES string format.
add_hydrogens : bool
Whether or not to automatically add hydrogens.
generate_coordinates : bool
Whether or not to automatically generate coordinates. Note that
generating the coordinates will automatically switch on addition
of hydrogens.
property_map : dict
A dictionary that maps system "properties" to their user defined
values. This allows the user to refer to properties with their
own naming scheme, e.g. { "charge" : "my-charge" }
Returns
-------
molecule : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>`
A BioSimSpace molecule.
"""
if not isinstance(smiles_string, str):
raise TypeError("'smiles_string' must be of type 'str'.")
if not isinstance(add_hydrogens, bool):
raise TypeError("'add_hydrogens' must be of type 'bool'.")
if not isinstance(generate_coordinates, bool):
raise TypeError("'generate_coordinates' must be of type 'bool'.")
if not isinstance(property_map, dict):
raise TypeError("'property_map' must be of type 'dict'.")
# Strip whitespace.
smiles_string = smiles_string.replace(" ", "")
try:
molecule = _SireWrappers.Molecule(
_sire_smiles(
smiles_string,
add_hydrogens=add_hydrogens,
generate_coordinates=generate_coordinates,
map=property_map,
)
)
# Rename the molecule with the original SMILES string.
# Since the name is written to topology file formats, we
# need to ensure that it doesn't start with an [ character,
# which would break GROMACS.
if smiles_string.startswith("["):
name = f"smiles:{smiles_string}"
edit_mol = molecule._sire_object.edit()
edit_mol = edit_mol.rename(name).molecule()
molecule._sire_object = edit_mol.commit()
return molecule
except:
raise _ConversionError(
f"Unable to create a BioSimSpace Molecule from SMILES string: {smiles_string}"
)
[docs]
def to(obj, format="biosimspace", property_map={}):
"""
Convert an object to a specified format.
Parameters
----------
obj :
The input object to convert.
format : str
The format to convert to.
property_map : dict
A dictionary that maps system "properties" to their user defined
values. This allows the user to refer to properties with their
own naming scheme, e.g. { "charge" : "my-charge" }
Returns
-------
converted_obj :
The object in the converted format.
"""
# Validate the input.
if not isinstance(format, str):
raise TypeError("'format' must be of type 'str'.")
# Convert to lower case and strip whitespace.
format = format.lower().replace(" ", "")
if not format in supportedFormats():
raise ValueError(
f"Unsupported format '{format}', options are: {', '.join(supportedFormats())}."
)
if not isinstance(property_map, dict):
raise TypeError("'property_map' must be of type 'dict'.")
# Special handling for OpenMM conversion. Currently this is a one-way (toOpenMM)
# conversion only and is only supported for specific Sire and BioSimSpace types.
if format == "openmm":
if not _have_imported(_openmm):
raise ConversionError(
"Conversion to OpenMM format is currently not supported on this platform."
)
# If this is already an OpenMM context, then simply return it.
if isinstance(obj, _openmm.openmm.Context):
return obj
# BioSimSpace objects.
if isinstance(obj, _SireWrappers._sire_wrapper.SireWrapper):
if not isinstance(obj, _SireWrappers.System):
# Convert to a system where possible.
try:
obj = obj.toSystem()
except:
# Otherwise, convert residues/atoms to a molecule, then a system.
try:
obj = obj.toMolecule().toSystem()
except:
raise _ConversionError(
"Unable to convert object to OpenMM format!"
)
# Get the user-defined space property name.
prop = property_map.get("space", "space")
# Try to get the space property. Use an infinite cartesian space
# if none is present.
try:
space = obj._sire_object.property(prop)
except:
space = _SireVol.Cartesian()
# Set a shared space property.
obj._sire_object.addSharedProperty(prop, space)
# Now try to convert the object to OpenMM format.
try:
return _sire_convert.to(
obj,
"openmm",
map=property_map,
)
except:
raise _ConversionError("Unable to convert object to OpenMM format!")
# Sire objects.
elif isinstance(
obj,
(
_NewSireSystem.System,
_SireSystem.System,
_SireMol.MoleculeGroup,
_SireMol.Molecules,
_SireMol.Molecule,
_SireMol.Residue,
),
):
try:
# First convert the object to BioSimSpace format.
obj = to(obj, format="biosimspace", property_map=property_map)
# Now try converting to OpenMM format.
return to(obj, format="openmm", property_map=property_map)
except:
raise _ConversionError("Unable to convert object to OpenMM format!")
else:
raise _ConversionError(
f"Currently unable to convert object of type {type(obj)} to OpenMM format!"
)
# Now do some work to handle different types of input. Where possible, we
# try to convert to the "expected" format, i.e. atom to atom, residue
# to residue, etc. This means that we need to use sire.legacy.Mol.PartialMolecule
# to extract the sub-components we require.
# BioSimSpace objects, i.e. wrapped Sire objects.
if isinstance(obj, _SireWrappers._sire_wrapper.SireWrapper):
# Return the object itself.
if format == "biosimspace":
return obj
# Return the underlying object.
elif format == "sire":
if isinstance(obj, _SireWrappers.Molecules):
return _SireMol.SelectorMol(obj._sire_object)
else:
return obj._sire_object
elif format == "rdkit":
# Handle System and Molecules objects.
if isinstance(obj, (_SireWrappers.System, _SireWrappers.Molecules)):
try:
return _sire_convert.to(
obj._sire_object.molecules(), "rdkit", map=property_map
)
except:
raise _ConversionError("Unable to convert object to RDKit format!")
# Handle Residues and Atoms by extracting the PartialMolecule.
else:
try:
return _sire_convert.to(
_SireMol.PartialMolecule(obj._sire_object).extract(),
"rdkit",
map=property_map,
)
except:
raise _ConversionError("Unable to convert object to RDKit format!")
# Sire objects.
elif isinstance(
obj,
(
_NewSireSystem.System,
_SireSystem.System,
_SireMol.SelectorMol,
_SireMol.MoleculeGroup,
_SireMol.Molecules,
_SireMol.Molecule,
_SireMol.Residue,
_SireMol.Atom,
),
):
if format == "biosimspace":
if isinstance(obj, _NewSireSystem.System):
try:
return _SireWrappers.System(obj._system)
except:
raise _ConversionError(
"Unable to convert object to BioSimSpace format!"
)
if isinstance(obj, _SireSystem.System):
try:
return _SireWrappers.System(obj)
except:
raise _ConversionError(
"Unable to convert object to BioSimSpace format!"
)
elif isinstance(obj, _SireMol.SelectorMol):
try:
return _SireWrappers.Molecules(obj.toMoleculeGroup())
except:
raise _ConversionError(
"Unable to convert object to BioSimSpace format!"
)
elif isinstance(obj, _SireMol.Molecules):
try:
molgrp = _SireMol.MoleculeGroup("all")
molgrp.add(obj)
return _SireWrappers.Molecules(molgrp)
except:
raise _ConversionError(
"Unable to convert object to BioSimSpace format!"
)
elif isinstance(obj, _SireMol.MoleculeGroup):
try:
return _SireWrappers.Molecules(obj)
except:
raise _ConversionError(
"Unable to convert object to BioSimSpace format!"
)
elif isinstance(obj, _SireMol.Molecule):
try:
return _SireWrappers.Molecule(obj)
except:
raise _ConversionError(
"Unable to convert object to BioSimSpace format!"
)
elif isinstance(obj, _SireMol.Residue):
try:
return _SireWrappers.Residue(obj)
except:
raise _ConversionError(
"Unable to convert object to BioSimSpace format!"
)
elif isinstance(obj, _SireMol.Atom):
try:
return _SireWrappers.Atom(obj)
except:
raise _ConversionError(
"Unable to convert object to BioSimSpace format!"
)
elif format == "rdkit":
if isinstance(
obj, (_NewSireSystem.System, _SireSystem.System, _SireMol.MoleculeGroup)
):
try:
return _sire_convert.to(obj.molecules(), "rdkit", map=property_map)
except:
raise _ConversionError("Unable to convert object to RDKit format!")
elif isinstance(obj, (_SireMol.Molecules, _SireMol.Molecule)):
try:
return _sire_convert.to(obj, "rdkit", map=property_map)
except:
raise _ConversionError("Unable to convert object to RDKit format!")
elif isinstance(obj, (_SireMol.Residue, _SireMol.Atom)):
try:
return _sire_convert.to(
_SireMol.PartialMolecule(obj).extract(),
"rdkit",
map=property_map,
)
except:
raise _ConversionError("Unable to convert object to RDKit format!")
# RDKit objects.
elif isinstance(obj, _RDMol):
try:
# Convert the object, then return as a single residue, atom,
# or entire molecule.
new_obj = _sire_convert.to(obj, format, map=property_map)
if format == "biosimspace":
if new_obj.nAtoms() == 1:
return new_obj.getAtoms()[0]
elif new_obj.nResidues() == 1:
return new_obj.getResidues()[0]
else:
return new_obj
elif format == "sire":
if new_obj.nAtoms() == 1:
return new_obj.atoms()[0]
if new_obj.nResidues() == 1:
return new_obj.residues()[0]
else:
return new_obj
except:
raise _ConversionError(f"Unable to convert object to {format} format!")
# Lists or tuples of objects.
elif isinstance(obj, (list, tuple)):
initial_type = type(obj[0])
if not all(isinstance(x, initial_type) for x in obj):
raise ValueError("Containers must contain objects of the same type!")
if isinstance(obj[0], (_RDMol, _SireWrappers.Molecule)):
try:
new_obj = _sire_convert.to(obj, format, map=property_map)
if format == "biosimspace":
new_obj = _SireWrappers.Molecules(new_obj)
return new_obj
except:
raise _ConversionError(f"Unable to convert object to {format} format!")
else:
raise TypeError(f"Cannot convert object of type '{type(obj)}'")
else:
raise TypeError(f"Cannot convert from object of type '{type(obj)}'")
[docs]
def toBioSimSpace(obj, property_map={}):
"""
Convert an object to BioSimSpace format.
Parameters
----------
obj :
The input object to convert.
property_map : dict
A dictionary that maps system "properties" to their user defined
values. This allows the user to refer to properties with their
own naming scheme, e.g. { "charge" : "my-charge" }
Returns
-------
converted_obj :
The object in BioSimSpace format.
"""
return to(obj, format="biosimspace", property_map=property_map)
[docs]
def toOpenMM(obj, property_map={}):
"""
Convert an object to OpenMM format.
Parameters
----------
obj :
The input object to convert.
property_map : dict
A dictionary that maps system "properties" to their user defined
values. This allows the user to refer to properties with their
own naming scheme, e.g. { "charge" : "my-charge" }
Returns
-------
converted_obj :
The object in RDKit format.
"""
if _have_imported(_openmm):
return to(obj, format="openmm", property_map=property_map)
else:
raise ConversionError(
"Conversion to OpenMM format is currently not supported on this platform."
)
[docs]
def toRDKit(obj, property_map={}):
"""
Convert an object to RDKit format.
Parameters
----------
obj :
The input object to convert.
property_map : dict
A dictionary that maps system "properties" to their user defined
values. This allows the user to refer to properties with their
own naming scheme, e.g. { "charge" : "my-charge" }
Returns
-------
converted_obj :
The object in OpenMM format.
"""
return to(obj, format="rdkit", property_map=property_map)
[docs]
def toSire(obj, property_map={}):
"""
Convert an object to Sire format.
Parameters
----------
obj :
The input object to convert.
format : str
The format to convert to.
property_map : dict
A dictionary that maps system "properties" to their user defined
values. This allows the user to refer to properties with their
own naming scheme, e.g. { "charge" : "my-charge" }
Returns
-------
converted_obj :
The object in Sire format.
"""
return to(obj, format="sire", property_map=property_map)
def _to_rdkit(molecule, work_dir=_os.getcwd(), direct=True, property_map={}):
"""
Internal function to convert two BioSimSpace molecules to RDKit format.
This will go via an intermediate file format if direct conversion fails.
Parameters
----------
molecule : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>`
The BioSimSpace molecule.
work_dir : str
The path to the working directory.
dire : bool
Whether to use a direct conversion (True) or go via an intermediate
file format (False).
property_map : dict
A dictionary that maps "properties" in 'molecule' to their user
defined values. This allows the user to refer to properties
with their own naming scheme, e.g. { "charge" : "my-charge" }
Returns
-------
rdmol : rdkit.Chem.rdchem.Mol
The molecule in RDKit format.
"""
if not isinstance(molecule, _SireWrappers.Molecule):
raise TypeError(
"'molecule' must be of type 'BioSimSpace._SireWrappers.Molecule'."
)
if not isinstance(work_dir, str):
raise TypeError("'work_dir' must be of type 'str'.")
if not _os.path.exists(work_dir):
raise ValueError(f"'work_dir' doesn't exist: {work_dir}")
if not isinstance(direct, bool):
raise TypeError("'direct' must be of type 'bool'.")
if not isinstance(property_map, dict):
raise TypeError("'property_map' must be of type 'dict'.")
# First try to convert to RDKit format directly.
try:
if not direct:
raise Exception
else:
rdmol = toRDKit(molecule, property_map=property_map)
# If this fails, then go via an intermediate file format.
except:
try:
is_sdf = False
filebase = work_dir + "/tmp"
# Try to go via SDF format to preserve bond orders.
if molecule._sire_object.hasProperty("fileformat"):
if "SDF" in molecule._sire_object.property("fileformat").value():
_IO.saveMolecules(
filebase, molecule, "SDF", property_map=property_map
)
rdmol = _Chem.MolFromMolFile(filebase + ".sdf", True, False, 0)
# Flag that we went via SDF format.
is_sdf = True
# Convert via PDB format..
if not is_sdf:
_IO.saveMolecules(filebase, molecule, "PDB", property_map=property_map)
rdmol = _Chem.MolFromPDBFile(filebase + ".pdb", True, False, 0)
except:
raise _ConversionError("Unable to convert molecule to RDKit format!")
return rdmol