mdsuite/transformations/map_molecules.py
"""
MDSuite: A Zincwarecode package.
License
-------
This program and the accompanying materials are made available under the terms
of the Eclipse Public License v2.0 which accompanies this distribution, and is
available at https://www.eclipse.org/legal/epl-v20.html
SPDX-License-Identifier: EPL-2.0
Copyright Contributors to the Zincwarecode Project.
Contact Information
-------------------
email: zincwarecode@gmail.com
github: https://github.com/zincware
web: https://zincwarecode.com/
Citation
--------
If you use this module please cite us with:
Summary
-------
"""
import logging
from typing import List
import numpy as np
import tensorflow as tf
from tqdm import tqdm
from mdsuite.database.mdsuite_properties import mdsuite_properties
from mdsuite.graph_modules.molecular_graph import MolecularGraph
from mdsuite.transformations.transformations import Transformations
from mdsuite.utils.meta_functions import join_path
from mdsuite.utils.molecule import Molecule
log = logging.getLogger(__name__)
class MolecularMap(Transformations):
"""
Class for mapping atoms in a database to molecules.
Attributes
----------
scale_function : dict
A dictionary referencing the memory/time scaling function of the
transformation.
molecules : dict
Molecule dictionary to use as reference. e.g.
.. code-block::
{'emim': {'smiles': 'CCN1C=C[N+](+C1)C', 'amount': 20}, 'PF6':
{'smiles': 'F[P-](F)(F)(F)(F)F', 'amount': 20}}
would be the input for the emim-PF6 ionic liquid.
"""
def __init__(self):
"""Constructor for the MolecularMap class."""
super().__init__()
self.molecules = None # parsed by the user.
self.reference_molecules = {}
self.adjacency_graphs = {}
self.mapping_property = mdsuite_properties.unwrapped_positions
self.dependency = mdsuite_properties.positions
self.scale_function = {"quadratic": {"outer_scale_factor": 5}}
def _prepare_database_entry(self, species: str, number_of_molecules: int) -> dict:
"""
Call some housekeeping methods and prepare for the transformations.
Parameters
----------
species
Name of the species to be added
number_of_molecules : int
Number of molecules to be added to the database.
Returns
-------
data_structure : dict
A data structure for the incoming data.
"""
# collect machine properties and determine batch size
path = join_path(species, self.mapping_property.name)
dataset_structure = {
path: (number_of_molecules, self.experiment.number_of_configurations, 3)
}
self.database.add_dataset(
dataset_structure
) # add a new dataset to the database_path
# data_structure = {path: {"indices": np.s_[:], "columns": [0, 1, 2]}}
data_structure = {
path: {
"indices": list(range(number_of_molecules)),
"columns": [0, 1, 2],
}
}
return data_structure
def _run_dependency_check(self):
"""
Check that dependencies are fulfilled.
Returns
-------
Calls a resolve method if dependencies are not met.
"""
for sp_name in self.experiment.species:
path = join_path(sp_name, self.dependency.name)
if not self.database.check_existence(path):
self.get_prop_through_transformation(sp_name, self.dependency)
def _get_mass_array(self, species: list) -> list:
"""
Return an array of atom masses for the scaling.
Parameters
----------
species : list
List of species to be loaded.
Returns
-------
mass_array : list
A list of masses.
"""
return [self.experiment.species[item].mass for item in species]
def _get_type_spec(self, path_list: list) -> dict:
"""
Compute the type spec of a list of data.
Parameters
----------
path_list : list
List of paths for which the type species must be built.
Returns
-------
type_spec : dict
"""
type_spec = {}
for item in path_list:
type_spec[str.encode(item)] = tf.TensorSpec(
shape=(None, None, 3), dtype=self.dtype
)
type_spec.update(
{
str.encode("data_size"): tf.TensorSpec(shape=(), dtype=tf.int32),
}
)
return type_spec
def _get_reduced_mass_dict(self, species: dict, molecular_mass) -> dict:
"""
Build the reduced mass dictionary.
This is a dictionary of reduced masses for each species in a molecule,
i.e m_species / m_molecule. These are used in the COM positions computation
later.
Parameters
----------
species : list
List of species to include in the dict.
Returns
-------
reduced_mass_dict : dict
Dictionary of reduced masses for each species.
"""
reduced_mass_dict = {}
for item in species:
reduced_mass_dict[item] = (
self.experiment.species[item].mass[0] / molecular_mass
)
return reduced_mass_dict
def _map_molecules(self, molecular_graph: MolecularGraph):
"""
Map the molecules and save the data in the database.
Returns
-------
Updates the database.
"""
molecule_name = molecular_graph.molecule_name
molecules = self.experiment.molecules
molecules[molecule_name] = {}
molecules[molecule_name]["n_particles"] = molecular_graph.n_molecules
molecules[molecule_name]["mass"] = molecular_graph.molecular_mass
molecules[molecule_name]["groups"] = molecular_graph.molecular_groups
scaling_factor = molecular_graph.molecular_mass
mass_dictionary = self._get_reduced_mass_dict(
molecular_graph.species, scaling_factor
)
# Prepare the data structures and monitors
data_structure = self._prepare_database_entry(
molecule_name, molecular_graph.n_molecules
)
path_list = [
join_path(s, self.mapping_property.name) for s in molecular_graph.species
]
self._prepare_monitors(data_path=path_list)
type_spec = self._get_type_spec(path_list)
batch_generator, batch_generator_args = self.data_manager.batch_generator()
data_set = tf.data.Dataset.from_generator(
batch_generator, args=batch_generator_args, output_signature=type_spec
)
data_set = data_set.prefetch(tf.data.experimental.AUTOTUNE)
log.info(f"Mapping molecule graphs onto trajectory for {molecule_name}")
for i, batch in tqdm(enumerate(data_set), ncols=70, total=self.n_batches):
batch_size = batch[b"data_size"]
trajectory = np.zeros(
shape=(
molecular_graph.n_molecules,
batch_size,
3,
)
)
for t, molecule in enumerate(molecular_graph.molecular_groups):
# Load species molecule-specific particles into a separate dict
# and apply their respective scaling factor.
molecule_trajectory = np.zeros((batch_size, 3))
for item in molecular_graph.molecular_groups[molecule]:
batch_reference = str.encode(f"{item}/{self.mapping_property.name}")
particles = molecular_graph.molecular_groups[molecule][item]
particle_trajectories = (
tf.gather(batch[batch_reference], particles)
* mass_dictionary[item]
)
molecule_trajectory += tf.reduce_sum(particle_trajectories, axis=0)
# Compute the COM trajectory
trajectory[t] = molecule_trajectory
self._save_output(
data=trajectory,
data_structure=data_structure,
index=i * self.batch_size,
)
self.experiment.molecules = molecules
def run_transformation(self, molecules: List[Molecule]):
"""
Perform the transformation.
Parameters
----------
molecules : List[Molecule]
A list of MDSuite Molecule objects. For each, a molecule will be
mapped.
Returns
-------
Update the experiment database.
"""
self._run_dependency_check()
# Populate the molecules dict
for item in molecules:
molecular_graph = MolecularGraph(
experiment=self.experiment,
molecule_input_data=item,
)
if item.mol_pbc:
self.mapping_property = mdsuite_properties.positions
self._map_molecules(molecular_graph)
if item.mol_pbc:
self.experiment.run.CoordinateUnwrapper(species=[item.name])
else:
self.experiment.run.CoordinateWrapper(species=[item.name])