mdsuite/calculators/potential_of_mean_force.py
"""
MDSuite: A Zincwarecode package.
License
-------
This program and the accompanying materials are made available under the terms
of the Eclipse Public License v2.0 which accompanies this distribution, and is
available at https://www.eclipse.org/legal/epl-v20.html
SPDX-License-Identifier: EPL-2.0
Copyright Contributors to the Zincwarecode Project.
Contact Information
-------------------
email: zincwarecode@gmail.com
github: https://github.com/zincware
web: https://zincwarecode.com/
Citation
--------
If you use this module please cite us with:
Summary
-------
Module for the computation of the potential of mean force (PMF). The PMF can be used to
better understand effective bond strength between species of a system.
"""
import logging
from dataclasses import dataclass
import numpy as np
from bokeh.models import HoverTool, Span
from bokeh.plotting import figure
from scipy.signal import find_peaks
from mdsuite import utils
from mdsuite.calculators.calculator import Calculator, call
from mdsuite.database.scheme import Computation
from mdsuite.utils.meta_functions import apply_savgol_filter, golden_section_search
from mdsuite.utils.units import boltzmann_constant
log = logging.getLogger(__name__)
@dataclass
class Args:
"""Data class for the saved properties."""
savgol_order: int
savgol_window_length: int
number_of_bins: int
number_of_configurations: int
cutoff: float
number_of_shells: int
class PotentialOfMeanForce(Calculator):
"""
Class for the calculation of the potential of mean-force.
The potential of mean-force is a measure of the binding strength between
atomic species in a experiment.
Attributes
----------
experiment : class object
Class object of the experiment.
data_range : int (default=500)
Range over which the property should be evaluated.
This is not applicable to the current analysis as the
full rdf will be calculated.
x_label : str
How to label the x axis of the saved plot.
y_label : str
How to label the y axis of the saved plot.
analysis_name : str
Name of the analysis. used in saving of the
tensor_values and figure.
file_to_study : str
The tensor_values file corresponding to the rdf being
studied.
data_files : list
list of files to be analyzed.
rdf = None : list
rdf tensor_values being studied.
radii = None : list
radii tensor_values corresponding to the rdf.
selected_species : list
A list of species combinations being studied.
See Also
--------
mdsuite.calculators.calculator.Calculator class
Examples
--------
experiment.run_computation.PotentialOfMeanForce(savgol_order = 2,
savgol_window_length = 17)
"""
def __init__(self, **kwargs):
"""
Python constructor for the class.
Parameters
----------
experiment : class object
Class object of the experiment.
experiments : class object
Class object of the experiment.
load_data : bool
"""
super().__init__(**kwargs)
self.file_to_study = None
self.rdf = None
self.radii = None
self.pomf = None
self.indices = None
self.x_label = r"$$r / nm$$"
self.y_label = r"$$w^{2}(r)$$"
self.data_range = 1
self.result_keys = []
self.result_series_keys = ["r", "pomf"]
self.analysis_name = "Potential_of_Mean_Force"
self.post_generation = True
@call
def __call__(
self,
rdf_data: Computation = None,
plot=True,
savgol_order: int = 2,
savgol_window_length: int = 17,
number_of_shells: int = 1,
):
"""
Python constructor for the class.
Parameters
----------
rdf_data : Computation
RDF data to use in the computation.
plot : bool (default=True)
Decision to plot the analysis.
savgol_order : int
Order of the savgol polynomial filter
savgol_window_length : int
Window length of the savgol filter.
number_of_shells : int
Number of shells to integrate through.
"""
if isinstance(rdf_data, Computation):
self.rdf_data = rdf_data
else:
self.rdf_data = self.experiment.run.RadialDistributionFunction(plot=False)
self.plot = plot
self.data_files = []
# set args that will affect the computation result
self.args = Args(
savgol_order=savgol_order,
savgol_window_length=savgol_window_length,
number_of_bins=self.rdf_data.computation_parameter["number_of_bins"],
cutoff=self.rdf_data.computation_parameter["cutoff"],
number_of_configurations=self.rdf_data.computation_parameter[
"number_of_configurations"
],
number_of_shells=number_of_shells,
)
# Auto-populate the results.
for i in range(self.args.number_of_shells):
self.result_keys.append(f"POMF_{i + 1}")
self.result_keys.append(f"POMF_{i + 1}_error")
def _calculate_potential_of_mean_force(self, rdf: np.ndarray) -> np.ndarray:
"""
Calculate the potential of mean force.
Parameters
----------
rdf : np.ndarray
RDF data to use in the computation.
Returns
-------
pomf : np.ndarray
The computed pomf array.
Notes
-----
Units here are always eV as the data stored in the RDF is constant independent
of what was in the simulation.
"""
pomf = -1 * boltzmann_constant * self.experiment.temperature * np.log(rdf)
return pomf * 6.242e8 # convert to eV
def _populate_args(self) -> tuple:
"""
Use the provided RDF data to populate the args class.
Returns
-------
number_of_bins : int
The data range used in the RDF calculation.
cutoff : float
The cutoff (in nm) used in the RDF calculation
"""
raw_data = self.rdf_data.data_dict
keys = list(raw_data)
number_of_bins = len(raw_data[keys[0]]["x"])
cutoff = raw_data[keys[0]]["x"][-1]
return number_of_bins, cutoff
def get_pomf_peaks(self, pomf_data: np.ndarray) -> np.ndarray:
"""
Calculate the maximums of the rdf.
Parameters
----------
pomf_data : np.ndarray
POMF data to use in the peak detection.
Returns
-------
peaks : np.ndarray
Peaks to be used in the calculation.
Raises
------
ValueError
Raised if the number of peaks required for the analysis are not met.
"""
filtered_data = apply_savgol_filter(
pomf_data,
order=self.args.savgol_order,
window_length=self.args.savgol_window_length,
)
required_peaks = self.args.number_of_shells + 1
# Find the maximums in the filtered dataset
peaks = find_peaks(filtered_data)[0]
# Check that the required number of peaks exist.
if len(peaks) < required_peaks:
msg = (
"Not enough peaks were detecting in the RDF to perform the desired "
"analysis. Try reducing the number of desired peaks or improving the "
"quality of the RDF provided."
)
log.error(msg)
raise ValueError(msg)
else:
return peaks
def _find_minimum(self, pomf_data: np.ndarray, radii_data: np.ndarray) -> dict:
"""
Find the minimum of the pomf function.
This function calls an implementation of the Golden-section search
algorithm to determine the minimum of the potential of mean-force function.
Parameters
----------
pomf_data : np.ndarray
POMF data to use in the min finding.
radii_data : np.ndarray
Radii data to use in the min finding.
Returns
-------
pomf_shells : dict
Dict of all shells detected based on user arguments, e.g:
{'1': [0.1, 0.2]} indicates that the first pomf peak is betwee 0.1 and
0.2 angstrom.
"""
# get the peaks of the tensor_values post-filtering
peaks = self.get_pomf_peaks(pomf_data)
pomf_shells = {}
for i in range(self.args.number_of_shells):
pomf_shells[i] = np.zeros(2, dtype=int)
pomf_radii_range = golden_section_search(
[radii_data, pomf_data], radii_data[peaks[i + 1]], radii_data[peaks[i]]
)
for j in range(2):
pomf_shells[i][j] = np.where(radii_data == pomf_radii_range[j])[0][0]
return pomf_shells
def _get_pomf_values(self, pomf: np.ndarray, radii: np.ndarray) -> dict:
"""
Use a min-finding algorithm to calculate pomf values along the curve.
Parameters
----------
pomf : np.ndarray
POMF function from which to compute properties.
radii : np.ndarray
Array of radii values to use in the min finding.
Returns
-------
pomf_data : dict
A dictionary of the pomf values and their uncertainty. e,g:
{"POMF_1": 5.6, "POMF_1_error": 0.01}
"""
pomf_shells = self._find_minimum(pomf, radii)
pomf_data = {}
for key, val in pomf_shells.items():
lower_bound = pomf[val[0]]
upper_bound = pomf[val[1]]
pomf_data[f"POMF_{int(key) + 1}"] = np.mean([lower_bound, upper_bound])
pomf_data[f"POMF_{int(key) + 1}_error"] = np.std(
[lower_bound, upper_bound]
) / np.sqrt(2)
return pomf_data
def run_calculator(self):
"""Calculate the potential of mean-force and perform error analysis."""
for selected_species, vals in self.rdf_data.data_dict.items():
selected_species = selected_species.split("_")
radii = np.array(vals["x"]).astype(float)[1:]
rdf = np.array(vals["y"]).astype(float)[1:]
log.debug(f"rdf: {rdf} \t radii: {radii}")
pomf = self._calculate_potential_of_mean_force(rdf)
pomf_data = self._get_pomf_values(pomf, radii)
data = {
self.result_series_keys[0]: radii[1:].tolist(),
self.result_series_keys[1]: pomf.tolist(),
}
for item in self.result_keys:
data[item] = pomf_data[item]
self.queue_data(data=data, subjects=selected_species)
def plot_data(self, data):
"""Plot the POMF."""
log.debug("Start plotting the POMF.")
for selected_species, val in data.items():
fig = figure(x_axis_label=self.x_label, y_axis_label=self.y_label)
# Add vertical lines to the plot
for i in range(self.args.number_of_shells):
pomf_value = val[f"POMF_{i + 1}"]
index = np.argmin(
np.abs(np.array(val[self.result_series_keys[1]]) - pomf_value)
)
r_location = val[self.result_series_keys[0]][index]
span = Span(location=r_location, dimension="height", line_dash="dashed")
fig.add_layout(span)
# Add the CN line and hover tool
fig.line(
val[self.result_series_keys[0]],
val[self.result_series_keys[1]],
color=utils.Colour.PRUSSIAN_BLUE,
# legend labels are always the first shell and first shell error.
legend_label=(
f"{selected_species}: {val[self.result_keys[0]]: 0.3E} +-"
f" {val[self.result_keys[1]]: 0.3E}"
),
)
fig.add_tools(HoverTool())
self.plot_array.append(fig)