Source code for cctk.molecule

import math
import copy
import re
import numpy as np
import networkx as nx
from scipy.spatial.distance import cdist
import pkg_resources
import yaml

import cctk
from cctk.helper_functions import (
    get_symbol,
    get_number,
    get_avg_mass,
    compute_rotation_matrix,
    compute_distance_between,
    compute_angle_between,
    compute_dihedral_between,
    compute_unit_vector,
    get_covalent_radius,
    get_vdw_radius,
    numpy_to_bytes,
    bytes_to_numpy,
    _recurse_through_formula,
)
import cctk.topology as top

[docs] class Molecule: """ Class representing a single molecular geometry. In contrast to typical Python behavior, ``atomic_numbers`` and ``geometry`` are indexed from one, to simplify interfacing with computational chemistry programs. This has been done by defining a custom wrapper for ``numpy.ndarray`` called ``cctk.OneIndexedArray``. All other datatypes are indexed from 0. Attributes: name (str): for identification, optional atomic_numbers (cctk.OneIndexedArray, dtype=np.int8): list of atomic numbers geometry (cctk.OneIndexedArray): list of 3-tuples of xyz coordinates - same ordering as ``atomic_numbers`` bonds (nx.Graph or list of tuples): connectivity graph or list of 2-tuples, with each element representing the 1-indexed atom number of a bonded pair charge (int): the charge of the molecule multiplicity (int): the spin state of the molecule (1 corresponds to singlet, 2 to doublet, 3 to triplet, etc. -- so a multiplicity of 1 is equivalent to S=0) vibrational_modes (list of cctk.VibrationalMode): vibrational modes """
[docs] def __init__(self, atomic_numbers, geometry, name=None, bonds=None, charge=0, multiplicity=1, checks=True): """ Create new Molecule object, and assign connectivity if needed. ``bonds`` must be a list of edges (i.e. an n x 2 ``numpy`` array). If ``checks`` is True, the atomic numbers in bonds will all be checked for consistency. This option can be disabled by setting ``checks`` to False, but this is not recommended for external data. """ if len(atomic_numbers) != len(geometry): raise ValueError(f"length of geometry ({len(geometry)}) and atomic_numbers ({len(atomic_numbers)}) does not match!\n{atomic_numbers}\n{geometry}") try: atomic_numbers = np.asarray(atomic_numbers, dtype=np.int8).view(cctk.OneIndexedArray) except Exception: raise ValueError("invalid atom list") try: geometry = np.array(geometry, dtype=np.float32).view(cctk.OneIndexedArray) except Exception: raise TypeError("geometry cannot be cast to ``np.ndarray`` of floats!") if name is not None: if not isinstance(name, str): raise TypeError("name must be a string!") if not isinstance(charge, int): try: charge = int(charge) except Exception: raise TypeError("charge must be integer or castable to integer!") if not isinstance(multiplicity, int): try: multiplicity = int(multiplicity) except Exception: raise TypeError("multiplicity must be positive integer or castable to positive integer") assert multiplicity > 0, "multiplicity must be positive" self.atomic_numbers = atomic_numbers self.geometry = geometry self.name = name self.multiplicity = multiplicity self.charge = charge self.vibrational_modes = list() if isinstance(bonds, nx.Graph): self.bonds = bonds elif isinstance(bonds, (list,np.ndarray,nx.classes.reportviews.EdgeView)): if checks: known_atomic_numbers = set() for bond in bonds: assert len(bond)==2, f"unexpected number of atoms in bond, expected 2, got {len(bond)}" if bond[0] not in known_atomic_numbers: self._check_atom_number(bond[0]) known_atomic_numbers.add(bond[0]) if bond[1] not in known_atomic_numbers: self._check_atom_number(bond[1]) known_atomic_numbers.add(bond[1]) self.bonds = nx.Graph() self.bonds.add_nodes_from(range(1, len(atomic_numbers) + 1)) self.bonds.add_edges_from(bonds, weight=1) elif bonds is None: self.bonds = nx.Graph() self.bonds.add_nodes_from(range(1, len(atomic_numbers)+1)) else: raise ValueError(f"unexpected type for bonds: {type(bonds)}")
def __str__(self): if self.name is not None: return f"Molecule (name={self.name}, {len(self.atomic_numbers)} atoms)" else: return f"Molecule ({len(self.atomic_numbers)} atoms)" def __repr__(self): return str(self) # placeholder # def __eq__(self, other):
[docs] @classmethod def equal(cls, mol1, mol2): """ Atomic numbers, geometry, charge, and multiplicity all must match. Name is irrelevant. """ if not isinstance(mol1, cctk.Molecule): return False if not isinstance(mol2, cctk.Molecule): return False comparisons = [ np.array_equal(mol1.atomic_numbers, mol2.atomic_numbers), np.array_equal(mol1.geometry, mol2.geometry), mol1.charge == mol2.charge, mol1.multiplicity == mol2.multiplicity ] return all(comparisons)
[docs] def assign_connectivity(self, cutoff=0.2, periodic_boundary_conditions=None): """ Automatically recalculates bonds based on covalent radii. If two atoms are closer than the sum of their covalent radii + ``cutoff`` Angstroms, then they are considered bonded. Args: cutoff (float): the threshold (in Angstroms) for how close two covalent radii must be to be considered bonded Returns: self """ #### delete all edges self.bonds = nx.create_empty_copy(self.bonds) assert isinstance(cutoff, (float, int)), "need cutoff to be numeric!" g = self.geometry.view(np.ndarray) dist_matrix = None #### cdist is SO FAST if periodic_boundary_conditions is None: dist_matrix = cdist(g, g, "euclidean") else: # even 16 cdist calls is faster than any other implementation, i tested it pbc = periodic_boundary_conditions assert isinstance(pbc, np.ndarray) and len(pbc) == 3, "Need 3-element ``np.ndarray`` for PBCs" nearby_cells = [ [0, 0, 0], [pbc[0], 0, 0], [0, pbc[1], 0], [0, 0, pbc[2]], [pbc[0], pbc[1], 0], [pbc[0], 0, pbc[2]], [0, pbc[1], pbc[2]], [pbc[0], pbc[1], pbc[2]], ] dist_matrices = [cdist(g, g + np.array(nc), "euclidean") for nc in nearby_cells] dist_matrices += [cdist(g, g - np.array(nc), "euclidean") for nc in nearby_cells] distances_3d = np.stack(dist_matrices) dist_matrix = distances_3d.min(axis=0) covalent_radii = {z: get_covalent_radius(z) for z in set(self.atomic_numbers)} radii_by_num = [covalent_radii[z] for z in self.atomic_numbers] for i in range(1, self.num_atoms() + 1): r_i = radii_by_num[i-1] for j in range(i + 1, self.num_atoms() + 1): distance = dist_matrix[i-1][j-1] r_j = radii_by_num[j-1] # 0.5 A distance is used by RasMol and Chime (documentation available online) and works well, empirically if distance < (r_i + r_j + cutoff): self.add_bond(i, j) return self
[docs] def check_for_conflicts(self, min_buffer=1, group1=None, group2=None): """ Automatically checks for conflicts based on covalent radii. If two atoms are closer than the sum of their covalent radii + buffer, then they are considered clashing. If `group1` and `group2` are selected, then conflicts will only be evaluated between these two groups of atoms. Args: min_buffer (float): the threshold (in Angstroms) for how close two covalent radii must be to be considered clashing. 1.0 A is default, empirically. group1 (list): atoms to evaluate against `group2` (if `None`, defaults to all atoms) group2 (list): atoms to evaluate against `group1` (if `None`, defaults to all atoms) Returns: True if there are no conflicts ValueError if there is a conflict """ if group1 is None: group1 = list(range(1, self.num_atoms() + 1)) if group2 is None: group2 = list(range(1, self.num_atoms() + 1)) for atom in group1 + group2: self._check_atom_number(atom) for i in group1: for j in group2: if i == j: continue distance = self.get_distance(i, j, check=False) r_i = get_covalent_radius(self.get_atomic_number(i)) r_j = get_covalent_radius(self.get_atomic_number(j)) # 0.5 A distance is used by RasMol and Chime (documentation available online) and works well, empirically if distance < (r_i + r_j - min_buffer): # raise ValueError(f"atoms {i} and {j} are too close - distance {distance} A!") return False return True
[docs] def add_bond(self, atom1, atom2, bond_order=1, check=True): """ Adds a new bond to the bond graph, or updates the existing bond order. Will not throw an error if the bond already exists. Args: atom1 (int): the number of the first atom atom2 (int): the number of the second atom bond_order (int): bond order of bond between atom1 and atom2 """ if check: self._check_atom_number(atom1) self._check_atom_number(atom2) assert isinstance(bond_order, int), f"bond order {bond_order} must be an integer" assert bond_order >= 0, f"bond order {bond_order} must be positive" if self.bonds.has_edge(atom1, atom2): if bond_order == 0: self.bonds.remove_edge(atom1, atom2) else: if self.bonds[atom1][atom2]["weight"] != bond_order: self.bonds[atom1][atom2]["weight"] = bond_order elif bond_order > 0: self.bonds.add_edge(atom1, atom2, weight=bond_order)
[docs] def remove_bond(self, atom1, atom2): """ Alias for ``self.add_bond(atom1, atom2, bond_order=0)`` -- more intuitive nomenclature. """ self.add_bond(atom1, atom2, bond_order=0)
def _check_atom_number(self, number): """ Helper method which performs quick checks on the validity of a given atom number. """ assert isinstance(number, int), "atomic number must be integer" assert 0 < number <= self.num_atoms(), "atom number {number} too large! (or too small - needs to be >0)"
[docs] def formula(self, return_dict=False): """ Returns the atomic formula. If ``return_dict`` is ``True``, then returns a ``dictionary`` with keys elemental symbols and values the number of occurrences. For instance, ``water.formula()`` would return ``{'O': 1, 'H': 2}``. If ``return_dict`` is ``False``, then returns a stringified version of the formula according to standard rules. For instance, ``water.formula()`` would return ``H2O``. Args: return_dict (Bool): if the method should return a string or a dictionary Returns: a dictionary or string representing the molecule's formula """ formula_dict = {} for atom in self.atomic_numbers: symbol = get_symbol(atom) if symbol in formula_dict: formula_dict[symbol] += 1 else: formula_dict[symbol] = 1 if return_dict is True: return formula_dict else: formula = "" elements = list(formula_dict.keys()) #### H and C always come first if "C" in elements: elements.remove("C") formula += f"C{formula_dict['C']}" if "H" in elements: elements.remove("H") formula += f"H{formula_dict['H']}" for element in sorted(elements): formula += f"{element}{formula_dict[element]}" return formula
def _get_bond_fragments(self, atom1, atom2): """ Returns the pieces of a molecule that one would obtain by ereaking the bond between two atoms. Will throw ``ValueError`` if the atoms are in a ring. Useful for distance/angle/dihedral scans -- one fragment can be moved and the other held constant. Args: atom1 (int): the number of the first atom atom2 (int): the number of the second atom Returns: fragment1: the list of atoms in fragment 1 (containing atom1) fragment2: the list of atoms in fragment 2 (containing atom2) """ self._check_atom_number(atom1) self._check_atom_number(atom2) assert self.bonds.number_of_edges() > 0, "need a bond graph to perform this operation -- try calling self.assign_connectivity()!" bond_order = self.get_bond_order(atom1, atom2) if self.bonds.has_edge(atom1, atom2): self.bonds.remove_edge(atom1, atom2) fragments = nx.connected_components(self.bonds) fragment1 = [] fragment2 = [] for fragment in fragments: if atom1 in fragment: if atom2 in fragment: self.add_bond(atom1, atom2, bond_order) # not adding back this bond causes some pretty pernicious errors raise ValueError(f"Atom {atom1} and atom {atom2} are in a ring or otherwise connected!") else: fragment1 = fragment if atom2 in fragment: fragment2 = fragment self.add_bond(atom1, atom2, bond_order) return list(fragment1), list(fragment2) else: raise ValueError(f"No bond between atom {atom1} and atom {atom2}!") def _get_fragment_containing(self, atom): """ Get the fragment containing the atom with number ``atom``. Args: atom (int): the number of the atom Returns: a list of all the atoms in the fragment """ self._check_atom_number(atom) fragments = nx.connected_components(self.bonds) for fragment in fragments: if atom in fragment: return list(fragment)
[docs] def set_distance(self, atom1=None, atom2=None, distance=None, move="group", atoms=None): """ Adjusts the ``atom1`` -- ``atom2`` bond length to be a fixed distance by moving atom2. If ``move`` is set to "group", then all atoms bonded to ``atom2`` will also be moved. If ``move`` is set to "atom", then only atom2 will be moved. Args: atom1 (int): the number of the first atom atom2 (int): the number of the second atom distance (float): distance in Angstroms of the final bond move (str): determines how fragment moving is handled atoms (list): 2-element list of atom numbers Returns: the Molecule object """ if (atom1 is None) and (atom2 is None): assert isinstance(atoms, (list, np.ndarray)), "atom numbers need to come from fields or list!" assert len(atoms) == 2, "need 2 atom numbers to set distance" atom1 = atoms[0] atom2 = atoms[1] assert isinstance(distance, (float, int, np.number)), "need distance to set distance" self._check_atom_number(atom1) self._check_atom_number(atom2) if (not isinstance(distance, float)) or (distance < 0): raise ValueError(f"invalid value {distance} for distance!") atoms_to_move = [] if move == "group": if self.get_bond_order(atom1, atom2): _, atoms_to_move = self._get_bond_fragments(atom1, atom2) else: atoms_to_move = self._get_fragment_containing(atom2) elif move == "atom": atoms_to_move = [atom2] else: raise ValueError(f"Invalid option {move} for parameter 'move'!") if (atom1 in atoms_to_move and atom2 in atoms_to_move) and move == "group": raise ValueError('both our atoms are connected which will preclude any movement with ``move`` set to "group"') current_distance = self.get_distance(atom1, atom2) v1 = self.get_vector(atom1) v2 = self.get_vector(atom2) vb = v2 - v1 if np.linalg.norm(vb) - current_distance > 0.00001: raise ValueError("Error calculating bond distance!") #### move all the atoms delta = distance - current_distance unitv = compute_unit_vector(vb) for atom in atoms_to_move: self.geometry[atom] = self.geometry[atom] + (delta * unitv) #### check everything worked okay... v1f = self.get_vector(atom1) v2f = self.get_vector(atom2) vbf = v2f - v1f if np.linalg.norm(vbf) - distance > 0.001: new_dist = np.linalg.norm(vbf) raise ValueError(f"Error moving bonds -- new distance is {new_dist:.3f}. Operation failed!") return self
[docs] def set_angle(self, atom1=None, atom2=None, atom3=None, angle=None, move="group", atoms=None): """ Adjusts the ``atom1`` -- ``atom2`` -- ``atom3`` bond angle to be a fixed value by moving ``atom3``. If `move` is set to "group", then all atoms bonded to ``atom3`` will also be moved. If `move` is set to "atom", then only ``atom3`` will be moved. Args: atom1 (int): the number of the first atom atom2 (int): the number of the second atom atom3 (int): the number of the third atom angle (float): final value in degrees of the ``atom1`` -- ``atom2`` -- ``atom3`` angle move (str): determines how fragment moving is handled atoms (list): 3-element list of atom numbers Returns: the Molecule object """ if (atom1 is None) and (atom2 is None) and (atom3 is None) : assert isinstance(atoms, (list, np.ndarray)), "atom numbers need to come from fields or list!" assert len(atoms) == 3, "need 3 atom numbers to set angle" atom1 = atoms[0] atom2 = atoms[1] atom3 = atoms[2] assert isinstance(angle, (float, int, np.number)), "need angle to set angle" self._check_atom_number(atom1) self._check_atom_number(atom2) self._check_atom_number(atom3) if self.get_distance(atom1, atom2) < 0.01: raise ValueError(f"atom {atom1} and atom {atom2} are too close!") if self.get_distance(atom2, atom3) < 0.01: raise ValueError(f"atom {atom2} and atom {atom3} are too close!") if self.get_distance(atom1, atom3) < 0.01: raise ValueError(f"atom {atom1} and atom {atom3} are too close!") try: angle = float(angle) except Exception: raise TypeError(f"angle {angle} cannot be converted to float!") if (not isinstance(angle, float)) or ((angle < 0) or (angle > 360)): raise ValueError(f"invalid value {angle} for angle!") atoms_to_move = [] if move == "group": if self.get_bond_order(atom2, atom3): _, atoms_to_move = self._get_bond_fragments(atom2, atom3) elif self.are_connected(atom2, atom3): raise ValueError( f"atom {atom2} and atom {atom3} are connected but not bonded -- cannot adjust angle! try manually removing one or more bonds." ) else: atoms_to_move = self._get_fragment_containing(atom3) elif move == "atom": atoms_to_move = [atom3] else: raise ValueError(f"Invalid option {move} for parameter 'move'!") if atom1 in atoms_to_move: raise ValueError( f"atom {atom1} and atom {atom3} are connected in multiple ways -- cannot adjust angle! try manually removing one or more bonds." ) current_angle = self.get_angle(atom1, atom2, atom3) delta = angle - current_angle if np.abs(delta) < 0.001: return #### now the real work begins... #### move everything to place atom2 at the origin v2 = self.get_vector(atom2) self.translate_molecule(-v2) v1 = self.get_vector(atom1) v3 = self.get_vector(atom3) #### perform the actual rotation rot_axis = np.cross(v1, v3) rot_matrix = compute_rotation_matrix(rot_axis, delta) for atom in atoms_to_move: self.geometry[atom] = np.dot(rot_matrix, self.get_vector(atom)) #### and move it back! self.translate_molecule(v2) final_angle = self.get_angle(atom1, atom2, atom3) #### need to compare cosines to prevent insidious phase difficulties (like 0.00 and 359.99) if np.abs(math.cos(math.radians(final_angle)) - math.cos(math.radians(angle))) > 0.001: raise ValueError(f"Error rotating atoms -- expected angle {angle}, got {final_angle} -- operation failed!") return self
[docs] def set_dihedral(self, atom1=None, atom2=None, atom3=None, atom4=None, dihedral=None, move="group34", check_result=True, atoms=None): """ Adjusts the ``atom1`` -- ``atom2`` -- ``atom3`` -- ``atom4`` dihedral angle to be a fixed value by moving atom 4. If ``move`` is set to "atom", then only ``atom4`` will be moved. If ``move`` is set to "group4", then all atoms bonded to ``atom4`` will also be moved. If ``move`` is set to "group34", then all atoms bonded to ``atom3`` and ``atom4`` will also be moved. Args: atom1 (int): the number of the first atom atom2 (int): the number of the second atom atom3 (int): the number of the third atom atom4 (int): the number of the fourth atom dihedral (float): final value in degrees of the ``atom1`` -- ``atom2`` -- ``atom3`` -- ``atom4`` angle move (str): determines how fragment moving is handled check_result (Bool): whether the final answer should be checked for correctness atoms (list): 4-element list of atomic numbers Returns: the Molecule object """ if (atom1 is None) and (atom2 is None) and (atom3 is None) and (atom4 is None): assert isinstance(atoms, (list, np.ndarray)), "atom numbers need to come from fields or list!" assert len(atoms) == 4, "need 4 atom numbers to set dihedral" atom1 = atoms[0] atom2 = atoms[1] atom3 = atoms[2] atom4 = atoms[3] assert isinstance(dihedral, (float, int, np.number)), "need angle to set dihedral angle" # check atom numbers self._check_atom_number(atom1) self._check_atom_number(atom2) self._check_atom_number(atom3) self._check_atom_number(atom4) # check there is bond connectivity information assert len(self.bonds) > 0, "no bond connectivity information" # check for collinearity angle = self.get_angle(atom1, atom2, atom3, check=False) assert 0.0001 < angle < 179.9999, f"1/2/3 atoms {atom1}-{atom2}-{atom3} are collinear (angle={angle:.8f})" angle = self.get_angle(atom2, atom3, atom4, check=False) assert 0.0001 < angle < 179.9999, f"2/3/4 atoms {atom2}-{atom3}-{atom4} are collinear (angle={angle:.8f})" for x in [atom1, atom2, atom3, atom4]: for y in [atom1, atom2, atom3, atom4]: if x <= y: continue else: if self.get_sq_distance(x, y, check=False) < 0.001: raise ValueError(f"atom {x} and atom {y} are too close!") try: dihedral = float(dihedral) except Exception: raise TypeError(f"dihedral angle {dihedral} cannot be converted to float!") if (not isinstance(dihedral, float)) or ((dihedral < 0) or (dihedral > 360)): raise ValueError(f"invalid value {dihedral} for dihedral angle!") atoms_to_move = [] if move == "group34": #### add atom3's fragment to atom4 if self.get_bond_order(atom2, atom3): _, atoms_to_move = self._get_bond_fragments(atom2, atom3) elif self.are_connected(atom2, atom3): raise ValueError( f"atom {atom2} and atom {atom3} are connected but not bonded -- cannot adjust dihedral angle! try manually removing one or more bonds." ) else: atoms_to_move = self._get_fragment_containing(atom3) #### and make sure atom4 is in there too! if atom4 not in atoms_to_move: atoms_to_move += self._get_fragment_containing(atom4) elif move == "group4": if self.get_bond_order(atom3, atom4): _, atoms_to_move = self._get_bond_fragments(atom3, atom4) elif self.are_connected(atom3, atom4): raise ValueError( f"atom {atom3} and atom {atom4} are connected but not bonded -- cannot adjust dihedral angle! try manually removing one or more bonds." ) else: atoms_to_move = self._get_fragment_containing(atom4) elif move == "atom": atoms_to_move = [atom4] else: raise ValueError(f"Invalid option {move} for parameter 'move'!") if atom1 in atoms_to_move: raise ValueError( f"atom {atom1} and atom {atom4} are connected in multiple ways -- cannot adjust dihedral angle! try manually removing one or more bonds." ) if atom2 in atoms_to_move: raise ValueError( f"atom {atom2} and atom {atom4} are connected in multiple ways -- cannot adjust dihedral angle! try manually removing one or more bonds." ) if atom4 not in atoms_to_move: raise ValueError(f"atom {atom4} is not going to be moved... this operation is doomed to fail!") current_dihedral = self.get_dihedral(atom1, atom2, atom3, atom4, check=False) delta = (dihedral - current_dihedral) % 360 if np.abs(delta) < 0.001: return self #### now the real work begins... #### move everything to place atom2 at the origin v3 = self.get_vector(atom3, check=False) self.translate_molecule(-v3) #### perform the actual rotation rot_matrix = compute_rotation_matrix(-self.get_vector(atom2, check=False), delta) for atom in atoms_to_move: self.geometry[atom] = np.dot(rot_matrix, self.get_vector(atom, check=False)) #### and move it back! self.translate_molecule(v3) if check_result: final_dihedral = self.get_dihedral(atom1, atom2, atom3, atom4, check=False) #### need to compare cosines to prevent insidious phase difficulties (like 0.00 and 359.99) #### this will throw ValueError for differences of about 2 degrees if np.abs(math.cos(math.radians(final_dihedral)) - math.cos(math.radians(dihedral))) > 0.001: raise ValueError(f"Error rotating atoms -- expected dihedral angle {dihedral}, got {final_dihedral} -- operation failed!") return self
[docs] def translate_molecule(self, vector): """ Translates the whole molecule by the given vector. Args: vector (vector): the vector to translate by Returns: the Molecule object """ # for atom in range(1, self.num_atoms() + 1): # self.geometry[atom] = self.geometry[atom] + vector self.geometry += vector return self
[docs] def rotate_molecule(self, axis, degrees): """ Rotates the whole molecule around the given axis. Args: axis (vector): the vector to rotate about theta (float): how much to rotate (in degrees) Returns: the Molecule object """ rot_matrix = compute_rotation_matrix(axis, degrees) for atom in range(1, self.num_atoms() + 1): self.geometry[atom] = np.dot(rot_matrix, self.geometry[atom]) return self
[docs] def calculate_mass_spectrum(self, **kwargs): """ Generates list of m/z values. Final weights rounded to one decimal point (because of low-res MS). """ form_vec = np.zeros(shape=92, dtype=np.int8) for z in self.atomic_numbers: form_vec[z] += 1 masses, weights = _recurse_through_formula(form_vec, [0], [1], **kwargs) new_masses, indices = np.unique(np.round(masses, decimals=1), return_inverse=True) new_weights = np.zeros_like(new_masses) for k in range(len(new_weights)): new_weights[k] = np.sum(weights[np.nonzero(indices == k)]) new_weights = new_weights / np.max(new_weights) return new_masses, new_weights
[docs] def add_atom_at_centroid(self, symbol, atom_numbers, weighted=False): """ Adds atom with symbol ``symbol`` at the centroid of the atoms in ``atom_numbers``. If ``weighted`` is ``True``, then the centroid calculation will take into account the atomic numbers of the atoms in question (placing the atom closer to more massive atoms). Otherwise, the average is unweighted. Args: symbol (str): the atomic symbol of the atom to be added atom_numbers (list): which atoms to put the new atom between weighted (Bool): if the centroid calculation should be weighted (see above) Returns: the Molecule object """ if (not isinstance(atom_numbers, list)) or (len(atom_numbers) < 2): raise TypeError("atom_numbers must be list with at least two elements") if not isinstance(symbol, str): raise TypeError(f"symbol {symbol} must be a string!") coords = [None] * len(atom_numbers) weights = [1] * len(atom_numbers) for index, atom in enumerate(atom_numbers): self._check_atom_number(atom) coords[index] = self.get_vector(atom) if weighted is True: weights[index] = self.atomic_numbers[atom] new_coord = list(np.average(coords, weights=weights, axis=0)) return self.add_atom(coordinates=new_coord, symbol=symbol)
[docs] def add_atom(self, symbol, coordinates): """ Add an atom with symbol ``symbol`` at position ``coordinates``. Args: symbol (str): symbol of the atom (e.g. "Cl", "Ar", "C") coordinates (list): the coordinates to add Returns: the Molecule object """ if (not isinstance(coordinates, (list, np.ndarray)) or (len(coordinates) != 3)): raise TypeError("coordinates must be list with three elements") if not isinstance(symbol, str): raise TypeError(f"symbol {symbol} must be a string!") number = get_number(symbol) self.atomic_numbers = np.append(self.atomic_numbers, [number]).astype(np.int8).view(cctk.OneIndexedArray) self.geometry = np.append(self.geometry, [coordinates], axis=0).view(cctk.OneIndexedArray) self.bonds.add_node(self.num_atoms()) return self
[docs] def remove_atom(self, number): """ Remove the atom with number ``number``. Args: number (int): number of the atom Returns: the Molecule object """ self._check_atom_number(number) try: self.bonds.remove_node(number) self.geometry = np.delete(self.geometry, number - 1, axis=0).view(cctk.OneIndexedArray) self.atomic_numbers = np.delete(self.atomic_numbers, number - 1).view(cctk.OneIndexedArray) #### need to renumber to fill gaps self.bonds = nx.convert_node_labels_to_integers(self.bonds, first_label=1, ordering="sorted") return self except Exception: raise ValueError("removing atom {number} failed!")
[docs] def get_atomic_number(self, atom): """ Get the atomic number for a given atom. Args: atom (int): number of the first atom Returns: atomic_number (int): the atomic number of that atom """ self._check_atom_number(atom) return self.atomic_numbers[atom]
[docs] def get_atomic_symbol(self, atom): """ Get the atomic symbol for a given atom. Args: atom (int): number of the first atom Returns: atomic_symbol (str): the atomic symbol of that atom """ atomic_number = self.get_atomic_number(atom) return get_symbol(atomic_number)
[docs] def get_atomic_symbols(self): """ Get a list of atomic symbols for this Molecule. Returns: atomic_symbols (cctk.OneIndexedArray): the atomic symbols """ n_atoms = self.get_n_atoms() sym = [ self.get_atomic_symbol(i) for i in range(1,n_atoms+1) ] return cctk.OneIndexedArray(sym)
[docs] def get_n_atoms(self): """ Determine how many atoms are in this Molecule. Returns n_atoms (int): the number of atoms """ return len(self.atomic_numbers)
[docs] def get_vector(self, atom, atom2=None, check=True): """ Get the geometry vector for a given atom. If two atoms are specified, gives the vector connecting them (from ``atom2`` to ``atom``). ``mol.get_vector(atom)`` is thus equivalent to ``mol.get_vector(atom, origin)``. Args: atom1 (int): number of the first atom atom2 (int): number of the second atom (optional) check (Bool): whether to validate input data (can be overridden to prevent slow double-checking) Returns: a Numpy array """ if check: self._check_atom_number(atom) if atom2: if check: self._check_atom_number(atom2) return (self.geometry[atom] - self.geometry[atom2]).view(np.ndarray) else: return self.geometry[atom].view(np.ndarray)
[docs] def get_distance(self, atom1=None, atom2=None, check=True, _dist=compute_distance_between, atoms=None): """ Wrapper to compute distance between two atoms. This function is relatively slow (rate-limiting for certain applications), so performance boosts have been implemented (e.g. preloading ``_dist``). Args: atom1 (int): number of the first atom atom2 (int): number of the second atom check (Bool): whether to validate input data (can be overridden to prevent slow double-checking) _dist (function): function usd to compute distance atoms (list): list of atomic numbers Returns: the distance, in Angstroms """ if (atom1 is None) and (atom2 is None): assert isinstance(atoms, (list, np.ndarray)), "atom numbers need to come from fields or list!" assert len(atoms) == 2, "need 2 atom numbers to get distance" atom1 = atoms[0] atom2 = atoms[1] if check: try: atom1 = int(atom1) atom2 = int(atom2) except Exception: raise TypeError("atom numbers cannot be cast to int!") self._check_atom_number(atom1) self._check_atom_number(atom2) return _dist(self.get_vector(atom1, check=False), self.get_vector(atom2, check=False))
[docs] def get_sq_distance(self, atom1, atom2, check=True): """ Wrapper to compute squared distance between two atoms -- optimized for speed! Args: atom1 (int): number of the first atom atom2 (int): number of the second atom check (Bool): whether to validate input data (can be overridden to prevent slow double-checking) Returns: the squared distance """ if check: try: atom1 = int(atom1) atom2 = int(atom2) except Exception: raise TypeError("atom numbers cannot be cast to int!") self._check_atom_number(atom1) self._check_atom_number(atom2) return np.sum(np.square(self.get_vector(atom1, atom2, check=False)))
[docs] def get_angle(self, atom1=None, atom2=None, atom3=None, check=True, _angle=compute_angle_between, atoms=None): """ Wrapper to compute angle between three atoms. This function is relatively slow (rate-limiting for certain applications), so performance boosts have been implemented (e.g. preloading ``_angle``). Args: atom1 (int): number of the first atom atom2 (int): number of the second atom atom3 (int): number of the third atom check (Bool): whether to validate input data (can be overridden to prevent slow double-checking) _angle (function): function usd to compute angle atoms (list): list of atom numbers Returns: the angle, in degrees """ if (atom1 is None) and (atom2 is None) and (atom3 is None): assert isinstance(atoms, (list, np.ndarray)), "atom numbers need to come from fields or list!" assert len(atoms) == 3, "need 3 atom numbers to get angle" atom1 = atoms[0] atom2 = atoms[1] atom3 = atoms[2] if check: try: atom1 = int(atom1) atom2 = int(atom2) atom3 = int(atom3) except Exception: raise TypeError("atom numbers cannot be cast to int!") self._check_atom_number(atom1) self._check_atom_number(atom2) self._check_atom_number(atom3) v1 = self.get_vector(atom1, check=False) v2 = self.get_vector(atom2, check=False) v3 = self.get_vector(atom3, check=False) return _angle(v1 - v2, v3 - v2)
[docs] def get_dihedral(self, atom1=None, atom2=None, atom3=None, atom4=None, check=True, _dihedral=compute_dihedral_between, atoms=None): """ Wrapper to compute dihedral angle between four atoms. This function is relatively slow (rate-limiting for certain applications), so performance boosts have been implemented (e.g. preloading ``_dihedral``). Args: atom1 (int): number of the first atom atom2 (int): number of the second atom atom3 (int): number of the third atom atom4 (int): number of the fourth atom check (Bool): whether to validate input data (can be overridden to prevent slow double-checking) _dihedral (function): function used to compute dihedral atoms (list): list of atom numbers Returns: the dihedral angle, in degrees """ if (atom1 is None) and (atom2 is None) and (atom3 is None) and (atom4 is None): assert isinstance(atoms, (list, np.ndarray)), "atom numbers need to come from fields or list!" assert len(atoms) == 4, "need 4 atom numbers to get dihedral angle" atom1 = atoms[0] atom2 = atoms[1] atom3 = atoms[2] atom4 = atoms[3] if check: try: atom1 = int(atom1) atom2 = int(atom2) atom3 = int(atom3) atom4 = int(atom4) except Exception: raise TypeError("atom numbers cannot be cast to int!") self._check_atom_number(atom1) self._check_atom_number(atom2) self._check_atom_number(atom3) self._check_atom_number(atom4) return _dihedral( self.get_vector(atom1, check=False), self.get_vector(atom2, check=False), self.get_vector(atom3, check=False), self.get_vector(atom4, check=False), )
[docs] def get_bond_order(self, atom1, atom2): """ Wrapper to get bond order between two atoms. Args: atom1 (int): number of the first atom atom2 (int): number of the second atom Returns: the bond order """ self._check_atom_number(atom1) self._check_atom_number(atom2) if self.bonds.has_edge(atom1, atom2): return self.bonds[atom1][atom2]["weight"] else: return 0
[docs] def are_connected(self, atom1, atom2): """ Wrapper to tell if two atoms are connected. """ self._check_atom_number(atom1) self._check_atom_number(atom2) if atom1 in self._get_fragment_containing(atom2): return True else: return False
[docs] def get_atoms_by_symbol(self, symbol): """ Returns all the numbers of atoms of type ``symbol`` in the molecule. """ if not isinstance(symbol, str): raise TypeError("symbol {symbol} must be a string") number = get_number(symbol) atoms = [] for index, atom in enumerate(self.atomic_numbers, start=1): if atom == number: atoms.append(index) return atoms
[docs] def get_heavy_atoms(self): """ Returns a list of all the heavy atoms in the molecule (i.e., not hydrogen), for array indexing. """ atoms = [] for index, atom in enumerate(self.atomic_numbers, start=1): if atom != 1: atoms.append(index) return atoms
[docs] def get_adjacent_atoms(self, atom): """ Returns a list of the neighbors of ``atom``. If ``atom`` has no neighbors, an empty list will be returned. """ try: atom = int(atom) except Exception: raise TypeError(f"atom number {atom} cannot be cast to int!") self._check_atom_number(atom) return list(self.bonds.neighbors(atom))
[docs] def num_atoms(self): return len(self.atomic_numbers)
[docs] def rms_distance_between_atoms(self): """ Returns the RMS distance (in Angstroms) between every pair of atoms - a quick, easy-to-calculate proxy for minimizing steric clashes. """ distance = 0 for i in range(1, self.num_atoms() + 1): for j in range(1, self.num_atoms() + 1): if i == j: continue distance += self.get_distance(i, j) ** 2 return math.sqrt(distance) / self.num_atoms()
[docs] def optimize_dihedral(self, atom1, atom2, atom3, atom4, step=10): """ Minimizes the value of ``self.rms_distance_between_atoms`` for the given dihedral, in one-degree increments. A cheap alternative to geometry optimization using *ab initio* methods or density functional theory. Args: atom1 (int): atom number of the first atom in the dihedral atom2 (int): atom number of the second atom in the dihedral atom3 (int): atom number of the third atom in the dihedral atom4 (int): atom number of the fourth atom in the dihedral step (float): explore angles from 0 to 360 with this stepsize in degrees Returns: the final value of the angle """ self._check_atom_number(atom1) self._check_atom_number(atom2) self._check_atom_number(atom3) self._check_atom_number(atom4) best_angle = 0 best_dist = 0 for angle in range(0, 360, step): self.set_dihedral(atom1, atom2, atom3, atom4, angle) if self.rms_distance_between_atoms() > best_dist: best_dist = self.rms_distance_between_atoms() best_angle = angle self.set_dihedral(atom1, atom2, atom3, atom4, best_angle) return best_angle
[docs] def atom_string(self, atom): """ Returns the elemental symbol and the atom number for a given atom. For example, ``methane.atom_string(1)`` might return "C1". Args: atom (int): number of the atom Returns: the aforementioned atom string """ try: atom = int(atom) except Exception: raise ValueError("atom cannot be cast to int") self._check_atom_number(atom) return f"{get_symbol(self.atomic_numbers[atom])}{atom}"
[docs] def perturb(self, size=0.005): """ This function can be used to generate a slightly different molecule in cases where numerical (or geometric) converge is problematic. It adds a random variable (sampled from a normal distribution, centered at 0 with stddev ``size`) to every number in ``self.geometry``. Args: size (float): stddev of the normal distribution Returns: the Molecule object """ geometry = self.geometry random = np.random.normal(scale=size, size=geometry.shape) self.geometry = geometry + random return self
[docs] def center(self): """ Moves the centroid to the origin. """ atoms = np.arange(1, self.num_atoms()+1) self.translate_molecule(-self.geometry[atoms].mean(axis=0)) return self
[docs] @classmethod def combine_molecules(cls, molecule1, molecule2): """ Combine two molecules into one final molecule. Bonding information is not currently preserved. Args: molecule1 (Molecule): 1st molecule molecule2 (Molecule): 2nd molecule Returns: new ``Molecule`` object """ atoms = np.hstack((molecule1.atomic_numbers.T, molecule2.atomic_numbers.T)).view(cctk.OneIndexedArray) geoms = np.vstack((molecule1.geometry, molecule2.geometry)).view(cctk.OneIndexedArray) charge = molecule1.charge + molecule2.charge s1 = (molecule1.multiplicity - 1) / 2 s2 = (molecule2.multiplicity - 1) / 2 multiplicity = (s1+s2) * 2 + 1 return Molecule(atoms, geoms, charge=charge, multiplicity=multiplicity)
[docs] def volume(self, pts_per_angstrom=10, qhull=False): """ Returns volume calculated using the Gavezotti algorithm (JACS, 1983, 105, 5220). Relatively slow. If MemoryError, defaults to a qhull-based approach (accurate in the limit as number of atoms goes to infinity) Args: pts_per_angstrom (int): how many grid points to use per Å - time scales as O(n**3) so be careful! qhull (bool): use faster QHull algorithm Returns: volume in Å**3 """ if not qhull: try: assert isinstance(pts_per_angstrom, int), "Need an integer number of pts per Å!" assert pts_per_angstrom > 0, "Need a positive integer of pts per Å!" box_max = np.max(self.geometry.view(np.ndarray), axis=0) + 4 box_min = np.min(self.geometry.view(np.ndarray), axis=0) - 4 box_volume = (box_max[0] - box_min[0]) * (box_max[1] - box_min[1]) * (box_max[2] - box_min[2]) x_vals = np.linspace(box_min[0], box_max[0], int((box_max[0] - box_min[0]) * pts_per_angstrom)) y_vals = np.linspace(box_min[1], box_max[1], int((box_max[1] - box_min[1]) * pts_per_angstrom)) z_vals = np.linspace(box_min[2], box_max[2], int((box_max[2] - box_min[2]) * pts_per_angstrom)) # generate list of box points box_pts = np.stack([np.ravel(a) for a in np.meshgrid(x_vals, y_vals, z_vals)], axis=-1) # caching to speed call vdw_radii = {z: get_vdw_radius(z) for z in set(self.atomic_numbers)} radii_per_atom = np.array([vdw_radii[z] for z in self.atomic_numbers]).reshape(-1,1) # this is the slow part since it's approximately a zillion operations dists_per_atom = cdist(self.geometry.view(np.ndarray), box_pts) occupied = np.sum(np.max(dists_per_atom < radii_per_atom, axis=0)) percent_occupied = occupied / box_pts.shape[0] return percent_occupied * box_volume except MemoryError: qhull = True if qhull: import scipy hull = scipy.spatial.ConvexHull(self.geometry.view(np.ndarray)) return hull.volume
[docs] def swap_atom_numbers(self, atom1, atom2): """ Interchanges the numbers of ``atom1`` and ``atom2``. Args: atom1 (int): number of 1st atom atom2 (int): number of 2nd atom Returns new ``Molecule`` object (does not modify in-place) """ self._check_atom_number(atom1) self._check_atom_number(atom2) mol = copy.deepcopy(self) z1 = mol.atomic_numbers[atom1] z2 = mol.atomic_numbers[atom2] g1 = copy.deepcopy(mol.geometry[atom1]) g2 = copy.deepcopy(mol.geometry[atom2]) mol.atomic_numbers[atom2] = z1 mol.atomic_numbers[atom1] = z2 mol.geometry[atom2] = g1 mol.geometry[atom1] = g2 mapping = {atom2: atom1, atom1: atom2} mol.bonds = nx.relabel_nodes(mol.bonds, mapping, copy=True) return mol
[docs] def epimerize(self, center_atom, substituent1, substituent2): """ Epimerizes ``center_atom`` by exchanging the groups corresponding to ``substituent1`` and ``substituent2``. Both substituents must be bonded to the center atom! Args: center_atom (int): number of middle atom substituent1 (int): number of 1st atom substituent1 (int): number of 2nd atom Returns new ``Molecule`` object (does not modify in-place) """ self._check_atom_number(center_atom) self._check_atom_number(substituent1) self._check_atom_number(substituent2) assert self.bonds.number_of_edges() > 0, "need a bond graph to perform this operation -- try calling self.assign_connectivity()!" adj = self.get_adjacent_atoms(center_atom) assert len(adj) == 4, "center atom must be making 4 bonds!" assert substituent1 in adj, "1st substituent is not bonded to center atom!" assert substituent2 in adj, "2nd substituent is not bonded to center atom!" #### remove both substituents mol, group1, mmap1, gmap1 = cctk.Group.remove_group_from_molecule(self, center_atom, substituent1, return_mapping=True) mol, group2, mmap2, gmap2 = cctk.Group.remove_group_from_molecule(mol, mmap1[center_atom], mmap1[substituent2], return_mapping=True) h1 = mol.num_atoms() - 1 h2 = mol.num_atoms() #### add them back in the opposite fashion mol, mmap3, gmap3 = cctk.Group.add_group_to_molecule(mol, group2, h1, return_mapping=True) mol = cctk.Group.add_group_to_molecule(mol, group1, mmap3[h2]) #### relabel new graph to match original molecule which = top.get_stereogenic_centers(self) which.remove(center_atom) return mol.renumber_to_match(self, check_chirality=which)
[docs] def renumber_to_match(self, model, check_chirality="all"): """ Renumbers atoms to match ``model`` (must have isomorphic bond graph). Returns a copy of ``self`` with renumbered atoms. Args: model (cctk.Molecule): isomorphic molecule to renumber by check_chirality (list of atomic numbers): atomic numbers to check, to prevent inversion due to graph isomorphism. Alternatively ``None`` will prevent any checking and "all" will use ``cctk.topology.get_exchangable_centers()``. Returns: new ``Molecule`` object """ assert self.bonds.number_of_edges() > 0, "need a bond graph to perform this operation -- try calling self.assign_connectivity()!" #### use networkx to generate mapping #### you need the node matcher to distinguish between e.g. H, F, Cl self._add_atomic_numbers_to_nodes() model._add_atomic_numbers_to_nodes() nm = nx.algorithms.isomorphism.categorical_node_match("atomic_number", 0) match = nx.algorithms.isomorphism.GraphMatcher(model.bonds, self.bonds, node_match=nm) assert match.is_isomorphic(), "can't renumber non-isomorphic graphs!" new_ordering = [match.mapping[x] for x in range(1, self.num_atoms() + 1)] inv_mapping = {v:k for k,v in match.mapping.items()} # bit kludgy but works #### create renumbered molecule mol = copy.deepcopy(self) mol.atomic_numbers = self.atomic_numbers[new_ordering] mol.geometry = self.geometry[new_ordering] mol.bonds = nx.relabel_nodes(self.bonds, mapping=inv_mapping, copy=True) if check_chirality == "all": check_chirality = top.get_exchangeable_centers(mol) #### diastereotopic protons get scrambled by the above code so we gotta go through and fix all of them #### this happens because networkx doesn't store chirality - a known limitation of graph molecular encoding! if isinstance(check_chirality, list): #### find all the differences and exchange them model_report = top.get_chirality_report(model, check_chirality) #### generate all meso ring permutations candidates = top.flip_meso_rings(mol, atoms=check_chirality) #### for each, try flipping configuration of all centers for candidate in candidates: report = top.get_chirality_report(candidate, check_chirality) for center in check_chirality: if model_report[center] != report[center]: try: candidate = top.exchange_identical_substituents(candidate, center) except ValueError: break #### check that we actually fixed all the problems mol_report = top.get_chirality_report(candidate, check_chirality) all_good = True for center in check_chirality: if mol_report[center] != model_report[center]: all_good = False break #### if we did, then return if all_good: return candidate raise ValueError("can't get a proper renumbering: are you *sure* these two molecules can have the same chirality?")
def _add_atomic_numbers_to_nodes(self): """ Add the atomic numbers to each node attribute, to allow for distinguishment of F and H during graph renumbering. """ nx.set_node_attributes(self.bonds, {z: {"atomic_number": self.atomic_numbers[z]} for z in range(1, self.num_atoms() + 1)})
[docs] def is_atom_in_ring(self, atom): assert self.bonds.number_of_edges() > 0, "need a bond graph to perform this operation -- try calling self.assign_connectivity()!" cycles = nx.cycle_basis(self.bonds, root=atom) for cycle in cycles: if atom in cycle: return True return False
[docs] def get_components(self): """ Returns a list of all the connected components in a molecule. """ assert self.bonds.number_of_edges() > 0, "need a bond graph to perform this operation -- try calling self.assign_connectivity()!" fragments = nx.connected_components(self.bonds) return [list(f) for f in list(fragments)]
[docs] def limit_solvent_shell(self, solute=0, num_atoms=0, num_solvents=10, distance_from_atom=None, return_idxs=False): """ Automatically detects solvent molecules and removes them until you have a set number of solvents or atoms. The "distance" between molecules is the minimum of the pairwise atomic distances, to emphasize inner-sphere interactions. Args: solute (int): which fragment is the solute, 0-indexed num_atoms (int): remove atoms until there are this number (modulo the size of a solvent molecule) num_solvents (int): remove solvent molecules until there are this number distance_from_atom (int): if you want to find molecules closest to a given atom in the solute, specify the atom number here. if this atom is not in the solute fragment, an exception will be raised. return_idxs (bool): if True, indices of atoms that would be in the new molecule are returned. no change is made to ``self``. Returns: new ``Molecule`` object """ assert isinstance(num_atoms, int) assert isinstance(num_solvents, int) fragments = self.get_components() solute_x = self.geometry[fragments[solute]].view(np.ndarray) if distance_from_atom: assert distance_from_atom in fragments[solute], f"{distance_from_atom} is not in the solute fragment" solute_x = self.geometry[[distance_from_atom]].view(np.ndarray) distances = np.zeros(shape=len(fragments)) for i, f in enumerate(fragments): if i == solute: distances[i] = 0 else: solvent_x = self.geometry[f].view(np.ndarray) # cdist is absurdly fast pairwise_distances = cdist(solvent_x, solute_x) distances[i] = np.min(pairwise_distances) mol = copy.deepcopy(self) #### reverse order - farthest away comes first order = np.argsort(distances)[::-1] current_num_solvents = len(fragments) - 1 current_num_atoms = mol.num_atoms() to_remove = [] for i in order: for j in fragments[i]: to_remove.append(j) current_num_atoms += -1 current_num_solvents += -1 if current_num_atoms <= num_atoms or num_solvents == current_num_solvents: if return_idxs: all_idxs = set(range(1,self.num_atoms())) return list(all_idxs - set(to_remove)) else: #### have to remove in reverse direction for indexing consistency for j in sorted(to_remove, reverse=True): mol.remove_atom(j) return mol
[docs] def center_periodic(self, center, side_length): """ Adjusts a molecule to be in the center of a cube, moving all other molecules accordingly. Bonded subgroups will be moved as a unit. For analysis of MD files with periodic boundary conditions. Args: center (int): atomic number to center side_length (float): length of side, in Å """ self._check_atom_number(center) assert isinstance(side_length, (int, float)) assert side_length > 0 #### Center the atom of interest self.geometry += -1 * self.geometry[center] self.geometry += side_length / 2 for f in self.get_components(): centroid = np.mean(self.geometry[f], axis=0) self.geometry[f] += -1 * np.floor_divide(centroid, side_length) * side_length return self
[docs] @classmethod def new_from_name(cls, name): """ Create a new ``Molecule`` instance using ``rdkit``. """ assert isinstance(name, str) from urllib.request import urlopen try: url_name = re.sub(" ", "%20", name) url = 'http://cactus.nci.nih.gov/chemical/structure/' + url_name + '/smiles' smiles = urlopen(url, timeout=5).read().decode('utf8') return cls.new_from_smiles(smiles) except Exception as e: raise ValueError(f"something went wrong auto-generating molecule {name}:\nurl: {url}\n{e}")
[docs] @classmethod def new_from_smiles(cls, smiles): """ Create a new ``Molecule`` instance using ``rdkit``. """ assert isinstance(smiles, str) try: from rdkit.Chem import AllChem as Chem except ImportError as e: raise ImportError(f"``rdkit`` must be installed for this function to work!\n{e}") rdkm = Chem.MolFromSmiles(smiles) rdkm = Chem.AddHs(rdkm) # https://github.com/rdkit/rdkit/issues/1433 try: Chem.EmbedMolecule(rdkm, maxAttempts=5000) Chem.MMFFOptimizeMolecule(rdkm) except Exception: Chem.EmbedMolecule(rdkm, maxAttempts=5000, useRandomCoords=True) Chem.MMFFOptimizeMolecule(rdkm) nums = [] for atom in rdkm.GetAtoms(): nums.append(atom.GetAtomicNum()) geom = rdkm.GetConformers()[0].GetPositions() return cls(nums, geom)
[docs] def fragment(self): """ Returns list of ``cctk.Molecule`` objects based on the bond-connected components of ``self``. """ fragments = list() indices = self.get_components() for idx in indices: mol = cctk.Molecule(self.atomic_numbers[idx], self.geometry[idx]).assign_connectivity() fragments.append(mol) return fragments
[docs] def get_symmetric_atoms(self): """ Returns lists of symmetric atoms, as defined in ``cctk.load_group``. Useful for NMR spectroscopy, etc. """ from cctk.load_groups import group_iterator symmetric_sets = [] for group in group_iterator(symmetric_only=True): # this gives us a list of dictionaries mapping from self.atomic_numbers to group numbers matches = top.find_group(self, group) for m in matches: i = {v: k for k,v in m.items()} for n in group.isomorphic: symmetric_sets.append([i[idx] for idx in n]) #### some groups overlap (e.g. methyl and t-butyl), so now we collapse the overlapping sets for i, s1 in enumerate(symmetric_sets): for j, s2 in enumerate(symmetric_sets[i+1:]): if set(s1).intersection(set(s2)): symmetric_sets[i + j + 1] = list(set(s1).union(s2)) symmetric_sets[i] = None # can't delete yet - messes up indexing #### now we delete symmetric_sets = list(filter(None, symmetric_sets)) return symmetric_sets
[docs] def atomic_symbols(self): """ Return list of atomic symbols. """ symbols = {z: get_symbol(z) for z in set(self.atomic_numbers)} return [symbols[z] for z in self.atomic_numbers]
[docs] def optimize(self, inplace=True, nprocs=1, return_energy=False): """ Optimize molecule at the GFN2-xtb level of theory. Args: inplace (Bool): whether or not to return a new molecule or simply modify ``self.geometry`` nprocs (int): number of processors to use return_energy (Bool): whether to return energy or not """ import cctk.optimize as opt assert isinstance(nprocs, int), "nprocs must be int!" optimized, energy = opt.optimize_molecule(self, nprocs=nprocs, return_energy=True) if inplace: self.geometry = optimized.geometry if return_energy: return self, energy else: return self else: if return_energy: return optimized, energy else: return optimized
[docs] def compute_energy(self, nprocs=1): """ Compute energy of molecule at the GFN2-xtb level of theory. Args: nprocs (int): number of processors to use """ import cctk.optimize as opt assert isinstance(nprocs, int), "nprocs must be int!" energy = opt.get_energy(self, nprocs=nprocs) return energy
[docs] def csearch(self, nprocs=1, constraints=[], logfile=None, noncovalent=False, use_tempdir=True, gfn=2, additional_flags=None): """ Optimize molecule at the GFN2-xtb level of theory. Args: nprocs (int): number of processors to use constraints (list): atoms numbers to freeze noncovalent (bool): whether or not to use non-covalent settings logfile (str): file to write ongoing ``crest`` output to use_tempdir (bool): write intermediate files to hidden directory (as opposed to current directory) gfn (int or ``ff``): level of theory, either 1, 2, or ``ff`` additional_flags (str): additional flags for command line Returns ConformationalEnsemble """ import cctk.optimize as opt assert isinstance(nprocs, int), "nprocs must be int!" return opt.csearch(molecule=self, nprocs=nprocs, constraints=constraints, noncovalent=noncovalent, logfile=logfile, use_tempdir=use_tempdir, gfn=gfn, additional_flags=additional_flags)
[docs] def num_neighbors_by_atom(self): """ Returns a list of the number of neighbors of each atom. """ result = [] for i in range(self.num_atoms()): result.append(len(self.get_adjacent_atoms(i))) return result
[docs] def atoms_moving_in_imaginary(self, max_num=5, percent_cutoff=0.03, return_string=False): """ Returns atoms moving in imaginary, ranked by how much they're moving. Args: max_num (int): how many atoms max to return percent_cutoff (float): threshold for what percent of total TS movement qualifies as "movement" return_string (bool): whether or not to return a formatted string report Returns: list of atomic numbers or string """ imaginary = 0 ts_mode = None for mode in self.vibrational_modes: if mode.frequency < imaginary: imaginary = mode.frequency ts_mode = mode if ts_mode is None: if return_string: return "" else: return None displacements = np.linalg.norm(ts_mode.displacements.view(np.ndarray), axis=-1) atoms_ranked = np.argsort(displacements)[::-1] + 1 percent_movement = np.sort(displacements)[::-1] / np.sum(displacements) return_list, string = list(), "" for atom, percent in zip(atoms_ranked, percent_movement): if percent > percent_cutoff and len(return_list) <= max_num: return_list.append(atom) string += f"{self.atom_string(atom)} ({percent:.1%}), " else: if return_string: return string[:-2] else: return return_list
[docs] def to_string(self): """ Save the current molecule as a string, for subsequent loading. Not human-readable. Vibrational modes are currently not saved. """ # name, charge, multiplicity need no encoding atomic_number_encoding = numpy_to_bytes(self.atomic_numbers.view(np.ndarray)) geometry_encoding = numpy_to_bytes(self.geometry.view(np.ndarray)) bonds_encoding = numpy_to_bytes(nx.convert_matrix.to_numpy_array(self.bonds)) if self.name is None: self.name = "name" cctk_version = pkg_resources.get_distribution("cctk").version store_dict = { "name": self.name, "charge": self.charge, "multiplicity": self.multiplicity, "atomic_numbers": atomic_number_encoding, "geometry": geometry_encoding, "bonds": bonds_encoding, "cctk_version": cctk_version, } return yaml.dump(store_dict)
[docs] @classmethod def from_string(cls, string, check_version=True): """ Loads a ``cctk.Molecule`` object from a string. Arguments: string (str): stringified version of the molecule check_version (bool): whether version consistency should be enforced """ try: store_dict = yaml.safe_load(string) if check_version: cctk_version = pkg_resources.get_distribution("cctk").version assert cctk_version == store_dict["cctk_version"], f"Warning: the data was saved in cctk {store_dict['cctk_version']} but is being loaded in cctk {cctk_version}!" atomic_numbers = bytes_to_numpy(store_dict["atomic_numbers"]).astype(np.int8) geometry = bytes_to_numpy(store_dict["geometry"]).astype(np.float32) bonds = nx.convert_matrix.from_numpy_array(bytes_to_numpy(store_dict["bonds"])) mol = cls( atomic_numbers, geometry, bonds=bonds, charge=store_dict["charge"], multiplicity=store_dict["multiplicity"], name=store_dict["name"], checks=False, # trust nx data implicitly ) return mol except Exception as e: raise ValueError(f"this stringified Molecule fails import: {e}")
[docs] def coulomb_analysis(self, atoms1, atoms2, charges): """ Computes the net Coulomb forces between atoms ``atoms1`` and atoms ``atoms2``. """ if isinstance(charges, np.ndarray): charges = charges.view(cctk.OneIndexedArray) elif isinstance(charges, list): charges = cctk.OneIndexedArray(charges) assert isinstance(charges, cctk.OneIndexedArray), "charges must be cctk.OneIndexedArray" assert len(charges) == self.num_atoms(), "need a charge for every atom" assert isinstance(atoms1, list) assert isinstance(atoms2, list) q1 = charges[atoms1] q2 = charges[atoms2] # need to convert to Bohr r1 = self.geometry[atoms1] / 0.529 r2 = self.geometry[atoms2] / 0.529 R = cdist(r1, r2)**2 Q = np.outer(q1, q2) energy = 0 for i in range(len(atoms1)): assert atoms1[i] not in atoms2, "lists must be non-overlapping" for j in range(len(atoms2)): energy += Q[i][j] / R[i][j] return energy * 627.509 # convert to kcal/mol
[docs] def center_of_mass(self): """ Returns the center-of-mass of the molecule, as a ``np.array``. """ masses = cctk.OneIndexedArray([get_avg_mass(z) for z in self.atomic_numbers]).reshape(-1,1) return np.sum(masses * self.geometry, axis=0) / np.sum(masses)
[docs] def principal_axes_of_rotation(self): """ Compute principal axes of rotation and corresponding moments of inertia. See Jprogdyn, RotationalBoltzmann, lines 48–115. Returns: moments of intertia (3-element np.array) - some may be zero axes of rotation (3 x 3 np.array) """ # move everything to the center of mass (on a copy, let's not get too crazy here) com = self.center_of_mass() positions = copy.deepcopy(self.geometry.view(np.ndarray)) positions += -1 * com masses = np.array([get_avg_mass(z) for z in self.atomic_numbers]).reshape(-1,1) np.testing.assert_allclose(np.sum(masses * positions, axis=0) / np.sum(masses), 0, atol=0.00001) # build up mass moment of inertia tensor Ixx, Ixy, Ixz, Iyy, Iyz, Izz = 0, 0, 0, 0, 0, 0 for mass, position in zip(masses, positions): Ixx += mass * (position[2]*position[2] + position[1]*position[1]) Iyy += mass * (position[0]*position[0] + position[2]*position[2]) Izz += mass * (position[0]*position[0] + position[1]*position[1]) Ixy -= mass * position[0] * position[1] Ixz -= mass * position[0] * position[2] Iyz -= mass * position[1] * position[2] I = np.array([[Ixx, Ixy, Ixz], [Ixy, Iyy, Iyz], [Ixz, Iyz, Izz]]).reshape(3,3) # now we do an eigendecomposition on that tensor return np.linalg.eigh(I)