Source code for cctk.group

import copy
import numpy as np
import networkx as nx

import cctk
from cctk.helper_functions import get_covalent_radius, compute_angle_between, compute_rotation_matrix


[docs] class Group(cctk.Molecule): """ Class representing a functional group. Note that a Group instance does not need to be missing atoms. Rather, the atom given by `attach_to` will be replaced wholesale by another molecule, and the bond distances scaled automatically. Attributes: attach_to (int): atom number to replace with larger fragment. must have only one bond! (e.g. H in F3C-H) adjacent (int): atom number that will be bonded to new molecule. (e.g. C in F3C-H) isomorphic (list of lists): list of lists of atoms that should be considered symmetry equivalent. For instance, the three methyl protons can be considered symmetry equivalent, so ``methane.isomorphic = [[3, 4, 5]]``. _map_from_truncated(dict): a dictionary mapping atom numbers of the group without ``attach_to`` to the atom numbers of the normal group """
[docs] def __init__(self, attach_to, isomorphic=None, **kwargs): super().__init__(**kwargs) self.add_attachment_point(attach_to) self._map_from_truncated = None if isomorphic is not None: assert isinstance(isomorphic, list), "group.isomorphic must be list of lists!" self.isomorphic = isomorphic
[docs] @classmethod def new_from_molecule(cls, molecule, attach_to, **kwargs): """ Convenient method to convert ``molecule`` to ``group`` directly. """ group = Group(attach_to, atomic_numbers=molecule.atomic_numbers, geometry=molecule.geometry, bonds=molecule.bonds.edges(), **kwargs) return group
[docs] def add_attachment_point(self, attach_to): """ Adds ``attach_to`` and ``adjacent`` attributes to the instance. Automatically centers atom ``adjacent`` on the origin, to simplify downstream mathematics. """ n_bonds = len(super().get_adjacent_atoms(attach_to)) if n_bonds != 1: raise ValueError(f"atom {attach_to} is making {n_bonds} but must make 1 bond to be a valid attachment point") self.attach_to = attach_to adjacent = super().get_adjacent_atoms(attach_to) assert len(adjacent) == 1, "can't substitute an atom with more than one adjacent atom!" self.adjacent = adjacent[0] adj_v = super().get_vector(self.adjacent) super().translate_molecule(-adj_v)
[docs] @staticmethod def add_group_to_molecule(molecule, group, add_to, optimize=True, return_mapping=False): """ Adds a `Group` object to a `Molecule` at the specified atom, and returns a new `Molecule` object (generated using `copy.deepcopy()`). Automatically attempts to prevent clashes by minimizing pairwise atomic distances. The atom in `group` that replaces `add_to` in `molecule` will inherit the number of `add_to` - however, the other atoms in `group` will be appended to the atom list. Args: molecule (Molecule): the molecule to change group (Group): the group to affix add_to (int): the 1-indexed atom number on `molecule` to add `group` to optimize (bool): whether or not to perform automated dihedral optimization return_mapping (bool): whether or not to return dictionaries mapping atom numbers from starting materials to products Returns: new Molecule object (optional) molecule_to_new dictionary mapping atom numbers from starting molecule (key) to new atom numbers (val) (optional) group_to_new dictionary mapping atom numbers from starting group (key) to new atom numbers (val) """ #### this code can be a bit complex: for an example, let's imagine converting benzene to toluene by adding methane (Group) to benzene (Molecule) #### add_to would be the benzene H (atom on Molecule you replace with the new group) #### adjacent_atom would be the benzene C #### group.attach_to would be the methane H #### group.adjacent would be the methane C #### prevent in-place modification of molecule - could lead to pernicious errors! try: add_to = int(add_to) except Exception: raise TypeError("add_to not castable to int") molecule = copy.deepcopy(molecule) molecule._check_atom_number(add_to) original_num_atoms = molecule.num_atoms() adjacent_atom = molecule.get_adjacent_atoms(add_to) assert ( len(adjacent_atom) > 0 ), "can't substitute an atom without an adjacent atom! (are there bonds defined for this molecule? consider calling molecule.assign_connectivity()!)" assert len(adjacent_atom) == 1, "can't substitute an atom with more than one adjacent atom!" adjacent_atom = adjacent_atom[0] attach_to = group.attach_to other_indices = np.ones_like(group.atomic_numbers).astype(bool) other_indices[attach_to] = False other_indices[group.adjacent] = False #### we need to change the bond length somewhat to prevent strange behavior old_radius = get_covalent_radius(molecule.atomic_numbers[add_to]) new_radius = get_covalent_radius(group.atomic_numbers[group.adjacent]) delta_rad = new_radius - old_radius #### make the swap! (this only adds the atoms, still have to get the geometry right) molecule.atomic_numbers[add_to] = group.atomic_numbers[group.adjacent] new_indices = [i + molecule.num_atoms() for i in range(1, np.sum(other_indices) + 1)] molecule.atomic_numbers = np.hstack([molecule.atomic_numbers, group.atomic_numbers[other_indices]]) molecule.atomic_numbers = molecule.atomic_numbers.view(cctk.OneIndexedArray) #### have to keep track of what all the new indices are, to carry over connectivity new_indices.insert(group.adjacent - 1, add_to) new_indices.insert(attach_to - 1, adjacent_atom) #### track atom number mapping molecule_to_new = {z : z for z in range(1, molecule.num_atoms() + 1)} molecule_to_new[add_to] = None group_to_new = {} offset = 1 for z in range(1, group.num_atoms() + 1): if other_indices[z]: group_to_new[z] = original_num_atoms + offset offset += 1 else: group_to_new[z] = None group_to_new[group.adjacent] = add_to #### adjust the bond length by moving add_to molecule.set_distance(adjacent_atom, add_to, molecule.get_distance(adjacent_atom, add_to) + delta_rad) #### rotate group to match the new positioning v_g = group.get_vector(group.attach_to, group.adjacent) v_m = molecule.get_vector(add_to, adjacent_atom) theta = compute_angle_between(v_g, v_m) #### rotate each atom and add it... center_pos = molecule.get_vector(add_to) rot = compute_rotation_matrix(np.cross(v_g, v_m), -(180 - theta)) for vector in group.geometry[other_indices]: new_v = np.dot(rot, vector) + center_pos molecule.geometry = np.vstack((molecule.geometry, new_v)) molecule.geometry = molecule.geometry.view(cctk.OneIndexedArray) #### now we have to merge the new bonds for (atom1, atom2) in group.bonds.edges(): molecule.add_bond(new_indices[atom1-1], new_indices[atom2-1]) assert molecule.get_bond_order(add_to, adjacent_atom), "we didn't add the bond we were supposed to form!" assert len(molecule.atomic_numbers) == len( molecule.geometry ), f"molecule has {len(molecule.atomic_numbers)} atoms but {len(molecule.geometry)} geometry elements!" #### now we want to find the "lowest" energy conformation, defined as the rotamer which minimizes the RMS distance between all atoms if group.num_atoms() > 3 and optimize: adjacent_on_old_molecule = molecule.get_adjacent_atoms(adjacent_atom)[0] adjacent_on_new_molecule = molecule.get_adjacent_atoms(add_to)[-1] molecule.optimize_dihedral(adjacent_on_old_molecule, adjacent_atom, add_to, adjacent_on_new_molecule) if molecule.check_for_conflicts(): if return_mapping: return molecule, molecule_to_new, group_to_new else: return molecule else: raise ValueError("molecule contains conflicts!")
[docs] @staticmethod def remove_group_from_molecule(molecule, atom1, atom2, return_mapping=False): """ The microscopic reverse of ``add_group_to_molecule`` -- splits a ``Molecule`` along the ``atom1``–``atom2`` bond and returns a new ``Molecule`` object (the ``atom1`` side) and a new ``Group`` (the ``atom2`` side). The new objects will be capped with hydrogens; atom ordering will be preserved! Args: molecule (Molecule): the molecule to change atom1 (int): the 1-indexed atom number on `molecule` to make part of the new ``Molecule`` object atom2 (int): the 1-indexed atom number on `molecule` to make part of the new ``Group`` object return_mapping (bool): whether or not to return dictionaries mapping atom numbers from starting materials to products Returns: new Molecule object new Group object (optional) molecule_to_molecule dictionary mapping atom numbers from starting molecule (key) to new molecule atom numbers (val) (optional) molecule_to_group dictionary mapping atom numbers from starting molecule (key) to new group atom numbers (val) """ try: atom1 = int(atom1) atom2 = int(atom2) except Exception: raise TypeError("atom numbers not castable to int") molecule = copy.deepcopy(molecule) molecule._check_atom_number(atom1) molecule._check_atom_number(atom2) #### define mapping dicts fragment1, fragment2 = molecule._get_bond_fragments(atom1, atom2) molecule_to_molecule = {x: i+1 for i, x in enumerate(fragment1)} molecule_to_group = {x: i+1 for i, x in enumerate(fragment2)} #### create new molecules new_mol = cctk.Molecule(molecule.atomic_numbers[fragment1], molecule.geometry[fragment1]) group = cctk.Molecule(molecule.atomic_numbers[fragment2], molecule.geometry[fragment2]) #### add capping H to new_mol new_mol.add_atom("H", molecule.geometry[atom2]) molecule_to_molecule[atom2] = new_mol.num_atoms() old_radius = get_covalent_radius(molecule.atomic_numbers[atom2]) H_radius = get_covalent_radius(1) new_dist = new_mol.get_distance(molecule_to_molecule[atom1], molecule_to_molecule[atom2]) - old_radius + H_radius new_mol.set_distance(molecule_to_molecule[atom1], molecule_to_molecule[atom2], new_dist) new_mol.add_bond(molecule_to_molecule[atom1], molecule_to_molecule[atom2]) #### add capping H to new group group.add_atom("H", molecule.geometry[atom1]) molecule_to_group[atom1] = group.num_atoms() old_radius = get_covalent_radius(molecule.atomic_numbers[atom1]) new_dist = group.get_distance(molecule_to_group[atom2], molecule_to_group[atom1]) - old_radius + H_radius group.set_distance(molecule_to_group[atom2], molecule_to_group[atom1], new_dist) group.add_bond(molecule_to_group[atom2], molecule_to_group[atom1]) #### add bonds to nascent molecules molecule.remove_bond(atom1, atom2) for (a1, a2) in molecule.bonds.edges(): if a1 in fragment1: assert a2 in fragment1, "somehow we have another bond between the two groups!" assert molecule_to_molecule[a1] is not None, f"we don't have a mapping for atom {a1}" assert molecule_to_molecule[a2] is not None, f"we don't have a mapping for atom {a2}" new_mol.add_bond(molecule_to_molecule[a1], molecule_to_molecule[a2]) elif a2 in fragment2: assert a2 in fragment2, "somehow we have another bond between the two groups!" assert molecule_to_group[a1] is not None, f"we don't have a mapping for atom {a1}" assert molecule_to_group[a2] is not None, f"we don't have a mapping for atom {a2}" group.add_bond(molecule_to_group[a1], molecule_to_group[a2]) #### create Group object from group group = cctk.Group.new_from_molecule(attach_to=molecule_to_group[atom1], molecule=group) if return_mapping: return new_mol, group, molecule_to_molecule, molecule_to_group else: return new_mol, group
[docs] def map_from_truncated(self): """ Returns a dictionary mapping atomic numbers without ``attach_to`` to atomic_numbers with ``attach_to``. """ if self._map_from_truncated is not None: return self._map_from_truncated assert self.bonds.number_of_edges() > 0, "need a bond graph to perform this operation -- try calling self.assign_connectivity()!" g = copy.deepcopy(self) g._add_atomic_numbers_to_nodes() tg = copy.deepcopy(g) tg.remove_atom(g.attach_to) nm = nx.algorithms.isomorphism.categorical_node_match("atomic_number", 0) match = nx.algorithms.isomorphism.GraphMatcher(g.bonds, tg.bonds, node_match=nm) for sg in match.subgraph_isomorphisms_iter(): if self.attach_to in sg.keys(): continue sg = {v: k for k, v in sg.items()} # invert self._map_from_truncated = sg return sg