Source code for cctk.group

import copy
import numpy as np
import networkx as nx

import cctk
from cctk.helper_functions import get_covalent_radius, compute_angle_between, compute_rotation_matrix



[docs]
class Group(cctk.Molecule):
    """
    Class representing a functional group.

    Note that a Group instance does not need to be missing atoms. Rather, the atom given by `attach_to` will be replaced wholesale by another molecule, and the bond distances scaled automatically.

    Attributes:
        attach_to (int): atom number to replace with larger fragment. must have only one bond! (e.g. H in F3C-H)
        adjacent (int): atom number that will be bonded to new molecule. (e.g. C in F3C-H)
        isomorphic (list of lists): list of lists of atoms that should be considered symmetry equivalent.
            For instance, the three methyl protons can be considered symmetry equivalent, so ``methane.isomorphic = [[3, 4, 5]]``.
        _map_from_truncated(dict): a dictionary mapping atom numbers of the group without ``attach_to`` to the atom numbers of the normal group
    """


[docs]
    def __init__(self, attach_to, isomorphic=None, **kwargs):
        super().__init__(**kwargs)
        self.add_attachment_point(attach_to)
        self._map_from_truncated = None

        if isomorphic is not None:
            assert isinstance(isomorphic, list), "group.isomorphic must be list of lists!"
        self.isomorphic = isomorphic



[docs]
    @classmethod
    def new_from_molecule(cls, molecule, attach_to, **kwargs):
        """
        Convenient method to convert ``molecule`` to ``group`` directly.
        """
        group = Group(attach_to, atomic_numbers=molecule.atomic_numbers, geometry=molecule.geometry, bonds=molecule.bonds.edges(), **kwargs)
        return group



[docs]
    def add_attachment_point(self, attach_to):
        """
        Adds ``attach_to`` and ``adjacent`` attributes to the instance.

        Automatically centers atom ``adjacent`` on the origin, to simplify downstream mathematics.
        """
        n_bonds = len(super().get_adjacent_atoms(attach_to))
        if n_bonds != 1:
            raise ValueError(f"atom {attach_to} is making {n_bonds} but must make 1 bond to be a valid attachment point")

        self.attach_to = attach_to

        adjacent = super().get_adjacent_atoms(attach_to)
        assert len(adjacent) == 1, "can't substitute an atom with more than one adjacent atom!"
        self.adjacent = adjacent[0]

        adj_v = super().get_vector(self.adjacent)
        super().translate_molecule(-adj_v)



[docs]
    @staticmethod
    def add_group_to_molecule(molecule, group, add_to, optimize=True, return_mapping=False):
        """
        Adds a `Group` object to a `Molecule` at the specified atom, and returns a new `Molecule` object (generated using `copy.deepcopy()`).
        Automatically attempts to prevent clashes by minimizing pairwise atomic distances.

        The atom in `group` that replaces `add_to` in `molecule` will inherit the number of `add_to` - however, the other atoms in `group` will be appended to the atom list.

        Args:
            molecule (Molecule): the molecule to change
            group (Group): the group to affix
            add_to (int): the 1-indexed atom number on `molecule` to add `group` to
            optimize (bool): whether or not to perform automated dihedral optimization
            return_mapping (bool): whether or not to return dictionaries mapping atom numbers from starting materials to products

        Returns:
            new Molecule object

            (optional) molecule_to_new dictionary mapping atom numbers from starting molecule (key) to new atom numbers (val)
            (optional) group_to_new dictionary mapping atom numbers from starting group (key) to new atom numbers (val)
        """
        #### this code can be a bit complex: for an example, let's imagine converting benzene to toluene by adding methane (Group) to benzene (Molecule)
        ####     add_to would be the benzene H (atom on Molecule you replace with the new group)
        ####     adjacent_atom would be the benzene C
        ####     group.attach_to would be the methane H
        ####     group.adjacent would be the methane C

        #### prevent in-place modification of molecule - could lead to pernicious errors!

        try:
            add_to = int(add_to)
        except Exception:
            raise TypeError("add_to not castable to int")

        molecule = copy.deepcopy(molecule)
        molecule._check_atom_number(add_to)
        original_num_atoms = molecule.num_atoms()

        adjacent_atom = molecule.get_adjacent_atoms(add_to)
        assert (
            len(adjacent_atom) > 0
        ), "can't substitute an atom without an adjacent atom! (are there bonds defined for this molecule? consider calling molecule.assign_connectivity()!)"
        assert len(adjacent_atom) == 1, "can't substitute an atom with more than one adjacent atom!"
        adjacent_atom = adjacent_atom[0]

        attach_to = group.attach_to
        other_indices = np.ones_like(group.atomic_numbers).astype(bool)
        other_indices[attach_to] = False
        other_indices[group.adjacent] = False

        #### we need to change the bond length somewhat to prevent strange behavior
        old_radius = get_covalent_radius(molecule.atomic_numbers[add_to])
        new_radius = get_covalent_radius(group.atomic_numbers[group.adjacent])
        delta_rad = new_radius - old_radius

        #### make the swap! (this only adds the atoms, still have to get the geometry right)
        molecule.atomic_numbers[add_to] = group.atomic_numbers[group.adjacent]
        new_indices = [i + molecule.num_atoms() for i in range(1, np.sum(other_indices) + 1)]
        molecule.atomic_numbers = np.hstack([molecule.atomic_numbers, group.atomic_numbers[other_indices]])
        molecule.atomic_numbers = molecule.atomic_numbers.view(cctk.OneIndexedArray)

        #### have to keep track of what all the new indices are, to carry over connectivity
        new_indices.insert(group.adjacent - 1, add_to)
        new_indices.insert(attach_to - 1, adjacent_atom)

        #### track atom number mapping
        molecule_to_new = {z : z for z in range(1, molecule.num_atoms() + 1)}
        molecule_to_new[add_to] = None

        group_to_new = {}
        offset = 1
        for z in range(1, group.num_atoms() + 1):
            if other_indices[z]:
                group_to_new[z] = original_num_atoms + offset
                offset += 1
            else:
                group_to_new[z] = None
        group_to_new[group.adjacent] = add_to

        #### adjust the bond length by moving add_to
        molecule.set_distance(adjacent_atom, add_to, molecule.get_distance(adjacent_atom, add_to) + delta_rad)

        #### rotate group to match the new positioning
        v_g = group.get_vector(group.attach_to, group.adjacent)
        v_m = molecule.get_vector(add_to, adjacent_atom)
        theta = compute_angle_between(v_g, v_m)

        #### rotate each atom and add it...
        center_pos = molecule.get_vector(add_to)
        rot = compute_rotation_matrix(np.cross(v_g, v_m), -(180 - theta))
        for vector in group.geometry[other_indices]:
            new_v = np.dot(rot, vector) + center_pos
            molecule.geometry = np.vstack((molecule.geometry, new_v))
            molecule.geometry = molecule.geometry.view(cctk.OneIndexedArray)

        #### now we have to merge the new bonds
        for (atom1, atom2) in group.bonds.edges():
            molecule.add_bond(new_indices[atom1-1], new_indices[atom2-1])
        assert molecule.get_bond_order(add_to, adjacent_atom), "we didn't add the bond we were supposed to form!"

        assert len(molecule.atomic_numbers) == len(
            molecule.geometry
        ), f"molecule has {len(molecule.atomic_numbers)} atoms but {len(molecule.geometry)} geometry elements!"

        #### now we want to find the "lowest" energy conformation, defined as the rotamer which minimizes the RMS distance between all atoms
        if group.num_atoms() > 3 and optimize:
            adjacent_on_old_molecule = molecule.get_adjacent_atoms(adjacent_atom)[0]
            adjacent_on_new_molecule = molecule.get_adjacent_atoms(add_to)[-1]
            molecule.optimize_dihedral(adjacent_on_old_molecule, adjacent_atom, add_to, adjacent_on_new_molecule)

        if molecule.check_for_conflicts():
            if return_mapping:
                return molecule, molecule_to_new, group_to_new
            else:
                return molecule
        else:
            raise ValueError("molecule contains conflicts!")



[docs]
    @staticmethod
    def remove_group_from_molecule(molecule, atom1, atom2, return_mapping=False):
        """
        The microscopic reverse of ``add_group_to_molecule`` -- splits a ``Molecule`` along the ``atom1``–``atom2`` bond
        and returns a new ``Molecule`` object (the ``atom1`` side) and a new ``Group`` (the ``atom2`` side).

        The new objects will be capped with hydrogens; atom ordering will be preserved!

        Args:
            molecule (Molecule): the molecule to change
            atom1 (int): the 1-indexed atom number on `molecule` to make part of the new ``Molecule`` object
            atom2 (int): the 1-indexed atom number on `molecule` to make part of the new ``Group`` object
            return_mapping (bool): whether or not to return dictionaries mapping atom numbers from starting materials to products

        Returns:
            new Molecule object
            new Group object

            (optional) molecule_to_molecule dictionary mapping atom numbers from starting molecule (key) to new molecule atom numbers (val)
            (optional) molecule_to_group dictionary mapping atom numbers from starting molecule (key) to new group atom numbers (val)
        """
        try:
            atom1 = int(atom1)
            atom2 = int(atom2)
        except Exception:
            raise TypeError("atom numbers not castable to int")

        molecule = copy.deepcopy(molecule)
        molecule._check_atom_number(atom1)
        molecule._check_atom_number(atom2)

        #### define mapping dicts
        fragment1, fragment2 = molecule._get_bond_fragments(atom1, atom2)
        molecule_to_molecule = {x: i+1 for i, x in enumerate(fragment1)}
        molecule_to_group = {x: i+1 for i, x in enumerate(fragment2)}

        #### create new molecules
        new_mol = cctk.Molecule(molecule.atomic_numbers[fragment1], molecule.geometry[fragment1])
        group = cctk.Molecule(molecule.atomic_numbers[fragment2], molecule.geometry[fragment2])

        #### add capping H to new_mol
        new_mol.add_atom("H", molecule.geometry[atom2])
        molecule_to_molecule[atom2] = new_mol.num_atoms()
        old_radius = get_covalent_radius(molecule.atomic_numbers[atom2])
        H_radius = get_covalent_radius(1)
        new_dist = new_mol.get_distance(molecule_to_molecule[atom1], molecule_to_molecule[atom2]) - old_radius + H_radius
        new_mol.set_distance(molecule_to_molecule[atom1], molecule_to_molecule[atom2], new_dist)
        new_mol.add_bond(molecule_to_molecule[atom1], molecule_to_molecule[atom2])

        #### add capping H to new group
        group.add_atom("H", molecule.geometry[atom1])
        molecule_to_group[atom1] = group.num_atoms()
        old_radius = get_covalent_radius(molecule.atomic_numbers[atom1])
        new_dist = group.get_distance(molecule_to_group[atom2], molecule_to_group[atom1]) - old_radius + H_radius
        group.set_distance(molecule_to_group[atom2], molecule_to_group[atom1], new_dist)
        group.add_bond(molecule_to_group[atom2], molecule_to_group[atom1])

        #### add bonds to nascent molecules
        molecule.remove_bond(atom1, atom2)
        for (a1, a2) in molecule.bonds.edges():
            if a1 in fragment1:
                assert a2 in fragment1, "somehow we have another bond between the two groups!"
                assert molecule_to_molecule[a1] is not None, f"we don't have a mapping for atom {a1}"
                assert molecule_to_molecule[a2] is not None, f"we don't have a mapping for atom {a2}"
                new_mol.add_bond(molecule_to_molecule[a1], molecule_to_molecule[a2])
            elif a2 in fragment2:
                assert a2 in fragment2, "somehow we have another bond between the two groups!"
                assert molecule_to_group[a1] is not None, f"we don't have a mapping for atom {a1}"
                assert molecule_to_group[a2] is not None, f"we don't have a mapping for atom {a2}"
                group.add_bond(molecule_to_group[a1], molecule_to_group[a2])

        #### create Group object from group
        group = cctk.Group.new_from_molecule(attach_to=molecule_to_group[atom1], molecule=group)

        if return_mapping:
            return new_mol, group, molecule_to_molecule, molecule_to_group
        else:
            return new_mol, group



[docs]
    def map_from_truncated(self):
        """
        Returns a dictionary mapping atomic numbers without ``attach_to`` to atomic_numbers with ``attach_to``.
        """
        if self._map_from_truncated is not None:
            return self._map_from_truncated

        assert self.bonds.number_of_edges() > 0, "need a bond graph to perform this operation -- try calling self.assign_connectivity()!"
        g = copy.deepcopy(self)
        g._add_atomic_numbers_to_nodes()
        tg = copy.deepcopy(g)
        tg.remove_atom(g.attach_to)

        nm = nx.algorithms.isomorphism.categorical_node_match("atomic_number", 0)
        match = nx.algorithms.isomorphism.GraphMatcher(g.bonds, tg.bonds, node_match=nm)

        for sg in match.subgraph_isomorphisms_iter():
            if self.attach_to in sg.keys():
                continue
            sg = {v: k for k, v in sg.items()} # invert
            self._map_from_truncated = sg
            return sg