Source code for cctk.orca_file

import re
import numpy as np

from enum import Enum

from cctk import File, Molecule, ConformationalEnsemble
from cctk.helper_functions import get_symbol, get_corrected_free_energy

import cctk.parse_orca as parse

class OrcaJobType(Enum):
    """
    Class representing allowed Orca job types. Not an exhaustive list, but should be fairly comprehensive.

    The value should be the Orca keyword, to permit automatic assignment.

    All jobs have type ``SP`` by default.
    """

    SP = ("sp", ["energy", "scf_iterations"])
    """
    Single point energy calculation.
    """

    OPT = ("opt", ["rms_gradient", "rms_step", "max_gradient", "max_step"])
    """
    Geometry optimization.
    """

    FREQ = ("freq", ["gibbs_free_energy", "enthalpy", "frequencies", "temperature"])
    """
    Hessian calculation.
    """

    NMR = ("nmr", ["isotropic_shielding"])
    """
    NMR shielding prediction.
    """

    def __init__(self, value, expected_properties):
        self._value_ = value
        self.expected_properties = expected_properties


[docs]
class OrcaFile(File):
    """
    Generic class for all Orca `.inp` and `.out` files.

    Attributes:
        ensemble (ConformationalEnsemble): `ConformationalEnsemble` instance
        job_types (list): list of ``OrcaJobType`` instances
        header (str): keyword line or lines
        variables (dict): list of variables to specify (e.g. ``{"maxcore": 2000}``).
        blocks (dict): list of blocks to change specific settings
            In general, the key should be the block name and the value should be a list of desired lines.
            For instance, configuring a time-dependent DFT job might look like ``{"tddft": ["maxdim 5", "nroots 50"]}``
        successful_terminations (int): number of successful terminations
        elapsed_time (float): total time for job in seconds
    """


[docs]
    def __init__(self, job_types, ensemble=None,  header=None, variables=None, blocks=None):
        if job_types is not None:
            if not all(isinstance(job, OrcaJobType) for job in job_types):
                raise TypeError(f"invalid job type list {job_types}")
            self.job_types = job_types
        else:
            raise ValueError("need job types for new Orca file")

        if ensemble and isinstance(ensemble, ConformationalEnsemble):
            self.ensemble = ensemble
        else:
            self.ensemble = ConformationalEnsemble()

        if header and isinstance(header, str):
            self.header = header
        else:
            self.header = None

        if blocks and isinstance(blocks, dict):
            for lines in list(blocks.values()):
                assert isinstance(lines, list)
            self.blocks = blocks
        else:
            self.blocks = {}

        if variables and isinstance(variables, dict):
            self.variables = variables
        else:
            self.variables = {}



[docs]
    @classmethod
    def read_file(cls, filename):
        if re.search("inp$", filename):
            return cls._read_inp_file(filename)

        multiple_lines = parse.split_multiple_inputs(filename)
        files = []

        for lines in multiple_lines:
            input_lines = parse.extract_input_file(lines)
            header = parse.read_header(input_lines)
            job_types = cls._assign_job_types(header)
            variables, blocks = parse.read_blocks_and_variables(input_lines)

            successful_scf_convergence = 0
            successful_opt = 0
            successful_freq = 0
            successful_NMR_EPR = 0
            is_scan_job = False
            # add identifiers for successful termination of other job types
    
            elapsed_time = 0
            for line in lines:        
                if line.startswith("FINAL SINGLE POINT ENERGY"):                                        #### SCF converged at least once
                    successful_scf_convergence += 1
                elif line.strip().startswith("***        THE OPTIMIZATION HAS CONVERGED     ***"):      #### geometry converged
                    successful_opt += 1
                elif line.startswith("VIBRATIONAL FREQUENCIES"):                                        #### a frequency job was completed
                    successful_freq += 1                                                                
                elif line.startswith("Maximum memory used throughout the entire EPRNMR-calculation:"):  #### an EPR NMR job was completed
                    successful_NMR_EPR += 1
                elif line.strip().startswith("*    Relaxed Surface Scan    *"):                         #### this is a scan job
                    is_scan_job = True
                elif line.startswith("Sum of individual times         ..."):                            #### the job was completed  (note the '...' is key)
                    fields = line.split()
                    assert len(fields) == 9 or len(fields) == 10, f"unexpected number of fields on elapsed time line:\n{line}"
                    elapsed_time = float(fields[5])
    
            # different than G16 "successful termination"
            success = 0   
            if successful_opt > 0:
                success += 1
            if successful_freq > 0:
                success += 1
            if successful_NMR_EPR > 0:
                success += 1
            if successful_scf_convergence > 0:
                success += 1

            energies, iters = parse.read_energies(lines)
            if len(energies) == 0:
                return None

            atomic_numbers, geometries = parse.read_geometries(lines, num_to_find=len(energies))
            assert len(geometries) >= len(energies), "can't have an energy without a geometry (cf. pigeonhole principle)"

            # this approach does not work with the option miniprint
            charge = lines.find_parameter("Total Charge           Charge          ....", 5, 4)[0]
            multip = lines.find_parameter("Multiplicity           Mult            ....", 4, 3)[0]

            #### TODO
            # detect Mayer bond orders

            f = OrcaFile(job_types, header=header, variables=variables, blocks=blocks)
            f.elapsed_time = elapsed_time
            f.successful_terminations = success

            molecules = [None] * len(geometries)
            properties = [{} for _ in range(len(geometries))]
            for idx, geom in enumerate(geometries):
                molecules[idx] = Molecule(atomic_numbers, geom, charge=charge, multiplicity=multip, bonds=None)
                if idx < len(energies):
                    properties[idx]["energy"] = energies[idx]
                properties[idx]["filename"] = filename
                properties[idx]["iteration"] = idx
                properties[idx]["scf_iterations"] = iters[idx]

            if multip > 1:
                s2 = lines.find_parameter("Expectation value of", 6, 5)
                for idx, spin_contam in enumerate(s2):
                    properties[idx]["S**2"] = spin_contam

            if OrcaJobType.OPT in job_types:
                rms_grad, max_grad, rms_step, max_step = parse.read_gradients(lines, len(properties))
                for idx in range(len(rms_grad)):
                    if idx < len(rms_grad):
                        properties[idx]["rms_gradient"] = rms_grad[idx]

                    if idx < len(max_grad):
                        properties[idx]["max_gradient"] = max_grad[idx]

                    if idx < len(rms_step):
                        properties[idx]["rms_step"] = rms_step[idx]

                    if idx < len(max_step):
                        properties[idx]["max_step"] = max_step[idx]

            if OrcaJobType.FREQ in job_types:
                properties[-1]["frequencies"] = sorted(parse.read_freqs(lines, successful_freq))

                enthalpies = lines.find_parameter("Total Enthalpy", expected_length=5, which_field=3)
                try:
                    properties[-1]["enthalpy"] = enthalpies[-1]
                except Exception:
                    pass

                gibbs = lines.find_parameter("Final Gibbs free", expected_length=7, which_field=5)
                try:
                    properties[-1]["gibbs_free_energy"] = gibbs[-1]
                except Exception:
                    pass

                try:
                    temperature = lines.find_parameter("Temperature", expected_length=4, which_field=2)
                    if len(temperature) > 0 and len(gibbs) > 0:
                        properties[-1]["temperature"] = temperature[-1]
                        corrected_free_energy = get_corrected_free_energy(gibbs[-1], properties[-1]["frequencies"],
                                                                      frequency_cutoff=100.0, temperature=temperature[-1])
                        properties[-1]["quasiharmonic_gibbs_free_energy"] = float(corrected_free_energy)
                except Exception:
                    pass

            if OrcaJobType.NMR in job_types:
                nmr_shifts = parse.read_nmr_shifts(lines, molecules[0].num_atoms())
                if nmr_shifts is not None:
                    properties[-1]["isotropic_shielding"] = nmr_shifts

            try:
                charges = parse.read_mulliken_charges(lines, successful_opt, is_scan_job)
                assert len(charges) == len(atomic_numbers)
                properties[-1]["mulliken_charges"] = charges
            except Exception:
                pass

            try:
                charges = parse.read_loewdin_charges(lines, successful_opt, is_scan_job)
                assert len(charges) == len(atomic_numbers)
                properties[-1]["lowdin_charges"] = charges
            except Exception:
                pass

            try:
                dipole = lines.find_parameter("Magnitude \(Debye\)", 4, 3)
                properties[-1]["dipole_moment"] = dipole[0]
            except Exception:
                pass

            for mol, prop in zip(molecules, properties):
                f.ensemble.add_molecule(mol, properties=prop)

            f.check_has_properties()
            files.append(f)

        if len(files) == 1:
            return files[0]
        else:
            return files


    @classmethod
    def _read_inp_file(cls, filename):
        print("reading ``.inp`` files is not currently supported :(")
        return None


[docs]
    def write_file(self, filename, molecule=None, header=None, variables=None, blocks=None):
        """
        Write a ``.inp`` file, using object attributes. If no header is specified, the object's header will be used.

        Args:
            filename (str): path to the new file
            molecule (int): which molecule to use -- passed to ``self.get_molecule()``.
                Default is -1 (e.g. the last molecule), but positive integers will select from self.ensemble.molecules (0-indexed).
                A ``Molecule`` object can also be passed, in which case that molecule will be written to the file.
            header (str): header for new file
        """
        if molecule is None:
            molecule = -1
        if not isinstance(molecule, Molecule):
            molecule = self.ensemble.molecules[molecule]

        if header is None:
            header = self.header

        if variables is None:
            variables = self.variables

        if blocks is None:
            blocks = self.blocks

        self.write_molecule_to_file(filename, molecule, header, variables, blocks)



[docs]
    @classmethod
    def write_molecule_to_file(cls, filename, molecule, header, variables={"maxcore": 2000}, blocks={"pal": ["nproc 16"]}, print_symbol=False):
        """
        Write an ``.inp`` file using the given molecule.

        Args:
            filename (str): path to the new file
            molecule (Molecule): which molecule to use -- a ``Molecule`` object.
            header (str): header for new file
            print_symbol (Bool): if atomic symbols should be printed instead of atomic numbers
        """
        assert isinstance(molecule, Molecule), "need a valid molecule to write a file!"
        assert isinstance(header, str), "can't write a file without a header"

        text = f"{header.strip()}\n"

        if variables is not None:
            assert isinstance(variables, dict), "blocks must be a dictionary"
            for k, v in variables.items():
                text += f"%{k} {v}\n"

        if blocks is not None:
            assert isinstance(blocks, dict), "blocks must be a dictionary"
            for k, v in blocks.items():
                text += f"%{k}\n"
                for line in v:
                    text += f"\t{line}\n"
                text += "end\n"

        text +="\n"

        text += f"* xyz {int(molecule.charge)} {int(molecule.multiplicity)}\n"
        for index, Z in enumerate(molecule.atomic_numbers, start=1):
            line = molecule.get_vector(index)
            if print_symbol:
                Z = get_symbol(Z)
                text += f"{Z:>2}       {line[0]:>13.8f} {line[1]:>13.8f} {line[2]:>13.8f}\n"
            else:
                text += f"{Z:2d}       {line[0]:>13.8f} {line[1]:>13.8f} {line[2]:>13.8f}\n"

        text += "*\n"
        text += "\n"

        #### write the file
        super().write_file(filename, text)



[docs]
    def get_molecule(self, num=None):
        """
        Returns the last molecule (from an optimization job or other multi-molecule jobs) or the only molecule (from other jobs).

        If ``num`` is specified, returns that job (1-indexed for positive numbers). So ``job.get_molecule(3)`` will return the 3rd element of ``job.molecules``, not the 4th.
        """
        # some methods pass num=None, which overrides setting the default above
        if num is None:
            num = -1

        if not isinstance(num, int):
            raise TypeError("num must be int")

        return self.ensemble.molecule_list()[num]



[docs]
    def num_imaginaries(self):
        """
        Returns the number of imaginary frequencies.
        """
        return len(self.imaginaries())



[docs]
    def imaginaries(self):
        """
        Returns the imaginary frequencies, rounded to the nearest integer.
        """
        if (OrcaJobType.FREQ in self.job_types) and (self.ensemble[-1:,"frequencies"] is not None):
            freqs = self.ensemble[-1:,"frequencies"]
            if not isinstance(freqs, list) or len(freqs) == 0:
                return list()
            else:
                return list(map(int, np.array(freqs)[np.array(freqs) < 0]))
        else:
            return list()



    @classmethod
    def _assign_job_types(cls, header):
        """
        Assigns ``OrcaJobType`` objects from header. ``OrcaJobType.SP`` is assigned by default.

        Args:
            header (str): Orca header

        Returns:
            list of ``OrcaJobType`` objects
        """
        job_types = []
        for job_type in OrcaJobType:
            if re.search(f"{job_type.value}", str(header), re.IGNORECASE):
                job_types.append(job_type)
        if OrcaJobType.SP not in job_types:
            job_types.append(OrcaJobType.SP) # include SP in all jobs, whether specified in header or not
        if re.search("ScanTs", str(header), re.IGNORECASE):
            job_types.append(OrcaJobType.OPT) #ScanTs is an optimization job that does not contain the string "opt" in it's keyword
        return job_types


[docs]
    def check_has_properties(self):
        """
        Checks that the file has all the appropriate properties for its job types, and raises ``ValueError`` if not.

        This only checks the last molecule in ``self.ensemble``, for now.
        """
        if self.successful_terminations > 0:
            if self.successful_terminations == 2 and ((OrcaJobType.OPT in self.job_types) and (OrcaJobType.FREQ in self.job_types)):
                return # opt and freq jobs should have three terminations
            for job_type in self.job_types:
                for prop in job_type.expected_properties:
                    if not self.ensemble.has_property(-1, prop):
                        raise ValueError(f"expected property {prop} for job type {job_type}, but it's not there!")
        else:
            return