Source code for cctk.orca_file

import re
import numpy as np

from enum import Enum

from cctk import File, Molecule, ConformationalEnsemble
from cctk.helper_functions import get_symbol, get_corrected_free_energy

import cctk.parse_orca as parse

class OrcaJobType(Enum):
    """
    Class representing allowed Orca job types. Not an exhaustive list, but should be fairly comprehensive.

    The value should be the Orca keyword, to permit automatic assignment.

    All jobs have type ``SP`` by default.
    """

    SP = ("sp", ["energy", "scf_iterations"])
    """
    Single point energy calculation.
    """

    OPT = ("opt", ["rms_gradient", "rms_step", "max_gradient", "max_step"])
    """
    Geometry optimization.
    """

    FREQ = ("freq", ["gibbs_free_energy", "enthalpy", "frequencies", "temperature"])
    """
    Hessian calculation.
    """

    NMR = ("nmr", ["isotropic_shielding"])
    """
    NMR shielding prediction.
    """

    def __init__(self, value, expected_properties):
        self._value_ = value
        self.expected_properties = expected_properties

[docs] class OrcaFile(File): """ Generic class for all Orca `.inp` and `.out` files. Attributes: ensemble (ConformationalEnsemble): `ConformationalEnsemble` instance job_types (list): list of ``OrcaJobType`` instances header (str): keyword line or lines variables (dict): list of variables to specify (e.g. ``{"maxcore": 2000}``). blocks (dict): list of blocks to change specific settings In general, the key should be the block name and the value should be a list of desired lines. For instance, configuring a time-dependent DFT job might look like ``{"tddft": ["maxdim 5", "nroots 50"]}`` successful_terminations (int): number of successful terminations elapsed_time (float): total time for job in seconds """
[docs] def __init__(self, job_types, ensemble=None, header=None, variables=None, blocks=None): if job_types is not None: if not all(isinstance(job, OrcaJobType) for job in job_types): raise TypeError(f"invalid job type list {job_types}") self.job_types = job_types else: raise ValueError("need job types for new Orca file") if ensemble and isinstance(ensemble, ConformationalEnsemble): self.ensemble = ensemble else: self.ensemble = ConformationalEnsemble() if header and isinstance(header, str): self.header = header else: self.header = None if blocks and isinstance(blocks, dict): for lines in list(blocks.values()): assert isinstance(lines, list) self.blocks = blocks else: self.blocks = {} if variables and isinstance(variables, dict): self.variables = variables else: self.variables = {}
[docs] @classmethod def read_file(cls, filename): if re.search("inp$", filename): return cls._read_inp_file(filename) multiple_lines = parse.split_multiple_inputs(filename) files = [] for lines in multiple_lines: input_lines = parse.extract_input_file(lines) header = parse.read_header(input_lines) job_types = cls._assign_job_types(header) variables, blocks = parse.read_blocks_and_variables(input_lines) successful_scf_convergence = 0 successful_opt = 0 successful_freq = 0 successful_NMR_EPR = 0 is_scan_job = False # add identifiers for successful termination of other job types elapsed_time = 0 for line in lines: if line.startswith("FINAL SINGLE POINT ENERGY"): #### SCF converged at least once successful_scf_convergence += 1 elif line.strip().startswith("*** THE OPTIMIZATION HAS CONVERGED ***"): #### geometry converged successful_opt += 1 elif line.startswith("VIBRATIONAL FREQUENCIES"): #### a frequency job was completed successful_freq += 1 elif line.startswith("Maximum memory used throughout the entire EPRNMR-calculation:"): #### an EPR NMR job was completed successful_NMR_EPR += 1 elif line.strip().startswith("* Relaxed Surface Scan *"): #### this is a scan job is_scan_job = True elif line.startswith("Sum of individual times ..."): #### the job was completed (note the '...' is key) fields = line.split() assert len(fields) == 9 or len(fields) == 10, f"unexpected number of fields on elapsed time line:\n{line}" elapsed_time = float(fields[5]) # different than G16 "successful termination" success = 0 if successful_opt > 0: success += 1 if successful_freq > 0: success += 1 if successful_NMR_EPR > 0: success += 1 if successful_scf_convergence > 0: success += 1 energies, iters = parse.read_energies(lines) if len(energies) == 0: return None atomic_numbers, geometries = parse.read_geometries(lines, num_to_find=len(energies)) assert len(geometries) >= len(energies), "can't have an energy without a geometry (cf. pigeonhole principle)" # this approach does not work with the option miniprint charge = lines.find_parameter("Total Charge Charge ....", 5, 4)[0] multip = lines.find_parameter("Multiplicity Mult ....", 4, 3)[0] #### TODO # detect Mayer bond orders f = OrcaFile(job_types, header=header, variables=variables, blocks=blocks) f.elapsed_time = elapsed_time f.successful_terminations = success molecules = [None] * len(geometries) properties = [{} for _ in range(len(geometries))] for idx, geom in enumerate(geometries): molecules[idx] = Molecule(atomic_numbers, geom, charge=charge, multiplicity=multip, bonds=None) if idx < len(energies): properties[idx]["energy"] = energies[idx] properties[idx]["filename"] = filename properties[idx]["iteration"] = idx properties[idx]["scf_iterations"] = iters[idx] if multip > 1: s2 = lines.find_parameter("Expectation value of", 6, 5) for idx, spin_contam in enumerate(s2): properties[idx]["S**2"] = spin_contam if OrcaJobType.OPT in job_types: rms_grad, max_grad, rms_step, max_step = parse.read_gradients(lines, len(properties)) for idx in range(len(rms_grad)): if idx < len(rms_grad): properties[idx]["rms_gradient"] = rms_grad[idx] if idx < len(max_grad): properties[idx]["max_gradient"] = max_grad[idx] if idx < len(rms_step): properties[idx]["rms_step"] = rms_step[idx] if idx < len(max_step): properties[idx]["max_step"] = max_step[idx] if OrcaJobType.FREQ in job_types: properties[-1]["frequencies"] = sorted(parse.read_freqs(lines, successful_freq)) enthalpies = lines.find_parameter("Total Enthalpy", expected_length=5, which_field=3) try: properties[-1]["enthalpy"] = enthalpies[-1] except Exception: pass gibbs = lines.find_parameter("Final Gibbs free", expected_length=7, which_field=5) try: properties[-1]["gibbs_free_energy"] = gibbs[-1] except Exception: pass try: temperature = lines.find_parameter("Temperature", expected_length=4, which_field=2) if len(temperature) > 0 and len(gibbs) > 0: properties[-1]["temperature"] = temperature[-1] corrected_free_energy = get_corrected_free_energy(gibbs[-1], properties[-1]["frequencies"], frequency_cutoff=100.0, temperature=temperature[-1]) properties[-1]["quasiharmonic_gibbs_free_energy"] = float(corrected_free_energy) except Exception: pass if OrcaJobType.NMR in job_types: nmr_shifts = parse.read_nmr_shifts(lines, molecules[0].num_atoms()) if nmr_shifts is not None: properties[-1]["isotropic_shielding"] = nmr_shifts try: charges = parse.read_mulliken_charges(lines, successful_opt, is_scan_job) assert len(charges) == len(atomic_numbers) properties[-1]["mulliken_charges"] = charges except Exception: pass try: charges = parse.read_loewdin_charges(lines, successful_opt, is_scan_job) assert len(charges) == len(atomic_numbers) properties[-1]["lowdin_charges"] = charges except Exception: pass try: dipole = lines.find_parameter("Magnitude \(Debye\)", 4, 3) properties[-1]["dipole_moment"] = dipole[0] except Exception: pass for mol, prop in zip(molecules, properties): f.ensemble.add_molecule(mol, properties=prop) f.check_has_properties() files.append(f) if len(files) == 1: return files[0] else: return files
@classmethod def _read_inp_file(cls, filename): print("reading ``.inp`` files is not currently supported :(") return None
[docs] def write_file(self, filename, molecule=None, header=None, variables=None, blocks=None): """ Write a ``.inp`` file, using object attributes. If no header is specified, the object's header will be used. Args: filename (str): path to the new file molecule (int): which molecule to use -- passed to ``self.get_molecule()``. Default is -1 (e.g. the last molecule), but positive integers will select from self.ensemble.molecules (0-indexed). A ``Molecule`` object can also be passed, in which case that molecule will be written to the file. header (str): header for new file """ if molecule is None: molecule = -1 if not isinstance(molecule, Molecule): molecule = self.ensemble.molecules[molecule] if header is None: header = self.header if variables is None: variables = self.variables if blocks is None: blocks = self.blocks self.write_molecule_to_file(filename, molecule, header, variables, blocks)
[docs] @classmethod def write_molecule_to_file(cls, filename, molecule, header, variables={"maxcore": 2000}, blocks={"pal": ["nproc 16"]}, print_symbol=False): """ Write an ``.inp`` file using the given molecule. Args: filename (str): path to the new file molecule (Molecule): which molecule to use -- a ``Molecule`` object. header (str): header for new file print_symbol (Bool): if atomic symbols should be printed instead of atomic numbers """ assert isinstance(molecule, Molecule), "need a valid molecule to write a file!" assert isinstance(header, str), "can't write a file without a header" text = f"{header.strip()}\n" if variables is not None: assert isinstance(variables, dict), "blocks must be a dictionary" for k, v in variables.items(): text += f"%{k} {v}\n" if blocks is not None: assert isinstance(blocks, dict), "blocks must be a dictionary" for k, v in blocks.items(): text += f"%{k}\n" for line in v: text += f"\t{line}\n" text += "end\n" text +="\n" text += f"* xyz {int(molecule.charge)} {int(molecule.multiplicity)}\n" for index, Z in enumerate(molecule.atomic_numbers, start=1): line = molecule.get_vector(index) if print_symbol: Z = get_symbol(Z) text += f"{Z:>2} {line[0]:>13.8f} {line[1]:>13.8f} {line[2]:>13.8f}\n" else: text += f"{Z:2d} {line[0]:>13.8f} {line[1]:>13.8f} {line[2]:>13.8f}\n" text += "*\n" text += "\n" #### write the file super().write_file(filename, text)
[docs] def get_molecule(self, num=None): """ Returns the last molecule (from an optimization job or other multi-molecule jobs) or the only molecule (from other jobs). If ``num`` is specified, returns that job (1-indexed for positive numbers). So ``job.get_molecule(3)`` will return the 3rd element of ``job.molecules``, not the 4th. """ # some methods pass num=None, which overrides setting the default above if num is None: num = -1 if not isinstance(num, int): raise TypeError("num must be int") return self.ensemble.molecule_list()[num]
[docs] def num_imaginaries(self): """ Returns the number of imaginary frequencies. """ return len(self.imaginaries())
[docs] def imaginaries(self): """ Returns the imaginary frequencies, rounded to the nearest integer. """ if (OrcaJobType.FREQ in self.job_types) and (self.ensemble[-1:,"frequencies"] is not None): freqs = self.ensemble[-1:,"frequencies"] if not isinstance(freqs, list) or len(freqs) == 0: return list() else: return list(map(int, np.array(freqs)[np.array(freqs) < 0])) else: return list()
@classmethod def _assign_job_types(cls, header): """ Assigns ``OrcaJobType`` objects from header. ``OrcaJobType.SP`` is assigned by default. Args: header (str): Orca header Returns: list of ``OrcaJobType`` objects """ job_types = [] for job_type in OrcaJobType: if re.search(f"{job_type.value}", str(header), re.IGNORECASE): job_types.append(job_type) if OrcaJobType.SP not in job_types: job_types.append(OrcaJobType.SP) # include SP in all jobs, whether specified in header or not if re.search("ScanTs", str(header), re.IGNORECASE): job_types.append(OrcaJobType.OPT) #ScanTs is an optimization job that does not contain the string "opt" in it's keyword return job_types
[docs] def check_has_properties(self): """ Checks that the file has all the appropriate properties for its job types, and raises ``ValueError`` if not. This only checks the last molecule in ``self.ensemble``, for now. """ if self.successful_terminations > 0: if self.successful_terminations == 2 and ((OrcaJobType.OPT in self.job_types) and (OrcaJobType.FREQ in self.job_types)): return # opt and freq jobs should have three terminations for job_type in self.job_types: for prop in job_type.expected_properties: if not self.ensemble.has_property(-1, prop): raise ValueError(f"expected property {prop} for job type {job_type}, but it's not there!") else: return