import re
import warnings
import numpy as np
import cctk
from cctk.helper_functions import get_symbol, get_number
[docs]
class XYZFile(cctk.File):
"""
Class representing plain ``.xyz`` files.
Attributes:
titles (list of str): the title or titles from the file
ensemble (Ensemble): `Ensemble` instance
molecule (Molecule): `Molecule` instance representing the first molecule in the file. deprecated, but present for backwards compatibility.
"""
[docs]
def __init__(self, ensemble, titles):
assert isinstance(ensemble, cctk.Ensemble), "ensemble must be cctk.Ensemble"
self.ensemble = ensemble
# backwards compatibility
self.molecule = ensemble.molecule_list()[0]
assert isinstance(titles, list), "title must be list"
self.titles = titles
def __getattribute__(self, name):
if name == "molecule":
warnings.warn("XYZFile attribute ``molecule`` will be removed in upcoming releases of cctk. Use ``ensemble`` attribute instead!", DeprecationWarning, stacklevel=2)
return cctk.File.__getattribute__(self, name)
[docs]
@classmethod
def read_file(cls, filename, charge=0, multiplicity=1, conformational=False):
"""
Factory method to create new XYZFile instances.
Arguments:
filename (str): path to ``.xyz`` file
charge (int): charge of resultant molecule
multiplicity (int): multiplicity of resultant molecule
conformational (bool): whether or not it's a conformational ensemble
"""
assert isinstance(charge, int), "charge must be integer"
assert isinstance(multiplicity, int), "multiplicity must be integer"
assert multiplicity > 0, "multiplicity must be a positive integer"
ensemble = cctk.Ensemble()
if conformational:
ensemble = cctk.ConformationalEnsemble()
titles = list()
lines = super().read_file(filename)
current_lines = list()
for line in lines:
if re.search(r"^\s*\d+$", line) and len(current_lines) > 2:
if len(current_lines) > 0:
t, m = cls.mol_from_lines(current_lines, charge=charge, multiplicity=multiplicity)
ensemble.add_molecule(m)
titles.append(t)
current_lines = list()
current_lines.append(line)
# catch the last molecule
if len(current_lines) > 0:
t, m = cls.mol_from_lines(current_lines, charge=charge, multiplicity=multiplicity)
ensemble.add_molecule(m)
titles.append(t)
return XYZFile(ensemble, titles)
[docs]
@classmethod
def mol_from_lines(cls, lines, charge=0, multiplicity=1):
num_atoms = 0
try:
num_atoms = int(lines[0])
except Exception:
raise ValueError("can't get the number of atoms from the first line!")
title = lines[1]
atomic_numbers = np.zeros(shape=num_atoms, dtype=np.int8)
geometry = np.zeros(shape=(num_atoms, 3))
for index, line in enumerate(lines[2:]):
# ignore blank lines
if len(line.strip()) == 0:
continue
pieces = list(filter(None, line.split(" ")))
try:
if re.match("[0-9]", pieces[0]):
atomic_numbers[index] = int(pieces[0])
elif re.match("([A-Za-z])+([0-9])+", pieces[0]):
# mdtraj writes in this format, for some reason
m = re.match("([A-Za-z])+([0-9])+", pieces[0])
atomic_numbers[index] = int(get_number(m.group(1)))
else:
atomic_numbers[index] = int(get_number(pieces[0]))
geometry[index][0] = float(pieces[1])
geometry[index][1] = float(pieces[2])
geometry[index][2] = float(pieces[3])
except Exception:
raise ValueError(f"can't parse line {index+2}: {line}")
assert num_atoms == len(atomic_numbers), "wrong number of atoms!"
molecule = cctk.Molecule(atomic_numbers, geometry, charge=charge, multiplicity=multiplicity)
return title, molecule
[docs]
@classmethod
def write_molecule_to_file(cls, filename, molecule, title="title", append=False):
"""
Write an ``.xyz`` file, using object attributes.
Args:
filename (str): path to the new file
molecule (Molecule): molecule to write
title (str): title of file
append (Bool): whether or not to append to file
"""
assert isinstance(molecule, cctk.Molecule), "molecule is not a valid Molecule object!"
text = f"{molecule.num_atoms()}\n"
text += f"{title}\n"
for index, Z in enumerate(molecule.atomic_numbers, start=1):
line = molecule.get_vector(index)
text += f"{get_symbol(Z):>2} {line[0]:>13.8f} {line[1]:>13.8f} {line[2]:>13.8f}\n"
if append:
super().append_to_file(filename, text)
else:
super().write_file(filename, text)
[docs]
def write_file(self, filename, idx=-1):
"""
Write an ``.xyz`` file, using object attributes.
Args:
idx (int): the index of the molecule to write
"""
assert isinstance(idx, int), "idx must be int"
self.write_molecule_to_file(filename, self.get_molecule(idx), title=self.titles[idx])
[docs]
@classmethod
def read_trajectory(cls, filename, **kwargs):
"""
Post refactoring, just an alias for ``XYZFile.read_file()``.
"""
return cls.read_file(filename, **kwargs)
[docs]
@classmethod
def read_ensemble(cls, filename, **kwargs):
"""
Post refactoring, just an alias for ``XYZFile.read_file()``.
"""
return cls.read_file(filename, **kwargs)
[docs]
@classmethod
def write_ensemble_to_file(cls, filename, ensemble, titles=None):
"""
Write a ``cctk.Ensemble`` to a single ``.xyz`` file. Can be viewed in MOLDEN.
Arguments:
filename (str): path to ``.xyz`` file
ensemble (Ensemble): the collection of structures to write
titles (None, str, or list of str): if None, the titles of the Molecules will be used;
if one str, then the same name will be used for all
molecules; if iterable, then the titles are assumed
to parallel the indexing of the list
"""
assert isinstance(filename, str), f"got {type(filename)} for filename but expected str"
assert isinstance(ensemble, cctk.Ensemble), f"ensemble {ensemble} is not a cctk.Ensemble"
assert len(ensemble)>0, "can't write empty Ensemble to xyz file"
if titles is None:
pass
elif isinstance(titles, str):
assert len(titles) > 0, "zero length title not allowed"
titles = [titles] * len(ensemble)
elif isinstance(titles, (list,np.ndarray)):
assert len(titles) == len(ensemble)
for i, title in enumerate(titles):
assert isinstance(title, str), f"got {type(filename)} at index {i} of titles, but expected str"
else:
raise ValueError(f"got {type(titles)} for title but expected None, str, or iterable")
for idx,molecule in enumerate(ensemble._items):
append = idx > 0
if titles is None:
title = molecule.name
if not (isinstance(title, str) and len(title)>0):
title = "title"
else:
title = titles[idx]
cls.write_molecule_to_file(filename, molecule, title=title, append=append)
[docs]
def get_molecule(self, num=None):
"""
Returns a given molecule.
If ``num`` is specified, returns ``self.ensemble.molecule_list()[num]``
"""
# some methods pass num=None, which overrides setting the default above
if num is None:
num = -1
assert isinstance(num, int), "num must be int"
return self.ensemble.molecule_list()[num]