Upload 8 files
Browse files- __init__.py +8 -13
- cp2k.py +65 -0
- i_pi.py +47 -0
- mdtraj.py +77 -0
- n2p2.py +65 -0
- runner.py +134 -0
- utilities.py +336 -0
- xyz.py +94 -0
__init__.py
CHANGED
@@ -1,14 +1,9 @@
|
|
1 |
-
|
2 |
|
3 |
-
from
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
__version__ = "1.0.0"
|
11 |
-
|
12 |
-
# Read URL of the feed from config file
|
13 |
-
_cfg = tomllib.loads(resources.read_text("reader", "config.toml"))
|
14 |
-
URL = _cfg["feed"]["url"]
|
|
|
1 |
+
"""Input and output functionality to serve the rest of the package."""
|
2 |
|
3 |
+
from .utilities import *
|
4 |
+
from .cp2k import *
|
5 |
+
from .mdtraj import *
|
6 |
+
from .i_pi import *
|
7 |
+
from . import xyz
|
8 |
+
from . import runner
|
9 |
+
from . import n2p2
|
|
|
|
|
|
|
|
|
|
cp2k.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Functions for CP2K-specific input/output."""
|
2 |
+
|
3 |
+
__all__ = ['add_energy_cp2k_comment', 'read_frames_cp2k']
|
4 |
+
|
5 |
+
from itertools import repeat
|
6 |
+
|
7 |
+
from .utilities import Frame, merge_frames, read_frames
|
8 |
+
|
9 |
+
|
10 |
+
def add_energy_cp2k_comment(frames):
|
11 |
+
"""Parse CP2K energy and inject it into frames.
|
12 |
+
|
13 |
+
For each frame in `frames`, try to extract a CP2K-formatted potential energy
|
14 |
+
from the comment string and inject it back into the frame. Energy from CP2K is
|
15 |
+
in Hartree, so no conversion is needed.
|
16 |
+
"""
|
17 |
+
|
18 |
+
for frame in frames:
|
19 |
+
|
20 |
+
if frame.energy is not None:
|
21 |
+
raise ValueError('Energy already present.')
|
22 |
+
|
23 |
+
try:
|
24 |
+
for pair in frame.comment.split(','):
|
25 |
+
items = pair.split('=')
|
26 |
+
if items[0].strip() == 'E':
|
27 |
+
frame.energy = float(items[1])
|
28 |
+
break
|
29 |
+
except (IndexError, ValueError):
|
30 |
+
raise ValueError('No CP2K energy found in comment line.')
|
31 |
+
|
32 |
+
yield frame
|
33 |
+
|
34 |
+
|
35 |
+
def read_frames_cp2k(fn_positions, cell=None, fn_forces=None, read_energy: bool = True, force_unit=1.0):
|
36 |
+
"""Read data specifically produced by CP2K.
|
37 |
+
|
38 |
+
Arguments:
|
39 |
+
fn_positions: position trajectory file name, XYZ format
|
40 |
+
cell: a constant cell to use in all frames, optional
|
41 |
+
fn_forces: forces file name, XYZ format, optional
|
42 |
+
read_energy: whether to read energies from comments in `fn_positions`
|
43 |
+
|
44 |
+
Returns:
|
45 |
+
a `Frame` object
|
46 |
+
"""
|
47 |
+
|
48 |
+
# positions from XYZ, energies from comment if requested
|
49 |
+
# we expect units of angstrom for positions from CP2K
|
50 |
+
frames_pos = read_frames(fn_positions, fformat='xyz')
|
51 |
+
if read_energy:
|
52 |
+
frames_pos = add_energy_cp2k_comment(frames_pos)
|
53 |
+
frames = [frames_pos]
|
54 |
+
|
55 |
+
# add a constant cell if provided
|
56 |
+
if cell is not None:
|
57 |
+
frames.append(repeat(Frame(cell=cell)))
|
58 |
+
|
59 |
+
# add forces from XYZ if filename was provided
|
60 |
+
# we expect atomic units for forces from CP2K per default
|
61 |
+
if fn_forces is not None:
|
62 |
+
frames.append(read_frames(fn_forces, fformat='xyz', name_data='forces', unit=force_unit))
|
63 |
+
|
64 |
+
# iterate over merged frames
|
65 |
+
yield from merge_frames(*frames)
|
i_pi.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Read data specifically produced by i-PI"""
|
2 |
+
|
3 |
+
__all__ = ['read_frames_i_pi']
|
4 |
+
|
5 |
+
import itertools
|
6 |
+
|
7 |
+
from .utilities import Frame, merge_frames, read_frames
|
8 |
+
|
9 |
+
|
10 |
+
def read_frames_i_pi(fn_positions, cell=None, fn_forces=None, fn_energies=None, column_energy=4):
|
11 |
+
"""Read data specifically produced by i-PI.
|
12 |
+
|
13 |
+
We assume typically used units - angstrom for positions, atomic units for forces and energies.
|
14 |
+
i-PI can save data in any units, but we do not attempt to be fully general here. The strides
|
15 |
+
if all files are assumed to be the same. For other units or strides, compose the frames by hand
|
16 |
+
or write a custom reader function.
|
17 |
+
|
18 |
+
Arguments:
|
19 |
+
fn_positions: position trajectory file name, XYZ format
|
20 |
+
cell: a constant cell to use in all frames, optional
|
21 |
+
fn_forces: forces file name, XYZ format, optional
|
22 |
+
fn_energies: energies file name, n2p2 energy format, optional
|
23 |
+
|
24 |
+
Returns:
|
25 |
+
a `Frame` object
|
26 |
+
"""
|
27 |
+
|
28 |
+
# positions from XYZ, we expect units of angstrom for positions from ipi
|
29 |
+
frames_pos = read_frames(fn_positions, fformat='xyz')
|
30 |
+
frames = [frames_pos]
|
31 |
+
|
32 |
+
# add a constant cell if provided
|
33 |
+
if cell is not None:
|
34 |
+
frames.append(itertools.repeat(Frame(cell=cell)))
|
35 |
+
|
36 |
+
# add forces from XYZ if filename was provided
|
37 |
+
# we expect atomic units for forces from i-PI
|
38 |
+
if fn_forces is not None:
|
39 |
+
frames.append(read_frames(fn_forces, fformat='xyz', name_data='forces', unit=1.0))
|
40 |
+
|
41 |
+
# add energies from file if filename was provided
|
42 |
+
# we expect atomic units for energies from i-PI
|
43 |
+
if fn_energies is not None:
|
44 |
+
frames.append(read_frames(fn_energies, fformat='N2P2_E', column=column_energy))
|
45 |
+
|
46 |
+
# iterate over merged frames
|
47 |
+
yield from merge_frames(*frames)
|
mdtraj.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Read data using MDTraj."""
|
2 |
+
|
3 |
+
__all__ = [
|
4 |
+
'read_frames_mdtraj',
|
5 |
+
]
|
6 |
+
|
7 |
+
try:
|
8 |
+
import mdtraj
|
9 |
+
except ImportError:
|
10 |
+
mdtraj = None
|
11 |
+
|
12 |
+
from .utilities import Frame
|
13 |
+
from ..constants import nm
|
14 |
+
|
15 |
+
|
16 |
+
def read_frames_mdtraj(fn_in, top=None, names_atoms='type', name_data='positions', unit=nm, unit_cell=nm, chunk=100):
|
17 |
+
"""Read data from a file using the MDTraj package.
|
18 |
+
|
19 |
+
Arguments:
|
20 |
+
fn_in: name of trajectory file to read, passed to `mdtraj.iterload`
|
21 |
+
top: MDTraj topology, passed to `mdtraj.iterload`
|
22 |
+
names_atoms: which atom names to use, 'type' or 'element'
|
23 |
+
name_data: what quantity to take the data as
|
24 |
+
unit: unit to scale data by, multiplicative factor in atomic units
|
25 |
+
unit_cell: unit to scale cell by, multiplicative factor in atomic units
|
26 |
+
chunk: size of one trajectory chunk, passed to `mdtraj.iterload`
|
27 |
+
|
28 |
+
Yields:
|
29 |
+
One AML `Frame` object at a time
|
30 |
+
"""
|
31 |
+
|
32 |
+
# open the trajectory for interation
|
33 |
+
trj = mdtraj.iterload(fn_in, top=top, chunk=chunk)
|
34 |
+
|
35 |
+
# no atom names yet
|
36 |
+
names = None
|
37 |
+
|
38 |
+
# prepare data names
|
39 |
+
if name_data not in ('positions', 'forces'):
|
40 |
+
raise ValueError(f'Unsupported `name_data`: {name_data}. Expected "positions" or "forces".')
|
41 |
+
|
42 |
+
# iterate over all frames
|
43 |
+
for chunk in trj:
|
44 |
+
|
45 |
+
# prepare atom names
|
46 |
+
# (`trj` is a generator, no topology information there)
|
47 |
+
if names is None:
|
48 |
+
if names_atoms == 'type':
|
49 |
+
names = [atom.name for atom in chunk.topology.atoms]
|
50 |
+
elif names_atoms == 'element':
|
51 |
+
names = [atom.element.symbol for atom in chunk.topology.atoms]
|
52 |
+
else:
|
53 |
+
raise ValueError(f'Expected "type" or "element" for `name_atoms`, got {names_atoms}.')
|
54 |
+
|
55 |
+
for i in range(len(chunk)):
|
56 |
+
|
57 |
+
# atomic data
|
58 |
+
data = chunk.xyz[i, :, :] * unit
|
59 |
+
|
60 |
+
# cell data, if present
|
61 |
+
if chunk.unitcell_vectors is not None:
|
62 |
+
cell = chunk.unitcell_vectors[i, ...] * unit_cell
|
63 |
+
else:
|
64 |
+
cell = None
|
65 |
+
|
66 |
+
# prepare all kwargs and construct a frame
|
67 |
+
kwargs = {
|
68 |
+
'names': names,
|
69 |
+
name_data: data,
|
70 |
+
'cell': cell
|
71 |
+
}
|
72 |
+
yield Frame(**kwargs)
|
73 |
+
|
74 |
+
|
75 |
+
if mdtraj is None:
|
76 |
+
del read_frames_mdtraj
|
77 |
+
__all__.remove('read_frames_mdtraj')
|
n2p2.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Functions to read and write n2p2 data files."""
|
2 |
+
|
3 |
+
__all__ = [
|
4 |
+
'read_epre_n2p2',
|
5 |
+
'read_fpre_n2p2'
|
6 |
+
]
|
7 |
+
|
8 |
+
import numpy as np
|
9 |
+
|
10 |
+
from .utilities import Frame, register_io
|
11 |
+
|
12 |
+
|
13 |
+
@register_io('N2P2_E','read')
|
14 |
+
def read_epre_n2p2(f_in, column=3):
|
15 |
+
"""Read the outcome of the energy prediction from file"""
|
16 |
+
|
17 |
+
line = f_in.readline()
|
18 |
+
# no more data in the file
|
19 |
+
if not line:
|
20 |
+
return None
|
21 |
+
# Skip comment lines:
|
22 |
+
while True:
|
23 |
+
if '#' not in line:
|
24 |
+
break
|
25 |
+
line = f_in.readline()
|
26 |
+
energy = float(line.split()[column])
|
27 |
+
return Frame(energy=energy)
|
28 |
+
|
29 |
+
|
30 |
+
@register_io('N2P2_F','read')
|
31 |
+
def read_fpre_n2p2(f_in):
|
32 |
+
"""Read the outcome of the force prediction from file"""
|
33 |
+
|
34 |
+
line = f_in.readline()
|
35 |
+
# no more data in the file
|
36 |
+
if not line:
|
37 |
+
return None
|
38 |
+
# Skip comment lines:
|
39 |
+
while True:
|
40 |
+
if '#' not in line:
|
41 |
+
break
|
42 |
+
line = f_in.readline()
|
43 |
+
|
44 |
+
items = line.split()
|
45 |
+
config = items[0]
|
46 |
+
|
47 |
+
forces = []
|
48 |
+
forces.append(float(items[3]))
|
49 |
+
while True:
|
50 |
+
last_pos = f_in.tell()
|
51 |
+
line = f_in.readline()
|
52 |
+
# no more data in the file
|
53 |
+
if not line:
|
54 |
+
break
|
55 |
+
items = line.split()
|
56 |
+
# Stop if config changes
|
57 |
+
if items[0] != config:
|
58 |
+
f_in.seek(last_pos)
|
59 |
+
break
|
60 |
+
|
61 |
+
forces.append(float(items[3]))
|
62 |
+
|
63 |
+
forces = np.array(forces)
|
64 |
+
forces = forces.reshape((len(forces)//3, 3))
|
65 |
+
return Frame(forces=forces)
|
runner.py
ADDED
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Functions to read and write RuNNer data files."""
|
2 |
+
|
3 |
+
__all__ = [
|
4 |
+
'write_frame_runner',
|
5 |
+
'read_frame_runner',
|
6 |
+
]
|
7 |
+
|
8 |
+
import numpy as np
|
9 |
+
|
10 |
+
from .utilities import Frame, register_io
|
11 |
+
|
12 |
+
|
13 |
+
@register_io('RuNNer', 'read', 'data') # noqa: C901
|
14 |
+
def read_frame_runner(f_in):
|
15 |
+
"""Read one frame of the RuNNer format from an open file.
|
16 |
+
|
17 |
+
Arguments:
|
18 |
+
f_in: open file in the RuNNer format
|
19 |
+
|
20 |
+
Returns:
|
21 |
+
`Frame` instance or `None`
|
22 |
+
"""
|
23 |
+
|
24 |
+
# For reference, in n2p2, this is implemented in `Structure::readFromFile`, found somewhere here:
|
25 |
+
# https://github.com/CompPhysVienna/n2p2/blob/master/src/libnnp/Structure.cpp#L84
|
26 |
+
|
27 |
+
# read first line to examine it
|
28 |
+
line_begin = f_in.readline()
|
29 |
+
|
30 |
+
# no more data in the file
|
31 |
+
if not line_begin:
|
32 |
+
return None
|
33 |
+
|
34 |
+
# there is some data, frame should start with 'begin'
|
35 |
+
if line_begin.strip() != 'begin':
|
36 |
+
raise ValueError
|
37 |
+
|
38 |
+
comment = None
|
39 |
+
cell = []
|
40 |
+
names = []
|
41 |
+
positions = []
|
42 |
+
forces = []
|
43 |
+
energy = None
|
44 |
+
|
45 |
+
for line in f_in:
|
46 |
+
items = line.split()
|
47 |
+
tag = items[0]
|
48 |
+
|
49 |
+
if tag == 'comment':
|
50 |
+
comment = " ".join(items[1:])
|
51 |
+
|
52 |
+
elif tag == 'lattice':
|
53 |
+
cell.append([float(item) for item in items[1:]])
|
54 |
+
|
55 |
+
elif tag == 'atom':
|
56 |
+
positions.append([float(item) for item in items[1:4]])
|
57 |
+
names.append(items[4])
|
58 |
+
forces.append([float(item) for item in items[7:10]])
|
59 |
+
# items[5] is atomic energy, only RuNNer itself (potentially) deals with that
|
60 |
+
# items[6] is atomic energy - not really used by anyone
|
61 |
+
|
62 |
+
elif tag == 'energy':
|
63 |
+
energy = float(items[1])
|
64 |
+
|
65 |
+
elif tag == 'charge':
|
66 |
+
pass
|
67 |
+
|
68 |
+
elif tag == 'end':
|
69 |
+
break
|
70 |
+
|
71 |
+
else:
|
72 |
+
raise ValueError('Unexpected data in file.')
|
73 |
+
|
74 |
+
if len(names) == 0:
|
75 |
+
raise ValueError('No atomic data.')
|
76 |
+
cell = np.array(cell)
|
77 |
+
if cell.shape != (3, 3) and len(cell) != 0:
|
78 |
+
raise ValueError('Wrong cell data.')
|
79 |
+
if len(cell) == 0:
|
80 |
+
cell = None
|
81 |
+
positions = np.array(positions)
|
82 |
+
forces = np.array(forces)
|
83 |
+
|
84 |
+
# Prepare frame
|
85 |
+
frame = Frame(names=names, positions=positions, comment=comment, cell=cell, energy=energy, forces=forces)
|
86 |
+
|
87 |
+
return frame
|
88 |
+
|
89 |
+
|
90 |
+
@register_io('RuNNer', 'write', 'data')
|
91 |
+
def write_frame_runner(f_out, frame):
|
92 |
+
|
93 |
+
# "cell" and "lattice" is the same data, we just use the terminology of the file format here.
|
94 |
+
#
|
95 |
+
# Note that atomic charges, atomic energies, and total charge currently not supported
|
96 |
+
# and zeros will be written in the file for these.
|
97 |
+
|
98 |
+
# Check that required data is in the frame:
|
99 |
+
if (frame.positions is None) or (frame.names is None):
|
100 |
+
raise ValueError('Frame does not contain required properties - atom names and positions.')
|
101 |
+
|
102 |
+
fmt_lattice = 'lattice ' + 3*'{:16.6f}' + '\n'
|
103 |
+
fmt_one = '{:13.6f}'
|
104 |
+
fmt_atom = 'atom ' + 3*fmt_one + '{:^6s}' + 5*fmt_one + '\n'
|
105 |
+
fmt_energy = 'energy ' + fmt_one + '\n'
|
106 |
+
fmt_charge = 'charge ' + fmt_one + '\n'
|
107 |
+
|
108 |
+
f_out.write('begin\n')
|
109 |
+
|
110 |
+
if frame.comment is not None:
|
111 |
+
f_out.write('comment ' + frame.comment + '\n')
|
112 |
+
|
113 |
+
if frame.cell is not None:
|
114 |
+
for lattice_vector in frame.cell:
|
115 |
+
f_out.write(fmt_lattice.format(*lattice_vector))
|
116 |
+
|
117 |
+
if frame.forces is not None:
|
118 |
+
for i, name in enumerate(frame.names):
|
119 |
+
f_out.write(fmt_atom.format(*frame.positions[i], name,
|
120 |
+
0.0, 0.0, *frame.forces[i]))
|
121 |
+
else:
|
122 |
+
for i, name in enumerate(frame.names):
|
123 |
+
f_out.write(fmt_atom.format(*frame.positions[i], name,
|
124 |
+
0.0, 0.0, 0.0, 0.0, 0.0))
|
125 |
+
|
126 |
+
if frame.energy is None:
|
127 |
+
energy = 0.0
|
128 |
+
else:
|
129 |
+
energy = frame.energy
|
130 |
+
f_out.write(fmt_energy.format(energy))
|
131 |
+
|
132 |
+
f_out.write(fmt_charge.format(0.0))
|
133 |
+
|
134 |
+
f_out.write('end\n')
|
utilities.py
ADDED
@@ -0,0 +1,336 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Input and output utilities.
|
2 |
+
|
3 |
+
The central concept in the I/O infrastructure is a "frame" - a dataclass that represents one atomic
|
4 |
+
configuration that contains data of different kinds. Atomic units are used in the frame object itself,
|
5 |
+
unless explicitly stated otherwise. Units specified by the file format are used in the files themselves.
|
6 |
+
"""
|
7 |
+
|
8 |
+
__all__ = [
|
9 |
+
'AnyPath',
|
10 |
+
'get_fn_test',
|
11 |
+
'Frame',
|
12 |
+
'open_safe',
|
13 |
+
'working_directory',
|
14 |
+
'temporary_directory',
|
15 |
+
'to_file',
|
16 |
+
'from_file',
|
17 |
+
'read_frames',
|
18 |
+
'write_frames',
|
19 |
+
'merge_frames',
|
20 |
+
]
|
21 |
+
|
22 |
+
import os
|
23 |
+
import shutil
|
24 |
+
from collections import defaultdict
|
25 |
+
from contextlib import contextmanager
|
26 |
+
from dataclasses import dataclass
|
27 |
+
from pathlib import Path
|
28 |
+
from typing import Optional, Sequence, Union
|
29 |
+
|
30 |
+
import numpy as np
|
31 |
+
|
32 |
+
from ..utilities import AMLIOError
|
33 |
+
|
34 |
+
|
35 |
+
# functions that are registered to read and write frames
|
36 |
+
formats = defaultdict(dict)
|
37 |
+
|
38 |
+
|
39 |
+
# mapping of file extensions to file formats
|
40 |
+
ext2fmt = dict()
|
41 |
+
|
42 |
+
|
43 |
+
AnyPath = Union[str, Path]
|
44 |
+
|
45 |
+
|
46 |
+
def get_fn_test(filename):
|
47 |
+
"""Get absolute file names of test data.
|
48 |
+
|
49 |
+
Arguments:
|
50 |
+
filename: name of file in the test data directory, no path
|
51 |
+
"""
|
52 |
+
fn_out = Path(__file__).parent.parent / '../tests/data' / filename
|
53 |
+
return fn_out.resolve()
|
54 |
+
|
55 |
+
|
56 |
+
def register_io(fformat: str, operation: str, extension: Union[str, None] = None):
|
57 |
+
"""Decorator to register an I/O operation for a specific file format.
|
58 |
+
|
59 |
+
Optionally, the function can also register a file name extension to automatic
|
60 |
+
detection of file format from file name.
|
61 |
+
|
62 |
+
Arguments:
|
63 |
+
fformat: name of file format
|
64 |
+
operation: I/O operation - "read" or "write"
|
65 |
+
extension: file name extension or `None`
|
66 |
+
"""
|
67 |
+
def decorator(function):
|
68 |
+
if operation not in ('read', 'write'):
|
69 |
+
raise ValueError('Unrecognized operation. Allowed values: "read", "write".')
|
70 |
+
formats[fformat][operation] = function
|
71 |
+
if extension is not None:
|
72 |
+
formats[fformat]['extension'] = extension
|
73 |
+
if (extension in ext2fmt.keys()) and ext2fmt[extension] != fformat:
|
74 |
+
raise ValueError(f'Attempted to register the same file extension ({extension}) twice.')
|
75 |
+
ext2fmt[extension] = fformat
|
76 |
+
return decorator
|
77 |
+
|
78 |
+
|
79 |
+
@dataclass(eq=False)
|
80 |
+
class Frame:
|
81 |
+
"""All possible data of a single frame.
|
82 |
+
|
83 |
+
Used to exchange data between data structure and I/O routines. Defaults are set to `None`, which
|
84 |
+
corresponds to that given kind of data not being set/available. We do not provide a comparison operator,
|
85 |
+
at least for now, as comparing NumPy arrays is more involved.
|
86 |
+
"""
|
87 |
+
|
88 |
+
# slots do not work correctly with dataclass
|
89 |
+
# Here is an alternative: https://pypi.org/project/dataslots/
|
90 |
+
# Here is some context: https://github.com/ericvsmith/dataclasses/issues/28
|
91 |
+
# __slots__ = ['names', 'positions', 'cell', 'comment', 'energy', 'forces']
|
92 |
+
|
93 |
+
names: Optional[Sequence] = None
|
94 |
+
positions: Optional[np.ndarray] = None
|
95 |
+
cell: Optional[np.ndarray] = None
|
96 |
+
comment: Optional[str] = None
|
97 |
+
energy: Optional[float] = None
|
98 |
+
forces: Optional[np.ndarray] = None
|
99 |
+
|
100 |
+
def update(self, other: 'Frame', force: bool = False):
|
101 |
+
"""Update this frame with data from another.
|
102 |
+
|
103 |
+
Arguments:
|
104 |
+
other: another frame
|
105 |
+
force: whether to overwrite data
|
106 |
+
"""
|
107 |
+
|
108 |
+
# check that we have the same atom names
|
109 |
+
if (other.names is not None) and (self.names != other.names):
|
110 |
+
raise ValueError('Inconsistent atom names.')
|
111 |
+
|
112 |
+
# take over all that we can
|
113 |
+
attrs = ['positions', 'cell', 'comment', 'energy', 'forces']
|
114 |
+
for attr in attrs:
|
115 |
+
attr_o = getattr(other, attr)
|
116 |
+
if attr_o is not None:
|
117 |
+
if force or (getattr(self, attr) is None):
|
118 |
+
setattr(self, attr, attr_o)
|
119 |
+
|
120 |
+
|
121 |
+
def open_safe(filename, mode='r', buffering=-1, verbose=False):
|
122 |
+
"""A wrapper around `open` which saves backup files.
|
123 |
+
|
124 |
+
If opening for writing and `filename` exists, it will be renamed
|
125 |
+
so that we do not overwrite any data.
|
126 |
+
|
127 |
+
Arguments:
|
128 |
+
filename: name of file to open
|
129 |
+
mode: file open mode
|
130 |
+
buffering: passed through to `open`
|
131 |
+
verbose: whether to print to standard output what backup was performed
|
132 |
+
|
133 |
+
Returns:
|
134 |
+
an open file
|
135 |
+
"""
|
136 |
+
|
137 |
+
if mode[0] == 'w':
|
138 |
+
# if writing, make sure file is not overwritten
|
139 |
+
|
140 |
+
filename = Path(filename)
|
141 |
+
|
142 |
+
i = 0
|
143 |
+
fn_backup = filename
|
144 |
+
while fn_backup.exists():
|
145 |
+
name_new = f'#{filename.name:s}#{i:d}#'
|
146 |
+
fn_backup = fn_backup.with_name(name_new)
|
147 |
+
i += 1
|
148 |
+
|
149 |
+
if fn_backup != filename:
|
150 |
+
filename.rename(fn_backup)
|
151 |
+
if verbose:
|
152 |
+
print(f'Backup performed: {filename} -> {fn_backup}\n')
|
153 |
+
|
154 |
+
elif mode[0] in ('r', 'a'):
|
155 |
+
# read or append, no danger of overwritten files
|
156 |
+
pass
|
157 |
+
|
158 |
+
else:
|
159 |
+
# did not expect that, more work needed
|
160 |
+
raise NotImplementedError(f'Unsupported file open mode: {mode:s}.')
|
161 |
+
|
162 |
+
return open(filename, mode, buffering)
|
163 |
+
|
164 |
+
|
165 |
+
@contextmanager
|
166 |
+
def working_directory(directory):
|
167 |
+
"""Change working directory within the context.
|
168 |
+
|
169 |
+
This is not available in the standard library [1] but can be useful, especially for testing.
|
170 |
+
The old fixture in pytest (`tmpdir`) used py.path [2] which has `as_cwd`, but this is legacy
|
171 |
+
code now and not recommended [3].
|
172 |
+
|
173 |
+
[1] https://bugs.python.org/issue25625
|
174 |
+
[2] https://py.readthedocs.io/en/latest/path.html
|
175 |
+
[3] https://docs.pytest.org/en/latest/how-to/tmpdir.html
|
176 |
+
|
177 |
+
Arguments:
|
178 |
+
directory: directory to change to
|
179 |
+
"""
|
180 |
+
|
181 |
+
# store the current working directory
|
182 |
+
dir_original = Path().absolute()
|
183 |
+
|
184 |
+
# try to change to the new one and then back
|
185 |
+
try:
|
186 |
+
os.chdir(directory)
|
187 |
+
yield
|
188 |
+
finally:
|
189 |
+
os.chdir(dir_original)
|
190 |
+
|
191 |
+
|
192 |
+
@contextmanager
|
193 |
+
def temporary_directory(directory: AnyPath, parents: bool = False, keep: bool = False):
|
194 |
+
"""Create a temporary directory.
|
195 |
+
|
196 |
+
The directory is removed upon exiting the context, unless the users asks to keep it.
|
197 |
+
|
198 |
+
Arguments:
|
199 |
+
directory: directory to create
|
200 |
+
parents: whether to create parents as well
|
201 |
+
keep: whether to keep directory after exiting context
|
202 |
+
"""
|
203 |
+
|
204 |
+
directory = Path(directory)
|
205 |
+
|
206 |
+
# catch conflict early, a nicer error message
|
207 |
+
if directory.exists():
|
208 |
+
raise AMLIOError(f'Unable to create directory, already exists: {directory.absolute()}')
|
209 |
+
|
210 |
+
# actually make the directory
|
211 |
+
directory.mkdir(parents=parents)
|
212 |
+
|
213 |
+
# create context, clean up if needed
|
214 |
+
try:
|
215 |
+
yield directory
|
216 |
+
finally:
|
217 |
+
if not keep:
|
218 |
+
shutil.rmtree(directory)
|
219 |
+
|
220 |
+
|
221 |
+
def from_file(fn_in, binary=False):
|
222 |
+
"""Read the contents of a file into a variable.
|
223 |
+
|
224 |
+
By default, the file will be read as a text file, resulting in a string.
|
225 |
+
It `binary` is true, it will be read as a binary file, resulting in bytes.
|
226 |
+
"""
|
227 |
+
|
228 |
+
mode = 'r'
|
229 |
+
if binary:
|
230 |
+
mode += 'b'
|
231 |
+
with open(fn_in, mode) as f_in:
|
232 |
+
data = f_in.read()
|
233 |
+
return data
|
234 |
+
|
235 |
+
|
236 |
+
def to_file(data, fn_out, binary=False, verbose=False):
|
237 |
+
"""Write a variable to a file.
|
238 |
+
|
239 |
+
The provided `data` would typically be a string or bytes, if `binary` is true.
|
240 |
+
The output file name is protected against overwriting and if `verbose is true,
|
241 |
+
backup file creation will be reported.
|
242 |
+
"""
|
243 |
+
|
244 |
+
mode = 'w'
|
245 |
+
if binary:
|
246 |
+
mode += 'b'
|
247 |
+
with open_safe(fn_out, mode, verbose=verbose) as f_out:
|
248 |
+
f_out.write(data)
|
249 |
+
|
250 |
+
|
251 |
+
def get_io_operation(fn, fformat, operation):
|
252 |
+
"""Select I/O function for given file format.
|
253 |
+
|
254 |
+
Arguments:
|
255 |
+
fn: name of file to operate on
|
256 |
+
fformat: name of file format
|
257 |
+
operation: I/O operation - "read" or "write"
|
258 |
+
|
259 |
+
Returns:
|
260 |
+
function to read or write one frame
|
261 |
+
"""
|
262 |
+
|
263 |
+
if operation not in ('read', 'write'):
|
264 |
+
raise ValueError('Unrecognized operation. Allowed values: "read", "write".')
|
265 |
+
|
266 |
+
# automatically pick a file format
|
267 |
+
if fformat is None:
|
268 |
+
fn = Path(fn)
|
269 |
+
extension = fn.suffix[1:]
|
270 |
+
try:
|
271 |
+
fformat = ext2fmt[extension]
|
272 |
+
except KeyError:
|
273 |
+
raise KeyError(f'Extension "{extension:s}" not registered for file format detection.')
|
274 |
+
|
275 |
+
try:
|
276 |
+
return formats[fformat][operation]
|
277 |
+
except KeyError:
|
278 |
+
msg = f'File format "{fformat:s}" not supported for operation "{operation:s}".'
|
279 |
+
raise ValueError(msg)
|
280 |
+
|
281 |
+
|
282 |
+
def read_frames(fn_in, fformat=None, **kwargs):
|
283 |
+
"""Iterate over a trajectory file, returning all data for each frame."""
|
284 |
+
|
285 |
+
read_frame = get_io_operation(fn_in, fformat, 'read')
|
286 |
+
|
287 |
+
# read all frames, quit when there is no more data
|
288 |
+
# File formats read using MDTraj must be opened differently. Maybe there is a more elegany way to do that though
|
289 |
+
with open(fn_in) as f_in:
|
290 |
+
while True:
|
291 |
+
frame = read_frame(f_in, **kwargs)
|
292 |
+
if frame is None:
|
293 |
+
break
|
294 |
+
yield frame
|
295 |
+
|
296 |
+
|
297 |
+
def write_frames(fn_out, frames, fformat=None):
|
298 |
+
"""Write frames to file.
|
299 |
+
|
300 |
+
The format of the file is given by `fformat` or inferred from the file
|
301 |
+
extension if `fformat` is `None`.
|
302 |
+
|
303 |
+
Arguments:
|
304 |
+
fn_out: name of output file
|
305 |
+
frames: iterator over `Frame` objects
|
306 |
+
fformat: format of the file, or `None`
|
307 |
+
label_prop: label of property to include, or `None`
|
308 |
+
"""
|
309 |
+
|
310 |
+
write_frame = get_io_operation(fn_out, fformat, 'write')
|
311 |
+
|
312 |
+
# write all frames to file
|
313 |
+
with open_safe(fn_out, 'w') as f_out:
|
314 |
+
for frame in frames:
|
315 |
+
write_frame(f_out, frame)
|
316 |
+
|
317 |
+
|
318 |
+
def merge_frames(frames, *frames_others, force: bool = False):
|
319 |
+
"""Merge frames from multiple sources.
|
320 |
+
|
321 |
+
The length of the result will be determined by the length of `frames`,
|
322 |
+
the other iterators should be at least as long as that.
|
323 |
+
|
324 |
+
Arguments:
|
325 |
+
frames: iterator over `Frame` objects
|
326 |
+
frames_others: more iterators over `Frame` instances
|
327 |
+
force: whether to overwrite data
|
328 |
+
|
329 |
+
Yields:
|
330 |
+
`Frame` objects
|
331 |
+
"""
|
332 |
+
|
333 |
+
for frame in frames:
|
334 |
+
for frames_extra in frames_others:
|
335 |
+
frame.update(next(frames_extra), force=force)
|
336 |
+
yield frame
|
xyz.py
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Functions to read and write XYZ files."""
|
2 |
+
|
3 |
+
__all__ = [
|
4 |
+
'write_frame_xyz',
|
5 |
+
'read_frame_xyz',
|
6 |
+
]
|
7 |
+
|
8 |
+
import numpy as np
|
9 |
+
|
10 |
+
from ..constants import angstrom
|
11 |
+
from .utilities import Frame, register_io
|
12 |
+
|
13 |
+
|
14 |
+
@register_io('xyz', 'read', 'xyz')
|
15 |
+
def read_frame_xyz(f_in, name_data='positions', unit=angstrom):
|
16 |
+
"""Read one frame of XYZ format from an open file.
|
17 |
+
|
18 |
+
Arguments:
|
19 |
+
f_in: open file in XYZ format
|
20 |
+
name_data: what quantity to take the XYZ data as
|
21 |
+
unit: unit to scale data by, multiplicative factor in atomic units
|
22 |
+
|
23 |
+
Returns:
|
24 |
+
`Frame` object or `None` if there is no more data
|
25 |
+
"""
|
26 |
+
|
27 |
+
# read first line to examine it
|
28 |
+
line_begin = f_in.readline()
|
29 |
+
|
30 |
+
# no more data in the file
|
31 |
+
if not line_begin:
|
32 |
+
return None
|
33 |
+
|
34 |
+
# there is some data, frame should begin with natoms
|
35 |
+
natoms = int(line_begin)
|
36 |
+
|
37 |
+
# read comment line
|
38 |
+
comment = f_in.readline().rstrip()
|
39 |
+
|
40 |
+
names = []
|
41 |
+
data = []
|
42 |
+
for _ in range(natoms):
|
43 |
+
line = f_in.readline()
|
44 |
+
if line.strip() == '':
|
45 |
+
raise ValueError('Unexpected data in file.')
|
46 |
+
items = line.split()
|
47 |
+
names.append(items[0])
|
48 |
+
data.append([float(item) for item in items[1:4]])
|
49 |
+
data = np.array(data) * unit
|
50 |
+
|
51 |
+
# so unless the code fails, this will not trigger.
|
52 |
+
if len(names) != natoms:
|
53 |
+
raise ValueError('Inconsistent number of atoms in XYZ file.')
|
54 |
+
|
55 |
+
# prepare data
|
56 |
+
if name_data == 'positions':
|
57 |
+
positions = data
|
58 |
+
forces = None
|
59 |
+
elif name_data == 'forces':
|
60 |
+
positions = None
|
61 |
+
forces = data
|
62 |
+
else:
|
63 |
+
raise ValueError(f'Unsupported `name_data`: {name_data}. Expected "positions" or "forces".')
|
64 |
+
|
65 |
+
return Frame(names=names, positions=positions, comment=comment, energy=None, forces=forces)
|
66 |
+
|
67 |
+
|
68 |
+
@register_io('xyz', 'write', 'xyz')
|
69 |
+
def write_frame_xyz(f_out, frame, unit=angstrom):
|
70 |
+
"""Print a single frame into an open XYZ file.
|
71 |
+
|
72 |
+
This is currently hard-coded to write positions, if we ever need to write forces
|
73 |
+
or something else, it needs generalizing.
|
74 |
+
"""
|
75 |
+
|
76 |
+
# Check that required things are in frame:
|
77 |
+
if (frame.positions is None) or (frame.names is None):
|
78 |
+
raise ValueError('Frame does not contain required properties.')
|
79 |
+
|
80 |
+
fmt_one = '{:13.6f}'
|
81 |
+
fmt_prop = '{:6s} ' + 3*fmt_one + '\n'
|
82 |
+
|
83 |
+
# write number of atoms and comment line
|
84 |
+
f_out.write(f'{len(frame.names):d}\n')
|
85 |
+
if frame.comment is not None:
|
86 |
+
f_out.write(f'{frame.comment:s}\n')
|
87 |
+
else:
|
88 |
+
f_out.write('\n')
|
89 |
+
|
90 |
+
data = frame.positions / unit
|
91 |
+
|
92 |
+
# write atomic lines
|
93 |
+
for i, name in enumerate(frame.names):
|
94 |
+
f_out.write(fmt_prop.format(name, *data[i]))
|