Skip to content

Commit 97ab237

Browse files
authored
Merge pull request #49 from hejung/more_api_consistency
- `asyncmd.utils` methods now all require a MDEngine (class) to dispatch to the correct engine submodule - take care of some pylint warnings
2 parents 233d847 + 4b06d58 commit 97ab237

11 files changed

Lines changed: 124 additions & 52 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1313

1414
### Changed
1515

16+
- all `asyncmd.utils` methods now require a MDEngine (class) to dispatch to the correct engine submodule methods
1617
- GmxEngine `apply_constraints` and `generate_velocities` methods: rename `wdir` argument to `workdir` to make it consistent with `prepare` and `prepare_from_files` (also add the `workdir` argument to the MDEngine ABC).
1718

1819
### Fixed

docs/source/conf.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,10 @@
8282
{
8383
"name": "PyPI",
8484
"url": "https://pypi.org/project/asyncmd/",
85-
# download stats are broken? (August 2025)
86-
#"icon": "https://img.shields.io/pypi/dm/asyncmd",
87-
"icon": "https://img.shields.io/pypi/v/asyncmd",
85+
"icon": "https://img.shields.io/pypi/dm/asyncmd?label=pypi%20downloads",
86+
# keep the replacement icon for PyPi link, but commented out
87+
# (for the next time download stats are broken)
88+
#"icon": "https://img.shields.io/pypi/v/asyncmd",
8889
"type": "url",
8990
},
9091
{

src/asyncmd/config.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def set_max_process(num: int | None = None, max_num: int | None = None) -> None:
6464
spawning hundreds of processes.
6565
"""
6666
# NOTE: I think we should use a conservative default, e.g. 0.25*cpu_count()
67-
# pylint: disable-next=global-variable-not-assigned
67+
# pylint: disable-next=global-statement
6868
global _SEMAPHORES
6969
if num is None:
7070
if (logical_cpu_count := os.cpu_count()) is not None:
@@ -107,7 +107,7 @@ def set_max_files_open(num: int | None = None, margin: int = 30) -> None:
107107
# semaphores from non-async code, but sometimes use the sync subprocess.run
108108
# and subprocess.check_call [which also need files/pipes to work])
109109
# also maybe we need other open files like a storage :)
110-
# pylint: disable-next=global-variable-not-assigned
110+
# pylint: disable-next=global-statement
111111
global _SEMAPHORES
112112
rlim_soft = resource.getrlimit(resource.RLIMIT_NOFILE)[0]
113113
if num is None:
@@ -157,7 +157,7 @@ def set_slurm_max_jobs(num: int | None) -> None:
157157
The maximum number of simultaneous SLURM jobs for this invocation of
158158
python/asyncmd. `None` means do not limit the maximum number of jobs.
159159
"""
160-
# pylint: disable-next=global-variable-not-assigned
160+
# pylint: disable-next=global-statement
161161
global _OPT_SEMAPHORES
162162
if num is None:
163163
_OPT_SEMAPHORES[_OPT_SEMAPHORES_KEYS.SLURM_MAX_JOB] = None
@@ -195,7 +195,7 @@ def set_trajectory_cache_type(cache_type: str,
195195
ValueError
196196
Raised if ``cache_type`` is not one of the allowed values.
197197
"""
198-
# pylint: disable-next=global-variable-not-assigned
198+
# pylint: disable-next=global-statement
199199
global _GLOBALS
200200
allowed_values = ["h5py", "npz", "memory"]
201201
if (cache_type := cache_type.lower()) not in allowed_values:
@@ -252,7 +252,7 @@ def register_h5py_cache(h5py_group: "h5py.Group | h5py.File", copy_h5py: bool =
252252
clear_old_cache : bool, optional
253253
Whether to clear the old/previously set cache, by default False.
254254
"""
255-
# pylint: disable-next=global-variable-not-assigned
255+
# pylint: disable-next=global-statement
256256
global _GLOBALS
257257
if _GLOBALS.get(_GLOBALS_KEYS.TRAJECTORY_FUNCTION_CACHE_TYPE, "not_set") != "h5py":
258258
# nothing to copy as h5py was not the old cache type
@@ -315,6 +315,9 @@ def deregister_h5py_cache(h5py_group: "h5py.Group | h5py.File"):
315315
_deregister_h5py_cache_for_all_trajectories(h5py_group=h5py_group)
316316
if _GLOBALS.get(_GLOBALS_KEYS.H5PY_CACHE, None) is h5py_group:
317317
_GLOBALS[_GLOBALS_KEYS.H5PY_CACHE] = None
318+
logger.warning("Deregistered global writeable h5py cache. No TrajectoryFunction"
319+
" values will be cached until a new h5py cache has been registered."
320+
)
318321
if h5py_group in _GLOBALS.get(_GLOBALS_KEYS.H5PY_CACHE_READ_ONLY_FALLBACKS, []):
319322
_GLOBALS[_GLOBALS_KEYS.H5PY_CACHE_READ_ONLY_FALLBACKS].remove(h5py_group)
320323

src/asyncmd/gromacs/mdengine.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -508,8 +508,8 @@ async def _0step_md(self, conf_in: Trajectory, conf_out_name: str, *,
508508
k=6,
509509
)
510510
)
511-
swdir = os.path.join(wdir, run_name)
512-
await aiofiles.os.mkdir(swdir)
511+
wdir = os.path.join(wdir, run_name)
512+
await aiofiles.os.mkdir(wdir)
513513
constraints_mdp = copy.deepcopy(self.mdp)
514514
constraints_mdp["continuation"] = "no" if constraints else "yes"
515515
constraints_mdp["gen-vel"] = "yes" if generate_velocities else "no"
@@ -523,12 +523,12 @@ async def _0step_md(self, conf_in: Trajectory, conf_out_name: str, *,
523523
# make sure we have draw a new/different random number for gen-vel
524524
constraints_mdp["gen-seed"] = -1
525525
constraints_mdp["nsteps"] = 0
526-
await self._run_grompp(workdir=swdir, deffnm=run_name,
526+
await self._run_grompp(workdir=wdir, deffnm=run_name,
527527
trr_in=conf_in.trajectory_files[0],
528-
tpr_out=os.path.join(swdir, f"{run_name}.tpr"),
528+
tpr_out=os.path.join(wdir, f"{run_name}.tpr"),
529529
mdp_obj=constraints_mdp)
530-
cmd_str = self._mdrun_cmd(tpr=os.path.join(swdir, f"{run_name}.tpr"),
531-
workdir=swdir,
530+
cmd_str = self._mdrun_cmd(tpr=os.path.join(wdir, f"{run_name}.tpr"),
531+
workdir=wdir,
532532
deffnm=run_name)
533533
logger.debug("About to execute gmx mdrun command for constraints and"
534534
"/or velocity generation: %s",
@@ -537,7 +537,7 @@ async def _0step_md(self, conf_in: Trajectory, conf_out_name: str, *,
537537
stdout = bytes()
538538
await self._acquire_resources_gmx_mdrun()
539539
mdrun_proc = await self._start_gmx_mdrun(
540-
cmd_str=cmd_str, workdir=swdir,
540+
cmd_str=cmd_str, workdir=wdir,
541541
run_name=run_name,
542542
# TODO: we hardcode that the 0step MD runs can not be longer than 15 min
543543
# (but i think this should be fine for randomizing velocities and/or
@@ -563,25 +563,25 @@ async def _0step_md(self, conf_in: Trajectory, conf_out_name: str, *,
563563
# the FrameExtractor (i.e. MDAnalysis) handle any potential conversions
564564
engine_traj = Trajectory(
565565
trajectory_files=os.path.join(
566-
swdir, f"{run_name}{self._num_suffix(1)}.trr"
566+
wdir, f"{run_name}{self._num_suffix(1)}.trr"
567567
),
568568
structure_file=conf_in.structure_file,
569569
)
570570
extractor = NoModificationFrameExtractor()
571571
# Note: we use extract (and not extract_async) because otherwise
572572
# it can happen in super-rare circumstances that the Trajectory
573573
# we just instantiated is "replaced" by a Trajectory with the
574-
# same hash but a different filename/path, then the extraction
574+
# same hash but a different filename/path. If then in addition
575+
# this trajectory is removed before extracting, the extraction
575576
# fails. If we dont await this can not happen since we do not
576577
# give up control in between.
577-
out_traj = extractor.extract(outfile=conf_out_name,
578-
traj_in=engine_traj,
579-
idx=len(engine_traj) - 1,
580-
)
581-
return out_traj
578+
return extractor.extract(outfile=conf_out_name,
579+
traj_in=engine_traj,
580+
idx=len(engine_traj) - 1,
581+
)
582582
finally:
583-
await self._cleanup_gmx_mdrun(workdir=swdir, run_name=run_name)
584-
shutil.rmtree(swdir) # remove the whole directory we used as wdir
583+
await self._cleanup_gmx_mdrun(workdir=wdir, run_name=run_name)
584+
shutil.rmtree(wdir) # remove the whole directory we used as wdir
585585

586586
async def prepare(self, starting_configuration: Trajectory | None | str,
587587
workdir: str, deffnm: str) -> None:

src/asyncmd/gromacs/utils.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
The general variants of these functions can be found in asyncmd.utils.
1919
"""
2020
import os
21+
import math
2122
import logging
2223
import aiofiles.os
2324

@@ -58,12 +59,12 @@ def get_value_from_mdp(k):
5859
v = mdp[k]
5960
except KeyError:
6061
# not set, defaults to 0
61-
v = float("inf")
62+
v = math.inf
6263
else:
63-
# need to check for 0 (== no output!) in case somone puts the
64+
# need to check for 0 (== no output!) in case someone puts the
6465
# defaults (or reads an mdout.mdp where gmx lists all the defaults)
6566
if not v:
66-
v = float("inf")
67+
v = math.inf
6768
return v
6869

6970
if traj_type.upper() == "TRR":
@@ -75,15 +76,15 @@ def get_value_from_mdp(k):
7576
vals = []
7677
for k in keys:
7778
vals += [get_value_from_mdp(k=k)]
78-
if (nstout := min(vals)) == float("inf"):
79+
if (nstout := min(vals)) == math.inf:
7980
raise ValueError(f"The MDP you passed results in no {traj_type} "
8081
+ "trajectory output.")
8182
if traj_type.upper == "TRR":
8283
# additional checks that nstvout and nstfout are multiples of nstxout
8384
# (if they are defined)
8485
additional_keys = ["nstvout", "nstfout"]
8586
for k in additional_keys:
86-
if (v := get_value_from_mdp(k=k)) != float("inf"):
87+
if (v := get_value_from_mdp(k=k)) != math.inf:
8788
if v % nstout:
8889
logger.warning("%s trajectory output is not a multiple of "
8990
"the nstxout frequency (%s=%d, nstxout=%d).",
@@ -136,7 +137,7 @@ async def get_all_file_parts(folder: str, deffnm: str, file_ending: str) -> "lis
136137
deffnm : str
137138
deffnm (prefix of filenames) used in the simulation.
138139
file_ending : str
139-
File ending of the requested filetype (with or without preceeding ".").
140+
File ending of the requested filetype (with or without preceding ".").
140141
141142
Returns
142143
-------

src/asyncmd/slurm/config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def set_all_slurm_settings(*, sinfo_executable: str = "sinfo",
6969
List of nodes to exclude in job submissions, by default None, which
7070
results in no excluded nodes.
7171
"""
72-
# pylint: disable-next=global-variable-not-assigned
72+
# pylint: disable-next=global-statement
7373
global SlurmProcess
7474
SlurmProcess.slurm_cluster_mediator = SlurmClusterMediator(
7575
sinfo_executable=sinfo_executable,
@@ -124,7 +124,7 @@ def set_slurm_settings(*, sinfo_executable: str | None = None,
124124
List of nodes to exclude in job submissions, by default None, which
125125
results in no excluded nodes.
126126
"""
127-
# pylint: disable-next=global-variable-not-assigned
127+
# pylint: disable-next=global-statement
128128
global SlurmProcess
129129
# collect options for slurm cluster mediator
130130
mediator_options: dict[str, str | int | list[str]] = {}

src/asyncmd/trajectory/propagate.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,7 @@ async def remove_parts(self, workdir: str, deffnm: str, *,
253253
folder=workdir,
254254
deffnm=deffnm,
255255
file_ending=ending.lower(),
256+
engine=self.engine_cls,
256257
)
257258
# make sure we dont miss anything because we have different
258259
# capitalization
@@ -261,6 +262,7 @@ async def remove_parts(self, workdir: str, deffnm: str, *,
261262
folder=workdir,
262263
deffnm=deffnm,
263264
file_ending=ending.upper(),
265+
engine=self.engine_cls,
264266
)
265267
await asyncio.gather(*(aiofiles.os.unlink(f)
266268
for f in parts_to_remove

src/asyncmd/trajectory/trajectory_cache.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -500,9 +500,13 @@ def __init__(self, traj_hash: int, traj_files: list[str]) -> None:
500500
)
501501
# setup (writeable) main cache if we have it
502502
if writeable_h5py_cache is None:
503-
logger.warning("Initializing a Trajectory cache in h5py with only "
504-
"read-only h5py.Groups associated. Newly calculated "
505-
"function values will not be cached!")
503+
# This can spam the log, because it happens once per trajectory that is
504+
# read from a read-only storage, e.g. for analysis
505+
logger.debug("Initializing h5py Trajectory cache with only "
506+
"read-only h5py.Groups associated. Newly calculated "
507+
"function values will not be cached! (trajectory files=%s)",
508+
traj_files,
509+
)
506510
self._main_cache = None
507511
else:
508512
self._main_cache = OneH5PYGroupTrajectoryFunctionValueCache(
@@ -542,10 +546,16 @@ def deregister_h5py_cache(self, h5py_cache: "h5py.File | h5py.Group") -> None:
542546
"""
543547
if self._main_cache is not None:
544548
if self._main_cache.h5py_cache is h5py_cache:
545-
logger.warning(
546-
"Deregistering the writeable (main) cache (%s). "
547-
"Newly calculated function values will not be cached!",
548-
h5py_cache
549+
# This can spam the log, because it happens once per trajectory
550+
# in existence when closing/deregistering a h5py cache.
551+
# We warn once when deregistering the h5py cache (in central config.py)
552+
# and use debug here for now
553+
logger.debug(
554+
"Deregistering the writeable (main) cache (%s) for Trajectory "
555+
"consisting of files %s."
556+
"Newly calculated function values will not be cached until "
557+
"a new h5py cache group is registered or the cache type changed!",
558+
h5py_cache, self._traj_files,
549559
)
550560
self._main_cache = None
551561
# found it so it can not be a fallback_cache, so get out of here

src/asyncmd/utils.py

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
It also includes various functions to retrieve or ensure important parameters from
2121
MDConfig/MDEngine combinations, such as nstout_from_mdconfig and ensure_mdconfig_options.
2222
"""
23+
import logging
24+
2325
from .mdengine import MDEngine
2426
from .mdconfig import MDConfig
2527
from .trajectory.trajectory import Trajectory
@@ -28,7 +30,11 @@
2830
from .gromacs import mdconfig as gmx_config
2931

3032

31-
async def get_all_traj_parts(folder: str, deffnm: str, engine: MDEngine) -> list[Trajectory]:
33+
logger = logging.getLogger(__name__)
34+
35+
36+
async def get_all_traj_parts(folder: str, deffnm: str, engine: MDEngine | type[MDEngine],
37+
) -> list[Trajectory]:
3238
"""
3339
List all trajectories in folder by given engine class with given deffnm.
3440
@@ -37,10 +43,12 @@ async def get_all_traj_parts(folder: str, deffnm: str, engine: MDEngine) -> list
3743
folder : str
3844
Absolute or relative path to a folder.
3945
deffnm : str
40-
deffnm used by the engines simulation run from which we want the trajs.
41-
engine : MDEngine
42-
The engine that produced the trajectories
43-
(or one from the same class and with similar init args)
46+
deffnm used by the engines simulation run from which we want the trajectories.
47+
engine : MDEngine | type[MDEngine]
48+
The engine that produced the trajectories (or one from the same class
49+
and with similar init args). Note that it is also possible to pass an
50+
uninitialized engine class, but then the default trajectory output type
51+
will be returned.
4452
4553
Returns
4654
-------
@@ -52,7 +60,17 @@ async def get_all_traj_parts(folder: str, deffnm: str, engine: MDEngine) -> list
5260
ValueError
5361
Raised when the engine class is unknown.
5462
"""
55-
if isinstance(engine, (gmx_engine.GmxEngine, gmx_engine.SlurmGmxEngine)):
63+
# test for uninitialized engine classes, we warn but return the default traj type
64+
if isinstance(engine, type) and issubclass(engine, MDEngine):
65+
logger.warning("Engine %s is not initialized, i.e. it is an engine class. "
66+
"Returning the default output trajectory type for this "
67+
"engine class.", engine)
68+
if (
69+
isinstance(engine, (gmx_engine.GmxEngine, gmx_engine.SlurmGmxEngine))
70+
or (isinstance(engine, type) # check that it is a type otherwise issubclass might not work
71+
and issubclass(engine, (gmx_engine.GmxEngine, gmx_engine.SlurmGmxEngine))
72+
)
73+
):
5674
return await gmx_utils.get_all_traj_parts(folder=folder, deffnm=deffnm,
5775
traj_type=engine.output_traj_type,
5876
)
@@ -61,6 +79,7 @@ async def get_all_traj_parts(folder: str, deffnm: str, engine: MDEngine) -> list
6179

6280

6381
async def get_all_file_parts(folder: str, deffnm: str, file_ending: str,
82+
engine: MDEngine | type[MDEngine],
6483
) -> list[str]:
6584
"""
6685
Find and return all files with given ending produced by a `MDEngine`.
@@ -75,16 +94,24 @@ async def get_all_file_parts(folder: str, deffnm: str, file_ending: str,
7594
deffnm (prefix of filenames) used in the simulation.
7695
file_ending : str
7796
File ending of the requested filetype (with or without preceding ".").
97+
engine : MDEngine | type[MDEngine]
98+
The engine or engine class that produced the file parts.
7899
79100
Returns
80101
-------
81102
list[str]
82103
Ordered list of filepaths for files with given ending.
83104
"""
84-
# TODO: we just use the function from the gromacs engines for now, i.e. we
85-
# assume that the filename scheme will be the same for other engines
86-
return await gmx_utils.get_all_file_parts(folder=folder, deffnm=deffnm,
87-
file_ending=file_ending)
105+
if (
106+
isinstance(engine, (gmx_engine.GmxEngine, gmx_engine.SlurmGmxEngine))
107+
or (isinstance(engine, type) # check that it is a type otherwise issubclass might not work
108+
and issubclass(engine, (gmx_engine.GmxEngine, gmx_engine.SlurmGmxEngine))
109+
)
110+
):
111+
return await gmx_utils.get_all_file_parts(folder=folder, deffnm=deffnm,
112+
file_ending=file_ending)
113+
raise ValueError(f"Engine {engine} is not a known MDEngine (class)."
114+
+ " Maybe someone just forgot to add the function?")
88115

89116

90117
def nstout_from_mdconfig(mdconfig: MDConfig, output_traj_type: str) -> int:

tests/helper_classes.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,14 @@ class NoOpMDEngine(MDEngine):
2828
current_trajectory = None
2929
output_traj_type = "TEST"
3030
steps_done = 0
31-
async def apply_constraints(self, conf_in: Trajectory, conf_out_name: str) -> Trajectory:
31+
async def apply_constraints(self, conf_in: Trajectory, conf_out_name: str,
32+
*, workdir: str = ".") -> Trajectory:
3233
pass
3334
async def prepare(self, starting_configuration: Trajectory, workdir: str, deffnm: str) -> None:
3435
pass
3536
async def prepare_from_files(self, workdir: str, deffnm: str) -> None:
3637
pass
37-
async def run_walltime(self, walltime: float) -> Trajectory:
38+
async def run_walltime(self, walltime: float, max_steps: int | None = None) -> Trajectory:
3839
pass
3940
async def run_steps(self, nsteps: int, steps_per_part: bool = False) -> Trajectory:
4041
pass

0 commit comments

Comments
 (0)