Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions arc/scripts/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,14 @@ def parse_command_line_arguments(command_line_args=None):

Returns:
The parsed command-line arguments by keywords.
``args.file`` is the input YAML path (positional).
``args.output`` is an optional output path (``-o``/``--output``); when omitted,
callers should default to overwriting ``args.file`` to preserve historical behavior.
"""
parser = argparse.ArgumentParser(description='Automatic Rate Calculator (ARC)')
parser.add_argument('file', metavar='FILE', type=str, nargs=1, help='a file with input information')
parser.add_argument('-o', '--output', type=str, default=None,
help='optional output YAML path; if omitted, the input file is overwritten')
args = parser.parse_args(command_line_args)
args.file = args.file[0]
return args
Expand Down
84 changes: 75 additions & 9 deletions arc/scripts/rmg_kinetics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,33 @@
# encoding: utf-8

"""
A standalone script to run RMG
and get kinetic rate coefficients for reactions
A standalone script to run RMG and get kinetic rate coefficients for reactions.

Output units (per entry returned by ``get_kinetics_from_reactions``):
- ``A``: cm^3/(mol*s) for bimolecular reactions, s^-1 for unimolecular
(3-body: cm^6/(mol^2*s)). Reported in the units stored on the
Arrhenius object after the SI->cm conversion below.
Comment on lines +8 to +10

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch — fixed in 342f518. Replaced the binary unimolecular/bimolecular flag with get_a_factor_si_to_cm(num_reactants) ({1: 1.0, 2: 1e6, 3: 1e12}), so termolecular reactions now get the correct m^6→cm^6 (×1e12, cm^6/(mol^2*s)) scaling instead of being mislabeled s^-1. The code now matches the documented 3-body units.

- ``n``: dimensionless temperature exponent.
- ``Ea``: kJ/mol (converted from SI J/mol).
- ``T_min``, ``T_max``: K.
"""

import copy
import os
import sys
from typing import Dict, List, Optional, Tuple

# Make ``from common import ...`` work no matter how this script is invoked
# (e.g. ``python /abs/path/to/rmg_kinetics.py``, ``cd elsewhere && python ...``).
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

from common import parse_command_line_arguments, read_yaml_file, save_yaml_file

from rmgpy.data.kinetics.common import find_degenerate_reactions
from rmgpy.data.kinetics.family import KineticsFamily
from rmgpy.data.rmg import RMGDatabase
from rmgpy import settings as rmg_settings
from rmgpy.kinetics import Arrhenius, ArrheniusEP, KineticsModel
from rmgpy.reaction import same_species_lists, Reaction
from rmgpy.species import Species

Expand All @@ -35,11 +49,12 @@ def main():
"""
args = parse_command_line_arguments()
input_file = args.file
output_file = args.output or input_file
reaction_list = read_yaml_file(path=input_file)
if not isinstance(reaction_list, list):
raise ValueError(f'The content of {input_file} must be a list, got {reaction_list} which is a {type(reaction_list)}')
result = get_rate_coefficients(reaction_list)
save_yaml_file(path=input_file, content=result)
save_yaml_file(path=output_file, content=result)


def get_rate_coefficients(reaction_list: List[Dict]) -> List[Dict]:
Expand All @@ -62,6 +77,42 @@ def get_rate_coefficients(reaction_list: List[Dict]) -> List[Dict]:
return reaction_list


def get_a_factor_si_to_cm(num_reactants: int) -> float:
"""
Get the factor that converts an Arrhenius A-factor from SI (m-based) units to
cm-based units, based on the reaction molecularity.

Args:
num_reactants (int): The number of reactants.

Returns:
float: 1.0 for unimolecular (s^-1), 1e6 for bimolecular (m^3->cm^3),
1e12 for termolecular (m^6->cm^6). Defaults to 1.0 otherwise.
"""
return {1: 1.0, 2: 1e6, 3: 1e12}.get(num_reactants, 1.0)


def scale_kinetics_by_degeneracy(kinetics: KineticsModel,
degeneracy: float,
) -> KineticsModel:
"""
Scale Arrhenius-type kinetics in place by the reaction-path degeneracy.

Non-Arrhenius forms (e.g. Chebyshev, PDepArrhenius) are returned unchanged,
since ``change_rate`` would otherwise corrupt their parameters.

Args:
kinetics (KineticsModel): An RMG kinetics object.
degeneracy (float): The reaction-path degeneracy to scale by.

Returns:
KineticsModel: The (possibly scaled) kinetics object.
"""
if isinstance(kinetics, (Arrhenius, ArrheniusEP)):
kinetics.change_rate(degeneracy)
return kinetics


def determine_rmg_kinetics(rmgdb: RMGDatabase,
reaction: Reaction,
dh_rxn298: Optional[float] = None,
Expand All @@ -71,6 +122,13 @@ def determine_rmg_kinetics(rmgdb: RMGDatabase,
Determine kinetics for `reaction` (an RMG Reaction object) from RMG's database, if possible.
Assigns a list of all matching entries from both libraries and families.

Note:
Family entries originating from the training set are intentionally filtered out
(an empty returned list therefore means "no matching libraries and only training-set
family hits", not necessarily "no match at all"). Database kinetics are deep-copied
before any in-place mutation (degeneracy scaling, unit conversion) so the loaded
``rmgdb`` instance remains unchanged across calls.

Args:
rmgdb (RMGDatabase): The RMG database instance.
reaction (Reaction): The RMG Reaction object.
Expand All @@ -79,6 +137,7 @@ def determine_rmg_kinetics(rmgdb: RMGDatabase,

Returns: list[dict]
All matching RMG reactions kinetics (both libraries and families) as a dict of parameters.
Empty list if nothing matched (or only training-set entries matched).
"""
rmg_reactions = list()
# Libraries:
Expand All @@ -89,19 +148,22 @@ def determine_rmg_kinetics(rmgdb: RMGDatabase,
library_reaction.comment = f'Library: {library.label}'
rmg_reactions.append(library_reaction)
break
# # Families:
A_units = "cm^3/(mol*s)" if len(reaction.reactants) == 2 else "s^-1"
# Families:
a_factor = get_a_factor_si_to_cm(len(reaction.reactants))
fam_list = loop_families(rmgdb, reaction)
dh_rxn298 = dh_rxn298 or get_dh_rxn298(rmgdb=rmgdb, reaction=reaction) # J/mol
for family, degenerate_reactions in fam_list:
for deg_rxn in degenerate_reactions:
kinetics_list = family.get_kinetics(reaction=deg_rxn, template_labels=deg_rxn.template, degeneracy=deg_rxn.degeneracy)
for kinetics_detailes in kinetics_list:
kinetics = kinetics_detailes[0]
kinetics.change_rate(deg_rxn.degeneracy)
# Deep-copy before mutating so the database object isn't double-scaled
# if the same family rule is queried again for another reaction.
kinetics = copy.deepcopy(kinetics_detailes[0])
kinetics = scale_kinetics_by_degeneracy(kinetics, deg_rxn.degeneracy)
if hasattr(kinetics, 'to_arrhenius'):
kinetics = kinetics.to_arrhenius(dh_rxn298) # Convert ArrheniusEP to Arrhenius
kinetics.A.value_si = kinetics.A.value_si * (1e6 if A_units == "cm^3/(mol*s)" else 1)
if a_factor != 1.0 and isinstance(kinetics, Arrhenius):
kinetics.A.value_si = kinetics.A.value_si * a_factor
deg_rxn.kinetics = kinetics
deg_rxn.comment = f'Family: {deg_rxn.family}'
if 'training' in deg_rxn.kinetics.comment:
Expand Down Expand Up @@ -150,7 +212,11 @@ def get_kinetics_from_reactions(reactions: List[Reaction]) -> List[Dict]:
"""
kinetics_list = list()
for rxn in reactions:
print(f'rxn: {rxn}, kinetics: {rxn.kinetics}, comment: {rxn.comment}')
try:
rxn_repr = str(rxn)
except (TypeError, AttributeError):
rxn_repr = '<reaction without reactants/products labels>'
print(f'rxn: {rxn_repr}, kinetics: {rxn.kinetics}, comment: {rxn.comment}', file=sys.stderr)
kinetics_list.append({
'kinetics': rxn.kinetics.__repr__(),
'comment': rxn.comment,
Expand Down
15 changes: 12 additions & 3 deletions arc/scripts/rmg_thermo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,19 @@
# encoding: utf-8

"""
A standalone script to run RMG
and get thermodynamic properties for species
A standalone script to run RMG and get thermodynamic properties for species.

Output units (per entry returned by ``get_thermo``):
- ``h298``: kJ/mol (converted from SI J/mol).
- ``s298``: J/(mol*K).
- ``comment``: source / estimation comment from RMG.
"""

import os
import sys

# Make ``from common import ...`` work no matter how this script is invoked.
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

from common import parse_command_line_arguments, read_yaml_file, save_yaml_file

Expand All @@ -33,11 +41,12 @@ def main():
"""
args = parse_command_line_arguments()
input_file = args.file
output_file = args.output or input_file
species_list = read_yaml_file(path=input_file)
if not isinstance(species_list, list):
raise ValueError(f'The content of {input_file} must be a list, got {species_list} which is a {type(species_list)}')
result = get_thermo(species_list)
save_yaml_file(path=input_file, content=result)
save_yaml_file(path=output_file, content=result)


def get_thermo(species_list: list[dict]) -> list[dict]:
Expand Down
152 changes: 151 additions & 1 deletion arc/scripts_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@
import shutil
import subprocess
import tempfile
import textwrap
import unittest

from arc.common import ARC_PATH, ARC_TESTING_PATH, read_yaml_file
from arc.common import ARC_PATH, ARC_TESTING_PATH, read_yaml_file, save_yaml_file
from arc.scripts.common import parse_command_line_arguments


def _rmg_env_available() -> bool:
Expand Down Expand Up @@ -117,5 +119,153 @@ def test_cp_data_present(self):
self.assertIn('cp_j_mol_k', cp[0])


class TestCommonArgparse(unittest.TestCase):
"""Test the shared CLI parser used by the standalone scripts."""

def test_positional_file_only(self):
"""Without ``--output`` the parser exposes ``args.output is None``."""
args = parse_command_line_arguments(['/tmp/in.yml'])
self.assertEqual(args.file, '/tmp/in.yml')
self.assertIsNone(args.output)

def test_output_long_form(self):
"""``--output`` populates ``args.output`` so callers can avoid overwriting input."""
args = parse_command_line_arguments(['/tmp/in.yml', '--output', '/tmp/out.yml'])
self.assertEqual(args.file, '/tmp/in.yml')
self.assertEqual(args.output, '/tmp/out.yml')

def test_output_short_form(self):
"""``-o`` is an accepted short form."""
args = parse_command_line_arguments(['/tmp/in.yml', '-o', '/tmp/out.yml'])
self.assertEqual(args.output, '/tmp/out.yml')


@unittest.skipUnless(RMG_ENV, 'rmg_env not available')
class TestRmgKineticsHelpers(unittest.TestCase):
"""
Unit tests for ``rmg_kinetics.py`` helpers that don't need a full RMG database load.

Each test runs a tiny ``python -c`` snippet inside ``rmg_env`` so we can import
rmgpy and the script module directly. Stdout is parsed as JSON.
"""

SCRIPT_DIR = os.path.join(ARC_PATH, 'arc', 'scripts')

def _run_in_rmg_env(self, snippet: str) -> str:
"""Execute ``snippet`` inside rmg_env and return stripped stdout."""
result = subprocess.run(
['conda', 'run', '-n', 'rmg_env', 'python', '-c', snippet],
capture_output=True, text=True, timeout=120,
)
self.assertEqual(result.returncode, 0,
f'snippet failed: stderr={result.stderr}\nstdout={result.stdout}')
return result.stdout.strip()

def test_get_kinetics_from_reactions_arrhenius(self):
"""``get_kinetics_from_reactions`` reports A/n/Ea (Ea in kJ/mol) for an Arrhenius rxn."""
snippet = textwrap.dedent(f"""
import sys, json
sys.path.insert(0, {self.SCRIPT_DIR!r})
from rmg_kinetics import get_kinetics_from_reactions
from rmgpy.kinetics import Arrhenius
from rmgpy.reaction import Reaction
rxn = Reaction()
rxn.kinetics = Arrhenius(A=(1.5e13, 'cm^3/(mol*s)'), n=0.0, Ea=(20.0, 'kJ/mol'),
Tmin=(300.0, 'K'), Tmax=(2500.0, 'K'))
rxn.comment = 'unit-test'
out = get_kinetics_from_reactions([rxn])
print(json.dumps(out[0]))
""")
import json
entry = json.loads(self._run_in_rmg_env(snippet))
self.assertEqual(entry['comment'], 'unit-test')
self.assertAlmostEqual(entry['A'], 1.5e13, delta=1e7)
self.assertEqual(entry['n'], 0.0)
self.assertAlmostEqual(entry['Ea'], 20.0, places=6) # kJ/mol
self.assertEqual(entry['T_min'], 300.0)
self.assertEqual(entry['T_max'], 2500.0)

def test_get_kinetics_from_reactions_handles_missing_T_bounds(self):
"""Tmin/Tmax may be absent; helper should yield None rather than crashing."""
snippet = textwrap.dedent(f"""
import sys, json
sys.path.insert(0, {self.SCRIPT_DIR!r})
from rmg_kinetics import get_kinetics_from_reactions
from rmgpy.kinetics import Arrhenius
from rmgpy.reaction import Reaction
rxn = Reaction()
rxn.kinetics = Arrhenius(A=(1.0, 's^-1'), n=1.0, Ea=(0.0, 'J/mol'))
rxn.comment = 'no-T-bounds'
print(json.dumps(get_kinetics_from_reactions([rxn])[0]))
""")
import json
entry = json.loads(self._run_in_rmg_env(snippet))
self.assertIsNone(entry['T_min'])
self.assertIsNone(entry['T_max'])

def test_scale_kinetics_by_degeneracy_skips_non_arrhenius(self):
"""``scale_kinetics_by_degeneracy`` scales Arrhenius A by the degeneracy but
leaves non-Arrhenius forms (e.g. Chebyshev) untouched. Dropping the guard would
let ``change_rate`` corrupt the Chebyshev coefficients and fail this test."""
snippet = textwrap.dedent(f"""
import sys
sys.path.insert(0, {self.SCRIPT_DIR!r})
from rmg_kinetics import scale_kinetics_by_degeneracy
from rmgpy.kinetics import Arrhenius, Chebyshev
arr = Arrhenius(A=(1.0, 's^-1'), n=0.0, Ea=(0.0, 'J/mol'))
scale_kinetics_by_degeneracy(arr, 2)
assert abs(arr.A.value_si - 2.0) < 1e-9, arr.A.value_si
cheb = Chebyshev(coeffs=[[1.0, 0.0], [0.0, 0.0]],
kunits='cm^3/(mol*s)',
Tmin=(300.0, 'K'), Tmax=(2000.0, 'K'),
Pmin=(0.01, 'bar'), Pmax=(100.0, 'bar'))
before = cheb.coeffs.value_si.tolist()
scale_kinetics_by_degeneracy(cheb, 2)
assert cheb.coeffs.value_si.tolist() == before, 'Chebyshev coeffs were mutated'
print('ok')
""")
self.assertEqual(self._run_in_rmg_env(snippet), 'ok')


@unittest.skipUnless(RMG_ENV, 'rmg_env not available')
class TestRmgScriptsOutputFlag(unittest.TestCase):
"""Verify ``--output`` writes to a fresh path and leaves the input file untouched."""

def setUp(self):
self.tmp_dir = tempfile.mkdtemp(prefix='rmg_scripts_test_')

def tearDown(self):
shutil.rmtree(self.tmp_dir, ignore_errors=True)

def _h2_adjlist(self) -> str:
return '1 H u0 p0 c0 {2,S}\n2 H u0 p0 c0 {1,S}\n'

def test_rmg_thermo_output_does_not_overwrite_input(self):
"""The thermo script writes the augmented YAML to ``--output`` and preserves input."""
input_path = os.path.join(self.tmp_dir, 'in.yml')
output_path = os.path.join(self.tmp_dir, 'out.yml')
original = [{'label': 'H2', 'adjlist': self._h2_adjlist()}]
save_yaml_file(path=input_path, content=original)
with open(input_path, 'rb') as f:
input_bytes_before = f.read()

script = os.path.join(ARC_PATH, 'arc', 'scripts', 'rmg_thermo.py')
result = subprocess.run(
['conda', 'run', '-n', 'rmg_env', 'python', script, input_path, '--output', output_path],
capture_output=True, text=True, timeout=300,
)
self.assertEqual(result.returncode, 0, f'thermo script failed: {result.stderr}')

# Input must be byte-identical (the script must not overwrite it).
with open(input_path, 'rb') as f:
self.assertEqual(f.read(), input_bytes_before)
# Output must contain the new keys.
out = read_yaml_file(output_path)
self.assertEqual(len(out), 1)
self.assertIn('h298', out[0])
self.assertIn('s298', out[0])
self.assertIn('comment', out[0])


if __name__ == '__main__':
unittest.main(testRunner=unittest.TextTestRunner(verbosity=2))