From 1a5b9345f21d96c1b8b3c15c390c8a7a7d5a08c7 Mon Sep 17 00:00:00 2001 From: njzjz-bot <48687836+njzjz-bot@users.noreply.github.com> Date: Tue, 24 Feb 2026 19:14:43 +0000 Subject: [PATCH 01/11] feat: add opt-in support for workflows - add --allow-ref to run/simplify/init_reaction\n- add allow_ref parameter in normalize path\n- keep disabled by default for security\n- bump minimum dargs to >=0.5.0\n- document usage in README\n\nAuthored by OpenClaw (model: gpt-5.3-codex) --- README.md | 8 ++++++++ dpgen/data/reaction.py | 2 +- dpgen/generator/run.py | 8 +++++--- dpgen/main.py | 15 +++++++++++++++ dpgen/simplify/simplify.py | 6 +++--- dpgen/util.py | 11 ++++++++--- pyproject.toml | 2 +- 7 files changed, 41 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index bffce5220..4be144906 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,14 @@ DP-GEN contains the following workflows: For detailed usage and parameters, read [DP-GEN documentation](https://docs.deepmodeling.com/projects/dpgen/). +### `$ref` support (secure by default) + +DP-GEN now supports loading external JSON/YAML snippets through `"$ref"` in relevant workflows (via `dargs>=0.5.0`). +For security reasons, it is **disabled by default** and must be enabled explicitly. + +- CLI: add `--allow-ref` to `dpgen run`, `dpgen simplify`, and `dpgen init_reaction` +- Python API path (`dpgen.util.normalize`): pass `allow_ref=True` + ## Tutorials and examples * [Tutorials](https://tutorials.deepmodeling.com/en/latest/Tutorials/DP-GEN/): basic tutorials for DP-GEN. diff --git a/dpgen/data/reaction.py b/dpgen/data/reaction.py index 46cde2ff8..754f1a425 100644 --- a/dpgen/data/reaction.py +++ b/dpgen/data/reaction.py @@ -212,7 +212,7 @@ def gen_init_reaction(args): mdata = load_file(args.MACHINE) jdata_arginfo = init_reaction_jdata_arginfo() - jdata = normalize(jdata_arginfo, jdata) + jdata = normalize(jdata_arginfo, jdata, allow_ref=args.allow_ref) mdata = convert_mdata(mdata, ["reaxff", "build", "fp"]) record = "record.reaction" diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py index 46ec6e1ce..e1735afbe 100644 --- a/dpgen/generator/run.py +++ b/dpgen/generator/run.py @@ -4972,12 +4972,14 @@ def set_version(mdata): return mdata -def run_iter(param_file, machine_file): +def run_iter(param_file, machine_file, allow_ref: bool = False): jdata = load_file(param_file) mdata = load_file(machine_file) jdata_arginfo = run_jdata_arginfo() - jdata = normalize(jdata_arginfo, jdata, strict_check=False) + jdata = normalize( + jdata_arginfo, jdata, strict_check=False, allow_ref=allow_ref + ) update_mass_map(jdata) @@ -5718,7 +5720,7 @@ def gen_run(args): if args.debug: dlog.setLevel(logging.DEBUG) dlog.info("start running") - run_iter(args.PARAM, args.MACHINE) + run_iter(args.PARAM, args.MACHINE, allow_ref=args.allow_ref) dlog.info("finished") diff --git a/dpgen/main.py b/dpgen/main.py index a0ac004e5..ecef58bd5 100644 --- a/dpgen/main.py +++ b/dpgen/main.py @@ -101,6 +101,11 @@ def main_parser() -> argparse.ArgumentParser: nargs="?", help="machine file, json/yaml format", ) + parser_init_reaction.add_argument( + "--allow-ref", + action="store_true", + help="Allow loading external JSON/YAML snippets through `$ref`. Disabled by default for security.", + ) parser_init_reaction.set_defaults(func=gen_init_reaction) # run @@ -110,6 +115,11 @@ def main_parser() -> argparse.ArgumentParser: parser_run.add_argument("PARAM", type=str, help="parameter file, json/yaml format") parser_run.add_argument("MACHINE", type=str, help="machine file, json/yaml format") parser_run.add_argument("-d", "--debug", action="store_true", help="log debug info") + parser_run.add_argument( + "--allow-ref", + action="store_true", + help="Allow loading external JSON/YAML snippets through `$ref`. Disabled by default for security.", + ) parser_run.set_defaults(func=gen_run) # run/report @@ -181,6 +191,11 @@ def main_parser() -> argparse.ArgumentParser: parser_run.add_argument("PARAM", type=str, help="parameter file, json/yaml format") parser_run.add_argument("MACHINE", type=str, help="machine file, json/yaml format") parser_run.add_argument("-d", "--debug", action="store_true", help="log debug info") + parser_run.add_argument( + "--allow-ref", + action="store_true", + help="Allow loading external JSON/YAML snippets through `$ref`. Disabled by default for security.", + ) parser_run.set_defaults(func=gen_simplify) # test diff --git a/dpgen/simplify/simplify.py b/dpgen/simplify/simplify.py index 2bc88e1f8..90fa300fc 100644 --- a/dpgen/simplify/simplify.py +++ b/dpgen/simplify/simplify.py @@ -508,7 +508,7 @@ def make_fp(iter_index, jdata, mdata): make_fp_calculation(iter_index, jdata, mdata) -def run_iter(param_file, machine_file): +def run_iter(param_file, machine_file, allow_ref: bool = False): """Init (iter 0): init_pick. tasks (iter > 0): @@ -526,7 +526,7 @@ def run_iter(param_file, machine_file): mdata = load_file(machine_file) jdata_arginfo = simplify_jdata_arginfo() - jdata = normalize(jdata_arginfo, jdata) + jdata = normalize(jdata_arginfo, jdata, allow_ref=allow_ref) # set up electron temperature use_ele_temp = jdata.get("use_ele_temp", 0) @@ -626,5 +626,5 @@ def gen_simplify(args): if args.debug: dlog.setLevel(logging.DEBUG) dlog.info("start simplifying") - run_iter(args.PARAM, args.MACHINE) + run_iter(args.PARAM, args.MACHINE, allow_ref=args.allow_ref) dlog.info("finished") diff --git a/dpgen/util.py b/dpgen/util.py index 73453d74b..aba75f7a3 100644 --- a/dpgen/util.py +++ b/dpgen/util.py @@ -78,7 +78,12 @@ def expand_sys_str(root_dir: Union[str, Path]) -> list[str]: return matches -def normalize(arginfo: Argument, data: dict, strict_check: bool = True) -> dict: +def normalize( + arginfo: Argument, + data: dict, + strict_check: bool = True, + allow_ref: bool = False, +) -> dict: """Normalize and check input data. Parameters @@ -95,8 +100,8 @@ def normalize(arginfo: Argument, data: dict, strict_check: bool = True) -> dict: dict normalized data """ - data = arginfo.normalize_value(data, trim_pattern="_*") - arginfo.check_value(data, strict=strict_check) + data = arginfo.normalize_value(data, trim_pattern="_*", allow_ref=allow_ref) + arginfo.check_value(data, strict=strict_check, allow_ref=allow_ref) return data diff --git a/pyproject.toml b/pyproject.toml index 36c4575e3..b3ae59d80 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ dependencies = [ 'GromacsWrapper>=0.9.0; python_version >= "3.12"', 'dpdispatcher>=0.3.11', 'netCDF4', - 'dargs>=0.4.0', + 'dargs>=0.5.0', 'h5py', 'pymatgen-analysis-defects', 'openbabel-wheel', From 8aa129446202ded5f5d33c7503b6ada027838aa1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 24 Feb 2026 19:16:02 +0000 Subject: [PATCH 02/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- dpgen/generator/run.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py index e1735afbe..1dde3841f 100644 --- a/dpgen/generator/run.py +++ b/dpgen/generator/run.py @@ -4977,9 +4977,7 @@ def run_iter(param_file, machine_file, allow_ref: bool = False): mdata = load_file(machine_file) jdata_arginfo = run_jdata_arginfo() - jdata = normalize( - jdata_arginfo, jdata, strict_check=False, allow_ref=allow_ref - ) + jdata = normalize(jdata_arginfo, jdata, strict_check=False, allow_ref=allow_ref) update_mass_map(jdata) From 1d1298ae2c07316c65e8b29c40eed0f7b58b62ec Mon Sep 17 00:00:00 2001 From: njzjz-bot <48687836+njzjz-bot@users.noreply.github.com> Date: Tue, 24 Feb 2026 19:30:24 +0000 Subject: [PATCH 03/11] docs: move guidance to docs and add function docs - document --allow-ref in run/simplify/init_reaction docs\n- add allow_ref parameter docs in dpgen.util.normalize\n\nAuthored by OpenClaw (model: gpt-5.3-codex) --- doc/init/init-reaction.md | 6 ++++++ doc/run/overview-of-the-run-process.md | 6 ++++++ doc/simplify/simplify.md | 6 ++++++ dpgen/util.py | 3 +++ 4 files changed, 21 insertions(+) diff --git a/doc/init/init-reaction.md b/doc/init/init-reaction.md index c7366dcbf..c6627b2e9 100644 --- a/doc/init/init-reaction.md +++ b/doc/init/init-reaction.md @@ -18,4 +18,10 @@ An example of `reaction.json` is given below: For detailed parameters, see [parametes](init-reaction-jdata.rst) and [machine parameters](init-reaction-mdata.rst). +To enable loading external JSON/YAML snippets via `$ref` in the reaction parameter file, add `--allow-ref` explicitly (disabled by default for security): + +```bash +dpgen init_reaction reaction.json machine.json --allow-ref +``` + The genereated data can be used to continue DP-GEN concurrent learning workflow. Read [Energy & Fuels, 2021, 35 (1), 762–769](https://10.1021/acs.energyfuels.0c03211) for details. diff --git a/doc/run/overview-of-the-run-process.md b/doc/run/overview-of-the-run-process.md index 590312ff3..7fb63b356 100644 --- a/doc/run/overview-of-the-run-process.md +++ b/doc/run/overview-of-the-run-process.md @@ -16,6 +16,12 @@ Here, we give a general description of the run process. We can execute the run p dpgen run param.json machine.json ``` +To enable loading external JSON/YAML snippets via `$ref` in `param.json`, add `--allow-ref` explicitly (disabled by default for security): + +```sh +dpgen run param.json machine.json --allow-ref +``` + The following files or folders will be created and upgraded by codes: - iter.00000x contains the main results that DP-GEN generates in the first iteration. diff --git a/doc/simplify/simplify.md b/doc/simplify/simplify.md index 4dd07a3eb..d8798ea57 100644 --- a/doc/simplify/simplify.md +++ b/doc/simplify/simplify.md @@ -6,6 +6,12 @@ Use the following script to start the workflow: dpgen simplify param.json machine.json ``` +To enable loading external JSON/YAML snippets via `$ref`, add `--allow-ref` explicitly (disabled by default for security): + +```bash +dpgen simplify param.json machine.json --allow-ref +``` + Here is an example of `param.json` for QM7 dataset: ```json { diff --git a/dpgen/util.py b/dpgen/util.py index aba75f7a3..c222a5539 100644 --- a/dpgen/util.py +++ b/dpgen/util.py @@ -94,6 +94,9 @@ def normalize( input data strict_check : bool, default=True strict check data or not + allow_ref : bool, default=False + Whether to allow loading external JSON/YAML snippets via ``$ref``. + Disabled by default for security. Returns ------- From 4b56c41134f06bc8a167d3c664a42913f3e5ccab Mon Sep 17 00:00:00 2001 From: njzjz-bot <48687836+njzjz-bot@users.noreply.github.com> Date: Tue, 24 Feb 2026 20:03:10 +0000 Subject: [PATCH 04/11] fix: address review comments for allow_ref compatibility - use getattr(args, 'allow_ref', False) in init_reaction\n- add allow_ref docs in simplify.run_iter docstring\n\nAuthored by OpenClaw (model: gpt-5.3-codex) --- dpgen/data/reaction.py | 4 +++- dpgen/simplify/simplify.py | 10 ++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/dpgen/data/reaction.py b/dpgen/data/reaction.py index 754f1a425..f493e41c4 100644 --- a/dpgen/data/reaction.py +++ b/dpgen/data/reaction.py @@ -212,7 +212,9 @@ def gen_init_reaction(args): mdata = load_file(args.MACHINE) jdata_arginfo = init_reaction_jdata_arginfo() - jdata = normalize(jdata_arginfo, jdata, allow_ref=args.allow_ref) + jdata = normalize( + jdata_arginfo, jdata, allow_ref=getattr(args, "allow_ref", False) + ) mdata = convert_mdata(mdata, ["reaxff", "build", "fp"]) record = "record.reaction" diff --git a/dpgen/simplify/simplify.py b/dpgen/simplify/simplify.py index 90fa300fc..ec56f28cb 100644 --- a/dpgen/simplify/simplify.py +++ b/dpgen/simplify/simplify.py @@ -521,6 +521,16 @@ def run_iter(param_file, machine_file, allow_ref: bool = False): 06 make_fp 07 run_fp (same as generator) 08 post_fp (same as generator) + + Parameters + ---------- + param_file : str + parameter file (json/yaml) + machine_file : str + machine file (json/yaml) + allow_ref : bool, default=False + Whether to allow loading external JSON/YAML snippets via ``$ref``. + Disabled by default for security. """ jdata = load_file(param_file) mdata = load_file(machine_file) From 25c7af624510a6f8a4d6cd9a36832c03137cf220 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 24 Feb 2026 20:03:26 +0000 Subject: [PATCH 05/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- dpgen/data/reaction.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/dpgen/data/reaction.py b/dpgen/data/reaction.py index f493e41c4..38f78ad24 100644 --- a/dpgen/data/reaction.py +++ b/dpgen/data/reaction.py @@ -212,9 +212,7 @@ def gen_init_reaction(args): mdata = load_file(args.MACHINE) jdata_arginfo = init_reaction_jdata_arginfo() - jdata = normalize( - jdata_arginfo, jdata, allow_ref=getattr(args, "allow_ref", False) - ) + jdata = normalize(jdata_arginfo, jdata, allow_ref=getattr(args, "allow_ref", False)) mdata = convert_mdata(mdata, ["reaxff", "build", "fp"]) record = "record.reaction" From 3bf2d41ef7e61e2df2f2af4d4520f57101f98762 Mon Sep 17 00:00:00 2001 From: njzjz-bot <48687836+njzjz-bot@users.noreply.github.com> Date: Tue, 24 Feb 2026 20:23:29 +0000 Subject: [PATCH 06/11] fix: address review comments and add CLI coverage - guard allow_ref for programmatic callers in init_reaction\n- complete docstrings for allow_ref path\n- add CLI test assertions for --allow-ref\n\nAuthored by OpenClaw (model: gpt-5.3-codex) --- tests/test_cli.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 0c2125d2a..20603ed82 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -13,4 +13,6 @@ def test_cli(self): "init_reaction", "autotest", ): - sp.check_output(["dpgen", subcommand, "-h"]) + output = sp.check_output(["dpgen", subcommand, "-h"]) + if subcommand in ("run", "simplify", "init_reaction"): + self.assertIn(b"--allow-ref", output) \ No newline at end of file From 0b663f649dfbfeed853aa1feb46c73837ee7f190 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 24 Feb 2026 20:25:45 +0000 Subject: [PATCH 07/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 20603ed82..29b7edd0a 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -15,4 +15,4 @@ def test_cli(self): ): output = sp.check_output(["dpgen", subcommand, "-h"]) if subcommand in ("run", "simplify", "init_reaction"): - self.assertIn(b"--allow-ref", output) \ No newline at end of file + self.assertIn(b"--allow-ref", output) From d4d044b8480f6d57c119f42f44a526f3bd3bd651 Mon Sep 17 00:00:00 2001 From: njzjz-bot <48687836+njzjz-bot@users.noreply.github.com> Date: Tue, 24 Feb 2026 21:42:15 +0000 Subject: [PATCH 08/11] docs: drop README changes for feature Keep documentation updates in doc/ only. --- README.md | 8 -------- 1 file changed, 8 deletions(-) diff --git a/README.md b/README.md index 4be144906..bffce5220 100644 --- a/README.md +++ b/README.md @@ -49,14 +49,6 @@ DP-GEN contains the following workflows: For detailed usage and parameters, read [DP-GEN documentation](https://docs.deepmodeling.com/projects/dpgen/). -### `$ref` support (secure by default) - -DP-GEN now supports loading external JSON/YAML snippets through `"$ref"` in relevant workflows (via `dargs>=0.5.0`). -For security reasons, it is **disabled by default** and must be enabled explicitly. - -- CLI: add `--allow-ref` to `dpgen run`, `dpgen simplify`, and `dpgen init_reaction` -- Python API path (`dpgen.util.normalize`): pass `allow_ref=True` - ## Tutorials and examples * [Tutorials](https://tutorials.deepmodeling.com/en/latest/Tutorials/DP-GEN/): basic tutorials for DP-GEN. From bbabb981497c69d3cb260445bb7aee7cc49a1362 Mon Sep 17 00:00:00 2001 From: njzjz-bot <48687836+njzjz-bot@users.noreply.github.com> Date: Tue, 24 Feb 2026 21:57:37 +0000 Subject: [PATCH 09/11] refactor(init_reaction): remove getattr fallback for allow_ref Use explicit args.allow_ref consistent with CLI contract.\n\nAuthored by OpenClaw (model: gpt-5.3-codex) --- dpgen/data/reaction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpgen/data/reaction.py b/dpgen/data/reaction.py index 38f78ad24..754f1a425 100644 --- a/dpgen/data/reaction.py +++ b/dpgen/data/reaction.py @@ -212,7 +212,7 @@ def gen_init_reaction(args): mdata = load_file(args.MACHINE) jdata_arginfo = init_reaction_jdata_arginfo() - jdata = normalize(jdata_arginfo, jdata, allow_ref=getattr(args, "allow_ref", False)) + jdata = normalize(jdata_arginfo, jdata, allow_ref=args.allow_ref) mdata = convert_mdata(mdata, ["reaxff", "build", "fp"]) record = "record.reaction" From 496f4119fb6624a647e9b25377f256b248a11b12 Mon Sep 17 00:00:00 2001 From: njzjz-bot <48687836+njzjz-bot@users.noreply.github.com> Date: Tue, 24 Feb 2026 22:42:08 +0000 Subject: [PATCH 10/11] test(cli): use python -m dpgen to satisfy Ruff S603/S607 Authored by OpenClaw (model: gpt-5.3-codex) --- tests/test_cli.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 29b7edd0a..f111e08d1 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,10 +1,11 @@ import subprocess as sp +import sys import unittest class TestCLI(unittest.TestCase): def test_cli(self): - sp.check_output(["dpgen", "-h"]) + sp.check_output([sys.executable, "-m", "dpgen", "-h"]) for subcommand in ( "run", "simplify", @@ -13,6 +14,6 @@ def test_cli(self): "init_reaction", "autotest", ): - output = sp.check_output(["dpgen", subcommand, "-h"]) + output = sp.check_output([sys.executable, "-m", "dpgen", subcommand, "-h"]) if subcommand in ("run", "simplify", "init_reaction"): self.assertIn(b"--allow-ref", output) From f259aa7f07f02a9a9535c2a1e6cf17c99b820d43 Mon Sep 17 00:00:00 2001 From: njzjz-bot <48687836+njzjz-bot@users.noreply.github.com> Date: Tue, 24 Feb 2026 22:47:50 +0000 Subject: [PATCH 11/11] test(cli): invoke module entrypoint as dpgen.main Use sys.executable -m dpgen.main for lint-safe invocation and working module path in CI.\n\nAuthored by OpenClaw (model: gpt-5.3-codex) --- tests/test_cli.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index f111e08d1..da7709e9c 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -5,7 +5,7 @@ class TestCLI(unittest.TestCase): def test_cli(self): - sp.check_output([sys.executable, "-m", "dpgen", "-h"]) + sp.check_output([sys.executable, "-m", "dpgen.main", "-h"]) for subcommand in ( "run", "simplify", @@ -14,6 +14,8 @@ def test_cli(self): "init_reaction", "autotest", ): - output = sp.check_output([sys.executable, "-m", "dpgen", subcommand, "-h"]) + output = sp.check_output( + [sys.executable, "-m", "dpgen.main", subcommand, "-h"] + ) if subcommand in ("run", "simplify", "init_reaction"): self.assertIn(b"--allow-ref", output)