From 253fec4bd633c49c5b3e6904de9b65842bbe173c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Mon, 3 Feb 2025 00:49:51 +0100 Subject: [PATCH 01/15] chore: Update package_info.py (#498) --- nemo_aligner/package_info.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo_aligner/package_info.py b/nemo_aligner/package_info.py index e219e397d..e8145ca6f 100644 --- a/nemo_aligner/package_info.py +++ b/nemo_aligner/package_info.py @@ -14,7 +14,7 @@ MAJOR = 0 -MINOR = 6 +MINOR = 7 PATCH = 0 PRE_RELEASE = "rc0" DEV = "dev0" From 673ef7a429183b882688739c093e6656fd2478d1 Mon Sep 17 00:00:00 2001 From: jgerh <163925524+jgerh@users.noreply.github.com> Date: Mon, 10 Feb 2025 16:41:29 -0800 Subject: [PATCH 02/15] docs: fix code block for install (#503) Signed-off-by: Jennifer Gerhold <163925524+jgerh@users.noreply.github.com> --- docs/user-guide/nemoaligner.rsts | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/user-guide/nemoaligner.rsts b/docs/user-guide/nemoaligner.rsts index 1e54ed676..4a3ac6b99 100644 --- a/docs/user-guide/nemoaligner.rsts +++ b/docs/user-guide/nemoaligner.rsts @@ -45,6 +45,7 @@ NeMo-Aligner also provides its own `dockerfile Date: Wed, 12 Feb 2025 23:58:05 +0100 Subject: [PATCH 03/15] ci: fix code freeze (#506) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: oliver könig --- .github/workflows/release-freeze.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release-freeze.yml b/.github/workflows/release-freeze.yml index 10ae3386e..c015728e6 100644 --- a/.github/workflows/release-freeze.yml +++ b/.github/workflows/release-freeze.yml @@ -16,7 +16,7 @@ on: default: main jobs: code-freeze: - uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_code_freeze.yml@v0.17.3 + uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_code_freeze.yml@v0.21.6 with: library-name: NeMo-Aligner python-package: nemo_aligner From 93d221ec8d9eae282bba82864b4c71635d517600 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Thu, 13 Feb 2025 00:16:07 +0100 Subject: [PATCH 04/15] chore: version bump (#507) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: oliver könig --- nemo_aligner/package_info.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo_aligner/package_info.py b/nemo_aligner/package_info.py index e8145ca6f..2dd79d2ef 100644 --- a/nemo_aligner/package_info.py +++ b/nemo_aligner/package_info.py @@ -14,7 +14,7 @@ MAJOR = 0 -MINOR = 7 +MINOR = 8 PATCH = 0 PRE_RELEASE = "rc0" DEV = "dev0" From a367debfce4326b140d498f062417ff94ecb5aaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Mon, 17 Feb 2025 16:06:38 +0100 Subject: [PATCH 05/15] ci: Add code-freeze dry-run (#509) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: oliver könig --- .github/workflows/release-freeze.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/release-freeze.yml b/.github/workflows/release-freeze.yml index c015728e6..1afe5681d 100644 --- a/.github/workflows/release-freeze.yml +++ b/.github/workflows/release-freeze.yml @@ -14,14 +14,20 @@ on: description: Commit SHA to use for cut-off required: false default: main + dry-run: + type: boolean + description: Dry-run of code-freeze + required: false + default: true jobs: code-freeze: - uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_code_freeze.yml@v0.21.6 + uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_code_freeze.yml@v0.22.0 with: library-name: NeMo-Aligner python-package: nemo_aligner release-type: ${{ inputs.release-type }} freeze-commit: ${{ inputs.freeze-commit }} + dry-run: ${{ inputs.dry-run }} secrets: SLACK_RELEASE_ENDPOINT: ${{ secrets.SLACK_RELEASE_ENDPOINT }} SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }} From 203b669893eea55571dde8c1f49739b2ab56993f Mon Sep 17 00:00:00 2001 From: Terry Kong Date: Wed, 19 Feb 2025 09:24:28 -0800 Subject: [PATCH 06/15] fix: remove spammy warning about the old dpo dataset format (#513) Signed-off-by: Terry Kong --- nemo_aligner/data/nlp/datasets.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/nemo_aligner/data/nlp/datasets.py b/nemo_aligner/data/nlp/datasets.py index b1b6e2d6c..f6ba43d41 100644 --- a/nemo_aligner/data/nlp/datasets.py +++ b/nemo_aligner/data/nlp/datasets.py @@ -445,9 +445,6 @@ def __getitem__(self, idx): prompt_fmtd = payload["prompt"] chosen_fmtd = payload["prompt"] + payload["chosen_response"] rejected_fmtd = payload["prompt"] + payload["rejected_response"] - logging.warning( - "Pre-formatting chat conversation as string with hardcoded chat tokens will be deprecated." - ) # (@adithyare) this will spam the console for now. else: prompt_fmtd = self.convert(payload["prompt"]) # (@adithyare) read var as "prompt formatted" chosen_fmtd = self.convert(payload["prompt"] + [payload["chosen_response"]]) From 3b51a3cb439c799ed441feb3d8effef8989f7d11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Wed, 19 Feb 2025 20:22:24 +0100 Subject: [PATCH 07/15] ci: Cherry-pick from `dev` (#515) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: oliver könig --- .github/workflows/cherry-pick-release-commit.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/cherry-pick-release-commit.yml b/.github/workflows/cherry-pick-release-commit.yml index a1d300ca4..124dfaf94 100644 --- a/.github/workflows/cherry-pick-release-commit.yml +++ b/.github/workflows/cherry-pick-release-commit.yml @@ -4,6 +4,7 @@ on: push: branches: - main + - dev jobs: cherry-pick: From 5ed6491726f731a1d9f1287d34819e5ffe570bd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Wed, 19 Feb 2025 20:46:22 +0100 Subject: [PATCH 08/15] ci: update workflows (#514) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: oliver könig --- .github/workflows/build-test-publish-wheel.yml | 18 +++++------------- .github/workflows/release-freeze.yml | 4 ++-- .github/workflows/release.yaml | 15 ++++----------- nemo_aligner/__init__.py | 13 +++++++++---- setup/requirements.txt | 10 +++++----- 5 files changed, 25 insertions(+), 35 deletions(-) diff --git a/.github/workflows/build-test-publish-wheel.yml b/.github/workflows/build-test-publish-wheel.yml index d109b746c..b3625e525 100644 --- a/.github/workflows/build-test-publish-wheel.yml +++ b/.github/workflows/build-test-publish-wheel.yml @@ -19,31 +19,23 @@ on: branches: - main - 'r**' - + pull_request: + defaults: run: shell: bash -x -e -u -o pipefail {0} jobs: build-test-publish-wheel: - uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_build_test_publish_wheel.yml@v0.12.2 + uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_build_test_publish_wheel.yml@v0.22.3 with: - image-name: nemo_container - dockerfile: Dockerfile - image-label: nemo-aligner - build-args: | - IMAGE_LABEL=nemo-aligner - MAX_JOBS=32 - ALIGNER_COMMIT=${{ github.event.pull_request.head.sha || github.sha }} - prune-filter-timerange: 24h dry-run: true python-package: nemo_aligner - container-workdir: /opt/NeMo-Aligner - environment: public + python-version: "3.11" secrets: TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }} TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }} + SLACK_WEBHOOK: ${{ secrets.SLACK_RELEASE_ENDPOINT }} SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }} - SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} \ No newline at end of file diff --git a/.github/workflows/release-freeze.yml b/.github/workflows/release-freeze.yml index 1afe5681d..85d7db004 100644 --- a/.github/workflows/release-freeze.yml +++ b/.github/workflows/release-freeze.yml @@ -21,7 +21,7 @@ on: default: true jobs: code-freeze: - uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_code_freeze.yml@v0.22.0 + uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_code_freeze.yml@v0.22.5 with: library-name: NeMo-Aligner python-package: nemo_aligner @@ -29,5 +29,5 @@ jobs: freeze-commit: ${{ inputs.freeze-commit }} dry-run: ${{ inputs.dry-run }} secrets: - SLACK_RELEASE_ENDPOINT: ${{ secrets.SLACK_RELEASE_ENDPOINT }} + SLACK_WEBHOOK: ${{ secrets.SLACK_RELEASE_ENDPOINT }} SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }} diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 9678d8db5..e474fa72f 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -32,24 +32,17 @@ on: jobs: release: - uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_release_library.yml@v0.18.4 + uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_release_library.yml@v0.22.6 with: release-ref: ${{ inputs.release-ref }} - image-name: nemo_aligner_container - dockerfile: Dockerfile - image-label: nemo-aligner - build-args: | - MAX_JOBS=32 - ALIGNER_COMMIT=${{ inputs.release-ref }} - prune-filter-timerange: 24h python-package: nemo_aligner - container-workdir: /opt/NeMo-Aligner + python-version: "3.11" library-name: NeMo-Aligner dry-run: ${{ inputs.dry-run }} version-bump-branch: ${{ inputs.version-bump-branch }} secrets: TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }} TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }} - SLACK_RELEASE_ENDPOINT: ${{ secrets.SLACK_RELEASE_ENDPOINT }} - SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} + SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }} + SLACK_WEBHOOK: ${{ secrets.SLACK_RELEASE_ENDPOINT }} PAT: ${{ secrets.PAT }} diff --git a/nemo_aligner/__init__.py b/nemo_aligner/__init__.py index 4df0c0dc8..f3639afdd 100644 --- a/nemo_aligner/__init__.py +++ b/nemo_aligner/__init__.py @@ -13,12 +13,17 @@ # limitations under the License. import os -from nemo.utils import logging os.environ["DISABLE_TORCH_DEVICE_SET"] = "1" -logging.info( - f"Importing NeMo-Aligner sets DISABLE_TORCH_DEVICE_SET=1 to disable device reassignment within TensorRT-LLM" -) + +try: + from nemo.utils import logging + + logging.info( + f"Importing NeMo-Aligner sets DISABLE_TORCH_DEVICE_SET=1 to disable device reassignment within TensorRT-LLM" + ) +except ImportError: + pass from nemo_aligner.package_info import ( diff --git a/setup/requirements.txt b/setup/requirements.txt index 4aa22afa1..80758d8ad 100644 --- a/setup/requirements.txt +++ b/setup/requirements.txt @@ -1,8 +1,8 @@ Jinja2~=3.1.4 jsonlines -megatron_core>=0.8 -nemo_toolkit[nlp] -nvidia-pytriton +# megatron_core>=0.8 +# nemo_toolkit[nlp] +# nvidia-pytriton # pynvml pin is needed for TRTLLM v0.13.0 since 12.0.0 contains a breaking change. -pynvml==11.5.3 -tensorrt-llm==0.13.0 +# pynvml==11.5.3 +# tensorrt-llm==0.13.0 From dd66927112bd86d36abd4492c4e16ba8e26518c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Mon, 24 Feb 2025 18:12:58 +0100 Subject: [PATCH 09/15] ci: Fix cherry-pick workflow (#520) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: oliver könig --- .github/workflows/cherry-pick-release-commit.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cherry-pick-release-commit.yml b/.github/workflows/cherry-pick-release-commit.yml index 124dfaf94..9ba66b1db 100644 --- a/.github/workflows/cherry-pick-release-commit.yml +++ b/.github/workflows/cherry-pick-release-commit.yml @@ -8,7 +8,7 @@ on: jobs: cherry-pick: - uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_cherry_pick.yml@v0.6.0 + uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_cherry_pick.yml@v0.22.7 secrets: PAT: ${{ secrets.PAT }} SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }} From 61a51c355238d8cf2093884e0626efaf5bdea7f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Mon, 24 Feb 2025 23:06:46 +0100 Subject: [PATCH 10/15] ci: Bump build workflow (#521) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: oliver könig --- .github/workflows/cicd-main.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml index 728f3b6bf..5b67f3065 100644 --- a/.github/workflows/cicd-main.yml +++ b/.github/workflows/cicd-main.yml @@ -92,8 +92,9 @@ jobs: build-container: if: ${{ needs.pre-flight.outputs.run_ci == 'true' }} needs: [pre-flight] - uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_build_container.yml@v0.1.0 + uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_build_container.yml@v0.22.7 with: + build-ref: ${{ github.sha }} image-name: nemo_aligner_container dockerfile: Dockerfile image-label: nemo-aligner From 8630dbf08462ec739bc081438a16cb0634ac67da Mon Sep 17 00:00:00 2001 From: Julien Veron Vialard <50602890+jveronvialard@users.noreply.github.com> Date: Fri, 14 Mar 2025 16:32:34 -0700 Subject: [PATCH 11/15] feat: enable reward model training with multiple validation datasets (#528) Signed-off-by: Julien Veron Vialard Signed-off-by: NeMo-Aligner CI Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- CHANGELOG.md | 2 ++ examples/nlp/gpt/conf/training_rm.yaml | 4 +++ examples/nlp/gpt/train_reward_model.py | 36 ++++++++++++++++------- nemo_aligner/algorithms/supervised.py | 29 +++++++++++++------ nemo_aligner/data/nlp/builders.py | 40 +++++++++++++++++--------- 5 files changed, 80 insertions(+), 31 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ca8bc3b37..8f1f31197 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,12 +38,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) ``` - Add code and instructions for replicating Reward Modeling training in HelpSteer2 and HelpSteer2-Preference - Implement REINFORCE algorithm. +- Add support for multiple validation sets when training a Reward Model. ### Breaking Changes - Upgrade TRTLLM dependency from v0.10.0 to v0.12.0 and migrate from `GPTSession` cpp runtime to `ModelRunner` python runtime. Please use the latest Dockerfile. - Using latest TransformerEngine versions may require `++model.dist_ckpt_load_strictness=log_all` when loading from a older pre-existing checkpoint to not error out. - NeMo-Aligner now requires Megatron-LM==0.9.0 for the APIs to calculate the microbatch sizes (API introduced `megatron.core.num_microbatches_calculator.reconfigure_num_microbatch_calculator`). - NeMo-Aligner now requires a version of NeMo with this change to how the MoE spec is handled: https://github.com/NVIDIA/NeMo/pull/9035 . +- Validation metrics of the SupervisedTrainer (used by SFT and Reward Model) are now logged in Weights&Biases as / instead of val/. ### Bug Fixes - It is now required, for stability, to add `export NCCL_ALGO=...` to scripts launching PPO training loop. Please see the [RLHF docs](./docs/user-guide/rlhf.rst) for information. diff --git a/examples/nlp/gpt/conf/training_rm.yaml b/examples/nlp/gpt/conf/training_rm.yaml index afe927423..e38d2b9ae 100644 --- a/examples/nlp/gpt/conf/training_rm.yaml +++ b/examples/nlp/gpt/conf/training_rm.yaml @@ -113,6 +113,10 @@ model: reset_attention_mask: False # Reset attention mask after end-of-document token eod_mask_loss: False # Mask loss for the end of document tokens index_mapping_dir: null # path to save index mapping .npy files, by default will save in the same location as data_prefix + + # For multiple validation sets, use a dictionary that includes one key named `validation` (main + # validation set) and extra keys of the form `validation*` (secondary validation sets), for instance: + # model.data.data_prefix="{train:[train.jsonl],validation:[valid.jsonl],validation_set2:[valid2.jsonl],test:[test.jsonl]}" data_prefix: null precision: ${trainer.precision} diff --git a/examples/nlp/gpt/train_reward_model.py b/examples/nlp/gpt/train_reward_model.py index 5bf507d67..fbc46d31b 100644 --- a/examples/nlp/gpt/train_reward_model.py +++ b/examples/nlp/gpt/train_reward_model.py @@ -24,6 +24,7 @@ build_train_valid_test_regression_rm_datasets, build_train_valid_test_rm_datasets, ) +from nemo_aligner.data.nlp.datasets import RewardModelDataset from nemo_aligner.models.nlp.gpt.reward_model_classes import REWARD_MODEL_CLASS_DICT, RewardModelType from nemo_aligner.utils.distributed import Timer from nemo_aligner.utils.train_script_utils import ( @@ -124,16 +125,31 @@ def main(cfg) -> None: load_gbs=True, use_random_sampler=cfg.trainer.rm.train_random_sampler, ) - - val_dataloader = build_dataloader( - cfg=cfg, - dataset=validation_ds, - consumed_samples=0, - mbs=cfg.model.micro_batch_size, - gbs=cfg.model.global_batch_size, - load_gbs=True, - use_random_sampler=cfg.trainer.rm.val_random_sampler, - ) + if isinstance(validation_ds, RewardModelDataset): + val_dataloader = build_dataloader( + cfg=cfg, + dataset=validation_ds, + consumed_samples=0, + mbs=cfg.model.micro_batch_size, + gbs=cfg.model.global_batch_size, + load_gbs=True, + use_random_sampler=cfg.trainer.rm.val_random_sampler, + ) + elif isinstance(validation_ds, dict): + val_dataloader = { + key: build_dataloader( + cfg=cfg, + dataset=dataset, + consumed_samples=0, + mbs=cfg.model.micro_batch_size, + gbs=cfg.model.global_batch_size, + load_gbs=True, + use_random_sampler=False, + ) + for key, dataset in validation_ds.items() + } + else: + raise NotImplementedError("Unsupported validation dataset type") init_using_ptl(trainer, ptl_model, train_dataloader, train_ds) optimizer, scheduler = extract_optimizer_scheduler_from_ptl_model(ptl_model) diff --git a/nemo_aligner/algorithms/supervised.py b/nemo_aligner/algorithms/supervised.py index 3f2f67c61..623a1c7bd 100644 --- a/nemo_aligner/algorithms/supervised.py +++ b/nemo_aligner/algorithms/supervised.py @@ -52,7 +52,7 @@ def __init__( ): self.model = model self.train_dataloader = train_dataloader - self.val_dataloader = val_dataloader + self.val_dataloaders = val_dataloader if isinstance(val_dataloader, dict) else {"validation": val_dataloader} self.test_dataloader = test_dataloader self.logger = logger self.cfg = cfg @@ -72,8 +72,10 @@ def __init__( self.num_steps_per_epoch = compute_num_steps_per_epoch( self.train_dataloader.batch_sampler, self.cfg.get("limit_train_batches", 1.0) ) - - self.limit_val_batches = compute_limit_batches(len(val_dataloader), self.cfg.limit_val_batches) + self.dict_limit_val_batches = { + key: compute_limit_batches(len(val_dataloader), self.cfg.limit_val_batches) + for key, val_dataloader in self.val_dataloaders.items() + } self.val_check_interval = ( int(self.cfg.val_check_interval * self.num_steps_per_epoch) if isinstance(self.cfg.val_check_interval, float) @@ -98,12 +100,23 @@ def validation_step(self, batch): @torch.no_grad() def run_validation(self): + val_loss, val_metrics = None, None + + for key in self.val_dataloaders: + loss, metrics = self.run_validation_one_dataset(key) + if key == "validation": + val_loss, val_metrics = loss, metrics + + return val_loss, val_metrics + + @torch.no_grad() + def run_validation_one_dataset(self, key: str): loss_means = [] val_metrics = defaultdict(list) val_pbar = tqdm( - zip(range(self.limit_val_batches), self.val_dataloader), - total=self.limit_val_batches, + zip(range(self.dict_limit_val_batches[key]), self.val_dataloaders[key]), + total=self.dict_limit_val_batches[key], leave=True, desc="Validation steps", ) @@ -134,6 +147,8 @@ def run_validation(self): val_metrics.update(self.inference_metrics_handler.compute()) self.inference_metrics_handler.reset() + self.logger.log_metrics(val_metrics, step=self.step, prefix=f"{key}/") + return mean(loss_means), val_metrics def train_single_step(self, batch): @@ -188,7 +203,6 @@ def fit(self): val_loss, val_metrics = self.run_validation() # validation is done on the UPDATED weights # so we use the incremented self.step - self.logger.log_metrics(val_metrics, step=self.step, prefix="val/") logging.info("Initial validation metrics logged.") for _ in epoch_iter: @@ -229,7 +243,7 @@ def fit(self): self.max_steps, self.val_check_interval, self.cfg.save_interval, - self.limit_val_batches, + self.dict_limit_val_batches["validation"], run_time_exceeded=run_time_exceeded, ) @@ -237,7 +251,6 @@ def fit(self): val_loss, val_metrics = self.run_validation() # validation is done on the UPDATED weights # so we use the incremented self.step - self.logger.log_metrics(val_metrics, step=self.step, prefix="val/") val_metrics = {f"val_{k}": v for k, v in val_metrics.items()} metrics.update(val_metrics) diff --git a/nemo_aligner/data/nlp/builders.py b/nemo_aligner/data/nlp/builders.py index 85ecac1e1..663e94d74 100644 --- a/nemo_aligner/data/nlp/builders.py +++ b/nemo_aligner/data/nlp/builders.py @@ -197,6 +197,12 @@ def build_train_valid_test_datasets( validation_n_chunks = n_chunks test_n_chunks = n_chunks + for key in data_prefix.keys(): + if key not in ["train", "test"] and not key.startswith("validation"): + raise NotImplementedError( + f"Unsupported key '{key}' found in data_prefix. Supported keys are: 'train', 'validation*', and 'test'." + ) + train_ds = build_dataset_generic( cls=cls, cfg=cfg, @@ -210,19 +216,27 @@ def build_train_valid_test_datasets( n_chunks=train_n_chunks, n_examples_per_chunk=train_examples_per_chunk, ) - validation_ds = build_dataset_generic( - cls=cls, - cfg=cfg, - data_prefix=data_prefix["validation"], - data_impl=data_impl, - num_samples=int(train_valid_test_num_samples[1]), - seq_length=seq_length, - seed=seed, - tokenizer=tokenizer, - name="validation", - n_chunks=validation_n_chunks, - n_examples_per_chunk=validation_examples_per_chunk, - ) + validation_keys = [key for key in data_prefix if key.startswith("validation")] + validation_ds = { + key: build_dataset_generic( + cls=cls, + cfg=cfg, + data_prefix=data_prefix[key], + data_impl=data_impl, + num_samples=int(train_valid_test_num_samples[1]), + seq_length=seq_length, + seed=seed, + tokenizer=tokenizer, + name=key, + n_chunks=validation_n_chunks, + n_examples_per_chunk=validation_examples_per_chunk, + ) + for key in validation_keys + } + if len(validation_ds) <= 1: + assert len(validation_ds) == 1, "No validation key found in data_prefix" + validation_ds = next(iter(validation_ds.values())) # Use validation dataset directly instead of a dict + test_ds = build_dataset_generic( cls=cls, cfg=cfg, From a2286e75b3cd28866b2731d7dad724820b90d4de Mon Sep 17 00:00:00 2001 From: Charlie Truong Date: Sat, 15 Mar 2025 10:57:00 -0500 Subject: [PATCH 12/15] ci: Disable changed-files github action (#534) Signed-off-by: Charlie Truong --- .github/workflows/cicd-main.yml | 40 ++++++++++++++++----------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml index 5b67f3065..a9bf76daf 100644 --- a/.github/workflows/cicd-main.yml +++ b/.github/workflows/cicd-main.yml @@ -55,39 +55,39 @@ jobs: run: | echo "main=${{ contains(fromJSON(steps.test_to_run.outputs.main), 'all') }}" | tee -a "$GITHUB_OUTPUT" - - name: Get changed files - id: changed-files - if: github.event_name == 'pull_request' - uses: tj-actions/changed-files@v44 - with: - files_yaml: | - doc: - - '**.md' - - docs/** - src: - - '!**.md' - - '!docs/**' + # - name: Get changed files + # id: changed-files + # if: github.event_name == 'pull_request' + # uses: tj-actions/changed-files@v44 + # with: + # files_yaml: | + # doc: + # - '**.md' + # - docs/** + # src: + # - '!**.md' + # - '!docs/**' - name: Evaluate conditions id: evaluate env: - DOCS_ONLY: ${{ steps.changed-files.outputs.doc_any_changed == 'true' && steps.changed-files.outputs.src_any_changed == 'false' }} - CHANGED_DOCS: ${{ steps.changed-files.outputs.doc_all_changed_files }} - CHANGED_SRC: ${{ steps.changed-files.outputs.src_all_changed_files }} + # DOCS_ONLY: ${{ steps.changed-files.outputs.doc_any_changed == 'true' && steps.changed-files.outputs.src_any_changed == 'false' }} + # CHANGED_DOCS: ${{ steps.changed-files.outputs.doc_all_changed_files }} + # CHANGED_SRC: ${{ steps.changed-files.outputs.src_all_changed_files }} IS_PULLREQUEST: ${{ github.event_name == 'pull_request' }} LABEL: ${{ github.event.label.name == 'Run CICD' }} MERGE_GROUP: ${{ github.event_name == 'merge_group' }} run: | # Some output that's helpful for debugging - echo "Docs changed: $CHANGED_DOCS" - echo "Src changed: $CHANGED_SRC" + # echo "Docs changed: $CHANGED_DOCS" + # echo "Src changed: $CHANGED_SRC" - echo "DOCS_ONLY: $DOCS_ONLY" + # echo "DOCS_ONLY: $DOCS_ONLY" echo "LABEL: $LABEL" echo "IS_PULLREQUEST: $IS_PULLREQUEST" # Run CI only (on main or if label is attached) and if it's not only docs - echo run_ci=$([[ ("$LABEL" = "true" || "$IS_PULLREQUEST" = "false" || "$MERGE_GROUP" = "true") && "$DOCS_ONLY" = "false" ]] && echo "true" || echo "false") | tee -a "$GITHUB_OUTPUT" + echo run_ci=$([[ ("$LABEL" = "true" || "$IS_PULLREQUEST" = "false" || "$MERGE_GROUP" = "true") ]] && echo "true" || echo "false") | tee -a "$GITHUB_OUTPUT" build-container: if: ${{ needs.pre-flight.outputs.run_ci == 'true' }} @@ -161,4 +161,4 @@ jobs: echo '🤖: CICD Result' >> $GITHUB_STEP_SUMMARY echo "$SUMMARY" >> $GITHUB_STEP_SUMMARY - test "$ALL_SUCCESS" = "true" || test "$CI_SKIP" = "true" \ No newline at end of file + test "$ALL_SUCCESS" = "true" || test "$CI_SKIP" = "true" From 9faab404f21994a7eb1d6ed5890b76152b941636 Mon Sep 17 00:00:00 2001 From: Julien Veron Vialard <50602890+jveronvialard@users.noreply.github.com> Date: Thu, 10 Apr 2025 19:59:35 -0700 Subject: [PATCH 13/15] feat: enable reward model training without dropping the last validation batch (#537) Signed-off-by: Julien Veron Vialard Signed-off-by: NeMo-Aligner CI Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- CHANGELOG.md | 1 + examples/nlp/gpt/train_reward_model.py | 3 + nemo_aligner/algorithms/supervised.py | 8 +- nemo_aligner/data/nlp/builders.py | 2 +- .../nlp/gpt/megatron_gpt_reward_model.py | 109 +++++++++--------- 5 files changed, 69 insertions(+), 54 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f1f31197..077f4bfbe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,6 +39,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Add code and instructions for replicating Reward Modeling training in HelpSteer2 and HelpSteer2-Preference - Implement REINFORCE algorithm. - Add support for multiple validation sets when training a Reward Model. +- Add support for `validation_drop_last=False` when training a Reward Model. ### Breaking Changes - Upgrade TRTLLM dependency from v0.10.0 to v0.12.0 and migrate from `GPTSession` cpp runtime to `ModelRunner` python runtime. Please use the latest Dockerfile. diff --git a/examples/nlp/gpt/train_reward_model.py b/examples/nlp/gpt/train_reward_model.py index fbc46d31b..42cf5bde2 100644 --- a/examples/nlp/gpt/train_reward_model.py +++ b/examples/nlp/gpt/train_reward_model.py @@ -136,6 +136,7 @@ def main(cfg) -> None: use_random_sampler=cfg.trainer.rm.val_random_sampler, ) elif isinstance(validation_ds, dict): + drop_last = cfg.model.data.get("validation_drop_last", True) val_dataloader = { key: build_dataloader( cfg=cfg, @@ -145,6 +146,8 @@ def main(cfg) -> None: gbs=cfg.model.global_batch_size, load_gbs=True, use_random_sampler=False, + drop_last=drop_last, + pad_samples_to_global_batch_size=not drop_last, ) for key, dataset in validation_ds.items() } diff --git a/nemo_aligner/algorithms/supervised.py b/nemo_aligner/algorithms/supervised.py index 623a1c7bd..d607d1fd3 100644 --- a/nemo_aligner/algorithms/supervised.py +++ b/nemo_aligner/algorithms/supervised.py @@ -143,7 +143,13 @@ def run_validation_one_dataset(self, key: str): log_val_metrics = {f"val_{k}": v for k, v in metrics.items()} val_pbar.set_postfix(log_val_metrics) - val_metrics = {k: mean(v) for k, v in val_metrics.items()} + if "weights" in val_metrics: + w = val_metrics.pop("weights") + val_metrics = { + k: sum([value * weight for value, weight in zip(v, w)]) / sum(w) for k, v in val_metrics.items() + } + else: + val_metrics = {k: mean(v) for k, v in val_metrics.items()} val_metrics.update(self.inference_metrics_handler.compute()) self.inference_metrics_handler.reset() diff --git a/nemo_aligner/data/nlp/builders.py b/nemo_aligner/data/nlp/builders.py index 663e94d74..c87ee5bb0 100644 --- a/nemo_aligner/data/nlp/builders.py +++ b/nemo_aligner/data/nlp/builders.py @@ -129,7 +129,7 @@ def _build_dataset(current_data_prefix, current_num_samples): logging.info(" Total {} documents is : {} ".format(name, total_num_of_documents)) drop_last = True - if name == "valid": + if name.startswith("validation"): drop_last = cfg.data.get("validation_drop_last", True) dataset = cls( diff --git a/nemo_aligner/models/nlp/gpt/megatron_gpt_reward_model.py b/nemo_aligner/models/nlp/gpt/megatron_gpt_reward_model.py index 596436c67..0a667cf0e 100644 --- a/nemo_aligner/models/nlp/gpt/megatron_gpt_reward_model.py +++ b/nemo_aligner/models/nlp/gpt/megatron_gpt_reward_model.py @@ -184,49 +184,54 @@ def gather_and_split_rewards(rewards_out): def loss_func(output_tensor): # Loss per micro batch (ub). - loss_for_ub, acc_chosen = self.loss_func(output_tensor) + loss_for_ub = self.loss_func(output_tensor) + # Number of valid pairs in the micro batch. + mask_valid_pairs = batch["loss_mask"].sum(dim=1) > 0 + num_valid_pairs = mask_valid_pairs.sum() + # Compute loss average over valid pairs only. + loss_for_ub = loss_for_ub[mask_valid_pairs].mean() + # Compute accuracy over valid pairs only. + out_chosen, out_rejected = self.split_output_tensor(output_tensor) + comp = out_chosen > out_rejected + num_correct_pairs = comp[mask_valid_pairs].sum() + acc_chosen = num_correct_pairs / max(1, num_valid_pairs) if validation_step and not self.cfg.data.get("validation_drop_last", True): - num_valid_tokens_in_ub = batch["loss_mask"].sum() + if loss_for_ub.isnan(): - assert batch["loss_mask"].count_nonzero() == 0, "Got NaN loss with non-empty input" - loss_sum_for_ub = torch.zeros_like(num_valid_tokens_in_ub) + assert num_valid_pairs == 0, "Got NaN loss with non-empty input" + loss_sum_for_ub = torch.zeros_like(num_valid_pairs, dtype=loss_for_ub.dtype) else: - loss_sum_for_ub = num_valid_tokens_in_ub * loss_for_ub - - loss_sum_and_ub_size_all_gpu = torch.cat( - [ - loss_sum_for_ub.clone().detach().view(1), - torch.tensor([num_valid_tokens_in_ub]).cuda().clone().detach(), - ] - ) - torch.distributed.all_reduce( - loss_sum_and_ub_size_all_gpu, group=parallel_state.get_data_parallel_group() - ) - out_chosen, out_rejected = gather_and_split_rewards(output_tensor) - - return ( - loss_for_ub, - { - "loss_sum_and_ub_size": loss_sum_and_ub_size_all_gpu, - "out_chosen": out_chosen, - "out_rejected": out_rejected, - }, - ) + loss_sum_for_ub = num_valid_pairs * loss_for_ub + + tensor_to_reduce = torch.stack([loss_sum_for_ub, num_valid_pairs, num_correct_pairs,]) + torch.distributed.all_reduce(tensor_to_reduce, group=parallel_state.get_data_parallel_group()) + loss_sum_for_ub, num_valid_pairs, num_correct_pairs = tensor_to_reduce + + if num_valid_pairs > 0: + reduced_loss = loss_sum_for_ub / num_valid_pairs + reduced_acc = num_correct_pairs / num_valid_pairs + else: + reduced_loss = torch.zeros_like(loss_sum_for_ub) + reduced_acc = torch.zeros_like(num_correct_pairs) + else: reduced_loss = average_losses_across_data_parallel_group([loss_for_ub]) reduced_acc = average_losses_across_data_parallel_group([acc_chosen]) - - out_chosen, out_rejected = gather_and_split_rewards(output_tensor) - - return ( - loss_for_ub, - { - "avg": reduced_loss, - "acc": reduced_acc, - "out_chosen": out_chosen, - "out_rejected": out_rejected, - }, - ) + # This assumes `drop_last=True` -- which is normally the case during training. + num_valid_pairs = num_valid_pairs * parallel_state.get_data_parallel_world_size() + + out_chosen, out_rejected = gather_and_split_rewards(output_tensor) + + return ( + loss_for_ub, + { + "num_valid_pairs": num_valid_pairs, + "avg": reduced_loss, + "acc": reduced_acc, + "out_chosen": out_chosen, + "out_rejected": out_rejected, + }, + ) return output_tensor, loss_func @@ -238,10 +243,8 @@ def split_output_tensor(self, output_tensor): def loss_func(self, output_tensor): out_chosen, out_rejected = self.split_output_tensor(output_tensor) - comp = out_chosen > out_rejected - acc_chosen = torch.sum(comp) / comp.shape[0] - loss = -torch.nn.functional.logsigmoid(out_chosen - out_rejected).mean() - return loss, acc_chosen + loss = -torch.nn.functional.logsigmoid(out_chosen - out_rejected) + return loss def get_loss_and_metrics(self, batch, forward_only): data_iter = get_iterator_k_split(batch, get_num_microbatches()) @@ -272,21 +275,21 @@ def get_loss_and_metrics(self, batch, forward_only): rewards_all_mean = rewards_all.mean() rewards_all_std = rewards_all.std() - # average loss across micro batches - loss_tensors_list = [loss_reduced["avg"] for loss_reduced in losses_reduced_per_micro_batch] - loss_tensor = torch.concat(loss_tensors_list) - loss_mean = loss_tensor.mean() - acc_tensors_list = [loss_reduced["acc"] for loss_reduced in losses_reduced_per_micro_batch] - - if len(acc_tensors_list) == 1: - acc_tensor = acc_tensors_list[0] - elif len(acc_tensors_list) > 1: - acc_tensor = torch.concat(acc_tensors_list) - acc_mean = acc_tensor.mean() + num_valid_pairs = torch.stack( + [loss_reduced["num_valid_pairs"] for loss_reduced in losses_reduced_per_micro_batch] + ) + loss_tensor = torch.cat([loss_reduced["avg"].view(1) for loss_reduced in losses_reduced_per_micro_batch]) + acc_tensor = torch.cat([loss_reduced["acc"].view(1) for loss_reduced in losses_reduced_per_micro_batch]) + + weights = num_valid_pairs.sum().float() + loss_mean = (loss_tensor * num_valid_pairs).sum() / weights + acc_mean = (acc_tensor * num_valid_pairs).sum() / weights + else: loss_mean = torch.tensor(0.0, device=torch.cuda.current_device()) acc_mean = torch.tensor(0.0, device=torch.cuda.current_device()) + weights = torch.tensor(0.0, device=torch.cuda.current_device()) rewards_chosen_mean = torch.tensor(0.0, device=torch.cuda.current_device()) rewards_rejected_mean = torch.tensor(0.0, device=torch.cuda.current_device()) @@ -296,6 +299,7 @@ def get_loss_and_metrics(self, batch, forward_only): # we can only log on one rank if it is rank zero so we broadcast from last rank torch.distributed.broadcast(loss_mean, get_last_rank()) torch.distributed.broadcast(acc_mean, get_last_rank()) + torch.distributed.broadcast(weights, get_last_rank()) torch.distributed.broadcast(rewards_chosen_mean, get_last_rank()) torch.distributed.broadcast(rewards_rejected_mean, get_last_rank()) @@ -305,6 +309,7 @@ def get_loss_and_metrics(self, batch, forward_only): metrics = { "loss": loss_mean, "acc": acc_mean, + "weights": weights, "rewards_chosen_mean": rewards_chosen_mean, "rewards_rejected_mean": rewards_rejected_mean, "rewards_all_mean": rewards_all_mean, From 2fbaed1aedbb89f9e4fc1ffd861f21bc13812e8d Mon Sep 17 00:00:00 2001 From: Julien Veron Vialard <50602890+jveronvialard@users.noreply.github.com> Date: Mon, 28 Apr 2025 09:49:54 -0700 Subject: [PATCH 14/15] feat: minor adjustments to the reward model training (#540) Signed-off-by: Julien Veron Vialard --- examples/nlp/gpt/train_reward_model.py | 5 ++++- nemo_aligner/algorithms/supervised.py | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/examples/nlp/gpt/train_reward_model.py b/examples/nlp/gpt/train_reward_model.py index 42cf5bde2..5fe04c3c6 100644 --- a/examples/nlp/gpt/train_reward_model.py +++ b/examples/nlp/gpt/train_reward_model.py @@ -126,6 +126,7 @@ def main(cfg) -> None: use_random_sampler=cfg.trainer.rm.train_random_sampler, ) if isinstance(validation_ds, RewardModelDataset): + drop_last = cfg.model.data.get("validation_drop_last", True) val_dataloader = build_dataloader( cfg=cfg, dataset=validation_ds, @@ -134,6 +135,8 @@ def main(cfg) -> None: gbs=cfg.model.global_batch_size, load_gbs=True, use_random_sampler=cfg.trainer.rm.val_random_sampler, + drop_last=drop_last, + pad_samples_to_global_batch_size=not drop_last, ) elif isinstance(validation_ds, dict): drop_last = cfg.model.data.get("validation_drop_last", True) @@ -145,7 +148,7 @@ def main(cfg) -> None: mbs=cfg.model.micro_batch_size, gbs=cfg.model.global_batch_size, load_gbs=True, - use_random_sampler=False, + use_random_sampler=cfg.trainer.rm.val_random_sampler, drop_last=drop_last, pad_samples_to_global_batch_size=not drop_last, ) diff --git a/nemo_aligner/algorithms/supervised.py b/nemo_aligner/algorithms/supervised.py index d607d1fd3..8cf0e6455 100644 --- a/nemo_aligner/algorithms/supervised.py +++ b/nemo_aligner/algorithms/supervised.py @@ -145,17 +145,19 @@ def run_validation_one_dataset(self, key: str): if "weights" in val_metrics: w = val_metrics.pop("weights") + val_loss = sum([value * weight for value, weight in zip(loss_means, w)]) / sum(w) val_metrics = { k: sum([value * weight for value, weight in zip(v, w)]) / sum(w) for k, v in val_metrics.items() } else: + val_loss = mean(loss_means) val_metrics = {k: mean(v) for k, v in val_metrics.items()} val_metrics.update(self.inference_metrics_handler.compute()) self.inference_metrics_handler.reset() self.logger.log_metrics(val_metrics, step=self.step, prefix=f"{key}/") - return mean(loss_means), val_metrics + return val_loss, val_metrics def train_single_step(self, batch): self.optimizer.zero_grad() From af2b3bd96ad78190f6ebfb552b9e599e313ce027 Mon Sep 17 00:00:00 2001 From: Terry Kong Date: Fri, 16 May 2025 14:09:40 -0700 Subject: [PATCH 15/15] docs: deprecation notice (#543) Signed-off-by: Terry Kong --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index a2a500e26..8c942f875 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # NVIDIA NeMo-Aligner +⚠️ **As of 5/15/2025, this repository is no longer actively maintained. We recommend switching to [NeMo RL](https://github.com/NVIDIA/NeMo-RL), a scalable and modular post-training library with seamless Hugging Face integration, Megatron Core optimizations, and uses Ray as the scheduling backbone.** ⚠️ + # Latest News - We released Nemotron-4-340B [Base](https://huggingface.co/nvidia/Nemotron-4-340B-Base), [Instruct](https://huggingface.co/nvidia/Nemotron-4-340B-Instruct), [Reward](https://huggingface.co/nvidia/Nemotron-4-340B-Reward). The Instruct and Reward variants are trained in Nemo-Aligner. Please see the [Helpsteer2](https://arxiv.org/abs/2406.08673) paper for more details on the reward model training. - We are excited to announce the release of accelerated generation support in our RLHF pipeline using [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM). For more information, please refer to our [RLHF documentation](https://docs.nvidia.com/nemo-framework/user-guide/latest/modelalignment/rlhf.html).