From bc7f81d5b4ca5dc039470cd937dedb16bb758866 Mon Sep 17 00:00:00 2001 From: leo-amd Date: Thu, 19 Mar 2026 16:44:06 +0100 Subject: [PATCH] Find errors in halo exchange test if any --- .github/workflows/rocm-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/rocm-ci.yml b/.github/workflows/rocm-ci.yml index b5aa06faf..570168865 100644 --- a/.github/workflows/rocm-ci.yml +++ b/.github/workflows/rocm-ci.yml @@ -172,6 +172,7 @@ jobs: export HSA_FORCE_FINE_GRAIN_PCIE=1 export HSA_ENABLE_SDMA=0 torchrun --nproc_per_node 8 apex/contrib/peer_memory/peer_halo_exchange_module_tests.py 2>&1 | tee halo_results.log + ! grep -q 'FAILURE :' halo_results.log " - name: Run Distributed Synced BatchNorm tests