From b12db5d3c7fe15e7d95a3c62642cabbdea27665a Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Thu, 15 Jan 2026 11:58:25 -0500 Subject: [PATCH 01/53] mpi4py CI with ASAN in separate workflow Address sanitizer helps us catch memory bugs even if they don't manifest into faults right away. The instrumention incurs some overhead so this is run on a reduced set of mpi4py runs. Also tests `ompi_info` and `mpicc`. Signed-off-by: Joseph Schuchart --- .github/workflows/ompi_mpi4py_asan.yaml | 159 ++++++++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100644 .github/workflows/ompi_mpi4py_asan.yaml diff --git a/.github/workflows/ompi_mpi4py_asan.yaml b/.github/workflows/ompi_mpi4py_asan.yaml new file mode 100644 index 00000000000..95a519604b4 --- /dev/null +++ b/.github/workflows/ompi_mpi4py_asan.yaml @@ -0,0 +1,159 @@ +name: mpi4py (ASAN) + +on: + pull_request: + workflow_dispatch: + inputs: + repository: + description: 'mpi4py repository' + default: 'mpi4py/mpi4py' + required: false + type: string + ref: + description: 'mpi4py branch/tag/SHA' + default: 'master' + required: false + type: string + +permissions: + contents: read + +jobs: + test: + # We need Unbuntu 24.04 (over 22.04) due to a kernel bug, + # see https://github.com/google/sanitizers/issues/856. + runs-on: ubuntu-24.04 + timeout-minutes: 30 + env: + MPI4PY_TEST_SPAWN: true + # disable ASAN while building + ASAN_OPTIONS: verify_asan_link_order=0,detect_odr_violation=0,abort_on_error=0 + # disable leak detection + LSAN_OPTIONS: detect_leaks=0,exitcode=0 + + steps: + - name: Configure hostname + run: echo 127.0.0.1 `hostname` | sudo tee -a /etc/hosts > /dev/null + if: ${{ runner.os == 'Linux' || runner.os == 'macOS' }} + + - name: Install depencencies + run: sudo apt-get install -y -q + libnuma-dev libasan8 + if: ${{ runner.os == 'Linux' }} + + - name: Checkout Open MPI + uses: actions/checkout@v4 + with: + path: mpi-build + submodules: recursive + + - name: Bootstrap Open MPI + run: ./autogen.pl + working-directory: mpi-build + + # Install into a separate directory (/opt/openmpi) so that we can + # bundle up that tree into an artifact to share with other jobs in + # this github action. Specifically don't use /usr/local, because + # there's a bunch of other stuff already installed in /usr/local, + # and we don't need to include that in our artifact. + - name: Configure Open MPI + run: ./configure + --disable-dependency-tracking + --disable-sphinx + --disable-mpi-fortran + --disable-oshmem + --disable-silent-rules + --prefix=/opt/openmpi + CFLAGS="-O2 -fno-omit-frame-pointer -g -fsanitize=address" + LDFLAGS="-Wl,-rpath,/opt/openmpi/lib -fsanitize=address" + working-directory: mpi-build + + - name: Build MPI + run: make -j $(nproc) + working-directory: mpi-build + + - name: Install MPI + run: sudo make install + working-directory: mpi-build + + - name: Add Open MPI to PATH + run: echo /opt/openmpi/bin >> $GITHUB_PATH + + - name: Tweak MPI + run: | + # Tweak MPI + mca_params="$HOME/.openmpi/mca-params.conf" + mkdir -p "$(dirname "$mca_params")" + echo mpi_param_check = true >> "$mca_params" + echo mpi_show_handle_leaks = true >> "$mca_params" + mca_params="$HOME/.prte/mca-params.conf" + mkdir -p "$(dirname "$mca_params")" + echo rmaps_default_mapping_policy = :oversubscribe >> "$mca_params" + + - name: Show MPI + run: ompi_info + + - name: Show MPICC + run: mpicc -show + + - name: Use Python + uses: actions/setup-python@v5 + with: + python-version: 3 + architecture: x64 + + - name: Install Python packages (build) + run: python -m pip install --upgrade + setuptools pip wheel + + - name: Install Python packages (test) + run: python -m pip install --upgrade + numpy cffi pyyaml + + - name: Checkout mpi4py + uses: actions/checkout@v4 + with: + repository: ${{ inputs.repository || 'mpi4py/mpi4py' }} + ref: ${{ inputs.ref }} + + - name: Install mpi4py + run: python -m pip install . + env: + CFLAGS: "-O0" + + - name: Setting up ASAN environment + # LD_PRELOAD is needed to make sure ASAN is the first thing loaded + # as it will otherwise complain. + # Leak detection is currently disabled because of the size of the report. + # The patcher is disabled because ASAN fails if code mmaps data at fixed + # memory addresses, see https://github.com/open-mpi/ompi/issues/12819. + # ODR violation detection is disabled until #13469 is fixed + # Disabling stack use after return detection to reduce slowdown, per + # https://github.com/llvm/llvm-project/issues/64190. + run: | + echo LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libasan.so.8 >> $GITHUB_ENV + echo ASAN_OPTIONS=detect_odr_violation=0,abort_on_error=1,detect_stack_use_after_return=0 >> $GITHUB_ENV + echo LSAN_OPTIONS=detect_leaks=0,exitcode=0 >> $GITHUB_ENV + echo OMPI_MCA_memory=^patcher >> $GITHUB_ENV + + - name: Test mpi4py (singleton) + run: python test/main.py -v -x TestExcErrhandlerNull + if: ${{ true }} + timeout-minutes: 10 + + - name: Test mpi4py (np=1) + run: mpiexec -n 1 python test/main.py -v -x TestExcErrhandlerNull + if: ${{ true }} + timeout-minutes: 10 + + - name: Test mpi4py (np=4) + run: mpiexec -n 4 python test/main.py -v -f -x TestExcErrhandlerNull + if: ${{ true }} + timeout-minutes: 10 + + - name: Show MPI (ASAN) + run: ompi_info + + - name: Show MPICC (ASAN) + run: mpicc -show + From d4ff6ff0cc62d53ed3f900b7eab89b407684893a Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Thu, 15 Jan 2026 12:37:58 -0500 Subject: [PATCH 02/53] Reduce optimization levels and enable ASAN when building mpi4py Signed-off-by: Joseph Schuchart --- .github/workflows/ompi_mpi4py_asan.yaml | 28 ++++++++----------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/.github/workflows/ompi_mpi4py_asan.yaml b/.github/workflows/ompi_mpi4py_asan.yaml index 95a519604b4..ad859fb1e71 100644 --- a/.github/workflows/ompi_mpi4py_asan.yaml +++ b/.github/workflows/ompi_mpi4py_asan.yaml @@ -64,7 +64,7 @@ jobs: --disable-oshmem --disable-silent-rules --prefix=/opt/openmpi - CFLAGS="-O2 -fno-omit-frame-pointer -g -fsanitize=address" + CFLAGS="-O1 -fno-omit-frame-pointer -g -fsanitize=address" LDFLAGS="-Wl,-rpath,/opt/openmpi/lib -fsanitize=address" working-directory: mpi-build @@ -90,12 +90,6 @@ jobs: mkdir -p "$(dirname "$mca_params")" echo rmaps_default_mapping_policy = :oversubscribe >> "$mca_params" - - name: Show MPI - run: ompi_info - - - name: Show MPICC - run: mpicc -show - - name: Use Python uses: actions/setup-python@v5 with: @@ -116,11 +110,6 @@ jobs: repository: ${{ inputs.repository || 'mpi4py/mpi4py' }} ref: ${{ inputs.ref }} - - name: Install mpi4py - run: python -m pip install . - env: - CFLAGS: "-O0" - - name: Setting up ASAN environment # LD_PRELOAD is needed to make sure ASAN is the first thing loaded # as it will otherwise complain. @@ -136,6 +125,14 @@ jobs: echo LSAN_OPTIONS=detect_leaks=0,exitcode=0 >> $GITHUB_ENV echo OMPI_MCA_memory=^patcher >> $GITHUB_ENV + - name: Show MPI + run: ompi_info + + - name: Install mpi4py + run: python -m pip install . + env: + CFLAGS: "-O0" + - name: Test mpi4py (singleton) run: python test/main.py -v -x TestExcErrhandlerNull if: ${{ true }} @@ -150,10 +147,3 @@ jobs: run: mpiexec -n 4 python test/main.py -v -f -x TestExcErrhandlerNull if: ${{ true }} timeout-minutes: 10 - - - name: Show MPI (ASAN) - run: ompi_info - - - name: Show MPICC (ASAN) - run: mpicc -show - From 6b8dfeb0f2e67aedc9a68b63988ce5709aaa3076 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Thu, 15 Jan 2026 12:51:49 -0500 Subject: [PATCH 03/53] Enable leak check, for sanity checking Signed-off-by: Joseph Schuchart --- .github/workflows/ompi_mpi4py_asan.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ompi_mpi4py_asan.yaml b/.github/workflows/ompi_mpi4py_asan.yaml index ad859fb1e71..e5bfc17e970 100644 --- a/.github/workflows/ompi_mpi4py_asan.yaml +++ b/.github/workflows/ompi_mpi4py_asan.yaml @@ -122,7 +122,7 @@ jobs: run: | echo LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libasan.so.8 >> $GITHUB_ENV echo ASAN_OPTIONS=detect_odr_violation=0,abort_on_error=1,detect_stack_use_after_return=0 >> $GITHUB_ENV - echo LSAN_OPTIONS=detect_leaks=0,exitcode=0 >> $GITHUB_ENV + echo LSAN_OPTIONS=detect_leaks=1,exitcode=0 >> $GITHUB_ENV echo OMPI_MCA_memory=^patcher >> $GITHUB_ENV - name: Show MPI From 3fbedbf31cf8e10e796be83cdea7ade39afa3799 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Sat, 17 Jan 2026 08:45:51 +0900 Subject: [PATCH 04/53] Disable LSAN and enable stack-use-after-return checks Signed-off-by: Joseph Schuchart --- .github/workflows/ompi_mpi4py_asan.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ompi_mpi4py_asan.yaml b/.github/workflows/ompi_mpi4py_asan.yaml index e5bfc17e970..5204dba825e 100644 --- a/.github/workflows/ompi_mpi4py_asan.yaml +++ b/.github/workflows/ompi_mpi4py_asan.yaml @@ -121,8 +121,8 @@ jobs: # https://github.com/llvm/llvm-project/issues/64190. run: | echo LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libasan.so.8 >> $GITHUB_ENV - echo ASAN_OPTIONS=detect_odr_violation=0,abort_on_error=1,detect_stack_use_after_return=0 >> $GITHUB_ENV - echo LSAN_OPTIONS=detect_leaks=1,exitcode=0 >> $GITHUB_ENV + echo ASAN_OPTIONS=detect_odr_violation=0,abort_on_error=1 >> $GITHUB_ENV + echo LSAN_OPTIONS=detect_leaks=0,exitcode=0 >> $GITHUB_ENV echo OMPI_MCA_memory=^patcher >> $GITHUB_ENV - name: Show MPI From e12e230968e47fbf338f359a663c532fb5f961b0 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Sat, 17 Jan 2026 09:10:47 +0900 Subject: [PATCH 05/53] ASAN: Configure Open MPI with --enable-debug Signed-off-by: Joseph Schuchart --- .github/workflows/ompi_mpi4py_asan.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ompi_mpi4py_asan.yaml b/.github/workflows/ompi_mpi4py_asan.yaml index 5204dba825e..3f20cd675df 100644 --- a/.github/workflows/ompi_mpi4py_asan.yaml +++ b/.github/workflows/ompi_mpi4py_asan.yaml @@ -58,6 +58,7 @@ jobs: # and we don't need to include that in our artifact. - name: Configure Open MPI run: ./configure + --enable-debug --disable-dependency-tracking --disable-sphinx --disable-mpi-fortran @@ -121,7 +122,7 @@ jobs: # https://github.com/llvm/llvm-project/issues/64190. run: | echo LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libasan.so.8 >> $GITHUB_ENV - echo ASAN_OPTIONS=detect_odr_violation=0,abort_on_error=1 >> $GITHUB_ENV + echo ASAN_OPTIONS=detect_odr_violation=0,abort_on_error=1,detect_stack_use_after_return=1 >> $GITHUB_ENV echo LSAN_OPTIONS=detect_leaks=0,exitcode=0 >> $GITHUB_ENV echo OMPI_MCA_memory=^patcher >> $GITHUB_ENV From c3f1a510a6ff4ebd987eb8935f06b4b8215dcc1c Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 16 Jan 2026 17:55:14 -0700 Subject: [PATCH 06/53] Fix usage of PMIx_Fence_nb to conform with PMIx Standard The PMIx_Fence_nb function can return PMIX_OPERATION_SUCCEEDED to indicate that the function was executed atomically and the callback function will therefore not be called. The PMIx Standard lists a few reasons why this can happen, but the point here was to fix usage to properly handle that possibility. Signed-off-by: Ralph Castain --- ompi/instance/instance.c | 54 ++++++++++++++++++++--------- ompi/runtime/ompi_mpi_finalize.c | 20 ++++++++--- ompi/runtime/ompi_mpi_init.c | 59 ++++++++++++++++++++++---------- 3 files changed, 94 insertions(+), 39 deletions(-) diff --git a/ompi/instance/instance.c b/ompi/instance/instance.c index bd686d2bab2..6d50d32ffb2 100644 --- a/ompi/instance/instance.c +++ b/ompi/instance/instance.c @@ -8,6 +8,7 @@ * reserved. * Copyright (c) 2023 Jeffrey M. Squyres. All rights reserved. * Copyright (c) 2024 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2026 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -586,11 +587,16 @@ static int ompi_mpi_instance_init_common (int argc, char **argv) active = true; OPAL_POST_OBJECT(&active); PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &opal_pmix_collect_all_data, PMIX_BOOL); - if( PMIX_SUCCESS != (rc = PMIx_Fence_nb(NULL, 0, NULL, 0, - fence_release, - (void*)&active))) { - ret = opal_pmix_convert_status(rc); - return ompi_instance_print_error ("PMIx_Fence_nb() failed", ret); + rc = PMIx_Fence_nb(NULL, 0, NULL, 0, fence_release, (void*)&active); + if (PMIX_SUCCESS != rc) { + active = false; + if (PMIX_OPERATION_SUCCEEDED == rc) { + // can return operation_succeeded if atomically completed + ret = MPI_SUCCESS; + } else { + ret = opal_pmix_convert_status(rc); + return ompi_instance_print_error ("PMIx_Fence_nb() failed", ret); + } } } } else { @@ -602,12 +608,19 @@ static int ompi_mpi_instance_init_common (int argc, char **argv) OPAL_POST_OBJECT(&active); PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &opal_pmix_collect_all_data, PMIX_BOOL); rc = PMIx_Fence_nb(NULL, 0, info, 1, fence_release, (void*)&active); - if( PMIX_SUCCESS != rc) { - ret = opal_pmix_convert_status(rc); - return ompi_instance_print_error ("PMIx_Fence() failed", ret); + if (PMIX_SUCCESS != rc) { + active = false; + if (PMIX_OPERATION_SUCCEEDED == rc) { + // can return operation_succeeded if atomically completed + ret = MPI_SUCCESS; + } else { + ret = opal_pmix_convert_status(rc); + return ompi_instance_print_error ("PMIx_Fence() failed", ret); + } + } else { + /* cannot just wait on thread as we need to call opal_progress */ + OMPI_LAZY_WAIT_FOR_COMPLETION(active); } - /* cannot just wait on thread as we need to call opal_progress */ - OMPI_LAZY_WAIT_FOR_COMPLETION(active); } } @@ -748,7 +761,9 @@ static int ompi_mpi_instance_init_common (int argc, char **argv) * we have to wait here for it to complete. However, there * is no reason to do two barriers! */ if (background_fence) { - OMPI_LAZY_WAIT_FOR_COMPLETION(active); + if (active) { + OMPI_LAZY_WAIT_FOR_COMPLETION(active); + } } else if (!ompi_async_mpi_init) { /* wait for everyone to reach this point - this is a hard * barrier requirement at this time, though we hope to relax @@ -757,12 +772,19 @@ static int ompi_mpi_instance_init_common (int argc, char **argv) active = true; OPAL_POST_OBJECT(&active); PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &flag, PMIX_BOOL); - if (PMIX_SUCCESS != (rc = PMIx_Fence_nb(NULL, 0, info, 1, - fence_release, (void*)&active))) { - ret = opal_pmix_convert_status(rc); - return ompi_instance_print_error ("PMIx_Fence_nb() failed", ret); + rc = PMIx_Fence_nb(NULL, 0, info, 1, fence_release, (void*)&active); + if (PMIX_SUCCESS != rc) { + active = false; + if (PMIX_OPERATION_SUCCEEDED == rc) { + // can return operation_succeeded if atomically completed + ret = MPI_SUCCESS; + } else { + ret = opal_pmix_convert_status(rc); + return ompi_instance_print_error ("PMIx_Fence() failed", ret); + } + } else { + OMPI_LAZY_WAIT_FOR_COMPLETION(active); } - OMPI_LAZY_WAIT_FOR_COMPLETION(active); } } diff --git a/ompi/runtime/ompi_mpi_finalize.c b/ompi/runtime/ompi_mpi_finalize.c index ad8a328dc55..08c6efaa616 100644 --- a/ompi/runtime/ompi_mpi_finalize.c +++ b/ompi/runtime/ompi_mpi_finalize.c @@ -24,6 +24,7 @@ * reserved. * Copyright (c) 2020 Amazon.com, Inc. or its affiliates. * All Rights reserved. + * Copyright (c) 2026 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -281,14 +282,25 @@ int ompi_mpi_finalize(void) * communications/actions to complete. See * https://github.com/open-mpi/ompi/issues/1576 for the * original bug report. */ - if (PMIX_SUCCESS != (rc = PMIx_Fence_nb(NULL, 0, NULL, 0, fence_cbfunc, (void*)&active))) { - ret = opal_pmix_convert_status(rc); - OMPI_ERROR_LOG(ret); + rc = PMIx_Fence_nb(NULL, 0, NULL, 0, fence_cbfunc, (void*)&active); + if (PMIX_SUCCESS != rc) { /* Reset the active flag to false, to avoid waiting for * completion when the fence was failed. */ active = false; + // can return operation_succeeded if atomically completed + if (PMIX_OPERATION_SUCCEEDED == rc) { + ret = MPI_SUCCESS; + } else { + ret = opal_pmix_convert_status(rc); + OMPI_ERROR_LOG(ret); + } + } else { + OMPI_LAZY_WAIT_FOR_COMPLETION(active); + /* NOTE: we lose the fence return status here. This can be + * a problem as the fence CAN fail. Might consider retrieving + * the returned status so you can respond if it doesn't + * successfully complete? */ } - OMPI_LAZY_WAIT_FOR_COMPLETION(active); } ompi_mpi_instance_finalize (&ompi_mpi_instance_default); diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index c7e61c5bf94..deea53cb02e 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -26,7 +26,7 @@ * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. * Copyright (c) 2020 Amazon.com, Inc. or its affiliates. * All Rights reserved. - * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2026 Nanook Consulting All rights reserved. * Copyright (c) 2021-2022 Triad National Security, LLC. All rights * reserved. * Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. @@ -464,12 +464,17 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, active = true; OPAL_POST_OBJECT(&active); PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &opal_pmix_collect_all_data, PMIX_BOOL); - if( PMIX_SUCCESS != (rc = PMIx_Fence_nb(NULL, 0, NULL, 0, - fence_release, - (void*)&active))) { - ret = opal_pmix_convert_status(rc); - error = "PMIx_Fence_nb() failed"; - goto error; + rc = PMIx_Fence_nb(NULL, 0, NULL, 0, fence_release, (void*)&active); + if (PMIX_SUCCESS != rc) { + active = false; + if (PMIX_OPERATION_SUCCEEDED == rc) { + // can return operation_succeeded if atomically completed + ret = MPI_SUCCESS; + } else { + ret = opal_pmix_convert_status(rc); + error = "PMIx_Fence_nb() failed"; + goto error; + } } } } else { @@ -482,12 +487,19 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &opal_pmix_collect_all_data, PMIX_BOOL); rc = PMIx_Fence_nb(NULL, 0, info, 1, fence_release, (void*)&active); if( PMIX_SUCCESS != rc) { - ret = opal_pmix_convert_status(rc); - error = "PMIx_Fence() failed"; - goto error; + active = false; + if (PMIX_OPERATION_SUCCEEDED == rc) { + // can return operation_succeeded if atomically completed + ret = MPI_SUCCESS; + } else { + ret = opal_pmix_convert_status(rc); + error = "PMIx_Fence_nb() failed"; + goto error; + } + } else { + /* cannot just wait on thread as we need to call opal_progress */ + OMPI_LAZY_WAIT_FOR_COMPLETION(active); } - /* cannot just wait on thread as we need to call opal_progress */ - OMPI_LAZY_WAIT_FOR_COMPLETION(active); } } @@ -537,7 +549,9 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, * we have to wait here for it to complete. However, there * is no reason to do two barriers! */ if (background_fence) { - OMPI_LAZY_WAIT_FOR_COMPLETION(active); + if (active) { + OMPI_LAZY_WAIT_FOR_COMPLETION(active); + } } else if (!ompi_async_mpi_init) { /* wait for everyone to reach this point - this is a hard * barrier requirement at this time, though we hope to relax @@ -546,13 +560,20 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, active = true; OPAL_POST_OBJECT(&active); PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &flag, PMIX_BOOL); - if (PMIX_SUCCESS != (rc = PMIx_Fence_nb(NULL, 0, info, 1, - fence_release, (void*)&active))) { - ret = opal_pmix_convert_status(rc); - error = "PMIx_Fence_nb() failed"; - goto error; + rc = PMIx_Fence_nb(NULL, 0, info, 1, fence_release, (void*)&active); + if (PMIX_SUCCESS != rc) { + active = false; + if (PMIX_OPERATION_SUCCEEDED == rc) { + // can return operation_succeeded if atomically completed + ret = MPI_SUCCESS; + } else { + ret = opal_pmix_convert_status(rc); + error = "PMIx_Fence_nb() failed"; + goto error; + } + } else { + OMPI_LAZY_WAIT_FOR_COMPLETION(active); } - OMPI_LAZY_WAIT_FOR_COMPLETION(active); } } From f71172c966f371ea6cfaa77e2c351cd7670ae3c3 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Sat, 17 Jan 2026 10:30:41 +0900 Subject: [PATCH 07/53] Have ompi_info print all variables Signed-off-by: Joseph Schuchart --- .github/workflows/ompi_mpi4py_asan.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ompi_mpi4py_asan.yaml b/.github/workflows/ompi_mpi4py_asan.yaml index 3f20cd675df..a14cb084b0b 100644 --- a/.github/workflows/ompi_mpi4py_asan.yaml +++ b/.github/workflows/ompi_mpi4py_asan.yaml @@ -127,7 +127,7 @@ jobs: echo OMPI_MCA_memory=^patcher >> $GITHUB_ENV - name: Show MPI - run: ompi_info + run: ompi_info --all --all - name: Install mpi4py run: python -m pip install . From 09a2d300a02ab7c9238c696ce264c1557be65d8a Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Tue, 27 Jan 2026 16:37:39 +0900 Subject: [PATCH 08/53] Fix local mca variable reference leakage in ompi_mpi_register_params The variable will go out of scope and ASAN flags this in ompi_info. Signed-off-by: Joseph Schuchart --- ompi/runtime/ompi_mpi_params.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ompi/runtime/ompi_mpi_params.c b/ompi/runtime/ompi_mpi_params.c index c747d55ee7d..7b5d1f3c55e 100644 --- a/ompi/runtime/ompi_mpi_params.c +++ b/ompi/runtime/ompi_mpi_params.c @@ -104,11 +104,12 @@ bool ompi_ftmpi_enabled = false; #endif /* OPAL_ENABLE_FT_MPI */ static int ompi_stream_buffering_mode = -1; +static int ompi_mpi_ft_verbose = 0; int ompi_comm_verbose_level = 0; int ompi_mpi_register_params(void) { - int value; + int value = 0; #if OPAL_ENABLE_FT_MPI mca_base_var_scope_t ftscope = MCA_BASE_VAR_SCOPE_READONLY; @@ -121,15 +122,14 @@ int ompi_mpi_register_params(void) "Enable UFLM MPI Fault Tolerance framework", MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, OPAL_INFO_LVL_4, ftscope, &ompi_ftmpi_enabled); - value = 0; (void) mca_base_var_register ("ompi", "mpi", "ft", "verbose", "Verbosity level of the ULFM MPI Fault Tolerance framework", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_LOCAL, &value); + OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_LOCAL, &ompi_mpi_ft_verbose); #if OPAL_ENABLE_FT_MPI - if( 0 < value ) { + if( 0 < ompi_mpi_ft_verbose ) { ompi_ftmpi_output_handle = opal_output_open(NULL); - opal_output_set_verbosity(ompi_ftmpi_output_handle, value); + opal_output_set_verbosity(ompi_ftmpi_output_handle, ompi_mpi_ft_verbose); } (void) ompi_comm_rbcast_register_params(); From b0f20e1d4385ae1a0f5339fac1227118c3e20ad0 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Tue, 27 Jan 2026 16:41:03 +0900 Subject: [PATCH 09/53] Fixes suggested by copilot Signed-off-by: Joseph Schuchart --- .github/workflows/ompi_mpi4py_asan.yaml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ompi_mpi4py_asan.yaml b/.github/workflows/ompi_mpi4py_asan.yaml index a14cb084b0b..240e3d2f101 100644 --- a/.github/workflows/ompi_mpi4py_asan.yaml +++ b/.github/workflows/ompi_mpi4py_asan.yaml @@ -20,7 +20,7 @@ permissions: jobs: test: - # We need Unbuntu 24.04 (over 22.04) due to a kernel bug, + # We need Ubuntu 24.04 (over 22.04) due to a kernel bug, # see https://github.com/google/sanitizers/issues/856. runs-on: ubuntu-24.04 timeout-minutes: 30 @@ -36,7 +36,7 @@ jobs: run: echo 127.0.0.1 `hostname` | sudo tee -a /etc/hosts > /dev/null if: ${{ runner.os == 'Linux' || runner.os == 'macOS' }} - - name: Install depencencies + - name: Install dependencies run: sudo apt-get install -y -q libnuma-dev libasan8 if: ${{ runner.os == 'Linux' }} @@ -117,9 +117,7 @@ jobs: # Leak detection is currently disabled because of the size of the report. # The patcher is disabled because ASAN fails if code mmaps data at fixed # memory addresses, see https://github.com/open-mpi/ompi/issues/12819. - # ODR violation detection is disabled until #13469 is fixed - # Disabling stack use after return detection to reduce slowdown, per - # https://github.com/llvm/llvm-project/issues/64190. + # ODR violation detection is disabled until #13469 is fixed. run: | echo LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libasan.so.8 >> $GITHUB_ENV echo ASAN_OPTIONS=detect_odr_violation=0,abort_on_error=1,detect_stack_use_after_return=1 >> $GITHUB_ENV From 019c605ef9455e15d09cb1e78f155b8dc9eeb383 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Wed, 28 Jan 2026 10:39:54 +0900 Subject: [PATCH 10/53] Fix local variable reference leak in coll/tuned Signed-off-by: Joseph Schuchart --- ompi/mca/coll/tuned/coll_tuned_alltoall_decision.c | 3 ++- ompi/mca/coll/tuned/coll_tuned_component.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/ompi/mca/coll/tuned/coll_tuned_alltoall_decision.c b/ompi/mca/coll/tuned/coll_tuned_alltoall_decision.c index e3482116c84..9dca14bcc55 100644 --- a/ompi/mca/coll/tuned/coll_tuned_alltoall_decision.c +++ b/ompi/mca/coll/tuned/coll_tuned_alltoall_decision.c @@ -34,6 +34,8 @@ static int coll_tuned_alltoall_segment_size = 0; static int coll_tuned_alltoall_tree_fanout; static int coll_tuned_alltoall_chain_fanout; +static int deprecated_mca_params = -1; + /* valid values for coll_tuned_alltoall_forced_algorithm */ static const mca_base_var_enum_value_t alltoall_algorithms[] = { {0, "ignore"}, @@ -119,7 +121,6 @@ int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm MCA_BASE_VAR_SCOPE_ALL, &coll_tuned_alltoall_chain_fanout); - int deprecated_mca_params = -1; (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version, "alltoall_large_msg", "use pairwise exchange algorithm for messages larger than this value", diff --git a/ompi/mca/coll/tuned/coll_tuned_component.c b/ompi/mca/coll/tuned/coll_tuned_component.c index d8dbb7959e4..6f5a8c57987 100644 --- a/ompi/mca/coll/tuned/coll_tuned_component.c +++ b/ompi/mca/coll/tuned/coll_tuned_component.c @@ -71,6 +71,8 @@ int ompi_coll_tuned_scatter_large_msg = 0; int ompi_coll_tuned_scatter_min_procs = 0; int ompi_coll_tuned_scatter_blocking_send_ratio = 0; +static int deprecated_mca_params = -1; + /* forced algorithm variables */ /* indices for the MCA parameters */ coll_tuned_force_algorithm_mca_param_indices_t ompi_coll_tuned_forced_params[COLLCOUNT] = {{0}}; @@ -161,7 +163,6 @@ static int tuned_register(void) MCA_BASE_VAR_SCOPE_ALL, &ompi_coll_tuned_init_chain_fanout); - int deprecated_mca_params = -1; (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version, "alltoall_small_msg", "threshold (if supported) to decide if small MSGs alltoall algorithm will be used", From 97f7a936b556f1970f2f3ca48f60886b36c66419 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Wed, 28 Jan 2026 12:10:41 +0900 Subject: [PATCH 11/53] Fix local variable reference leak in coll/ft Signed-off-by: Joseph Schuchart --- ompi/mca/coll/ftagree/coll_ftagree_component.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ompi/mca/coll/ftagree/coll_ftagree_component.c b/ompi/mca/coll/ftagree/coll_ftagree_component.c index 97e9ca4cee7..8a733ad3357 100644 --- a/ompi/mca/coll/ftagree/coll_ftagree_component.c +++ b/ompi/mca/coll/ftagree/coll_ftagree_component.c @@ -38,6 +38,8 @@ int mca_coll_ftagree_era_rebuild = 0; double mca_coll_ftagree_debug_inject_proba = 0.0; #endif +static int mca_coll_ft_agreement; + /* * Local function */ @@ -92,8 +94,6 @@ ftagree_close(void) static int ftagree_register(void) { - int value; - /* Use a low priority, but allow other components to be lower */ mca_coll_ftagree_priority = 30; (void) mca_base_component_var_register(&mca_coll_ftagree_component.collm_version, @@ -103,15 +103,15 @@ ftagree_register(void) MCA_BASE_VAR_SCOPE_READONLY, &mca_coll_ftagree_priority); - if( ompi_ftmpi_enabled ) value = 1; - else value = 0; /* NOFT: do not initialize ERA */ + if( ompi_ftmpi_enabled ) mca_coll_ft_agreement = 1; + else mca_coll_ft_agreement = 0; /* NOFT: do not initialize ERA */ (void) mca_base_component_var_register(&mca_coll_ftagree_component.collm_version, "agreement", "Agreement algorithm 0: Allreduce (NOT FAULT TOLERANT); 1: Early Returning Consensus (era); 2: Early Terminating Consensus (eta)", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_6, MCA_BASE_VAR_SCOPE_READONLY, - &value); - switch(value) { + &mca_coll_ft_agreement); + switch(mca_coll_ft_agreement) { case 0: mca_coll_ftagree_algorithm = COLL_FTAGREE_NOFT; opal_output_verbose(6, ompi_ftmpi_output_handle, From 4f87669edf9db4e28ee95276dff55eda4bac426f Mon Sep 17 00:00:00 2001 From: Howard Pritchard Date: Mon, 2 Feb 2026 12:18:33 -0700 Subject: [PATCH 12/53] OSHMEM: squash compiler warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit without this patch I keep seeing this compiler warning when building with newer gcc's. shmem_put_nb.c:230:6: warning: no previous prototype for ‘shmemx_alltoall_global_nb’ [-Wmissing-prototypes] void shmemx_alltoall_global_nb(void *dest, ^~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Howard Pritchard --- oshmem/shmem/c/shmem_put_nb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/oshmem/shmem/c/shmem_put_nb.c b/oshmem/shmem/c/shmem_put_nb.c index 89e4bf18240..cef6abcc40b 100644 --- a/oshmem/shmem/c/shmem_put_nb.c +++ b/oshmem/shmem/c/shmem_put_nb.c @@ -11,6 +11,7 @@ #include "oshmem/constants.h" #include "oshmem/include/shmem.h" +#include "oshmem/include/shmemx.h" #include "oshmem/runtime/runtime.h" From c3377ebff35f4706c07794c003affa938d1b78d7 Mon Sep 17 00:00:00 2001 From: Joseph Antony Date: Tue, 4 Nov 2025 04:42:00 +0000 Subject: [PATCH 13/53] osc/sm: Add notification support for put/get operations This commit adds notification support to the OSC SM component by implementing the put_with_notify, get_with_notify, rput_with_notify, and rget_with_notify functions. These functions perform the same operations as their non-notify counterparts but also increment notification counters after the data transfer completes. The changes include: - Added function pointer types for notify variants in osc.h - Added function prototypes in osc_sm.h - Implemented the notify functions in osc_sm_comm.c - Updated the module template to register the new functions - Removed TODO comments that have been addressed Signed-off-by: Joseph Antony --- ompi/mca/osc/osc.h | 44 +++++++++- ompi/mca/osc/sm/osc_sm.h | 42 ++++++++++ ompi/mca/osc/sm/osc_sm_comm.c | 130 ++++++++++++++++++++++++++++- ompi/mca/osc/sm/osc_sm_component.c | 6 +- 4 files changed, 217 insertions(+), 5 deletions(-) diff --git a/ompi/mca/osc/osc.h b/ompi/mca/osc/osc.h index c8f77404c1c..bd05a6f11b7 100644 --- a/ompi/mca/osc/osc.h +++ b/ompi/mca/osc/osc.h @@ -216,6 +216,15 @@ typedef int (*ompi_osc_base_module_put_fn_t)(const void *origin_addr, struct ompi_datatype_t *target_dt, struct ompi_win_t *win); +typedef int (*ompi_osc_base_module_put_with_notify_fn_t)(const void *origin_addr, + size_t origin_count, + struct ompi_datatype_t *origin_dt, + int target, + ptrdiff_t target_disp, + size_t target_count, + struct ompi_datatype_t *target_dt, + int notify, + struct ompi_win_t *win); typedef int (*ompi_osc_base_module_get_fn_t)(void *origin_addr, size_t origin_count, @@ -226,6 +235,15 @@ typedef int (*ompi_osc_base_module_get_fn_t)(void *origin_addr, struct ompi_datatype_t *target_dt, struct ompi_win_t *win); +typedef int (*ompi_osc_base_module_get_with_notify_fn_t)(void *origin_addr, + size_t origin_count, + struct ompi_datatype_t *origin_dt, + int target, + ptrdiff_t target_disp, + size_t target_count, + struct ompi_datatype_t *target_dt, + int notify, + struct ompi_win_t *win); typedef int (*ompi_osc_base_module_accumulate_fn_t)(const void *origin_addr, size_t origin_count, @@ -276,6 +294,17 @@ typedef int (*ompi_osc_base_module_rput_fn_t)(const void *origin_addr, struct ompi_win_t *win, struct ompi_request_t **request); +typedef int (*ompi_osc_base_module_rput_with_notify_fn_t)(const void *origin_addr, + size_t origin_count, + struct ompi_datatype_t *origin_dt, + int target, + ptrdiff_t target_disp, + size_t target_count, + struct ompi_datatype_t *target_dt, + int notify, + struct ompi_win_t *win, + struct ompi_request_t **request); + typedef int (*ompi_osc_base_module_rget_fn_t)(void *origin_addr, size_t origin_count, struct ompi_datatype_t *origin_dt, @@ -286,6 +315,16 @@ typedef int (*ompi_osc_base_module_rget_fn_t)(void *origin_addr, struct ompi_win_t *win, struct ompi_request_t **request); +typedef int (*ompi_osc_base_module_rget_with_notify_fn_t)(void *origin_addr, + size_t origin_count, + struct ompi_datatype_t *origin_dt, + int target, + ptrdiff_t target_disp, + size_t target_count, + struct ompi_datatype_t *target_dt, + int notify, + struct ompi_win_t *win, + struct ompi_request_t **request); typedef int (*ompi_osc_base_module_raccumulate_fn_t)(const void *origin_addr, size_t origin_count, @@ -371,7 +410,6 @@ typedef int (*ompi_osc_base_module_flush_local_all_fn_t)(struct ompi_win_t *win) * module structure. */ - // TODO: extend the struct and add pointers to put/get_with_notify functions struct ompi_osc_base_module_4_0_0_t { ompi_osc_base_module_win_shared_query_fn_t osc_win_shared_query; @@ -380,14 +418,18 @@ struct ompi_osc_base_module_4_0_0_t { ompi_osc_base_module_free_fn_t osc_free; ompi_osc_base_module_put_fn_t osc_put; + ompi_osc_base_module_put_with_notify_fn_t osc_put_with_notify; ompi_osc_base_module_get_fn_t osc_get; + ompi_osc_base_module_get_with_notify_fn_t osc_get_with_notify; ompi_osc_base_module_accumulate_fn_t osc_accumulate; ompi_osc_base_module_compare_and_swap_fn_t osc_compare_and_swap; ompi_osc_base_module_fetch_and_op_fn_t osc_fetch_and_op; ompi_osc_base_module_get_accumulate_fn_t osc_get_accumulate; ompi_osc_base_module_rput_fn_t osc_rput; + ompi_osc_base_module_rput_with_notify_fn_t osc_rput_with_notify; ompi_osc_base_module_rget_fn_t osc_rget; + ompi_osc_base_module_rget_with_notify_fn_t osc_rget_with_notify; ompi_osc_base_module_raccumulate_fn_t osc_raccumulate; ompi_osc_base_module_rget_accumulate_fn_t osc_rget_accumulate; diff --git a/ompi/mca/osc/sm/osc_sm.h b/ompi/mca/osc/sm/osc_sm.h index 23afacd7d49..b7d6dadfd49 100644 --- a/ompi/mca/osc/sm/osc_sm.h +++ b/ompi/mca/osc/sm/osc_sm.h @@ -118,6 +118,16 @@ int ompi_osc_sm_put(const void *origin_addr, struct ompi_datatype_t *target_dt, struct ompi_win_t *win); + int ompi_osc_sm_put_with_notify(const void *origin_addr, + size_t origin_count, + struct ompi_datatype_t *origin_dt, + int target, + ptrdiff_t target_disp, + size_t target_count, + struct ompi_datatype_t *target_dt, + int notify, + struct ompi_win_t *win); + int ompi_osc_sm_get(void *origin_addr, size_t origin_count, struct ompi_datatype_t *origin_dt, @@ -127,6 +137,16 @@ int ompi_osc_sm_get(void *origin_addr, struct ompi_datatype_t *target_dt, struct ompi_win_t *win); +int ompi_osc_sm_get_with_notify(void *origin_addr, + size_t origin_count, + struct ompi_datatype_t *origin_dt, + int target, + ptrdiff_t target_disp, + size_t target_count, + struct ompi_datatype_t *target_dt, + int notify, + struct ompi_win_t *win); + int ompi_osc_sm_accumulate(const void *origin_addr, size_t origin_count, struct ompi_datatype_t *origin_dt, @@ -176,6 +196,17 @@ int ompi_osc_sm_rput(const void *origin_addr, struct ompi_win_t *win, struct ompi_request_t **request); +int ompi_osc_sm_rput_with_notify(const void *origin_addr, + size_t origin_count, + struct ompi_datatype_t *origin_dt, + int target, + ptrdiff_t target_disp, + size_t target_count, + struct ompi_datatype_t *target_dt, + int notify, + struct ompi_win_t *win, + struct ompi_request_t **request); + int ompi_osc_sm_rget(void *origin_addr, size_t origin_count, struct ompi_datatype_t *origin_dt, @@ -186,6 +217,17 @@ int ompi_osc_sm_rget(void *origin_addr, struct ompi_win_t *win, struct ompi_request_t **request); +int ompi_osc_sm_rget_with_notify(void *origin_addr, + size_t origin_count, + struct ompi_datatype_t *origin_dt, + int target, + ptrdiff_t target_disp, + size_t target_count, + struct ompi_datatype_t *target_dt, + int notify, + struct ompi_win_t *win, + struct ompi_request_t **request); + int ompi_osc_sm_raccumulate(const void *origin_addr, size_t origin_count, struct ompi_datatype_t *origin_dt, diff --git a/ompi/mca/osc/sm/osc_sm_comm.c b/ompi/mca/osc/sm/osc_sm_comm.c index f9bae370870..a2e3a5cce1f 100644 --- a/ompi/mca/osc/sm/osc_sm_comm.c +++ b/ompi/mca/osc/sm/osc_sm_comm.c @@ -59,6 +59,49 @@ ompi_osc_sm_rput(const void *origin_addr, return OMPI_SUCCESS; } +int +ompi_osc_sm_rput_with_notify(const void *origin_addr, + size_t origin_count, + struct ompi_datatype_t *origin_dt, + int target, + ptrdiff_t target_disp, + size_t target_count, + struct ompi_datatype_t *target_dt, + int notify, + struct ompi_win_t *win, + struct ompi_request_t **ompi_req) +{ + int ret; + ompi_osc_sm_module_t *module = + (ompi_osc_sm_module_t*) win->w_osc_module; + void *remote_address; + + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "rput_notify: 0x%lx, %zu, %s, %d, %d, %zu, %s, %d, 0x%lx", + (unsigned long) origin_addr, origin_count, + origin_dt->name, target, (int) target_disp, + target_count, target_dt->name, + notify, + (unsigned long) win)); + + remote_address = ((char*) (module->bases[target])) + module->disp_units[target] * target_disp; + + ret = ompi_datatype_sndrcv((void *)origin_addr, origin_count, origin_dt, + remote_address, target_count, target_dt); + if (OMPI_SUCCESS != ret) { + return ret; + } + + /* the only valid field of RMA request status is the MPI_ERROR field. + * ompi_request_empty has status MPI_SUCCESS and indicates the request is + * complete. */ + *ompi_req = &ompi_request_empty; + + opal_atomic_wmb(); + opal_atomic_add(&module->notify_counters[target][notify], 1); + + return OMPI_SUCCESS; +} int ompi_osc_sm_rget(void *origin_addr, @@ -99,6 +142,49 @@ ompi_osc_sm_rget(void *origin_addr, return OMPI_SUCCESS; } +int +ompi_osc_sm_rget_with_notify(void *origin_addr, + size_t origin_count, + struct ompi_datatype_t *origin_dt, + int target, + ptrdiff_t target_disp, + size_t target_count, + struct ompi_datatype_t *target_dt, + int notify, + struct ompi_win_t *win, + struct ompi_request_t **ompi_req) +{ + int ret; + ompi_osc_sm_module_t *module = + (ompi_osc_sm_module_t*) win->w_osc_module; + void *remote_address; + + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "rget_notify: 0x%lx, %zu, %s, %d, %d, %zu, %s, %d, 0x%lx", + (unsigned long) origin_addr, origin_count, + origin_dt->name, target, (int) target_disp, + target_count, target_dt->name, + notify, + (unsigned long) win)); + + remote_address = ((char*) (module->bases[target])) + module->disp_units[target] * target_disp; + + ret = ompi_datatype_sndrcv(remote_address, target_count, target_dt, + origin_addr, origin_count, origin_dt); + if (OMPI_SUCCESS != ret) { + return ret; + } + + /* the only valid field of RMA request status is the MPI_ERROR field. + * ompi_request_empty has status MPI_SUCCESS and indicates the request is + * complete. */ + *ompi_req = &ompi_request_empty; + + opal_atomic_rmb(); + opal_atomic_add(&module->notify_counters[target][notify], 1); + + return OMPI_SUCCESS; +} int ompi_osc_sm_raccumulate(const void *origin_addr, @@ -236,6 +322,44 @@ ompi_osc_sm_put(const void *origin_addr, } +int +ompi_osc_sm_put_with_notify(const void *origin_addr, + size_t origin_count, + struct ompi_datatype_t *origin_dt, + int target, + ptrdiff_t target_disp, + size_t target_count, + struct ompi_datatype_t *target_dt, + int notify, + struct ompi_win_t *win) +{ +int ret; +ompi_osc_sm_module_t *module = + (ompi_osc_sm_module_t*) win->w_osc_module; +void *remote_address; + +OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "put_notify: 0x%lx, %zu, %s, %d, %d, %zu, %s, %d, 0x%lx", + (unsigned long) origin_addr, origin_count, + origin_dt->name, target, (int) target_disp, + target_count, target_dt->name, + notify, + (unsigned long) win)); + +remote_address = ((char*) (module->bases[target])) + module->disp_units[target] * target_disp; + +ret = ompi_datatype_sndrcv((void *)origin_addr, origin_count, origin_dt, + remote_address, target_count, target_dt); +if (OMPI_SUCCESS != ret) { + return ret; +} + +opal_atomic_wmb(); +opal_atomic_add(&module->notify_counters[target][notify], 1); + +return ret; +} + int ompi_osc_sm_get(void *origin_addr, size_t origin_count, @@ -294,7 +418,9 @@ ompi_osc_sm_get_with_notify(void *origin_addr, ret = ompi_datatype_sndrcv(remote_address, target_count, target_dt, origin_addr, origin_count, origin_dt); - // TODO: do the same for put_with_notify + if (OMPI_SUCCESS != ret) { + return ret; + } opal_atomic_rmb(); opal_atomic_add(&module->notify_counters[target][notify], 1); @@ -473,4 +599,4 @@ ompi_osc_sm_fetch_and_op(const void *origin_addr, opal_atomic_unlock(&module->node_states[target].accumulate_lock); return OMPI_SUCCESS;; -} +} \ No newline at end of file diff --git a/ompi/mca/osc/sm/osc_sm_component.c b/ompi/mca/osc/sm/osc_sm_component.c index 1ad9a48cfd2..11f0ccc2e47 100644 --- a/ompi/mca/osc/sm/osc_sm_component.c +++ b/ompi/mca/osc/sm/osc_sm_component.c @@ -70,8 +70,6 @@ ompi_osc_sm_component_t mca_osc_sm_component = { MCA_BASE_COMPONENT_INIT(ompi, osc, sm) -// TODO: extend the struct and add pointers to put/get_with_notify functions -// TODO: extend it to rput/rget_with_notify as well ompi_osc_sm_module_t ompi_osc_sm_module_template = { { .osc_win_shared_query = ompi_osc_sm_shared_query, @@ -81,14 +79,18 @@ ompi_osc_sm_module_t ompi_osc_sm_module_template = { .osc_free = ompi_osc_sm_free, .osc_put = ompi_osc_sm_put, + .osc_put_with_notify = ompi_osc_sm_put_with_notify, .osc_get = ompi_osc_sm_get, + .osc_get_with_notify = ompi_osc_sm_get_with_notify, .osc_accumulate = ompi_osc_sm_accumulate, .osc_compare_and_swap = ompi_osc_sm_compare_and_swap, .osc_fetch_and_op = ompi_osc_sm_fetch_and_op, .osc_get_accumulate = ompi_osc_sm_get_accumulate, .osc_rput = ompi_osc_sm_rput, + .osc_rput_with_notify = ompi_osc_sm_rput_with_notify, .osc_rget = ompi_osc_sm_rget, + .osc_rget_with_notify = ompi_osc_sm_rget_with_notify, .osc_raccumulate = ompi_osc_sm_raccumulate, .osc_rget_accumulate = ompi_osc_sm_rget_accumulate, From 3bbbb4819a06a1a3046dcbb7aba91cc8fca47e4b Mon Sep 17 00:00:00 2001 From: Joseph Antony Date: Tue, 4 Nov 2025 17:03:59 +0000 Subject: [PATCH 14/53] osc/sm: Nit picking edits Signed-off-by: Joseph Antony --- ompi/mca/osc/sm/osc_sm_comm.c | 52 +++++++++++++++++------------------ 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/ompi/mca/osc/sm/osc_sm_comm.c b/ompi/mca/osc/sm/osc_sm_comm.c index a2e3a5cce1f..ba19d8c08cf 100644 --- a/ompi/mca/osc/sm/osc_sm_comm.c +++ b/ompi/mca/osc/sm/osc_sm_comm.c @@ -333,31 +333,31 @@ ompi_osc_sm_put_with_notify(const void *origin_addr, int notify, struct ompi_win_t *win) { -int ret; -ompi_osc_sm_module_t *module = - (ompi_osc_sm_module_t*) win->w_osc_module; -void *remote_address; - -OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "put_notify: 0x%lx, %zu, %s, %d, %d, %zu, %s, %d, 0x%lx", - (unsigned long) origin_addr, origin_count, - origin_dt->name, target, (int) target_disp, - target_count, target_dt->name, - notify, - (unsigned long) win)); - -remote_address = ((char*) (module->bases[target])) + module->disp_units[target] * target_disp; - -ret = ompi_datatype_sndrcv((void *)origin_addr, origin_count, origin_dt, - remote_address, target_count, target_dt); -if (OMPI_SUCCESS != ret) { - return ret; -} + int ret; + ompi_osc_sm_module_t *module = + (ompi_osc_sm_module_t*) win->w_osc_module; + void *remote_address; + + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "put_notify: 0x%lx, %zu, %s, %d, %d, %zu, %s, %d, 0x%lx", + (unsigned long) origin_addr, origin_count, + origin_dt->name, target, (int) target_disp, + target_count, target_dt->name, + notify, + (unsigned long) win)); + + remote_address = ((char*) (module->bases[target])) + module->disp_units[target] * target_disp; + + ret = ompi_datatype_sndrcv((void *)origin_addr, origin_count, origin_dt, + remote_address, target_count, target_dt); + if (OMPI_SUCCESS != ret) { + return ret; + } -opal_atomic_wmb(); -opal_atomic_add(&module->notify_counters[target][notify], 1); + opal_atomic_wmb(); + opal_atomic_add(&module->notify_counters[target][notify], 1); -return ret; + return ret; } int @@ -419,7 +419,7 @@ ompi_osc_sm_get_with_notify(void *origin_addr, ret = ompi_datatype_sndrcv(remote_address, target_count, target_dt, origin_addr, origin_count, origin_dt); if (OMPI_SUCCESS != ret) { - return ret; + return ret; } opal_atomic_rmb(); opal_atomic_add(&module->notify_counters[target][notify], 1); @@ -598,5 +598,5 @@ ompi_osc_sm_fetch_and_op(const void *origin_addr, done: opal_atomic_unlock(&module->node_states[target].accumulate_lock); - return OMPI_SUCCESS;; -} \ No newline at end of file + return OMPI_SUCCESS; +} From 5e895cf31af78219cbebdf24d9b8c04466f66761 Mon Sep 17 00:00:00 2001 From: Joseph Antony Date: Wed, 19 Nov 2025 12:29:14 -0500 Subject: [PATCH 15/53] Public APIs for: put_with_notify get_with_notify Signed-off-by: Joseph Antony --- ompi/include/mpi.h.in | 29 ++++++++ ompi/include/mpif-values.py | 1 + ompi/mca/osc/osc.h | 16 ++--- ompi/mca/osc/sm/osc_sm.h | 10 +-- ompi/mca/osc/sm/osc_sm_comm.c | 8 +-- ompi/mca/osc/sm/osc_sm_component.c | 8 +-- ompi/mpi/bindings/ompi_bindings/consts.py | 1 + ompi/mpi/c/Makefile.am | 2 + ompi/mpi/c/get_notify.c.in | 77 ++++++++++++++++++++++ ompi/mpi/c/put_notify.c.in | 80 +++++++++++++++++++++++ ompi/runtime/ompi_spc.c | 2 + ompi/runtime/ompi_spc.h | 2 + 12 files changed, 215 insertions(+), 21 deletions(-) create mode 100644 ompi/mpi/c/get_notify.c.in create mode 100644 ompi/mpi/c/put_notify.c.in diff --git a/ompi/include/mpi.h.in b/ompi/include/mpi.h.in index e06865b182f..d34624cd1d2 100644 --- a/ompi/include/mpi.h.in +++ b/ompi/include/mpi.h.in @@ -764,6 +764,7 @@ enum { #define MPI_ERR_SESSION 78 #define MPI_ERR_VALUE_TOO_LARGE 79 #define MPI_ERR_ERRHANDLER 80 +#define MPI_ERR_NOTIFY_IDX 81 /* Per MPI-3 p349 47, MPI_ERR_LASTCODE must be >= the last predefined MPI_ERR_ code. Set the last code to allow some room for adding @@ -1917,6 +1918,14 @@ OMPI_DECLSPEC int MPI_Get_c(void *origin_addr, MPI_Count origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, MPI_Count target_count, MPI_Datatype target_datatype, MPI_Win win); +OMPI_DECLSPEC int MPI_Get_notify(void *origin_addr, int origin_count, + MPI_Datatype origin_datatype, int target_rank, + MPI_Aint target_disp, int target_count, + MPI_Datatype target_datatype, int notification_idx, MPI_Win win); +OMPI_DECLSPEC int MPI_Get_notify_c(void *origin_addr, MPI_Count origin_count, + MPI_Datatype origin_datatype, int target_rank, + MPI_Aint target_disp, MPI_Count target_count, + MPI_Datatype target_datatype, int notification_idx, MPI_Win win); OMPI_DECLSPEC int MPI_Get_accumulate(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, void *result_addr, int result_count, MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp, int target_count, @@ -2180,6 +2189,12 @@ OMPI_DECLSPEC int MPI_Put(const void *origin_addr, int origin_count, MPI_Dataty OMPI_DECLSPEC int MPI_Put_c(const void *origin_addr, MPI_Count origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, MPI_Count target_count, MPI_Datatype target_datatype, MPI_Win win); +OMPI_DECLSPEC int MPI_Put_notify(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, + int target_rank, MPI_Aint target_disp, int target_count, + MPI_Datatype target_datatype, int notification_idx, MPI_Win win); +OMPI_DECLSPEC int MPI_Put_notify_c(const void *origin_addr, MPI_Count origin_count, MPI_Datatype origin_datatype, + int target_rank, MPI_Aint target_disp, MPI_Count target_count, + MPI_Datatype target_datatype, int notification_idx, MPI_Win win); OMPI_DECLSPEC int MPI_Query_thread(int *provided); OMPI_DECLSPEC int MPI_Raccumulate(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, int target_count, @@ -3091,6 +3106,14 @@ OMPI_DECLSPEC int PMPI_Get_c(void *origin_addr, MPI_Count origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, MPI_Count target_count, MPI_Datatype target_datatype, MPI_Win win); +OMPI_DECLSPEC int PMPI_Get_notify(void *origin_addr, int origin_count, + MPI_Datatype origin_datatype, int target_rank, + MPI_Aint target_disp, int target_count, + MPI_Datatype target_datatype, int notification_idx, MPI_Win win); +OMPI_DECLSPEC int PMPI_Get_notify_c(void *origin_addr, MPI_Count origin_count, + MPI_Datatype origin_datatype, int target_rank, + MPI_Aint target_disp, MPI_Count target_count, + MPI_Datatype target_datatype, int notification_idx, MPI_Win win); OMPI_DECLSPEC int PMPI_Get_accumulate(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, void *result_addr, int result_count, MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp, int target_count, @@ -3354,6 +3377,12 @@ OMPI_DECLSPEC int PMPI_Put(const void *origin_addr, int origin_count, MPI_Datat OMPI_DECLSPEC int PMPI_Put_c(const void *origin_addr, MPI_Count origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, MPI_Count target_count, MPI_Datatype target_datatype, MPI_Win win); +OMPI_DECLSPEC int PMPI_Put_notify(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, + int target_rank, MPI_Aint target_disp, int target_count, + MPI_Datatype target_datatype, int notification_idx, MPI_Win win); +OMPI_DECLSPEC int PMPI_Put_notify_c(const void *origin_addr, MPI_Count origin_count, MPI_Datatype origin_datatype, + int target_rank, MPI_Aint target_disp, MPI_Count target_count, + MPI_Datatype target_datatype, int notification_idx, MPI_Win win); OMPI_DECLSPEC int PMPI_Query_thread(int *provided); OMPI_DECLSPEC int PMPI_Raccumulate(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, int target_count, diff --git a/ompi/include/mpif-values.py b/ompi/include/mpif-values.py index 53159d5d8dd..b74fbcbaf1f 100755 --- a/ompi/include/mpif-values.py +++ b/ompi/include/mpif-values.py @@ -301,6 +301,7 @@ 'MPI_ERR_SESSION': 78, 'MPI_ERR_VALUE_TOO_LARGE': 79, 'MPI_ERR_ERRHANDLER': 80, + 'MPI_ERR_NOTIFY_IDX': 81, 'MPI_ERR_LASTCODE': 92, 'MPI_IDENT': 0, 'MPI_CONGRUENT': 1, diff --git a/ompi/mca/osc/osc.h b/ompi/mca/osc/osc.h index bd05a6f11b7..83c7af9305e 100644 --- a/ompi/mca/osc/osc.h +++ b/ompi/mca/osc/osc.h @@ -216,7 +216,7 @@ typedef int (*ompi_osc_base_module_put_fn_t)(const void *origin_addr, struct ompi_datatype_t *target_dt, struct ompi_win_t *win); -typedef int (*ompi_osc_base_module_put_with_notify_fn_t)(const void *origin_addr, +typedef int (*ompi_osc_base_module_put_notify_fn_t)(const void *origin_addr, size_t origin_count, struct ompi_datatype_t *origin_dt, int target, @@ -235,7 +235,7 @@ typedef int (*ompi_osc_base_module_get_fn_t)(void *origin_addr, struct ompi_datatype_t *target_dt, struct ompi_win_t *win); -typedef int (*ompi_osc_base_module_get_with_notify_fn_t)(void *origin_addr, +typedef int (*ompi_osc_base_module_get_notify_fn_t)(void *origin_addr, size_t origin_count, struct ompi_datatype_t *origin_dt, int target, @@ -294,7 +294,7 @@ typedef int (*ompi_osc_base_module_rput_fn_t)(const void *origin_addr, struct ompi_win_t *win, struct ompi_request_t **request); -typedef int (*ompi_osc_base_module_rput_with_notify_fn_t)(const void *origin_addr, +typedef int (*ompi_osc_base_module_rput_notify_fn_t)(const void *origin_addr, size_t origin_count, struct ompi_datatype_t *origin_dt, int target, @@ -315,7 +315,7 @@ typedef int (*ompi_osc_base_module_rget_fn_t)(void *origin_addr, struct ompi_win_t *win, struct ompi_request_t **request); -typedef int (*ompi_osc_base_module_rget_with_notify_fn_t)(void *origin_addr, +typedef int (*ompi_osc_base_module_rget_notify_fn_t)(void *origin_addr, size_t origin_count, struct ompi_datatype_t *origin_dt, int target, @@ -418,18 +418,18 @@ struct ompi_osc_base_module_4_0_0_t { ompi_osc_base_module_free_fn_t osc_free; ompi_osc_base_module_put_fn_t osc_put; - ompi_osc_base_module_put_with_notify_fn_t osc_put_with_notify; + ompi_osc_base_module_put_notify_fn_t osc_put_notify; ompi_osc_base_module_get_fn_t osc_get; - ompi_osc_base_module_get_with_notify_fn_t osc_get_with_notify; + ompi_osc_base_module_get_notify_fn_t osc_get_notify; ompi_osc_base_module_accumulate_fn_t osc_accumulate; ompi_osc_base_module_compare_and_swap_fn_t osc_compare_and_swap; ompi_osc_base_module_fetch_and_op_fn_t osc_fetch_and_op; ompi_osc_base_module_get_accumulate_fn_t osc_get_accumulate; ompi_osc_base_module_rput_fn_t osc_rput; - ompi_osc_base_module_rput_with_notify_fn_t osc_rput_with_notify; + ompi_osc_base_module_rput_notify_fn_t osc_rput_notify; ompi_osc_base_module_rget_fn_t osc_rget; - ompi_osc_base_module_rget_with_notify_fn_t osc_rget_with_notify; + ompi_osc_base_module_rget_notify_fn_t osc_rget_notify; ompi_osc_base_module_raccumulate_fn_t osc_raccumulate; ompi_osc_base_module_rget_accumulate_fn_t osc_rget_accumulate; diff --git a/ompi/mca/osc/sm/osc_sm.h b/ompi/mca/osc/sm/osc_sm.h index b7d6dadfd49..200ec8b3de8 100644 --- a/ompi/mca/osc/sm/osc_sm.h +++ b/ompi/mca/osc/sm/osc_sm.h @@ -107,7 +107,7 @@ int ompi_osc_sm_detach(struct ompi_win_t *win, const void *base); int ompi_osc_sm_free(struct ompi_win_t *win); -// TODO: add put/get_with_notify prototypes +// TODO: add put/get_notify prototypes int ompi_osc_sm_put(const void *origin_addr, size_t origin_count, @@ -118,7 +118,7 @@ int ompi_osc_sm_put(const void *origin_addr, struct ompi_datatype_t *target_dt, struct ompi_win_t *win); - int ompi_osc_sm_put_with_notify(const void *origin_addr, + int ompi_osc_sm_put_notify(const void *origin_addr, size_t origin_count, struct ompi_datatype_t *origin_dt, int target, @@ -137,7 +137,7 @@ int ompi_osc_sm_get(void *origin_addr, struct ompi_datatype_t *target_dt, struct ompi_win_t *win); -int ompi_osc_sm_get_with_notify(void *origin_addr, +int ompi_osc_sm_get_notify(void *origin_addr, size_t origin_count, struct ompi_datatype_t *origin_dt, int target, @@ -196,7 +196,7 @@ int ompi_osc_sm_rput(const void *origin_addr, struct ompi_win_t *win, struct ompi_request_t **request); -int ompi_osc_sm_rput_with_notify(const void *origin_addr, +int ompi_osc_sm_rput_notify(const void *origin_addr, size_t origin_count, struct ompi_datatype_t *origin_dt, int target, @@ -217,7 +217,7 @@ int ompi_osc_sm_rget(void *origin_addr, struct ompi_win_t *win, struct ompi_request_t **request); -int ompi_osc_sm_rget_with_notify(void *origin_addr, +int ompi_osc_sm_rget_notify(void *origin_addr, size_t origin_count, struct ompi_datatype_t *origin_dt, int target, diff --git a/ompi/mca/osc/sm/osc_sm_comm.c b/ompi/mca/osc/sm/osc_sm_comm.c index ba19d8c08cf..4391a375ebc 100644 --- a/ompi/mca/osc/sm/osc_sm_comm.c +++ b/ompi/mca/osc/sm/osc_sm_comm.c @@ -60,7 +60,7 @@ ompi_osc_sm_rput(const void *origin_addr, } int -ompi_osc_sm_rput_with_notify(const void *origin_addr, +ompi_osc_sm_rput_notify(const void *origin_addr, size_t origin_count, struct ompi_datatype_t *origin_dt, int target, @@ -143,7 +143,7 @@ ompi_osc_sm_rget(void *origin_addr, } int -ompi_osc_sm_rget_with_notify(void *origin_addr, +ompi_osc_sm_rget_notify(void *origin_addr, size_t origin_count, struct ompi_datatype_t *origin_dt, int target, @@ -323,7 +323,7 @@ ompi_osc_sm_put(const void *origin_addr, int -ompi_osc_sm_put_with_notify(const void *origin_addr, +ompi_osc_sm_put_notify(const void *origin_addr, size_t origin_count, struct ompi_datatype_t *origin_dt, int target, @@ -392,7 +392,7 @@ ompi_osc_sm_get(void *origin_addr, int -ompi_osc_sm_get_with_notify(void *origin_addr, +ompi_osc_sm_get_notify(void *origin_addr, size_t origin_count, struct ompi_datatype_t *origin_dt, int target, diff --git a/ompi/mca/osc/sm/osc_sm_component.c b/ompi/mca/osc/sm/osc_sm_component.c index 11f0ccc2e47..e7613c86f6e 100644 --- a/ompi/mca/osc/sm/osc_sm_component.c +++ b/ompi/mca/osc/sm/osc_sm_component.c @@ -79,18 +79,18 @@ ompi_osc_sm_module_t ompi_osc_sm_module_template = { .osc_free = ompi_osc_sm_free, .osc_put = ompi_osc_sm_put, - .osc_put_with_notify = ompi_osc_sm_put_with_notify, + .osc_put_notify = ompi_osc_sm_put_notify, .osc_get = ompi_osc_sm_get, - .osc_get_with_notify = ompi_osc_sm_get_with_notify, + .osc_get_notify = ompi_osc_sm_get_notify, .osc_accumulate = ompi_osc_sm_accumulate, .osc_compare_and_swap = ompi_osc_sm_compare_and_swap, .osc_fetch_and_op = ompi_osc_sm_fetch_and_op, .osc_get_accumulate = ompi_osc_sm_get_accumulate, .osc_rput = ompi_osc_sm_rput, - .osc_rput_with_notify = ompi_osc_sm_rput_with_notify, + .osc_rput_notify = ompi_osc_sm_rput_notify, .osc_rget = ompi_osc_sm_rget, - .osc_rget_with_notify = ompi_osc_sm_rget_with_notify, + .osc_rget_notify = ompi_osc_sm_rget_notify, .osc_raccumulate = ompi_osc_sm_raccumulate, .osc_rget_accumulate = ompi_osc_sm_rget_accumulate, diff --git a/ompi/mpi/bindings/ompi_bindings/consts.py b/ompi/mpi/bindings/ompi_bindings/consts.py index 43bca486b57..759b342f64a 100644 --- a/ompi/mpi/bindings/ompi_bindings/consts.py +++ b/ompi/mpi/bindings/ompi_bindings/consts.py @@ -23,6 +23,7 @@ 'MPI_SUCCESS', 'MPI_ERR_BUFFER', 'MPI_ERR_COUNT', + 'MPI_ERR_NOTIFY_IDX' 'MPI_ERR_TYPE', 'MPI_ERR_TAG', 'MPI_ERR_COMM', diff --git a/ompi/mpi/c/Makefile.am b/ompi/mpi/c/Makefile.am index 25b871fa7d4..f6757d669dd 100644 --- a/ompi/mpi/c/Makefile.am +++ b/ompi/mpi/c/Makefile.am @@ -223,6 +223,7 @@ prototype_sources = \ get_accumulate.c.in \ get_address.c.in \ get.c.in \ + get_notify.c.in \ get_count.c.in \ get_elements.c.in \ get_elements_x.c.in \ @@ -341,6 +342,7 @@ prototype_sources = \ psend_init.c.in \ publish_name.c.in \ put.c.in \ + put_notify.c.in \ query_thread.c.in \ raccumulate.c.in \ recv.c.in \ diff --git a/ompi/mpi/c/get_notify.c.in b/ompi/mpi/c/get_notify.c.in new file mode 100644 index 00000000000..1bad16944ab --- /dev/null +++ b/ompi/mpi/c/get_notify.c.in @@ -0,0 +1,77 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2020 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2024 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "ompi_config.h" +#include + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/communicator/communicator.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/win/win.h" +#include "ompi/mca/osc/osc.h" +#include "ompi/datatype/ompi_datatype.h" +#include "ompi/runtime/ompi_spc.h" + +PROTOTYPE ERROR_CLASS get_notify(BUFFER_OUT origin_addr, COUNT origin_count, + DATATYPE origin_datatype, INT target_rank, + AINT target_disp, COUNT target_count, + DATATYPE target_datatype, INT notification_idx, WIN win) +{ + int rc; + + SPC_RECORD(OMPI_SPC_GET_NOTIFY, 1); + + if (MPI_PARAM_CHECK) { + rc = OMPI_SUCCESS; + + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + + if (ompi_win_invalid(win)) { + return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_WIN, FUNC_NAME); + } else if (origin_count < 0 || target_count < 0) { + rc = MPI_ERR_COUNT; + } else if (ompi_win_peer_invalid(win, target_rank) && + (MPI_PROC_NULL != target_rank)) { + rc = MPI_ERR_RANK; + } else if ( MPI_WIN_FLAVOR_DYNAMIC != win->w_flavor && target_disp < 0 ) { + rc = MPI_ERR_DISP; + } else if (notification_idx < 0) { + rc = MPI_ERR_NOTIFY_IDX; + } else { + OMPI_CHECK_DATATYPE_FOR_ONE_SIDED(rc, origin_datatype, origin_count); + if (OMPI_SUCCESS == rc) { + OMPI_CHECK_DATATYPE_FOR_ONE_SIDED(rc, target_datatype, target_count); + } + } + OMPI_ERRHANDLER_CHECK(rc, win, rc, FUNC_NAME); + } + + if (MPI_PROC_NULL == target_rank) return MPI_SUCCESS; + + rc = win->w_osc_module->osc_get_notify(origin_addr, origin_count, origin_datatype, + target_rank, target_disp, target_count, + target_datatype, notification_idx, win); + OMPI_ERRHANDLER_RETURN(rc, win, rc, FUNC_NAME); +} diff --git a/ompi/mpi/c/put_notify.c.in b/ompi/mpi/c/put_notify.c.in new file mode 100644 index 00000000000..14ee5c7e365 --- /dev/null +++ b/ompi/mpi/c/put_notify.c.in @@ -0,0 +1,80 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2020 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2024 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "ompi_config.h" +#include + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/communicator/communicator.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/win/win.h" +#include "ompi/mca/osc/osc.h" +#include "ompi/datatype/ompi_datatype.h" +#include "ompi/runtime/ompi_spc.h" + +PROTOTYPE ERROR_CLASS put_notify(BUFFER origin_addr, COUNT origin_count, DATATYPE origin_datatype, + INT target_rank, AINT target_disp, COUNT target_count, + DATATYPE target_datatype, INT notification_idx, WIN win) +{ + int rc; + + SPC_RECORD(OMPI_SPC_PUT_NOTIFY, 1); + + if (MPI_PARAM_CHECK) { + rc = OMPI_SUCCESS; + + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + + if (ompi_win_invalid(win)) { + return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_WIN, FUNC_NAME); + } else if (origin_count < 0 || target_count < 0) { + rc = MPI_ERR_COUNT; + } else if (ompi_win_peer_invalid(win, target_rank) && + (MPI_PROC_NULL != target_rank)) { + rc = MPI_ERR_RANK; + } else if (NULL == target_datatype || + MPI_DATATYPE_NULL == target_datatype) { + rc = MPI_ERR_TYPE; + } else if ( MPI_WIN_FLAVOR_DYNAMIC != win->w_flavor && target_disp < 0 ) { + rc = MPI_ERR_DISP; + } else if (notification_idx < 0) { + rc = MPI_ERR_NOTIFY_IDX; + } else { + OMPI_CHECK_DATATYPE_FOR_ONE_SIDED(rc, origin_datatype, origin_count); + if (OMPI_SUCCESS == rc) { + OMPI_CHECK_DATATYPE_FOR_ONE_SIDED(rc, target_datatype, target_count); + } + } + OMPI_ERRHANDLER_CHECK(rc, win, rc, FUNC_NAME); + } + + if (MPI_PROC_NULL == target_rank) return MPI_SUCCESS; + + rc = win->w_osc_module->osc_put_notify(origin_addr, origin_count, origin_datatype, + target_rank, target_disp, target_count, + target_datatype, notification_idx, win); + OMPI_ERRHANDLER_RETURN(rc, win, rc, FUNC_NAME); +} diff --git a/ompi/runtime/ompi_spc.c b/ompi/runtime/ompi_spc.c index 6f1d8aa7d6a..1d25545c80b 100644 --- a/ompi/runtime/ompi_spc.c +++ b/ompi/runtime/ompi_spc.c @@ -71,8 +71,10 @@ static const ompi_spc_event_t ompi_spc_events_desc[OMPI_SPC_NUM_COUNTERS] = { SET_COUNTER_ARRAY(OMPI_SPC_SENDRECV, "The number of times MPI_Sendrecv was called.", false, false), SET_COUNTER_ARRAY(OMPI_SPC_SENDRECV_REPLACE, "The number of times MPI_Sendrecv_replace was called.", false, false), SET_COUNTER_ARRAY(OMPI_SPC_PUT, "The number of times MPI_Put was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_PUT_NOTIFY, "The number of times MPI_Put_notify was called.", false, false), SET_COUNTER_ARRAY(OMPI_SPC_RPUT, "The number of times MPI_Rput was called.", false, false), SET_COUNTER_ARRAY(OMPI_SPC_GET, "The number of times MPI_Get was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_GET_NOTIFY, "The number of times MPI_Get was called.", false, false), SET_COUNTER_ARRAY(OMPI_SPC_RGET, "The number of times MPI_Rget was called.", false, false), SET_COUNTER_ARRAY(OMPI_SPC_PROBE, "The number of times MPI_Probe was called.", false, false), SET_COUNTER_ARRAY(OMPI_SPC_IPROBE, "The number of times MPI_Iprobe was called.", false, false), diff --git a/ompi/runtime/ompi_spc.h b/ompi/runtime/ompi_spc.h index 76ec7f25f16..3d0efd257b3 100644 --- a/ompi/runtime/ompi_spc.h +++ b/ompi/runtime/ompi_spc.h @@ -58,8 +58,10 @@ typedef enum ompi_spc_counters { OMPI_SPC_SENDRECV, OMPI_SPC_SENDRECV_REPLACE, OMPI_SPC_PUT, + OMPI_SPC_PUT_NOTIFY, OMPI_SPC_RPUT, OMPI_SPC_GET, + OMPI_SPC_GET_NOTIFY, OMPI_SPC_RGET, OMPI_SPC_PROBE, OMPI_SPC_IPROBE, From 088364cb39970c5e650494cf3e23235424e12ce8 Mon Sep 17 00:00:00 2001 From: Joseph Antony Date: Mon, 22 Dec 2025 09:46:28 -0500 Subject: [PATCH 16/53] Edits for Public APIs: put_with_notify get_with_notify Signed-off-by: Joseph Antony --- ompi/mca/osc/sm/osc_sm.h | 1 - ompi/runtime/ompi_spc.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/ompi/mca/osc/sm/osc_sm.h b/ompi/mca/osc/sm/osc_sm.h index 200ec8b3de8..0aca3b50892 100644 --- a/ompi/mca/osc/sm/osc_sm.h +++ b/ompi/mca/osc/sm/osc_sm.h @@ -107,7 +107,6 @@ int ompi_osc_sm_detach(struct ompi_win_t *win, const void *base); int ompi_osc_sm_free(struct ompi_win_t *win); -// TODO: add put/get_notify prototypes int ompi_osc_sm_put(const void *origin_addr, size_t origin_count, diff --git a/ompi/runtime/ompi_spc.c b/ompi/runtime/ompi_spc.c index 1d25545c80b..dcbbe04b256 100644 --- a/ompi/runtime/ompi_spc.c +++ b/ompi/runtime/ompi_spc.c @@ -74,7 +74,7 @@ static const ompi_spc_event_t ompi_spc_events_desc[OMPI_SPC_NUM_COUNTERS] = { SET_COUNTER_ARRAY(OMPI_SPC_PUT_NOTIFY, "The number of times MPI_Put_notify was called.", false, false), SET_COUNTER_ARRAY(OMPI_SPC_RPUT, "The number of times MPI_Rput was called.", false, false), SET_COUNTER_ARRAY(OMPI_SPC_GET, "The number of times MPI_Get was called.", false, false), - SET_COUNTER_ARRAY(OMPI_SPC_GET_NOTIFY, "The number of times MPI_Get was called.", false, false), + SET_COUNTER_ARRAY(OMPI_SPC_GET_NOTIFY, "The number of times MPI_Get_notify was called.", false, false), SET_COUNTER_ARRAY(OMPI_SPC_RGET, "The number of times MPI_Rget was called.", false, false), SET_COUNTER_ARRAY(OMPI_SPC_PROBE, "The number of times MPI_Probe was called.", false, false), SET_COUNTER_ARRAY(OMPI_SPC_IPROBE, "The number of times MPI_Iprobe was called.", false, false), From f0ee69f578e1b6e9d14ba98540ac96e75f2d94c0 Mon Sep 17 00:00:00 2001 From: Joseph Antony Date: Tue, 3 Feb 2026 08:29:27 -0500 Subject: [PATCH 17/53] Notified RMA counters memory allocation in the shared memory segment for a single and multi rank window. Signed-off-by: Joseph Antony --- ompi/mca/osc/sm/osc_sm.h | 3 ++- ompi/mca/osc/sm/osc_sm_component.c | 18 +++++++++++++++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/ompi/mca/osc/sm/osc_sm.h b/ompi/mca/osc/sm/osc_sm.h index 0aca3b50892..bf80c082ac8 100644 --- a/ompi/mca/osc/sm/osc_sm.h +++ b/ompi/mca/osc/sm/osc_sm.h @@ -22,6 +22,7 @@ typedef uint64_t osc_sm_post_type_t; typedef opal_atomic_uint64_t osc_sm_post_atomic_type_t; #define OSC_SM_POST_BITS 6 #define OSC_SM_POST_MASK 0x3f +#define OSC_SM_MAX_NOTIFY_COUNTERS 16 /* data shared across all peers */ struct ompi_osc_sm_global_state_t { @@ -79,7 +80,7 @@ struct ompi_osc_sm_module_t { size_t *sizes; void **bases; ptrdiff_t *disp_units; - uint64_t **notify_counters; + uint64_t *notify_counters; ompi_group_t *start_group; diff --git a/ompi/mca/osc/sm/osc_sm_component.c b/ompi/mca/osc/sm/osc_sm_component.c index e7613c86f6e..5500a2bb412 100644 --- a/ompi/mca/osc/sm/osc_sm_component.c +++ b/ompi/mca/osc/sm/osc_sm_component.c @@ -255,12 +255,17 @@ component_select(struct ompi_win_t *win, void **base, size_t size, ptrdiff_t dis module->posts = calloc (1, sizeof(module->posts[0]) + sizeof (module->posts[0][0])); if (NULL == module->posts) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; module->posts[0] = (osc_sm_post_atomic_type_t *) (module->posts + 1); + + /* allocate notify counters for single process case */ + module->notify_counters = calloc(OSC_SM_MAX_NOTIFY_COUNTERS, sizeof(uint64_t)); + if (NULL == module->notify_counters) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; } else { unsigned long total, *rbuf; int i, flag; size_t pagesize; size_t state_size; size_t posts_size, post_size = (comm_size + OSC_SM_POST_MASK) / (OSC_SM_POST_MASK + 1); + size_t notify_counters_size; size_t data_base_size; opal_output_verbose(MCA_BASE_VERBOSE_DEBUG, ompi_osc_base_framework.framework_output, @@ -316,7 +321,9 @@ component_select(struct ompi_win_t *win, void **base, size_t size, ptrdiff_t dis state_size += OPAL_ALIGN_PAD_AMOUNT(state_size, 64); posts_size = comm_size * post_size * sizeof (module->posts[0][0]); posts_size += OPAL_ALIGN_PAD_AMOUNT(posts_size, 64); - data_base_size = state_size + posts_size; + notify_counters_size = OSC_SM_MAX_NOTIFY_COUNTERS * sizeof(uint64_t); + notify_counters_size += OPAL_ALIGN_PAD_AMOUNT(notify_counters_size, 64); + data_base_size = state_size + posts_size + notify_counters_size; data_base_size += OPAL_ALIGN_PAD_AMOUNT(data_base_size, pagesize); if (0 == ompi_comm_rank (module->comm)) { char *data_file; @@ -377,6 +384,12 @@ component_select(struct ompi_win_t *win, void **base, size_t size, ptrdiff_t dis module->global_state = (ompi_osc_sm_global_state_t *) (module->posts[0] + comm_size * post_size); module->node_states = (ompi_osc_sm_node_state_t *) (module->global_state + 1); + /* set up notify counters in shared memory after node_states */ + module->notify_counters = (uint64_t *) ((char *)(module->node_states + comm_size) + + OPAL_ALIGN_PAD_AMOUNT((uintptr_t)(module->node_states + comm_size), 64)); + /* zero out notify counters */ + memset(module->notify_counters, 0, OSC_SM_MAX_NOTIFY_COUNTERS * sizeof(uint64_t)); + for (i = 0, total = data_base_size ; i < comm_size ; ++i) { if (i > 0) { module->posts[i] = module->posts[i - 1] + post_size; @@ -555,6 +568,7 @@ ompi_osc_sm_free(struct ompi_win_t *win) module->comm->c_coll->coll_barrier_module); opal_shmem_segment_detach (&module->seg_ds); + /* notify_counters points into shared memory segment, no separate free needed */ } else { free(module->node_states); free(module->global_state); @@ -562,6 +576,8 @@ ompi_osc_sm_free(struct ompi_win_t *win) mca_mpool_base_default_module->mpool_free(mca_mpool_base_default_module, module->bases[0]); } + /* free notify_counters for single process case */ + free(module->notify_counters); } free(module->disp_units); free(module->outstanding_locks); From 645739a2a839c4b801ba6b54a1622897a4856163 Mon Sep 17 00:00:00 2001 From: Joseph Antony Date: Wed, 4 Feb 2026 07:47:01 -0500 Subject: [PATCH 18/53] Editing Notified RMA implementation Signed-off-by: Joseph Antony --- ompi/mca/osc/sm/osc_sm_comm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ompi/mca/osc/sm/osc_sm_comm.c b/ompi/mca/osc/sm/osc_sm_comm.c index 4391a375ebc..6cc5384d750 100644 --- a/ompi/mca/osc/sm/osc_sm_comm.c +++ b/ompi/mca/osc/sm/osc_sm_comm.c @@ -98,7 +98,7 @@ ompi_osc_sm_rput_notify(const void *origin_addr, *ompi_req = &ompi_request_empty; opal_atomic_wmb(); - opal_atomic_add(&module->notify_counters[target][notify], 1); + opal_atomic_add(&module->notify_counters[notify], 1); return OMPI_SUCCESS; } @@ -181,7 +181,7 @@ ompi_osc_sm_rget_notify(void *origin_addr, *ompi_req = &ompi_request_empty; opal_atomic_rmb(); - opal_atomic_add(&module->notify_counters[target][notify], 1); + opal_atomic_add(&module->notify_counters[notify], 1); return OMPI_SUCCESS; } @@ -355,7 +355,7 @@ ompi_osc_sm_put_notify(const void *origin_addr, } opal_atomic_wmb(); - opal_atomic_add(&module->notify_counters[target][notify], 1); + opal_atomic_add(&module->notify_counters[notify], 1); return ret; } @@ -422,7 +422,7 @@ ompi_osc_sm_get_notify(void *origin_addr, return ret; } opal_atomic_rmb(); - opal_atomic_add(&module->notify_counters[target][notify], 1); + opal_atomic_add(&module->notify_counters[notify], 1); return ret; } From 8e48b71dd3b5749d068b0ce6c55f82c6d94832fb Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Fri, 16 Jan 2026 11:04:32 -0800 Subject: [PATCH 19/53] ci: Remove outdated comment in community-jenkins Signed-off-by: Brian Barrett --- .ci/community-jenkins/Jenkinsfile | 1 - 1 file changed, 1 deletion(-) diff --git a/.ci/community-jenkins/Jenkinsfile b/.ci/community-jenkins/Jenkinsfile index f4b305f1d66..79ee83c85f5 100644 --- a/.ci/community-jenkins/Jenkinsfile +++ b/.ci/community-jenkins/Jenkinsfile @@ -14,7 +14,6 @@ // // // WORKSPACE Layout: -// autotools-install/ Autotools install for the builder // ompi/ Open MPI source tree // We if we push changes to a PR, we don't need to keep old jobs running, so From 362dfc100f5efaf43aa6cd8f0694988b4ca6bfb5 Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Wed, 21 Jan 2026 19:46:01 -0800 Subject: [PATCH 20/53] ci: Find compiler translation from AMI THere's no reason to have the list of compiler names (since every distro names their versioned compilers differently) in both the AMI build scripts and in the CI scripts. New AMIs will all have a translator script in the default user home directory, so use that instead. Signed-off-by: Brian Barrett --- .ci/community-jenkins/pr-builder.sh | 100 ++++++---------------------- 1 file changed, 20 insertions(+), 80 deletions(-) diff --git a/.ci/community-jenkins/pr-builder.sh b/.ci/community-jenkins/pr-builder.sh index eb88b4c1538..dc72a5b7c51 100755 --- a/.ci/community-jenkins/pr-builder.sh +++ b/.ci/community-jenkins/pr-builder.sh @@ -109,89 +109,29 @@ echo "--> version: $VERSION_ID" # See if builder provided a compiler we should use, and translate it to # CONFIGURE_ARGS. # -case ${PLATFORM_ID} in - rhel) - case "$COMPILER" in - gcc48|"") - echo "--> Using default compilers" - ;; - *) - echo "Unsupported compiler ${COMPILER}. Aborting" - exit 1 - ;; - esac - ;; - amzn) - case "$COMPILER" in - "") - echo "--> Using default compilers" - ;; - gcc44) - CONFIGURE_ARGS="$CONFIGURE_ARGS CC=gcc44 CXX=g++44 FC=gfortran44" - ;; - gcc48) - CONFIGURE_ARGS="$CONFIGURE_ARGS CC=gcc48 CXX=g++48 FC=gfortran48" - ;; - clang36) - CONFIGURE_ARGS="$CONFIGURE_ARGS CC=clang CXX=clang++ --disable-mpi-fortran" - ;; - *) - echo "Unsupported compiler ${COMPILER}. Aborting" - exit 1 - ;; - esac - ;; - ubuntu) - case "$COMPILER" in - "") - echo "--> Using default compilers" - ;; - gcc4*) - version=`echo "$COMPILER" | sed -e 's/gcc4\([0-9]*\)/4.\1/'` - CONFIGURE_ARGS="CC=gcc-${version} CXX=g++-${version} FC=gfortran-${version}" - ;; - gcc*) - version=`echo "$COMPILER" | sed -e 's/gcc\([0-9]*\)/\1/'` - CONFIGURE_ARGS="CC=gcc-${version} CXX=g++-${version} FC=gfortran-${version}" - ;; - clang3*|clang4*|clang5*|clang6*) - version=`echo "$COMPILER" | sed -e 's/clang\([0-9]\)\([0-9]*\)/\1.\2/'` - CONFIGURE_ARGS="CC=clang-${version} CXX=clang++-${version} --disable-mpi-fortran" - ;; +if test "${COMPILER}" != "" ; then + if test ! -r ${HOME}/ompi-compiler-setup.sh ; then + echo "Could not find compiler setup script ompi-compiler-setup.sh. Aborting." + exit 1 + fi + + . ${HOME}/ompi-compiler-setup.sh + activate_compiler ${COMPILER} + + CONFIGURE_ARGS="${CONFIGURE_ARGS} CC=${CC} CPP=${CPP} CXX=${CXX} FC=${FC}" + if test "$FC" = "" ; then + CONFIGURE_ARGS="${CONFIGURE_ARGS} --disable-mpi-fortran" + else + # Flang doesn't seem good enough (yet) to compile our Fortran bindings, + # so skip for now. + case "${COMPILER}" in clang*) - version=`echo "$COMPILER" | sed -e 's/clang\([0-9]*\)/\1/'` - CONFIGURE_ARGS="CC=clang-${version} CXX=clang++-${version} --disable-mpi-fortran" - ;; - *) - echo "Unsupported compiler ${COMPILER}. Aborting" - exit 1 + CONFIGURE_ARGS="${CONFIGURE_ARGS} --disable-mpi-fortran" ;; esac - ;; - sles) - case "$COMPILER" in - "") - echo "--> Using default compilers" - ;; - gcc48) - CONFIGURE_ARGS="$CONFIGURE_ARGS CC=gcc-48 CXX=g++-48 FC=gfortran-48" - ;; - gcc5) - CONFIGURE_ARGS="$CONFIGURE_ARGS CC=gcc-5 CXX=g++-5 FC=gfortran-5" - ;; - gcc6) - CONFIGURE_ARGS="$CONFIGURE_ARGS CC=gcc-6 CXX=g++-6 FC=gfortran-6" - ;; - *) - echo "Unsupported compiler ${COMPILER}. Aborting" - exit 1 - ;; - esac - ;; - FreeBSD) - CONFIGURE_ARGS="$CONFIGURE_ARGS LDFLAGS=-Wl,-rpath,/usr/local/lib/gcc5 --with-wrapper-ldflags=-Wl,-rpath,/usr/local/lib/gcc5" - ;; -esac + fi +fi + CONFIGURE_ARGS="$CONFIGURE_ARGS --disable-silent-rules" echo "--> Compiler setup: $CONFIGURE_ARGS" From 4cebd308a4d937f1e913a98c9c80dcf3e79e1284 Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Fri, 16 Jan 2026 11:14:50 -0800 Subject: [PATCH 21/53] ci: Update build config list Update build configuration map to include the latest GCC / Clang compilers, Amazon Linux 2023, and Ubuntu 24.04. Reformat the list of configurations so that future updates are a bit easier to diff. Signed-off-by: Brian Barrett --- .ci/community-jenkins/Jenkinsfile | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/.ci/community-jenkins/Jenkinsfile b/.ci/community-jenkins/Jenkinsfile index 79ee83c85f5..a6fb4cb781c 100644 --- a/.ci/community-jenkins/Jenkinsfile +++ b/.ci/community-jenkins/Jenkinsfile @@ -55,9 +55,26 @@ println('Tests Completed') // although currently we only support the one stage of "everything", where each // build stage is a map of different configurations to test. def prepare_check_stages() { - def configure_options = ["--disable-dlopen", "--disable-oshmem", "--enable-builtin-atomic", "--enable-ipv6"] - def compilers = ["clang10", "gcc7", "gcc8", "gcc9", "gcc10"] - def platforms = ["amazon_linux_2", "amazon_linux_2-arm64", "rhel8"] + def configure_options = [ + "--disable-dlopen", + "--disable-oshmem", + "--enable-builtin-atomic", + "--enable-ipv6" + ] + def compilers = [ + "gcc14", + "clang18" + ] + def platforms = [ + "amazon_linux_2", + "amazon_linux_2-arm64", + "rhel8", + "amazon_linux_2023-arm64", + "amazon_linux_2023-x86_64", + "ubuntu_20.04", + "ubuntu_24.04-arm64", + "ubuntu_24.04-x86_64" + ] def check_stages_list = [] // Build everything stage From 3b3cc5cf02882231248421acff2b4ae208f01111 Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Fri, 16 Jan 2026 11:26:18 -0800 Subject: [PATCH 22/53] ci: Use virtual env for Python modules With recent updates, the Jenkins builders include a pre-configured Python virtual environment for the required set of packages. Activate that environment and re-run the requirements check (this should be a no-op unless there are additional packages in the current PR). Signed-off-by: Brian Barrett --- .ci/community-jenkins/pr-builder.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.ci/community-jenkins/pr-builder.sh b/.ci/community-jenkins/pr-builder.sh index dc72a5b7c51..980b7190c7d 100755 --- a/.ci/community-jenkins/pr-builder.sh +++ b/.ci/community-jenkins/pr-builder.sh @@ -154,6 +154,14 @@ echo "--> Configure arguments: $CONFIGURE_ARGS" sha1=`git rev-parse HEAD` echo "--> Building commit ${sha1}" +if test "${HOME}/ompi-setup-python.sh" ; then + echo "--> Initializing Python environment" + . ${HOME}/ompi-setup-python.sh + find . -name "requirements.txt" -exec ${PIP_CMD} install -r {} \; +else + echo "--> No Python environment found, hoping for the best." +fi + if test -f autogen.pl; then echo "--> running ./autogen.pl ${AUTOGEN_ARGS}" ./autogen.pl ${AUTOGEN_ARGS} From 151d1b6283900045e3a9d3854b3ed32dcf36b520 Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Wed, 4 Feb 2026 08:11:01 -0800 Subject: [PATCH 23/53] ci: Add a vpath build test We recently had an issue where we missed a vpath-breaking change, so add a vpath test to CI. Signed-off-by: Brian Barrett --- .ci/community-jenkins/Jenkinsfile | 11 +++++-- .ci/community-jenkins/pr-builder.sh | 51 ++++++++++++++++++++++++++--- 2 files changed, 56 insertions(+), 6 deletions(-) diff --git a/.ci/community-jenkins/Jenkinsfile b/.ci/community-jenkins/Jenkinsfile index a6fb4cb781c..2c20d630ac1 100644 --- a/.ci/community-jenkins/Jenkinsfile +++ b/.ci/community-jenkins/Jenkinsfile @@ -95,6 +95,7 @@ def prepare_check_stages() { } build_parallel_map.put("distcheck", prepare_build("distcheck", "tarball_build", "--distcheck")) + build_parallel_map.put("vpath", prepare_build("vpath", "", "--build-dir ompi-build")) check_stages_list.add(build_parallel_map) @@ -105,14 +106,20 @@ def prepare_build(build_name, label, build_arg) { return { stage("${build_name}") { node(label) { - checkout(changelog: false, poll: false, scm: scm) + // Checkout into ompi-source instead of the top of the + // workspace, so that we have room in the workspace to setup a + // vpath build. + dir ('ompi-source') { + checkout(changelog: false, poll: false, scm: scm) + } + // If pr-builder.sh fails, the sh step will throw an exception, // which we catch so that the job doesn't abort and continues on // to other steps - such as cleanup. Because we catch the // exception, we need to tell Jenkins the overall job has // failed. try { - sh "/bin/bash -x .ci/community-jenkins/pr-builder.sh ${build_arg} ompi" + sh "/bin/bash -x ompi-source/.ci/community-jenkins/pr-builder.sh ${build_arg} --source-dir ompi-source" } catch (Exception e) { currentBuild.result = "FAILURE" } diff --git a/.ci/community-jenkins/pr-builder.sh b/.ci/community-jenkins/pr-builder.sh index 980b7190c7d..88426859bf0 100755 --- a/.ci/community-jenkins/pr-builder.sh +++ b/.ci/community-jenkins/pr-builder.sh @@ -21,6 +21,8 @@ MAKE_ARGS= MAKE_J="-j 8" PREFIX="${WORKSPACE}/install" MPIRUN_MODE=${MPIRUN_MODE:-runall} +SOURCE_DIR= +BUILD_DIR= # # Options Parsing @@ -77,6 +79,24 @@ while (( "$#" )); do exit 1 fi ;; + --source-dir) + if [ -n "$2" ] && [ ${2:0:1} != "-" ]; then + SOURCE_DIR=$2 + shift 2 + else + echo "Error: Argument for $1 is missing" >&2 + exit 1 + fi + ;; + --build-dir) + if [ -n "$2" ] && [ ${2:0:1} != "-" ]; then + BUILD_DIR=$2 + shift 2 + else + echo "Error: Argument for $1 is missing" >&2 + exit 1 + fi + ;; -*|--*=) # Unsupported flags echo "Error: Unsupported flag $1" >&2 exit 1 @@ -105,6 +125,16 @@ fi echo "--> platform: $PLATFORM_ID" echo "--> version: $VERSION_ID" +if test "${SOURCE_DIR}" = "" ; then + echo "SOURCED_DIR is unset. Cannot continue." + exit 1 +fi + +echo "--> Workspace: ${WORKSPACE}" +echo "--> Source Dir: ${SOURCE_DIR}" +echo "--> Build Dir: ${BUILD_DIR}" +echo "--> Install Dir: ${PREFIX}" + # # See if builder provided a compiler we should use, and translate it to # CONFIGURE_ARGS. @@ -150,6 +180,8 @@ fi echo "--> Autogen arguments: $AUTOGEN_ARGS" echo "--> Configure arguments: $CONFIGURE_ARGS" +cd "${WORKSPACE}/${SOURCE_DIR}" + # Build sha1=`git rev-parse HEAD` echo "--> Building commit ${sha1}" @@ -175,9 +207,20 @@ else fi fi -echo "--> running ./configure --prefix=\"${PREFIX}\" ${CONFIGURE_ARGS}" -if ! ./configure --prefix="${PREFIX}" ${CONFIGURE_ARGS}; then - echo "./configure --prefix=\"${PREFIX}\" ${CONFIGURE_ARGS} failed, ABORTING !" +if test "${BUILD_DIR}" != "" ; then + cd "${WORKSPACE}" + rm -rf "${BUILD_DIR}" + mkdir "${BUILD_DIR}" + cd "${WORKSPACE}/${BUILD_DIR}" + CONFIGURE=../${SOURCE_DIR}/configure +else + # already in ${WORKSPACE}/${SOURCE_DIR} + CONFIGURE=./configure +fi + +echo "--> running ${CONFIGURE} --prefix=\"${PREFIX}\" ${CONFIGURE_ARGS}" +if ! ${CONFIGURE} --prefix="${PREFIX}" ${CONFIGURE_ARGS}; then + echo "${CONFIGURE} --prefix=\"${PREFIX}\" ${CONFIGURE_ARGS} failed, ABORTING !" if test -f config.log; then echo "config.log content :" cat config.log @@ -216,7 +259,7 @@ echo "--> running ompi_info" ompi_info echo "--> running make all in examples" -cd "examples" +cd "${WORKSPACE}/${SOURCE_DIR}/examples" make ${MAKE_ARGS} all cd .. From ebc3e16cbfc77831aa0aaca913230e4d573e6129 Mon Sep 17 00:00:00 2001 From: Howard Pritchard Date: Thu, 5 Feb 2026 09:54:06 -0700 Subject: [PATCH 24/53] UCX: squash compiler warning my gcc 15.1.0 is chattier than older gcc's and reported this warning when compiling opal_ucx_common.c osc_ucx_comm.c: In function 'ompi_osc_ucx_check_ops_and_flush': osc_ucx_comm.c:947:45: warning: comparison of integer expressions of different signedness: 'opal_atomic_int64_t' {aka 'long int'} and 'size_t' {aka 'long unsigned int'} [-Wsign-compare] 947 | if (module->ctx->num_incomplete_req_ops > ompi_osc_ucx_outstanding_ops_flush_threshold) { | ^ this patch squashses that warning. Signed-off-by: Howard Pritchard --- ompi/mca/osc/ucx/osc_ucx_comm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ompi/mca/osc/ucx/osc_ucx_comm.c b/ompi/mca/osc/ucx/osc_ucx_comm.c index ab122e67263..0354edb71c0 100644 --- a/ompi/mca/osc/ucx/osc_ucx_comm.c +++ b/ompi/mca/osc/ucx/osc_ucx_comm.c @@ -944,7 +944,7 @@ static inline int ompi_osc_ucx_check_ops_and_flush (ompi_osc_ucx_module_t *modul uint64_t base_tmp, tail_tmp; int ret = OMPI_SUCCESS; - if (module->ctx->num_incomplete_req_ops > ompi_osc_ucx_outstanding_ops_flush_threshold) { + if ((size_t)module->ctx->num_incomplete_req_ops > ompi_osc_ucx_outstanding_ops_flush_threshold) { ret = opal_common_ucx_ctx_flush(module->ctx, OPAL_COMMON_UCX_SCOPE_WORKER, 0); if (ret != OPAL_SUCCESS) { ret = OMPI_ERROR; From aac635c76f6d4963bfbd7d8002e01ad06709a864 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Tue, 31 Jan 2023 09:49:08 -0500 Subject: [PATCH 25/53] coll/han: increase segment sizes to 512k Increase segment sizes for bcast, reduce, and allreduce to 512k. On modern machines, higher segment sizes seem to be more efficient as they reduce the overhead of segmenting. Signed-off-by: Joseph Schuchart --- ompi/mca/coll/han/coll_han_component.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ompi/mca/coll/han/coll_han_component.c b/ompi/mca/coll/han/coll_han_component.c index 1d78bf87158..7ae17b9e4f8 100644 --- a/ompi/mca/coll/han/coll_han_component.c +++ b/ompi/mca/coll/han/coll_han_component.c @@ -301,7 +301,7 @@ static int han_register(void) OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL, &cs->han_output_verbose); - cs->han_bcast_segsize = 65536; + cs->han_bcast_segsize = 524288; (void) mca_base_component_var_register(c, "bcast_segsize", "segment size for bcast", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, @@ -321,7 +321,7 @@ static int han_register(void) &cs->han_bcast_low_module, &cs->han_op_module_name.bcast.han_op_low_module_name); - cs->han_reduce_segsize = 65536; + cs->han_reduce_segsize = 524288; (void) mca_base_component_var_register(c, "reduce_segsize", "segment size for reduce", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, @@ -340,7 +340,7 @@ static int han_register(void) OPAL_INFO_LVL_9, &cs->han_reduce_low_module, &cs->han_op_module_name.reduce.han_op_low_module_name); - cs->han_allreduce_segsize = 65536; + cs->han_allreduce_segsize = 524288; (void) mca_base_component_var_register(c, "allreduce_segsize", "segment size for allreduce", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, From 4a7296005023a8a7d187cefefa79e4f8f055d569 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Tue, 31 Jan 2023 09:51:14 -0500 Subject: [PATCH 26/53] coll/adapt: Increase ireduce segment size A larger segment size helps reduce the overhead of segmenting. The 512k size matches the size of coll/han. Signed-off-by: Joseph Schuchart --- ompi/mca/coll/adapt/coll_adapt_ireduce.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ompi/mca/coll/adapt/coll_adapt_ireduce.c b/ompi/mca/coll/adapt/coll_adapt_ireduce.c index 15bd586901a..07616285616 100644 --- a/ompi/mca/coll/adapt/coll_adapt_ireduce.c +++ b/ompi/mca/coll/adapt/coll_adapt_ireduce.c @@ -48,7 +48,7 @@ int ompi_coll_adapt_ireduce_register(void) mca_coll_adapt_component.adapt_ireduce_algorithm = 1; } - mca_coll_adapt_component.adapt_ireduce_segment_size = 163740; + mca_coll_adapt_component.adapt_ireduce_segment_size = 524288; mca_base_component_var_register(c, "reduce_segment_size", "Segment size in bytes used by default for reduce algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.", MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, From 6a7d562e43733557b176ccbdc443de9836f3248f Mon Sep 17 00:00:00 2001 From: Aurelien Bouteiller Date: Wed, 29 Mar 2023 13:01:59 -0400 Subject: [PATCH 27/53] Remove era_comm_free function and explain the reasoning. Signed-off-by: Aurelien Bouteiller --- .../ftagree/coll_ftagree_earlyreturning.c | 52 ++++--------------- 1 file changed, 9 insertions(+), 43 deletions(-) diff --git a/ompi/mca/coll/ftagree/coll_ftagree_earlyreturning.c b/ompi/mca/coll/ftagree/coll_ftagree_earlyreturning.c index 9450c443349..f28c36a3d16 100644 --- a/ompi/mca/coll/ftagree/coll_ftagree_earlyreturning.c +++ b/ompi/mca/coll/ftagree/coll_ftagree_earlyreturning.c @@ -2956,6 +2956,15 @@ int mca_coll_ftagree_era_finalize(void) "%s ftagree:agreement (ERA) GC: %lu passed agreements remain in the passed agreements hash table\n", OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), opal_hash_table_get_size(&era_passed_agreements))); + /* Some agreements can remain in the era_passed_agreements table until + * finalize; notably, the last agreement in a communicator that has been + * freed. + * + * The commit that added this comment also removed the (unused) function + * mca_coll_ftagree_era_free_comm that could enforce purging that table + * during comm_free, at the cost of making comm_free hard synchronizing; + * this was deemed too disruptive for the small memory usage gain. + */ for( rc = opal_hash_table_get_first_key_uint64(&era_passed_agreements, &key64, &value, &node); OPAL_SUCCESS == rc; rc = opal_hash_table_get_next_key_uint64(&era_passed_agreements, &key64, &value, node, &node) ) { @@ -3368,46 +3377,3 @@ int mca_coll_ftagree_iera_intra(void *contrib, return OMPI_SUCCESS; } -#if 0 -// Per @bosilca and @jsquyres discussion 29 Apr 2021: there is -// probably a memory leak in MPI_FINALIZE right now, because this -// function does not appear to be being called from anywhere. -// @bosilca's team is looking into it. -int mca_coll_ftagree_era_free_comm(ompi_communicator_t* comm, - mca_coll_base_module_t *module) -{ - ompi_group_t* acked; - era_identifier_t aid; - int rc; - - OPAL_OUTPUT_VERBOSE((4, ompi_ftmpi_output_handle, - "%s ftagree:agreement (ERA) Freeing Communicator (%d.%d).\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - comm->c_contextid, - comm->c_epoch)); - - opal_mutex_lock(&ompi_group_afp_mutex); - ompi_group_intersection(comm->c_remote_group, ompi_group_all_failed_procs, &acked); - opal_mutex_unlock(&ompi_group_afp_mutex); - do { - rc = mca_coll_ftagree_era_intra(NULL, - 0, - &ompi_mpi_int.dt, - &ompi_mpi_op_band.op, - &acked, true, - comm, - comm->c_coll->coll_agree_module); - } while(rc != MPI_SUCCESS); - OBJ_RELEASE(acked); - - aid.ERAID_FIELDS.contextid = comm->c_contextid.cid_sub.u64; - aid.ERAID_FIELDS.epoch = comm->c_epoch; - - opal_mutex_lock(&era_mutex); - /** We don't need to set aid.ERAID_FIELDS.agreementid to collect all of them */ - era_collect_passed_agreements(aid, 0, (uint16_t)-1); - opal_mutex_unlock(&era_mutex); - - return OMPI_SUCCESS; -} -#endif From 6a157fa3eaab6d15c5b6bc42a57c2dbff4cad197 Mon Sep 17 00:00:00 2001 From: George Katevenis Date: Thu, 5 Feb 2026 13:53:37 +0200 Subject: [PATCH 28/53] docs: Move xhc documentation from xhc's README to the sphinx docs Signed-off-by: George Katevenis --- docs/Makefile.am | 3 +- docs/tuning-apps/collectives/components.rst | 4 +- .../collectives/images}/xhc-hierarchy.svg | 240 ++++++----- docs/tuning-apps/collectives/index.rst | 1 + docs/tuning-apps/collectives/xhc.rst | 310 +++++++++++++++ ompi/mca/coll/xhc/README.md | 373 +----------------- 6 files changed, 432 insertions(+), 499 deletions(-) rename {ompi/mca/coll/xhc/resources => docs/tuning-apps/collectives/images}/xhc-hierarchy.svg (86%) create mode 100644 docs/tuning-apps/collectives/xhc.rst diff --git a/docs/Makefile.am b/docs/Makefile.am index 871184eb01d..a6edc6ae045 100644 --- a/docs/Makefile.am +++ b/docs/Makefile.am @@ -38,7 +38,8 @@ TEXT_SOURCE_FILES = \ $(srcdir)/license/*.txt IMAGE_SOURCE_FILES = \ $(srcdir)/openmpi_logo.png \ - $(srcdir)/installing-open-mpi/required-support-libraries-dependency-graph.png + $(srcdir)/installing-open-mpi/required-support-libraries-dependency-graph.png \ + $(srcdir)/tuning-apps/collectives/images/xhc-hierarchy.svg RST_SOURCE_FILES = \ $(srcdir)/*.rst \ $(srcdir)/release-notes/*.rst \ diff --git a/docs/tuning-apps/collectives/components.rst b/docs/tuning-apps/collectives/components.rst index f29c202e358..921f7e12036 100644 --- a/docs/tuning-apps/collectives/components.rst +++ b/docs/tuning-apps/collectives/components.rst @@ -28,7 +28,9 @@ The following provides a list of components and their primary target scenario: more details. - ``ucc``: component using the `UCC library `_ for collective operations. - - ``xhc``: shared memory collective component using XPMEM for data transfers. + - ``xhc``: shared memory collective component, employing hierarchical & + topology-aware algorithms, with XPMEM for data transfers. See :doc:`xhc` for + more details. - ``acoll``: collective component tuned for AMD Zen architectures. See :doc:`acoll` for more details. - ``accelerator``: component providing host-proxy algorithms for some diff --git a/ompi/mca/coll/xhc/resources/xhc-hierarchy.svg b/docs/tuning-apps/collectives/images/xhc-hierarchy.svg similarity index 86% rename from ompi/mca/coll/xhc/resources/xhc-hierarchy.svg rename to docs/tuning-apps/collectives/images/xhc-hierarchy.svg index c8f6d8a2da3..b4ae62a6c4f 100644 --- a/ompi/mca/coll/xhc/resources/xhc-hierarchy.svg +++ b/docs/tuning-apps/collectives/images/xhc-hierarchy.svg @@ -7,7 +7,7 @@ viewBox="0 0 169.571 119.89402" version="1.1" id="svg5" - inkscape:version="1.2.1 (9c6d41e410, 2022-07-14, custom)" + inkscape:version="1.4.3 (0d15f75042, 2025-12-25)" sodipodi:docname="xhc-hierarchy.svg" inkscape:export-filename="../xhc-hierarchy.png" inkscape:export-xdpi="300" @@ -26,11 +26,11 @@ inkscape:pagecheckerboard="0" inkscape:document-units="mm" showgrid="false" - inkscape:zoom="0.75290071" - inkscape:cx="286.22632" - inkscape:cy="274.93665" + inkscape:zoom="1.4452058" + inkscape:cx="278.16108" + inkscape:cy="266.39805" inkscape:window-width="1920" - inkscape:window-height="1018" + inkscape:window-height="1136" inkscape:window-x="1920" inkscape:window-y="0" inkscape:window-maximized="1" @@ -78,25 +78,6 @@ id="path-effect556" is_visible="true" lpeversion="1" /> - - - + transform="translate(-430.99854,-193.98109)"> + y="193.98109" /> NUMA Level + y="296.00598">NUMA Level Socket Level + y="259.80359">Socket Level + transform="translate(28.708569,27.920669)"> System Level + style="font-size:5.64444px;text-align:center;text-anchor:middle;stroke-width:0.264583" + x="524.14557" + y="204.60033">Node Level + inkscape:original-d="m 561.29231,236.42783 c -10.38789,-6.52565 -20.67275,-12.94489 -31.00982,-19.41762" + transform="translate(0,-1.0583333)" /> + y="241.07695" /> + y="241.07695" /> + transform="translate(76.684113,23.158255)"> + y="240.44742" /> + y="240.44742" /> + transform="translate(118.68254,23.158255)"> + transform="translate(150.43255,23.158255)"> + inkscape:original-d="m 487.56018,236.95873 c 10.17386,-6.63057 20.2468,-13.15301 30.3709,-19.72977" + transform="translate(0,-1.0583333)" /> Cores + style="font-size:4.93889px;stroke-width:0.264583" + x="-163.80605" + y="497.17615">Cores @@ -768,23 +751,23 @@ + transform="matrix(-1,0,0,1,924.11737,0.52916667)"> NUMA 0Leader + transform="translate(44.916471,23.158255)"> P0 + y="279.12918">P0 P1 + y="279.12741">P1 P2 + y="279.12741">P2 P3 + y="279.12741">P3 P4 + y="279.12918">P4 P5 + y="279.12741">P5 P6 + y="279.12741">P6 P7 + y="279.12741">P7 P8 + y="279.12921">P8 P9 + y="279.12744">P9 10 + y="279.12744">10 11 + y="279.12744">11 12 + y="279.12921">12 13 + y="279.12744">13 14 + y="279.12744">14 15 + y="279.12744">15 + transform="matrix(-1,0,0,1,944.89717,-1.0583333)"> + transform="translate(84.666671)"> + id="g8800"> + transform="translate(-83.60834,-0.52916667)"> + transform="rotate(180,501.4769,222.70799)"> + transform="matrix(1,0,0,-1,45.394312,445.41596)"> + transform="matrix(-1,0,0,1,955.86739,0.52916667)"> NUMA 1Leader + transform="matrix(-1,0,0,1,1029.9509,0.52916667)"> NUMA 3Leader [...] + +Main Features +------------- + +Hierarchy +~~~~~~~~~ + +XHC constructs an *n*-level hierarchy (i.e. no limitation on number of levels), +based on intra-node topological features. Rank/process locality information +originates from Hwloc, and is obtained through Open MPI's internal structures. + +The following topological features can currently be defined: + + * NUMA node + * CPU Socket + * L1/L2/L3 cache + * Hwthread/core + * Node (all ranks *are* in same node -> flat hierarchy) + +An example of a 3-level XHC hierarchy (``numa,socket`` configuration): + +.. image:: images/xhc-hierarchy.svg + :width: 450px + +Furthermore, support for virtual/user-defined hierarchies is available, to +allow for even finer control and custom experiments. + +**Pipelining** is seamlessly applied across all levels of the hierarchy, to +minimize hierarchy-induced overheads, and to allow for interleaving of +operations in certain collectives (e.g. reduce+bcast in allreduce). + +Single-copy data transfers +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +XHC supports data transfers between MPI ranks using a single copy, through Open +MPI's ``opal/smsc`` (shared-memory-single-copy) framework. Despite the +component's name, XHC actually also supports additional single-copy mechanisms +in some collectives, though XPMEM is highly recommended. + + * Bcast: XPMEM, CMA, KNEM + * Allreduce/Reduce: XPMEM + * Barrier: *(irrelevant)* + +In XPMEM mode, application buffers are attached on the fly the first time they +appear, and are saved in ``smsc/xpmem``'s internal registration cache for +future uses. + +Shared-memory data transfers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +XHC also supports data transfers using copy-in-copy-out (CICO) over shared +memory. Copy-in-copy-out is always used for small messages, with automatic +switching to single-copy for large ones. All primitives support this mode, +regardless of XPMEM or SMSC presence, as long as the size of the message is +below the threshold. + +Inline data transfers +~~~~~~~~~~~~~~~~~~~~~ + +For especially small messages, the payload data is inlined in the same cache +line as the control data. This achieves exceptionally low latency in such +messages. Supported in all primitives, regardless of XPMEM or SMSC presence. + +Synchronization +~~~~~~~~~~~~~~~ + +XHC uses **lock-free** synchronization, using the single-writer paradigm and lightweight *read* or *write* memory barriers wherever appropriate. + +Multi-node with HAN +------------------- + +Even though ``xhc`` only works over shared memory, it may also be utilized in +multi-node environments, through ``coll/han``. HAN is already the default +component in multi-node runs, so all that's needed is to define ``xhc`` as the +component to be used for the intra-node phase: + +.. code-block:: sh + + $ mpirun --mca coll_han_bcast_low_module 2 --mca coll_han_reduce_low_module 2 \ + --mca coll_han_allreduce_low_module 2 + +.. _mca-params: + +MCA Parameters +-------------- + +Basic +~~~~~ + +.. list-table:: + :header-rows: 1 + :widths: 20 10 70 + + * - Parameter + - Default + - Description + + * - coll_xhc_priority + - 0 + - The priority of the component. Set it to a value higher than other + components to enable xhc. + +Main +~~~~ + +.. list-table:: + :header-rows: 1 + :widths: 20 20 60 + + * - Parameter + - Default + - Description + + * - coll_xhc_hierarchy + - *unset* + - A comma separated list of topological features to which XHC's hierarchy + should be sensitive. This is a hint -- xhc will automatically: disregard + features that don't exist in the system, or that don't further segment + the ranks (e.g. ``numa`` was specified, but all ranks are in the same + NUMA node); re-order the list to match the system's hierarchy; add an + extra top level that's common to all ranks. This parameter applies to + all primitives, and is mutually exclusive with the primitive-specific + ones below. + + This parameter also supports the use of special modifiers for *virtual + hierarchies*. Check ``xhc_component_parse_hierarchy()`` for further + explanation and syntax. + + * - coll_xhc_chunk_size + - *unset* + - The chunk size for the pipelining. Data is processed in this-much sized + pieces at once. Applies to all primitives -- mutually exclusive with + primitive-specific parameters. + + * - coll_xhc_cico_max + - *unset* + - The max size up to which to use copy-in-copy-out. Single copy will be + used for messages above this size. Applies to all primitives -- mutually + exclusive with primitive-specific parameters. + + * - coll_xhc__hierarchy + - bcast/barrier: ``numa,socket`` + (all)reduce: ``l3,numa,socket`` + - Topological features to consider for XHC's hierarchy, specifially for + this primitive. Mutually exclusive with the respective non-specific + parameter. + + * - coll_xhc__chunk_size + - 16K + - Pipeline chunk size, specifically for this primitive. Mutually exclusive + with the non-specific parameter. + + * - coll_xhc__cico_max + - bcast: ``256`` + (all)reduce: ``4K`` + - Max size for copy-in-copy-out transfers, specifically for this + primitive. Mutually exclusive with the non-specific parameter. + +Advanced +~~~~~~~~ + +.. list-table:: + :header-rows: 1 + :widths: 20 20 60 + + * - Parameter + - Default + - Description + + * - coll_xhc__root + - 0 + - Internal root rank, for either of these operations. + + * - coll_xhc_uniforms_chunks + - true + - Whether to dynamically adjust (decrease) the chunk size in reduction + primitives, so that all ranks will perform equal work, depending on + the message size. + + * - coll_xhc_uniforms_chunks_min + - 4K + - Minimum allowed value for the automatically decreased chunk size in + reduction primitives. + + * - coll_xhc_reduce_load_balance + - top,first + - Controls load balancing features in reduction primitives. With no such + features enabled, leader ranks don't perform any reduction work, on the + levels on which they are leaders. Add ``top`` to have the root perform + reductions on the top-most level of the hierarchy, as if a common rank. + Add ``first``, to have all leaders reduce a single chunk, at the + beginning of the operation as if they weren't leaders. Add ``all`` to + have leaders always perform reductions, even on the levels on which they + are leaders (not recommended). + + * - coll_xhc_dynamic_reduce + - non-float + - Controls support for out-of-order reduction (rank wise), which allows + temporarily skipping a peer that's not yet ready. The default value only + enables the feature for non-float types, to avoid reproducibility issues + with floats. Set to ``disabled`` or ``all`` to turn off or on, + respectively, for all types. + + * - coll_xhc_dynamic_leader + - false + - Dynamically elect the first rank from each hierarchy group to join the + collective as its leader, in broadcast. Introduces an atomic + compare-exchange per each call, when enabled. + +Other +~~~~~ + +.. list-table:: + :header-rows: 1 + :widths: 20 20 60 + + * - Parameter + - Default + - Description + + * - coll_xhc_shmem_backing + - /dev/shm + - Backing directory for shmem files. + + * - coll_xhc_memcpy_chunk_size + - 256K + - Break up large memcpy calls to smaller ones, using this chunk size. + Will actually attempt to mirror the value of ``smsc/xpmem``'s respective + parameter at run-time. + +Debug +~~~~~ + +.. list-table:: + :header-rows: 1 + :widths: 25 15 60 + + * - Parameter + - Default + - Description + + * - coll_xhc_print_info + - *none* + - Print information about the component's configuration, and its + constructed hierarchies. Takes a comma delimited list of: the name of + the collective primitive about which to print information; ``config`` + to print the configuration; ``all`` to print everything; ``dot`` along + with the name of a collective primitive to print its hierarchy in DOT + format. + +Limitations +----------- + +* **Heterogeneity**: XHC does not support nodes with non-uniform + datatype representations across ranks (Open MPI's ``proc_arch``). + +* **Non-commutative** operators are not currently supported in + reduction collectives. + +* **Derived datatypes** are not yet supported. + +* The Reduce implementation only supports rank 0 as the root, and will + automatically fall back to another component in other scenarios. Work in + progress. + +Other resources +--------------- + +All things XHC landing page: https://github.com/CARV-ICS-FORTH/XHC-OpenMPI + +Publications +~~~~~~~~~~~~ + +.. **Publications** + +| **A framework for hierarchical single-copy MPI collectives on multicore nodes** +| *George Katevenis, Manolis Ploumidis, and Manolis Marazakis* +| Cluster 2022, Heidelberg, Germany +| https://ieeexplore.ieee.org/document/9912729 + +| **Impact of Cache Coherence on the Performance of Shared-Memory based MPI Primitives: A Case Study for Broadcast on Intel Xeon Scalable Processors** +| *George Katevenis, Manolis Ploumidis, and Manolis Marazakis* +| ICPP 2023, Salt Lake City, Utah, USA +| https://dl.acm.org/doi/10.1145/3605573.3605616 diff --git a/ompi/mca/coll/xhc/README.md b/ompi/mca/coll/xhc/README.md index 213062a5edc..438fd712507 100644 --- a/ompi/mca/coll/xhc/README.md +++ b/ompi/mca/coll/xhc/README.md @@ -1,371 +1,8 @@ -# XHC: XPMEM-based Hierarchical Collectives +# XPMEM Hierarchical Collectives (XHC) XHC implements hierarchical & topology-aware intra-node MPI collectives, -utilizing XPMEM for efficient shared address space memory access between -processes. +(mainly) utilizing XPMEM for efficient shared address space data transfers +between MPI ranks. -## Main features - -* XHC constructs an **n-level hierarchy** (i.e. no algorithmic limitation on -level count), based on intra-node topological features. Rank/process locality -information is known thanks to Hwloc, and is obtained from Open MPI's -integrated book-keeping. - - Topological features that can currently be defined: - - - NUMA node - - CPU Socket - - L1/L2/L3 cache - - Hwthread/core - - Node (all ranks *are* in same node --> flat, no hierarchy at all) - - Example of a 3-level XHC hierarchy (numa+socket+node configuration): - - ![Example of 3-level XHC hierarchy](resources/xhc-hierarchy.svg) - - Furthermore, support for custom virtual user-defined hierarchies is - available, to allow fine-grained control over the communication pattern. - -* **Single-copy** transportation - - - Supported through integration with Open MPI's `opal/smsc` - (shared-memory-single-copy) framework. Selecting `smsc/xpmem` is highly - recommended. - - - Bcast support: XPMEM, CMA, KNEM - - Allreduce/Reduce support: XPMEM - - Barrier support: *(irrelevant)* - - - Application buffers are attached on the fly the first time they appear, - saved on and recovered from the registration cache in subsequent - appearances. (assuming smsc/xpmem) - -* **Copy-in-copy-out (CICO)** transportation - - - Through shared memory buffers that remain active throughout the - component's lifetime. - - - Switchover with single-copy at configurable message size. - - - Supported in all ops, regardless of smsc support or XPMEM presence (up to - maximum allowed message size). - -* **Inline** transportation - - - For especially small messages, payload data is inlined in the same cache - line as the control data. - - - Supported in all ops, regardless of smsc support or XPMEM presence (up to - maximum allowed message size). - -* Data-wise **pipelining** across all levels of the hierarchy. Allows for -lowering hierarchy-induced start-up overheads, and interleaving of operations -in applicable operations (e.g. reduce+bcast in allreduce). - -* **Lock-free** single-writer synchronization, with appropriate cache-line -separation where necessary. Consistency ensured via lightweight *read* or -*write* memory barriers. - -## Configuration options -- MCA params - -XHC can be customized via a number of standard Open MPI MCA parameters, though -defaults that should satisfy a wide number of systems are in place. - -The available parameters (also found in `coll_xhc_component.c`): - -#### *(prepend with "coll_xhc_")* - -* **priority** (default `0`): The priority of the coll/xhc component, used -during the component selection process. - -* **print_info** (default `false`): Print information about XHC's generated -hierarchy and its configuration. - -* **shmem_backing** (default `/dev/shm`): Backing directory for shmem files -used for XHC's synchronization fields and CICO buffers. - -* **dynamic_leader** (default `false`): Enables the feature that dynamically -elects an XHC-communicator leader at each collective (currently only applicable -for bcast). - -* **dynamic_reduce** (default `1`=`non-float`): Enables support for -out-of-order reduction. Ranks fetch data to reduce from multiple peers; -out-of-order reduction allows them to temporarily skip a peer when the expected -data is not yet prepared, instead of stalling. The default value auto-enables -it when the data is of non-float type; setting to `2`=`enabled for all types`, -might/will harm reproducibility of reductions with float types. - -* **reduce_load_balance** (default `0`=`non-leader`): Controls the -leader-to-member load balancing mode in reductions. Under `non-leader`, the -members, and not the leaders, perform reductions. With `top-level`, all members -as well as the leader of the top-most level perform reductions. With -`first-chunk`, leaders perform a single reduction on each level for a single -chunk at the beginning of the operation. `top+first` combines `top-level` and -`first-chunk`. Finally, with `all`, all ranks perform reductions equally. - -* **hierarchy** (default `"numa,socket"`): A comma separated list of -topological feature to which XHC's hierarchy-building algorithm should be -sensitive. `ompi_info` reports the possible values for the parameter. - - - In some ways, this is "just" a suggestion. The resulting hierarchy may - not exactly match the requested one. Reasons that this will occur: - - - A requested topological feature does not effectively segment the set - of ranks. (eg. `numa` was specified, but all ranks reside in the same - NUMA node) - - - No feature that all ranks have in common was provided. This a more - intrinsic detail, that you probably don't need to be aware of, but you - might come across if eg. you investigate the output of `print_info`. An - additional level will automatically be added in this case, no need to - worry about it. - - For all intents and purposes, a hierarchy of `numa,socket` is - interpreted as "segment the ranks according to NUMA node locality, - and then further segment them according to CPU socket locality". - - - The provided features will automatically be re-ordered when their - order does not match their order in the physical system. (unless a - virtual hierarchy is present in the list) - - - *Virtual Hierarchies*: The string may alternatively also contain "rank - lists" which specify exactly which ranks to group together, as well as some - other special modifiers. See - `coll_xhc_component.c:xhc_component_parse_hierarchy()` for further - explanation as well as syntax information. - -* **chunk_size** (default `16K`): The chunk size for the pipelining process. -Data is processed (eg broadcast, reduced) in this-much sized pieces at once. - - - It's possible to have a different chunk size for each level of the - hierarchy, achieved via providing a comma-separated list of sizes (eg. - `"16K,16K,128K"`) instead of single one. The sizes in this list's *DO NOT* - correspond to the items on hierarchy list; the hierarchy keys might be - re-ordered or reduced to match the system, but the chunk sizes will be - consumed in the order they are given, left-to-right -> bottom-to-top. - -* **uniform_chunks** (default `true`): Automatically optimize the chunk size -in reduction collectives, according to the message size, so that all members -will perform equal work. - -* **uniform_chunks_min** (default `1K`): The lowest allowed value for the chunk -size when uniform chunks are enabled. - -* **cico_max** (default `1K`): Copy-in-copy-out, instead of single-copy, will -be used for messages of *cico_max* or less bytes. - -*(Removed Parameters)* - -* **rcache_max**, **rcache_max_global** *(REMOVED with shift to opal/smsc)*: -Limit to number of attachments that the registration cache should hold. - - - A case can be made about their usefulness. If desired, shall be - re-implemented at smsc-level. - -## Limitations - -- *Intra-node support only* - - Define XHC as `coll/HAN`'s intra-node component to reap its benefits in - multi-node runs. - -- **Heterogeneity**: XHC does not support nodes with non-uniform (rank-wise) -datatype representations. (determined according to Open MPI's `proc_arch`) - -- **Non-commutative** operators are not supported by XHC's reduction -collectives. In past versions, they were, but only with a flat hierarchy; this -could make a return at some point. - -- **Derived Datatypes** are currently not supported. - -- XHC's Reduce currently only supports rank 0 as the root, and will -automatically fall back to another component for other cases. - -## Building - -This section describes how to compile the XHC component. - -XPMEM support in Open MPI is required to reap the full benefits of XHC. - -- The XHC component will build and work without XPMEM support, but for large -messages (i.e. ones above the CICO threshold) Allreduce/Reduce will be -disabled, and Broadcast will fall-back to less efficient mechanisms. - -- XPMEM can be obtained from , and then -compiled like a common kernel module. You might need to manually point Open -MPI's configure script to XPMEM's installation location, via the -`--with-xpmem=` parameter. - -- At run-time, you will need to insert the kernel module and obtain proper -access rights to `/dev/xpmem`. - -Apart from instructing Open MPI to include XPMEM support, the rest of the build -process is standard. General information on building Open MPI can be found in -its documentation. - - - - - -## Running - -General information on running Open MPI jobs can be found here: - - - -`mpirun`'s man page will also be useful: - - -In order for the XHC component to be chosen, its priority must be manually set -higher than other collectives components that implement the same primitives, -via the `coll_xhc_priority` MCA param. - - - Example: `--mca coll_xhc_priority 100` - -* Most likely, you will also want the `--bind-to core` param. Otherwise, the -reported process localities might be too general, preventing XHC from correctly -segmenting the system. (MCA `coll_xhc_print_info` will report the generated -hierarchy if you wish to experiment) - -### Tuning - -* Optional: You might wish to manually specify the topological features that -XHC's hierarchy should conform to. The default is `numa,socket`, which will -group the processes according to NUMA locality and then further group them -according to socket locality. See the `coll_xhc_hierarchy` param. - - - Example: `--mca coll_xhc_hierarchy numa,socket` - - Example: `--mca coll_xhc_hierarchy numa` - - Example: `--mca coll_xhc_hierarchy flat` - - In some systems, small-message Broadcast or the Barrier operation might - perform better with a flat tree instead of a hierarchical one. Currently, - manual benchmarking is required to accurately determine this. - -* Optional: You might wish to tune XHC's chunk size (default `16K`). Use the -`coll_xhc_chunk_size` param, and try values close to the default and see if -improvements are observed. - - - Example: `--mca coll_xhc_chunk_size 16K` - -* Optional: If you wish to focus on latencies of small/medium size messages, -you can try altering the cico-to-zcopy switchover point (MCA -`coll_xhc_cico_max`, default `1K`). - - - Example: `--mca coll_xhc_cico_max 1K` - -* Optional: If your application is heavy in Broadcast calls and you suspect -that specific ranks might be joining the collective with delay and causing -others to stall waiting for them, try enabling dynamic leadership (MCA -`coll_xhc_dynamic_leader`), and seeing if it makes an improvement. Please let -us know if it does :-). - - - Example: `--mca coll_xhc_dynamic_leader 1` - -### Example command lines - -*Assuming `PATH` and `LD_LIBRARY_PATH` have been set appropriately.* - -Default XHC configuration: -`$ mpirun --mca coll_xhc_priority 100 --bind-to core ` - -XHC w/ numa-sensitive hierarchy, chunk size @ 16K: -`$ mpirun --mca coll_xhc_priority 100 --mca coll_xhc_hierarchy numa --mca coll_xhc_chunk_size 16K --bind-to core ` - -XHC with flat hierarchy (ie. none at all): -`$ mpirun --mca coll_xhc_priority 100 --mca coll_xhc_hierarchy node [--bind-to core] ` - -## Benchmarking - -This section outlines some tips for benchmarking XHC and intra-node MPI -collectives in general. - -### Micro-Benchmarks - -For our micro-benchmarking purposes, we have been using [OSU's microbenchmark -suite](https://mvapich.cse.ohio-state.edu/benchmarks/). However, when -micro-benchmarking intra-node collectives, there are some important details -that one needs to look out for. - -**CPU Cache** An issue with the OSU micro-benchmarks is that they use the same -buffer for each iteration without altering it. Since modern processors -implicitly cache data, this can lead to false/unrealistic/unrepresentative -results, given that actual real-world applications do not (usually/optimally!) -perform duplicate operations. - -Availability of collective operation source data on a processor's local cache -hierarchy will cause certain phenomenons (e.g. slow path memory transactions) -and their effects to remain hidden and undetected in the micro-benchmarking -process, even though they *will* negatively impact performance in actual -applications, - -We have created "data-varying" (`_dv` suffix) benchmarks to counter this -problem, which will alter the data before each iteration. - -**Microbenchmark's pre-op Barrier** One also needs to be aware how the barrier -that appears before each iteration in the OSU micro-benchmarks affects the -result, especially so when latencies of small messages are concerned. The -underlying implementation of this barrier and the speed/efficiency of its -"release stage" will affect how fast and how synchronized ranks will exit the -barrier, and therefore how fast/synchronized they will enter the benchmarked -collective operation. - -For as accurate/clean performance reporting as possible, use a barrier -implementation that has as low a latency as possible. Furthermore, ideally, -all ranks should exit the barrier at the exact same time -- this is more -complex to measure, but can make a difference. In order to have a common -baseline when benchmarking and comparing multiple collectives implementation, -use this same barrier implementation for all benchmark scenarios. - -In the environments we tested, XHC's barrier was the best performing one. To -make using this barrier easier, we have put together a small new collective -component, `XB` (= xhc barrier). - -XB creates a new nested (duplicate) communicator with a hint to prioritize XHC, -and delegates barrier operations to it. A slightly inconvenient side-effect is -that XHC needs to be on the coll list (MCA `--mca coll`); it doesn't need to -have a high priority, though it can't be less than 0. - -* To benchmark Open MPI's `coll/tuned` with XB: `--mca coll basic,libnbc,tuned,xb,xhc --mca coll_xhc_priority 0 --mca coll_xb_priority 95 --mca coll_tuned_priority 90` - -* Or XHC itself, with XB: `--mca coll basic,libnbc,xb,xhc --mca coll_xhc_priority 90 --mca coll_xb_priority 95` - -It is also possible to specify the hierarchy to be used for XB's barrier (the -request will be passed in string form to XHC, only for the nested communicator) -via the `coll_xb_hierarchy` MCA parameter. - -In our fork of the OSU micro-benchmarks, you will also find -"integrity-checking" variants (`_integrity` suffix). These can help verify that -collective operations complete successfully without data corruption. - -Our OSU micro-benchmarks fork: - - -The XB component: - - -### Applications - -We expect to see any meaningful performance improvement with XHC in actual -applications, only if they spend a non-insignificant percentage of their -runtime in the collective operations that XHC implements: Broadcast, Barrier, -Allreduce, Reduce. - -One known such application is [miniAMR](https://github.com/Mantevo/miniAMR). -The application parameters (e.g. the refine count and frequency) will affect -the amount of time spent in the Allreduce primitive. - -Another one is Microsoft's [CNTK](https://github.com/microsoft/CNTK), also -heavy in Allreduce, though it actually makes use of the non-blocking -`Iallreduce` variant. However, it can easily be converted to use the blocking -variant instead (contact for patch). Comparing the performance of the -unmodified CNTK with OpenMPI's `coll/libnbc`, versus that of the patched CNTK -with XHC reveals that this modification is sensible and beneficial. - -Finally, while we have not yet rigorously evaluated it, -[PiSvM](http://pisvm.sourceforge.net/) is another candidate, with intense use -of MPI Broadcast. - ---- - -Contact: George Katevenis (gkatev@ics.forth.gr), Manolis Ploumidis (ploumid@ics.forth.gr) -Computer Architecture and VLSI Systems (CARV) Laboratory, ICS Forth +For additional info and resources about XHC, check the Open MPI docs: +https://docs.open-mpi.org/ From fb7766299dc3bb9a0b09d2ff512cf0d2a529ab31 Mon Sep 17 00:00:00 2001 From: Tomislav Janjusic Date: Fri, 13 Feb 2026 15:28:49 -0600 Subject: [PATCH 29/53] mpif-h: fix request_get_status when MPI_STATUS_IGNORE is passed The Fortran mpif-h binding for MPI_Request_get_status incorrectly returned flag=false when MPI_STATUS_IGNORE was passed, without ever checking request completion. Always call PMPI_Request_get_status to get the correct flag value, and only conditionally copy the status back. This is the mpif-h counterpart to 95e4599ed2 which fixed the same bug in the use-mpi-f08 .c.in template. Related to issue #13671 Signed-off-by: Tomislav Janjusic Co-authored-by: Howard Pritchard --- ompi/mpi/fortran/mpif-h/request_get_status_f.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/ompi/mpi/fortran/mpif-h/request_get_status_f.c b/ompi/mpi/fortran/mpif-h/request_get_status_f.c index 7a5c9d57716..7fac2b2e051 100644 --- a/ompi/mpi/fortran/mpif-h/request_get_status_f.c +++ b/ompi/mpi/fortran/mpif-h/request_get_status_f.c @@ -12,6 +12,7 @@ * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -75,16 +76,11 @@ void ompi_request_get_status_f(MPI_Fint *request, ompi_fortran_logical_t *flag, MPI_Request c_req = PMPI_Request_f2c( *request ); OMPI_LOGICAL_NAME_DECL(flag); - /* This seems silly, but someone will do it */ - - if (OMPI_IS_FORTRAN_STATUS_IGNORE(status)) { - *flag = OMPI_INT_2_LOGICAL(0); - c_ierr = MPI_SUCCESS; - } else { - c_ierr = PMPI_Request_get_status(c_req, - OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag), - &c_status); - OMPI_SINGLE_INT_2_LOGICAL(flag); + c_ierr = PMPI_Request_get_status(c_req, + OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag), + &c_status); + OMPI_SINGLE_INT_2_LOGICAL(flag); + if (!OMPI_IS_FORTRAN_STATUS_IGNORE(status)) { PMPI_Status_c2f( &c_status, status ); } if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); From 7441b3ef8cf17bc5c31b623373de7336ab76e7ff Mon Sep 17 00:00:00 2001 From: Joseph Antony Date: Wed, 18 Feb 2026 22:17:07 -0500 Subject: [PATCH 30/53] Editing Notified RMA implementation with new design Signed-off-by: Joseph Antony --- ompi/mca/osc/sm/osc_sm.h | 3 +++ ompi/mca/osc/sm/osc_sm_comm.c | 35 +++++++++++++++++++++++++---- ompi/mca/osc/sm/osc_sm_component.c | 36 ++++++++++++++++++++---------- 3 files changed, 58 insertions(+), 16 deletions(-) diff --git a/ompi/mca/osc/sm/osc_sm.h b/ompi/mca/osc/sm/osc_sm.h index bf80c082ac8..cb104e5df15 100644 --- a/ompi/mca/osc/sm/osc_sm.h +++ b/ompi/mca/osc/sm/osc_sm.h @@ -48,6 +48,9 @@ struct ompi_osc_sm_node_state_t { opal_atomic_int32_t complete_count; ompi_osc_sm_lock_t lock; opal_atomic_lock_t accumulate_lock; + uint32_t notify_counter_count; + uint64_t notify_counter_offset; /* offset from segment_base, not raw pointer */ + }; typedef struct ompi_osc_sm_node_state_t ompi_osc_sm_node_state_t; diff --git a/ompi/mca/osc/sm/osc_sm_comm.c b/ompi/mca/osc/sm/osc_sm_comm.c index 6cc5384d750..359cdf7147f 100644 --- a/ompi/mca/osc/sm/osc_sm_comm.c +++ b/ompi/mca/osc/sm/osc_sm_comm.c @@ -20,6 +20,17 @@ #include "osc_sm.h" +static inline uint64_t *osc_sm_target_notify_base(ompi_osc_sm_module_t *module, int target) +{ + if (NULL == module->segment_base) { + /* single-rank path: notify_counters is a regular local allocation */ + return module->notify_counters; + } + + return (uint64_t *) ((char *) module->segment_base + + module->node_states[target].notify_counter_offset); +} + int ompi_osc_sm_rput(const void *origin_addr, size_t origin_count, @@ -97,8 +108,12 @@ ompi_osc_sm_rput_notify(const void *origin_addr, * complete. */ *ompi_req = &ompi_request_empty; + if (notify < 0 || (uint32_t) notify >= module->node_states[target].notify_counter_count) { + return OMPI_ERR_BAD_PARAM; + } + opal_atomic_wmb(); - opal_atomic_add(&module->notify_counters[notify], 1); + opal_atomic_add(&osc_sm_target_notify_base(module, target)[notify], 1); return OMPI_SUCCESS; } @@ -180,8 +195,12 @@ ompi_osc_sm_rget_notify(void *origin_addr, * complete. */ *ompi_req = &ompi_request_empty; + if (notify < 0 || (uint32_t) notify >= module->node_states[target].notify_counter_count) { + return OMPI_ERR_BAD_PARAM; + } + opal_atomic_rmb(); - opal_atomic_add(&module->notify_counters[notify], 1); + opal_atomic_add(&osc_sm_target_notify_base(module, target)[notify], 1); return OMPI_SUCCESS; } @@ -354,8 +373,12 @@ ompi_osc_sm_put_notify(const void *origin_addr, return ret; } + if (notify < 0 || (uint32_t) notify >= module->node_states[target].notify_counter_count) { + return OMPI_ERR_BAD_PARAM; + } + opal_atomic_wmb(); - opal_atomic_add(&module->notify_counters[notify], 1); + opal_atomic_add(&osc_sm_target_notify_base(module, target)[notify], 1); return ret; } @@ -421,8 +444,12 @@ ompi_osc_sm_get_notify(void *origin_addr, if (OMPI_SUCCESS != ret) { return ret; } + if (notify < 0 || (uint32_t) notify >= module->node_states[target].notify_counter_count) { + return OMPI_ERR_BAD_PARAM; + } + opal_atomic_rmb(); - opal_atomic_add(&module->notify_counters[notify], 1); + opal_atomic_add(&osc_sm_target_notify_base(module, target)[notify], 1); return ret; } diff --git a/ompi/mca/osc/sm/osc_sm_component.c b/ompi/mca/osc/sm/osc_sm_component.c index 5500a2bb412..7954ef6963e 100644 --- a/ompi/mca/osc/sm/osc_sm_component.c +++ b/ompi/mca/osc/sm/osc_sm_component.c @@ -259,8 +259,10 @@ component_select(struct ompi_win_t *win, void **base, size_t size, ptrdiff_t dis /* allocate notify counters for single process case */ module->notify_counters = calloc(OSC_SM_MAX_NOTIFY_COUNTERS, sizeof(uint64_t)); if (NULL == module->notify_counters) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; + module->node_states[0].notify_counter_count = OSC_SM_MAX_NOTIFY_COUNTERS; + module->node_states[0].notify_counter_offset = 0; } else { - unsigned long total, *rbuf; + unsigned long total, total_counters, gather_values[2], *rbuf; int i, flag; size_t pagesize; size_t state_size; @@ -274,7 +276,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, ptrdiff_t dis /* get the pagesize */ pagesize = opal_getpagesize(); - rbuf = malloc(sizeof(unsigned long) * comm_size); + rbuf = malloc(sizeof(unsigned long) * comm_size * 2 ); if (NULL == rbuf) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; /* Note that the alloc_shared_noncontig info key only has @@ -298,9 +300,10 @@ component_select(struct ompi_win_t *win, void **base, size_t size, ptrdiff_t dis "allocating window using contiguous strategy"); } - total = size; - ret = module->comm->c_coll->coll_allgather(&total, 1, MPI_UNSIGNED_LONG, - rbuf, 1, MPI_UNSIGNED_LONG, + gather_values[0] = size; + gather_values[1] = OSC_SM_MAX_NOTIFY_COUNTERS; + ret = module->comm->c_coll->coll_allgather(gather_values, 2, MPI_UNSIGNED_LONG, + rbuf, 2, MPI_UNSIGNED_LONG, module->comm, module->comm->c_coll->coll_allgather_module); if (OMPI_SUCCESS != ret) { @@ -309,8 +312,10 @@ component_select(struct ompi_win_t *win, void **base, size_t size, ptrdiff_t dis } total = 0; + total_counters = 0; for (i = 0 ; i < comm_size ; ++i) { - total += rbuf[i]; + total += rbuf[2 * i]; + total_counters += rbuf[2 * i + 1]; if (module->noncontig) { total += OPAL_ALIGN_PAD_AMOUNT(total, pagesize); } @@ -321,7 +326,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, ptrdiff_t dis state_size += OPAL_ALIGN_PAD_AMOUNT(state_size, 64); posts_size = comm_size * post_size * sizeof (module->posts[0][0]); posts_size += OPAL_ALIGN_PAD_AMOUNT(posts_size, 64); - notify_counters_size = OSC_SM_MAX_NOTIFY_COUNTERS * sizeof(uint64_t); + notify_counters_size = total_counters * sizeof(uint64_t); notify_counters_size += OPAL_ALIGN_PAD_AMOUNT(notify_counters_size, 64); data_base_size = state_size + posts_size + notify_counters_size; data_base_size += OPAL_ALIGN_PAD_AMOUNT(data_base_size, pagesize); @@ -388,17 +393,23 @@ component_select(struct ompi_win_t *win, void **base, size_t size, ptrdiff_t dis module->notify_counters = (uint64_t *) ((char *)(module->node_states + comm_size) + OPAL_ALIGN_PAD_AMOUNT((uintptr_t)(module->node_states + comm_size), 64)); /* zero out notify counters */ - memset(module->notify_counters, 0, OSC_SM_MAX_NOTIFY_COUNTERS * sizeof(uint64_t)); + memset(module->notify_counters, 0, total_counters * sizeof(uint64_t)); - for (i = 0, total = data_base_size ; i < comm_size ; ++i) { + for (i = 0, total = data_base_size, total_counters = 0 ; i < comm_size ; ++i) { if (i > 0) { module->posts[i] = module->posts[i - 1] + post_size; } - module->sizes[i] = rbuf[i]; + module->node_states[i].notify_counter_count = (uint32_t) rbuf[2 * i + 1]; + module->node_states[i].notify_counter_offset = + (uint64_t) ((char *) (module->notify_counters + total_counters) - + (char *) module->segment_base); + total_counters += rbuf[2 * i + 1]; + + module->sizes[i] = rbuf[2 * i]; if (module->sizes[i] || !module->noncontig) { module->bases[i] = ((char *) module->segment_base) + total; - total += rbuf[i]; + total += rbuf[2 * i]; if (module->noncontig) { total += OPAL_ALIGN_PAD_AMOUNT(total, pagesize); } @@ -412,7 +423,8 @@ component_select(struct ompi_win_t *win, void **base, size_t size, ptrdiff_t dis /* initialize my state shared */ module->my_node_state = &module->node_states[ompi_comm_rank(module->comm)]; - memset (module->my_node_state, 0, sizeof(*module->my_node_state)); + module->my_node_state->complete_count = 0; + memset (&module->my_node_state->lock, 0, sizeof(module->my_node_state->lock)); *base = module->bases[ompi_comm_rank(module->comm)]; From a79248730dc6097dd845cb99d5afa4dc1bb80d52 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Mon, 23 Feb 2026 22:11:34 +0200 Subject: [PATCH 31/53] No need to link against common_sm. Signed-off-by: George Bosilca --- opal/mca/btl/smcuda/Makefile.am | 4 ---- 1 file changed, 4 deletions(-) diff --git a/opal/mca/btl/smcuda/Makefile.am b/opal/mca/btl/smcuda/Makefile.am index c0cdf788e8d..9aed69bfb7f 100644 --- a/opal/mca/btl/smcuda/Makefile.am +++ b/opal/mca/btl/smcuda/Makefile.am @@ -46,15 +46,11 @@ component_noinst = libmca_btl_smcuda.la component_install = endif -# See opal/mca/common/cuda/Makefile.am for an explanation of -# libmca_common_sm.la. - mcacomponentdir = $(opallibdir) mcacomponent_LTLIBRARIES = $(component_install) mca_btl_smcuda_la_SOURCES = $(libmca_btl_smcuda_la_sources) mca_btl_smcuda_la_LDFLAGS = -module -avoid-version $(btl_smcuda_LDFLAGS) mca_btl_smcuda_la_LIBADD = $(top_builddir)/opal/lib@OPAL_LIB_NAME@.la \ - $(OPAL_TOP_BUILDDIR)/opal/mca/common/sm/lib@OPAL_LIB_NAME@mca_common_sm.la \ $(btl_smcuda_LIBS) mca_btl_smcuda_la_CPPFLAGS = $(btl_smcuda_CPPFLAGS) From da0a4c9841c4e70e139e32756bd59e1f5b162c2d Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Tue, 24 Feb 2026 13:11:31 +0200 Subject: [PATCH 32/53] Fix the _Quad discovery. Issue identified by copilot on PR #13713 Signed-off-by: George Bosilca --- config/ompi_fortran_check.m4 | 4 ++-- config/ompi_fortran_check_real16_c_equiv.m4 | 21 ++++++++++++++------- config/ompi_setup_mpi_fortran.m4 | 2 +- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/config/ompi_fortran_check.m4 b/config/ompi_fortran_check.m4 index e479a87ac64..7fd2a790353 100644 --- a/config/ompi_fortran_check.m4 +++ b/config/ompi_fortran_check.m4 @@ -137,8 +137,8 @@ AC_DEFUN([OMPI_FORTRAN_CHECK], [ long*double*_Complex) ofc_type_kind=C_LONG_DOUBLE_COMPLEX ;; opal_short_float_t) ofc_type_kind=C_SHORT_FLOAT ;; opal_short_float_complex_t) ofc_type_kind=C_SHORT_FLOAT_COMPLEX ;; - _Float128) ofc_type_kind=C__FLOAT128 ;; - __float128) ofc_type_kind=C___FLOAT128 ;; + _Float128) ofc_type_kind=C_FLOAT128 ;; + __float128) ofc_type_kind=C_FLOAT128 ;; *) # Skip types like "DOUBLE PRECISION" ;; diff --git a/config/ompi_fortran_check_real16_c_equiv.m4 b/config/ompi_fortran_check_real16_c_equiv.m4 index 85141c798b6..b9e67d9606e 100644 --- a/config/ompi_fortran_check_real16_c_equiv.m4 +++ b/config/ompi_fortran_check_real16_c_equiv.m4 @@ -61,19 +61,26 @@ AC_DEFUN([OMPI_FORTRAN_CHECK_REAL16_C_EQUIV],[ AC_MSG_RESULT([works!])], [AC_MSG_RESULT([does not work])]) ]) - # As recent Intel compilers identify as GNU we will always test for Quad support if no other tests were succesfull + # As recent Intel compilers identify as GNU we will always test for Quad + # support if no other tests were succesfull AS_IF([test "$fortran_real16_happy" = "no"], - [AC_CHECK_TYPES(_Quad) - AS_IF([test "$ac_cv_type__Quad" = "yes"], - [AC_MSG_CHECKING([if the compiler _Quad == REAL*16]) - CFLAGS_save="$CFLAGS" + [AC_CHECK_TYPES([_Quad]) + AS_IF([test "$ac_cv_type__Quad" != "yes"], + [CFLAGS_save="$CFLAGS" OPAL_FLAGS_APPEND_UNIQ([CFLAGS], ["-Qoption,cpp,--extended_float_types"]) + # force the check as we have updated CFLAGS + unset ac_cv_type__Quad + AC_CHECK_TYPES([_Quad]) + AS_IF([test "$ac_cv_type__Quad" != "yes"], + [CFLAGS="$CFLAGS_save"]) + ]) + AS_IF([test "$ac_cv_type__Quad" != "yes"], + [AC_MSG_CHECKING([if the compiler _Quad == REAL*16]) OMPI_FORTRAN_CHECK_REAL16_EQUIV_TYPE([_Quad], [q]) AS_IF([test "$fortran_real16_happy" = "yes"], [OMPI_FORTRAN_REAL16_C_TYPE="_Quad" AC_MSG_RESULT([works!])], - [CFLAGS="$CFLAGS_save" - AC_MSG_RESULT([does not work])]) + [AC_MSG_RESULT([does not work])]) ]) ]) # We have to [re-]print a new message here, because diff --git a/config/ompi_setup_mpi_fortran.m4 b/config/ompi_setup_mpi_fortran.m4 index 3474276e661..c396a2efab6 100644 --- a/config/ompi_setup_mpi_fortran.m4 +++ b/config/ompi_setup_mpi_fortran.m4 @@ -226,7 +226,7 @@ AC_DEFUN([OMPI_SETUP_MPI_FORTRAN],[ [long double _Complex, double _Complex, float _Complex, short float _Complex, opal_short_float_complex_t], [16], [no]) OMPI_FORTRAN_CHECK([COMPLEX*32], [no], - [_Float128 _Complex, long double _Complex, double _Complex, float _Complex, short float _Complex, opal_short_float_complex_t], + [_Float128 _Complex, __float128 _Complex, long double _Complex, double _Complex, float _Complex, short float _Complex, opal_short_float_complex_t], [32], [no]) # Double precision complex types are not standard, but many # compilers support it. Code should be wrapped with #ifdef From fb9562a4a9ca329f065e3d8f8018ed602af681a4 Mon Sep 17 00:00:00 2001 From: Tomislav Janjusic Date: Tue, 24 Feb 2026 21:52:52 -0600 Subject: [PATCH 33/53] fix deprecated variable Signed-off-by: Tomislav Janjusic --- contrib/platform/mellanox/optimized.conf | 2 -- 1 file changed, 2 deletions(-) diff --git a/contrib/platform/mellanox/optimized.conf b/contrib/platform/mellanox/optimized.conf index 6a7be025a66..b1316c4b67d 100644 --- a/contrib/platform/mellanox/optimized.conf +++ b/contrib/platform/mellanox/optimized.conf @@ -85,8 +85,6 @@ opal_warn_on_missing_libcuda = 0 bml_r2_show_unreach_errors = 0 # alltoall algorithm selection settings for tuned coll mca -coll_tuned_alltoall_large_msg = 250000 -coll_tuned_alltoall_min_procs = 2048 coll_tuned_alltoall_algorithm_max_requests = 8 coll_tuned_scatter_intermediate_msg = 8192 coll_tuned_scatter_large_msg = 250000 From 5a15b42bd57aeb045abaf31afc2b395929c76e58 Mon Sep 17 00:00:00 2001 From: Nithya V S Date: Thu, 26 Feb 2026 14:40:26 +0530 Subject: [PATCH 34/53] coll/acoll: Fix sbuf handling in reduce_topo Fixes the way MPI_IN_PLACE is handled in coll_acoll_reduce_topo(). This resolves https://github.com/open-mpi/ompi/issues/13736. Signed-off-by: Nithya V S --- ompi/mca/coll/acoll/coll_acoll_reduce.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ompi/mca/coll/acoll/coll_acoll_reduce.c b/ompi/mca/coll/acoll/coll_acoll_reduce.c index 69da3cb49cf..28fc3c62c6a 100644 --- a/ompi/mca/coll/acoll/coll_acoll_reduce.c +++ b/ompi/mca/coll/acoll/coll_acoll_reduce.c @@ -66,7 +66,7 @@ static inline int coll_acoll_reduce_topo(const void *sbuf, void *rbuf, size_t co int use_socket = (0 == acoll_module->use_socket) ? 1 : acoll_module->use_socket; tmp_sbuf = (char *) sbuf; - if ((MPI_IN_PLACE == sbuf) && (rank == root)) { + if (MPI_IN_PLACE == sbuf) { tmp_sbuf = (char *) rbuf; } From 5a92c69ac95e016b7382ccc72798e7426769e008 Mon Sep 17 00:00:00 2001 From: Howard Pritchard Date: Tue, 24 Feb 2026 15:26:45 -0700 Subject: [PATCH 35/53] pmix/prrte:advance shas advance openpmix to 53fce423d5d6b25798ed1f32837671dc55d0230d advance prrte to 2d9b0aaaeea49a0e7850aed95e5ace9340c7d847 Signed-off-by: Howard Pritchard --- 3rd-party/openpmix | 2 +- 3rd-party/prrte | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/3rd-party/openpmix b/3rd-party/openpmix index 7704efaf865..53fce423d5d 160000 --- a/3rd-party/openpmix +++ b/3rd-party/openpmix @@ -1 +1 @@ -Subproject commit 7704efaf865328234e3cb1f77ff393adc971c9fe +Subproject commit 53fce423d5d6b25798ed1f32837671dc55d0230d diff --git a/3rd-party/prrte b/3rd-party/prrte index 91544b8d2c5..2d9b0aaaeea 160000 --- a/3rd-party/prrte +++ b/3rd-party/prrte @@ -1 +1 @@ -Subproject commit 91544b8d2c5ac84585022d0edad68e38f375a917 +Subproject commit 2d9b0aaaeea49a0e7850aed95e5ace9340c7d847 From 629d6eff45825c61ed47e71f5c50cef6ba4ab494 Mon Sep 17 00:00:00 2001 From: Howard Pritchard Date: Thu, 26 Feb 2026 12:22:17 -0700 Subject: [PATCH 36/53] Comm create from group: improve debug statements when fetching PMIX_GROUP_LOCAL_CID values from pmix server. Signed-off-by: Howard Pritchard --- ompi/communicator/comm_cid.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ompi/communicator/comm_cid.c b/ompi/communicator/comm_cid.c index be99de913ab..ddf1657b9ab 100644 --- a/ompi/communicator/comm_cid.c +++ b/ompi/communicator/comm_cid.c @@ -24,7 +24,7 @@ * Copyright (c) 2017 Mellanox Technologies. All rights reserved. * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. * Copyright (c) 2021 Nanook Consulting. All rights reserved. - * Copyright (c) 2020-2025 Triad National Security, LLC. All rights + * Copyright (c) 2020-2026 Triad National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -1094,7 +1094,7 @@ int ompi_comm_get_remote_cid_from_pmix (ompi_communicator_t *comm, int dest, uin } if (val->type != PMIX_SIZE) { - OPAL_OUTPUT_VERBOSE((10, ompi_comm_output, "PMIx_Get failed for PMIX_GROUP_LOCAL_CID type mismatch")); + OPAL_OUTPUT_VERBOSE((10, ompi_comm_output, "PMIx_Get failed for PMIX_GROUP_LOCAL_CID type mismatch - %s", PMIx_Value_string(val))); rc = OMPI_ERR_TYPE_MISMATCH; goto done; } From 032335b6439124412f92faf75fdb6cc0362cb21e Mon Sep 17 00:00:00 2001 From: Shachar Hasson Date: Sun, 15 Feb 2026 16:24:47 +0200 Subject: [PATCH 37/53] OMPI/RTE: Modify job name printing to use thread local storage Signed-off-by: Shachar Hasson Signed-off-by: Thomas Vegas --- ompi/runtime/ompi_rte.c | 50 +++++------------------------------------ 1 file changed, 5 insertions(+), 45 deletions(-) diff --git a/ompi/runtime/ompi_rte.c b/ompi/runtime/ompi_rte.c index 651cf9d0b5a..f94df4fbd5d 100644 --- a/ompi/runtime/ompi_rte.c +++ b/ompi/runtime/ompi_rte.c @@ -85,56 +85,20 @@ static int _setup_proc_session_dir(char **sdir); #define OPAL_PRINT_NAME_ARGS_MAX_SIZE 50 #define OPAL_PRINT_NAME_ARG_NUM_BUFS 16 -static bool fns_init=false; -static opal_tsd_tracked_key_t print_args_tsd_key; static char* opal_print_args_null = "NULL"; typedef struct { - char *buffers[OPAL_PRINT_NAME_ARG_NUM_BUFS]; + char buffers[OPAL_PRINT_NAME_ARG_NUM_BUFS][OPAL_PRINT_NAME_ARGS_MAX_SIZE + 1]; int cntr; } opal_print_args_buffers_t; -static void -buffer_cleanup(void *value) -{ - int i; - opal_print_args_buffers_t *ptr; - - if (NULL != value) { - ptr = (opal_print_args_buffers_t*)value; - for (i=0; i < OPAL_PRINT_NAME_ARG_NUM_BUFS; i++) { - free(ptr->buffers[i]); - } - free (ptr); - } - fns_init = false; -} - static opal_print_args_buffers_t* get_print_name_buffer(void) { - opal_print_args_buffers_t *ptr; - int ret, i; - - if (!fns_init) { - /* setup the print_args function */ - OBJ_CONSTRUCT(&print_args_tsd_key, opal_tsd_tracked_key_t); - opal_tsd_tracked_key_set_destructor(&print_args_tsd_key, buffer_cleanup); - fns_init = true; - } - - ret = opal_tsd_tracked_key_get(&print_args_tsd_key, (void**)&ptr); - if (OPAL_SUCCESS != ret) return NULL; + static opal_thread_local opal_print_args_buffers_t name_buffer = { + .cntr = 0 + }; - if (NULL == ptr) { - ptr = (opal_print_args_buffers_t*)malloc(sizeof(opal_print_args_buffers_t)); - for (i=0; i < OPAL_PRINT_NAME_ARG_NUM_BUFS; i++) { - ptr->buffers[i] = (char *) malloc((OPAL_PRINT_NAME_ARGS_MAX_SIZE+1) * sizeof(char)); - } - ptr->cntr = 0; - ret = opal_tsd_tracked_key_set(&print_args_tsd_key, (void*)ptr); - } - - return (opal_print_args_buffers_t*) ptr; + return &name_buffer; } static char* ompi_pmix_print_jobids(const opal_jobid_t job) @@ -1043,10 +1007,6 @@ int ompi_rte_finalize(void) opal_process_info.initial_errhandler = NULL; } - if (fns_init) { - OBJ_DESTRUCT(&print_args_tsd_key); - } - /* cleanup our internal nspace hack */ opal_pmix_finalize_nspace_tracker(); From 6ee9b494a0a674584302ee9cf280669e0114242a Mon Sep 17 00:00:00 2001 From: Joseph Antony Date: Wed, 4 Mar 2026 13:19:28 -0500 Subject: [PATCH 38/53] Implementing Notify Query Signed-off-by: Joseph Antony --- ompi/include/mpi.h.in | 2 ++ ompi/mca/osc/osc.h | 5 ++++ ompi/mca/osc/sm/osc_sm.h | 4 +++ ompi/mca/osc/sm/osc_sm_comm.c | 19 +++++++++++++ ompi/mca/osc/sm/osc_sm_component.c | 1 + ompi/mca/osc/ubcl/osc_ubcl.c | 5 ++++ ompi/mpi/c/Makefile.am | 2 ++ ompi/mpi/c/win_get_notify_value.c.in | 41 ++++++++++++++++++++++++++++ 8 files changed, 79 insertions(+) create mode 100644 ompi/mpi/c/win_get_notify_value.c.in diff --git a/ompi/include/mpi.h.in b/ompi/include/mpi.h.in index d34624cd1d2..ed6d69d6222 100644 --- a/ompi/include/mpi.h.in +++ b/ompi/include/mpi.h.in @@ -2619,6 +2619,7 @@ OMPI_DECLSPEC int MPI_Win_get_errhandler(MPI_Win win, MPI_Errhandler *errhandle OMPI_DECLSPEC int MPI_Win_get_group(MPI_Win win, MPI_Group *group); OMPI_DECLSPEC int MPI_Win_get_info(MPI_Win win, MPI_Info *info_used); OMPI_DECLSPEC int MPI_Win_get_name(MPI_Win win, char *win_name, int *resultlen); +OMPI_DECLSPEC int MPI_Win_get_notify_value(MPI_Win win, int notification_idx, MPI_Count *value); OMPI_DECLSPEC int MPI_Win_lock(int lock_type, int rank, int mpi_assert, MPI_Win win); OMPI_DECLSPEC int MPI_Win_lock_all(int mpi_assert, MPI_Win win); OMPI_DECLSPEC int MPI_Win_post(MPI_Group group, int mpi_assert, MPI_Win win); @@ -3807,6 +3808,7 @@ OMPI_DECLSPEC int PMPI_Win_get_errhandler(MPI_Win win, MPI_Errhandler *errhandl OMPI_DECLSPEC int PMPI_Win_get_group(MPI_Win win, MPI_Group *group); OMPI_DECLSPEC int PMPI_Win_get_info(MPI_Win win, MPI_Info *info_used); OMPI_DECLSPEC int PMPI_Win_get_name(MPI_Win win, char *win_name, int *resultlen); +OMPI_DECLSPEC int PMPI_Win_get_notify_value(MPI_Win win, int notification_idx, MPI_Count *value); OMPI_DECLSPEC int PMPI_Win_lock(int lock_type, int rank, int mpi_assert, MPI_Win win); OMPI_DECLSPEC int PMPI_Win_lock_all(int mpi_assert, MPI_Win win); OMPI_DECLSPEC int PMPI_Win_post(MPI_Group group, int mpi_assert, MPI_Win win); diff --git a/ompi/mca/osc/osc.h b/ompi/mca/osc/osc.h index 83c7af9305e..b43757b9b5c 100644 --- a/ompi/mca/osc/osc.h +++ b/ompi/mca/osc/osc.h @@ -245,6 +245,10 @@ typedef int (*ompi_osc_base_module_get_notify_fn_t)(void *origin_addr, int notify, struct ompi_win_t *win); +typedef int (*ompi_osc_base_module_win_get_notify_value_fn_t)(struct ompi_win_t *win, + int notify, + MPI_Count *value); + typedef int (*ompi_osc_base_module_accumulate_fn_t)(const void *origin_addr, size_t origin_count, struct ompi_datatype_t *origin_dt, @@ -421,6 +425,7 @@ struct ompi_osc_base_module_4_0_0_t { ompi_osc_base_module_put_notify_fn_t osc_put_notify; ompi_osc_base_module_get_fn_t osc_get; ompi_osc_base_module_get_notify_fn_t osc_get_notify; + ompi_osc_base_module_win_get_notify_value_fn_t osc_win_get_notify_value; ompi_osc_base_module_accumulate_fn_t osc_accumulate; ompi_osc_base_module_compare_and_swap_fn_t osc_compare_and_swap; ompi_osc_base_module_fetch_and_op_fn_t osc_fetch_and_op; diff --git a/ompi/mca/osc/sm/osc_sm.h b/ompi/mca/osc/sm/osc_sm.h index cb104e5df15..f80c0116d73 100644 --- a/ompi/mca/osc/sm/osc_sm.h +++ b/ompi/mca/osc/sm/osc_sm.h @@ -149,6 +149,10 @@ int ompi_osc_sm_get_notify(void *origin_addr, struct ompi_datatype_t *target_dt, int notify, struct ompi_win_t *win); + +int ompi_osc_sm_win_get_notify_value(struct ompi_win_t *win, + int notify, + MPI_Count *value); int ompi_osc_sm_accumulate(const void *origin_addr, size_t origin_count, diff --git a/ompi/mca/osc/sm/osc_sm_comm.c b/ompi/mca/osc/sm/osc_sm_comm.c index 359cdf7147f..0597ecf3095 100644 --- a/ompi/mca/osc/sm/osc_sm_comm.c +++ b/ompi/mca/osc/sm/osc_sm_comm.c @@ -17,6 +17,7 @@ #include "ompi/mca/osc/osc.h" #include "ompi/mca/osc/base/base.h" #include "ompi/mca/osc/base/osc_base_obj_convert.h" +#include "ompi/communicator/communicator.h" #include "osc_sm.h" @@ -31,6 +32,24 @@ static inline uint64_t *osc_sm_target_notify_base(ompi_osc_sm_module_t *module, module->node_states[target].notify_counter_offset); } +int +ompi_osc_sm_win_get_notify_value(struct ompi_win_t *win, + int notify, + MPI_Count *value) +{ + ompi_osc_sm_module_t *module = (ompi_osc_sm_module_t *) win->w_osc_module; + int rank = ompi_comm_rank(module->comm); + + if (notify < 0 || (uint32_t) notify >= module->node_states[rank].notify_counter_count) { + return OMPI_ERR_BAD_PARAM; + } + + opal_atomic_rmb(); + *value = (MPI_Count) osc_sm_target_notify_base(module, rank)[notify]; + + return OMPI_SUCCESS; +} + int ompi_osc_sm_rput(const void *origin_addr, size_t origin_count, diff --git a/ompi/mca/osc/sm/osc_sm_component.c b/ompi/mca/osc/sm/osc_sm_component.c index 7954ef6963e..0a3f7002337 100644 --- a/ompi/mca/osc/sm/osc_sm_component.c +++ b/ompi/mca/osc/sm/osc_sm_component.c @@ -82,6 +82,7 @@ ompi_osc_sm_module_t ompi_osc_sm_module_template = { .osc_put_notify = ompi_osc_sm_put_notify, .osc_get = ompi_osc_sm_get, .osc_get_notify = ompi_osc_sm_get_notify, + .osc_win_get_notify_value = ompi_osc_sm_win_get_notify_value, .osc_accumulate = ompi_osc_sm_accumulate, .osc_compare_and_swap = ompi_osc_sm_compare_and_swap, .osc_fetch_and_op = ompi_osc_sm_fetch_and_op, diff --git a/ompi/mca/osc/ubcl/osc_ubcl.c b/ompi/mca/osc/ubcl/osc_ubcl.c index 5e81ed1add3..d358605244b 100644 --- a/ompi/mca/osc/ubcl/osc_ubcl.c +++ b/ompi/mca/osc/ubcl/osc_ubcl.c @@ -80,14 +80,19 @@ mca_osc_ubcl_module_t mca_osc_ubcl_module_template = { win_free, ompi_osc_ubcl_put, + NULL, ompi_osc_ubcl_get, + NULL, + NULL, ompi_osc_ubcl_accumulate, ompi_osc_ubcl_compare_and_swap, ompi_osc_ubcl_fetch_and_op, ompi_osc_ubcl_get_accumulate, ompi_osc_ubcl_rput, + NULL, ompi_osc_ubcl_rget, + NULL, ompi_osc_ubcl_raccumulate, ompi_osc_ubcl_rget_accumulate, diff --git a/ompi/mpi/c/Makefile.am b/ompi/mpi/c/Makefile.am index f6757d669dd..12c8752869c 100644 --- a/ompi/mpi/c/Makefile.am +++ b/ompi/mpi/c/Makefile.am @@ -486,6 +486,7 @@ prototype_sources = \ win_get_group.c.in \ win_get_info.c.in \ win_get_name.c.in \ + win_get_notify_value.c.in \ win_lock_all.c.in \ win_lock.c.in \ win_post.c.in \ @@ -956,6 +957,7 @@ interface_profile_sources = \ win_get_group_generated.c \ win_get_info_generated.c \ win_get_name_generated.c \ + win_get_notify_value_generated.c \ win_lock_all_generated.c \ win_lock_generated.c \ win_post_generated.c \ diff --git a/ompi/mpi/c/win_get_notify_value.c.in b/ompi/mpi/c/win_get_notify_value.c.in new file mode 100644 index 00000000000..228999c13ea --- /dev/null +++ b/ompi/mpi/c/win_get_notify_value.c.in @@ -0,0 +1,41 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2026 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "ompi_config.h" + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/win/win.h" +#include "ompi/mca/osc/osc.h" + +PROTOTYPE ERROR_CLASS win_get_notify_value(WIN win, INT notification_idx, ELEMENT_COUNT value) +{ + int rc; + + if (MPI_PARAM_CHECK) { + rc = OMPI_SUCCESS; + + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + + if (ompi_win_invalid(win)) { + return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_WIN, FUNC_NAME); + } else if (notification_idx < 0) { + rc = MPI_ERR_NOTIFY_IDX; + } else if (NULL == value) { + rc = MPI_ERR_ARG; + } + + OMPI_ERRHANDLER_CHECK(rc, win, rc, FUNC_NAME); + } + + rc = win->w_osc_module->osc_win_get_notify_value(win, notification_idx, value); + OMPI_ERRHANDLER_RETURN(rc, win, rc, FUNC_NAME); +} From d2d71f8a7019296cbd418f0c358cf0e8a1f3cb35 Mon Sep 17 00:00:00 2001 From: Joseph Antony Date: Wed, 4 Mar 2026 13:39:55 -0500 Subject: [PATCH 39/53] Changes to Notify Query Signed-off-by: Joseph Antony --- ompi/mca/osc/osc.h | 2 +- ompi/mca/osc/sm/osc_sm.h | 2 +- ompi/mca/osc/sm/osc_sm_comm.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ompi/mca/osc/osc.h b/ompi/mca/osc/osc.h index b43757b9b5c..8aebf7446b1 100644 --- a/ompi/mca/osc/osc.h +++ b/ompi/mca/osc/osc.h @@ -247,7 +247,7 @@ typedef int (*ompi_osc_base_module_get_notify_fn_t)(void *origin_addr, typedef int (*ompi_osc_base_module_win_get_notify_value_fn_t)(struct ompi_win_t *win, int notify, - MPI_Count *value); + OMPI_MPI_COUNT_TYPE *value); typedef int (*ompi_osc_base_module_accumulate_fn_t)(const void *origin_addr, size_t origin_count, diff --git a/ompi/mca/osc/sm/osc_sm.h b/ompi/mca/osc/sm/osc_sm.h index f80c0116d73..c294cc7d1f6 100644 --- a/ompi/mca/osc/sm/osc_sm.h +++ b/ompi/mca/osc/sm/osc_sm.h @@ -152,7 +152,7 @@ int ompi_osc_sm_get_notify(void *origin_addr, int ompi_osc_sm_win_get_notify_value(struct ompi_win_t *win, int notify, - MPI_Count *value); + OMPI_MPI_COUNT_TYPE *value); int ompi_osc_sm_accumulate(const void *origin_addr, size_t origin_count, diff --git a/ompi/mca/osc/sm/osc_sm_comm.c b/ompi/mca/osc/sm/osc_sm_comm.c index 0597ecf3095..b0b3bde37f8 100644 --- a/ompi/mca/osc/sm/osc_sm_comm.c +++ b/ompi/mca/osc/sm/osc_sm_comm.c @@ -35,7 +35,7 @@ static inline uint64_t *osc_sm_target_notify_base(ompi_osc_sm_module_t *module, int ompi_osc_sm_win_get_notify_value(struct ompi_win_t *win, int notify, - MPI_Count *value) + OMPI_MPI_COUNT_TYPE *value) { ompi_osc_sm_module_t *module = (ompi_osc_sm_module_t *) win->w_osc_module; int rank = ompi_comm_rank(module->comm); @@ -45,7 +45,7 @@ ompi_osc_sm_win_get_notify_value(struct ompi_win_t *win, } opal_atomic_rmb(); - *value = (MPI_Count) osc_sm_target_notify_base(module, rank)[notify]; + *value = (OMPI_MPI_COUNT_TYPE) osc_sm_target_notify_base(module, rank)[notify]; return OMPI_SUCCESS; } From bfe1797c5735521be9f97c30d09d475669e04760 Mon Sep 17 00:00:00 2001 From: Joseph Antony Date: Wed, 4 Mar 2026 18:24:42 -0500 Subject: [PATCH 40/53] Implemented Set Notify Query Signed-off-by: Joseph Antony --- ompi/include/mpi.h.in | 2 ++ ompi/mca/osc/osc.h | 5 ++++ ompi/mca/osc/sm/osc_sm.h | 4 +++ ompi/mca/osc/sm/osc_sm_comm.c | 21 +++++++++++++- ompi/mca/osc/sm/osc_sm_component.c | 1 + ompi/mpi/c/Makefile.am | 2 ++ ompi/mpi/c/win_set_notify_value.c.in | 41 ++++++++++++++++++++++++++++ 7 files changed, 75 insertions(+), 1 deletion(-) create mode 100644 ompi/mpi/c/win_set_notify_value.c.in diff --git a/ompi/include/mpi.h.in b/ompi/include/mpi.h.in index ed6d69d6222..eff39e9b1f0 100644 --- a/ompi/include/mpi.h.in +++ b/ompi/include/mpi.h.in @@ -2620,6 +2620,7 @@ OMPI_DECLSPEC int MPI_Win_get_group(MPI_Win win, MPI_Group *group); OMPI_DECLSPEC int MPI_Win_get_info(MPI_Win win, MPI_Info *info_used); OMPI_DECLSPEC int MPI_Win_get_name(MPI_Win win, char *win_name, int *resultlen); OMPI_DECLSPEC int MPI_Win_get_notify_value(MPI_Win win, int notification_idx, MPI_Count *value); +OMPI_DECLSPEC int MPI_Win_set_notify_value(MPI_Win win, int notification_idx, MPI_Count value); OMPI_DECLSPEC int MPI_Win_lock(int lock_type, int rank, int mpi_assert, MPI_Win win); OMPI_DECLSPEC int MPI_Win_lock_all(int mpi_assert, MPI_Win win); OMPI_DECLSPEC int MPI_Win_post(MPI_Group group, int mpi_assert, MPI_Win win); @@ -3809,6 +3810,7 @@ OMPI_DECLSPEC int PMPI_Win_get_group(MPI_Win win, MPI_Group *group); OMPI_DECLSPEC int PMPI_Win_get_info(MPI_Win win, MPI_Info *info_used); OMPI_DECLSPEC int PMPI_Win_get_name(MPI_Win win, char *win_name, int *resultlen); OMPI_DECLSPEC int PMPI_Win_get_notify_value(MPI_Win win, int notification_idx, MPI_Count *value); +OMPI_DECLSPEC int PMPI_Win_set_notify_value(MPI_Win win, int notification_idx, MPI_Count value); OMPI_DECLSPEC int PMPI_Win_lock(int lock_type, int rank, int mpi_assert, MPI_Win win); OMPI_DECLSPEC int PMPI_Win_lock_all(int mpi_assert, MPI_Win win); OMPI_DECLSPEC int PMPI_Win_post(MPI_Group group, int mpi_assert, MPI_Win win); diff --git a/ompi/mca/osc/osc.h b/ompi/mca/osc/osc.h index 8aebf7446b1..ca3b9aac3ef 100644 --- a/ompi/mca/osc/osc.h +++ b/ompi/mca/osc/osc.h @@ -248,6 +248,10 @@ typedef int (*ompi_osc_base_module_get_notify_fn_t)(void *origin_addr, typedef int (*ompi_osc_base_module_win_get_notify_value_fn_t)(struct ompi_win_t *win, int notify, OMPI_MPI_COUNT_TYPE *value); + +typedef int (*ompi_osc_base_module_win_set_notify_value_fn_t)(struct ompi_win_t *win, + int notify, + OMPI_MPI_COUNT_TYPE value); typedef int (*ompi_osc_base_module_accumulate_fn_t)(const void *origin_addr, size_t origin_count, @@ -426,6 +430,7 @@ struct ompi_osc_base_module_4_0_0_t { ompi_osc_base_module_get_fn_t osc_get; ompi_osc_base_module_get_notify_fn_t osc_get_notify; ompi_osc_base_module_win_get_notify_value_fn_t osc_win_get_notify_value; + ompi_osc_base_module_win_set_notify_value_fn_t osc_win_set_notify_value; ompi_osc_base_module_accumulate_fn_t osc_accumulate; ompi_osc_base_module_compare_and_swap_fn_t osc_compare_and_swap; ompi_osc_base_module_fetch_and_op_fn_t osc_fetch_and_op; diff --git a/ompi/mca/osc/sm/osc_sm.h b/ompi/mca/osc/sm/osc_sm.h index c294cc7d1f6..cec3fa44bb2 100644 --- a/ompi/mca/osc/sm/osc_sm.h +++ b/ompi/mca/osc/sm/osc_sm.h @@ -153,6 +153,10 @@ int ompi_osc_sm_get_notify(void *origin_addr, int ompi_osc_sm_win_get_notify_value(struct ompi_win_t *win, int notify, OMPI_MPI_COUNT_TYPE *value); + +int ompi_osc_sm_win_set_notify_value(struct ompi_win_t *win, + int notify, + OMPI_MPI_COUNT_TYPE value); int ompi_osc_sm_accumulate(const void *origin_addr, size_t origin_count, diff --git a/ompi/mca/osc/sm/osc_sm_comm.c b/ompi/mca/osc/sm/osc_sm_comm.c index b0b3bde37f8..b3879c11949 100644 --- a/ompi/mca/osc/sm/osc_sm_comm.c +++ b/ompi/mca/osc/sm/osc_sm_comm.c @@ -44,8 +44,27 @@ ompi_osc_sm_win_get_notify_value(struct ompi_win_t *win, return OMPI_ERR_BAD_PARAM; } - opal_atomic_rmb(); + *value = (OMPI_MPI_COUNT_TYPE) osc_sm_target_notify_base(module, rank)[notify]; + opal_atomic_rmb(); + + return OMPI_SUCCESS; +} + +int +ompi_osc_sm_win_set_notify_value(struct ompi_win_t *win, + int notify, + OMPI_MPI_COUNT_TYPE value) +{ + ompi_osc_sm_module_t *module = (ompi_osc_sm_module_t *) win->w_osc_module; + int rank = ompi_comm_rank(module->comm); + + if (notify < 0 || (uint32_t) notify >= module->node_states[rank].notify_counter_count) { + return OMPI_ERR_BAD_PARAM; + } + + opal_atomic_wmb(); + osc_sm_target_notify_base(module, rank)[notify] = (uint64_t) value; return OMPI_SUCCESS; } diff --git a/ompi/mca/osc/sm/osc_sm_component.c b/ompi/mca/osc/sm/osc_sm_component.c index 0a3f7002337..e64d04d6130 100644 --- a/ompi/mca/osc/sm/osc_sm_component.c +++ b/ompi/mca/osc/sm/osc_sm_component.c @@ -83,6 +83,7 @@ ompi_osc_sm_module_t ompi_osc_sm_module_template = { .osc_get = ompi_osc_sm_get, .osc_get_notify = ompi_osc_sm_get_notify, .osc_win_get_notify_value = ompi_osc_sm_win_get_notify_value, + .osc_win_set_notify_value = ompi_osc_sm_win_set_notify_value, .osc_accumulate = ompi_osc_sm_accumulate, .osc_compare_and_swap = ompi_osc_sm_compare_and_swap, .osc_fetch_and_op = ompi_osc_sm_fetch_and_op, diff --git a/ompi/mpi/c/Makefile.am b/ompi/mpi/c/Makefile.am index 12c8752869c..fef08119635 100644 --- a/ompi/mpi/c/Makefile.am +++ b/ompi/mpi/c/Makefile.am @@ -487,6 +487,7 @@ prototype_sources = \ win_get_info.c.in \ win_get_name.c.in \ win_get_notify_value.c.in \ + win_set_notify_value.c.in \ win_lock_all.c.in \ win_lock.c.in \ win_post.c.in \ @@ -958,6 +959,7 @@ interface_profile_sources = \ win_get_info_generated.c \ win_get_name_generated.c \ win_get_notify_value_generated.c \ + win_set_notify_value_generated.c \ win_lock_all_generated.c \ win_lock_generated.c \ win_post_generated.c \ diff --git a/ompi/mpi/c/win_set_notify_value.c.in b/ompi/mpi/c/win_set_notify_value.c.in new file mode 100644 index 00000000000..8a7d97567db --- /dev/null +++ b/ompi/mpi/c/win_set_notify_value.c.in @@ -0,0 +1,41 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2026 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "ompi_config.h" + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/win/win.h" +#include "ompi/mca/osc/osc.h" + +PROTOTYPE ERROR_CLASS win_set_notify_value(WIN win, INT notification_idx, PARTITIONED_COUNT value) +{ + int rc; + + if (MPI_PARAM_CHECK) { + rc = OMPI_SUCCESS; + + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + + if (ompi_win_invalid(win)) { + return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_WIN, FUNC_NAME); + } else if (notification_idx < 0) { + rc = MPI_ERR_NOTIFY_IDX; + } else if (value < 0) { + rc = MPI_ERR_ARG; + } + + OMPI_ERRHANDLER_CHECK(rc, win, rc, FUNC_NAME); + } + + rc = win->w_osc_module->osc_win_set_notify_value(win, notification_idx, value); + OMPI_ERRHANDLER_RETURN(rc, win, rc, FUNC_NAME); +} From 8ae3d484194dffe254d6522791d4d1b668778100 Mon Sep 17 00:00:00 2001 From: Joseph Antony Date: Wed, 4 Mar 2026 18:25:43 -0500 Subject: [PATCH 41/53] Changes to Notify Query Signed-off-by: Joseph Antony --- ompi/mca/osc/sm/osc_sm_comm.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ompi/mca/osc/sm/osc_sm_comm.c b/ompi/mca/osc/sm/osc_sm_comm.c index b3879c11949..5d9032e19ea 100644 --- a/ompi/mca/osc/sm/osc_sm_comm.c +++ b/ompi/mca/osc/sm/osc_sm_comm.c @@ -44,10 +44,9 @@ ompi_osc_sm_win_get_notify_value(struct ompi_win_t *win, return OMPI_ERR_BAD_PARAM; } - - *value = (OMPI_MPI_COUNT_TYPE) osc_sm_target_notify_base(module, rank)[notify]; opal_atomic_rmb(); - + *value = (OMPI_MPI_COUNT_TYPE) osc_sm_target_notify_base(module, rank)[notify]; + return OMPI_SUCCESS; } From b878c7d974dae767246ad20ef9124a331d0f59a4 Mon Sep 17 00:00:00 2001 From: Orion Poplawski Date: Thu, 5 Mar 2026 08:48:12 -0700 Subject: [PATCH 42/53] Fix brace initialization (fixes #13757) Signed-off-by: Orion Poplawski --- oshmem/mca/memheap/base/memheap_base_frame.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/oshmem/mca/memheap/base/memheap_base_frame.c b/oshmem/mca/memheap/base/memheap_base_frame.c index 53a71b27a9e..82658e09791 100644 --- a/oshmem/mca/memheap/base/memheap_base_frame.c +++ b/oshmem/mca/memheap/base/memheap_base_frame.c @@ -33,9 +33,9 @@ int mca_memheap_base_output = -1; int mca_memheap_base_key_exchange = 1; -opal_list_t mca_memheap_base_components_opened = {{0}}; +opal_list_t mca_memheap_base_components_opened = {}; int mca_memheap_base_already_opened = 0; -mca_memheap_map_t mca_memheap_base_map = {{{{0}}}}; +mca_memheap_map_t mca_memheap_base_map = {}; int mca_memheap_num_segments_warn = 32; static int mca_memheap_base_register(mca_base_register_flag_t flags) From aa024ac73d624611cfe3af6f541b5d28dedf07bb Mon Sep 17 00:00:00 2001 From: Orion Poplawski Date: Thu, 5 Mar 2026 08:32:13 -0700 Subject: [PATCH 43/53] Drop __opal_attribute_always_inline__ for mca_part_persist_start (fixes #13721) Signed-off-by: Orion Poplawski --- ompi/mca/part/persist/part_persist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ompi/mca/part/persist/part_persist.h b/ompi/mca/part/persist/part_persist.h index ccc8f8f1971..86fb9bac42d 100644 --- a/ompi/mca/part/persist/part_persist.h +++ b/ompi/mca/part/persist/part_persist.h @@ -490,7 +490,7 @@ mca_part_persist_psend_init(const void* buf, return err; } -__opal_attribute_always_inline__ static inline int +static inline int mca_part_persist_start(size_t count, ompi_request_t** requests) { int err = OMPI_SUCCESS; From 223351cd24a28ad99d76fcea2ff7e52d3f6c788a Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Thu, 22 Aug 2024 10:38:42 -0400 Subject: [PATCH 44/53] Add a global OMPI_HAVE_ACCELERATOR_SUPPORT variable It indicates if any accelerator support has been build in, and if we need all the frameworks dealing with such devices. Dont build the smcuda BTL, coll accelerator, SMSC accelerator, rcache gpusm and rgpusm without accelerators support. Signed-off-by: George Bosilca --- config/opal_check_cuda.m4 | 1 + config/opal_check_rocm.m4 | 3 ++- config/opal_check_ze.m4 | 3 ++- configure.ac | 1 + ompi/mca/coll/accelerator/configure.m4 | 27 ++++++++++++++++++++++++ opal/mca/btl/smcuda/configure.m4 | 29 ++++++++++++++++++++++++++ opal/mca/rcache/gpusm/configure.m4 | 27 ++++++++++++++++++++++++ opal/mca/rcache/rgpusm/configure.m4 | 27 ++++++++++++++++++++++++ opal/mca/smsc/accelerator/configure.m4 | 27 ++++++++++++++++++++++++ 9 files changed, 143 insertions(+), 2 deletions(-) create mode 100644 ompi/mca/coll/accelerator/configure.m4 create mode 100644 opal/mca/btl/smcuda/configure.m4 create mode 100644 opal/mca/rcache/gpusm/configure.m4 create mode 100644 opal/mca/rcache/rgpusm/configure.m4 create mode 100644 opal/mca/smsc/accelerator/configure.m4 diff --git a/config/opal_check_cuda.m4 b/config/opal_check_cuda.m4 index a6bf80a1b2a..ed3a51a26e8 100644 --- a/config/opal_check_cuda.m4 +++ b/config/opal_check_cuda.m4 @@ -154,6 +154,7 @@ AC_MSG_CHECKING([if have cuda support]) if test "$opal_check_cuda_happy" = "yes"; then AC_MSG_RESULT([yes (-I$opal_cuda_incdir)]) CUDA_SUPPORT=1 + OMPI_HAVE_ACCELERATOR_SUPPORT=1 common_cuda_CPPFLAGS="-I$opal_cuda_incdir" AC_SUBST([common_cuda_CPPFLAGS]) else diff --git a/config/opal_check_rocm.m4 b/config/opal_check_rocm.m4 index 25ac54e438e..0d1e6053469 100644 --- a/config/opal_check_rocm.m4 +++ b/config/opal_check_rocm.m4 @@ -57,7 +57,8 @@ AC_DEFUN([OPAL_CHECK_ROCM],[ AS_IF([ test "$opal_check_rocm_happy" = "yes" ], [ OPAL_APPEND([$1_CPPFLAGS], [$rocm_CPPFLAGS]) AC_DEFINE_UNQUOTED([OPAL_ROCM_SUPPORT], [1], [Enable ROCm support]) - ROCM_SUPPORT=1 ], + ROCM_SUPPORT=1 + OMPI_HAVE_ACCELERATOR_SUPPORT=1 ], [ AC_DEFINE_UNQUOTED([OPAL_ROCM_SUPPORT], [0], [Disable ROCm support]) ROCM_SUPPORT=0 ]) diff --git a/config/opal_check_ze.m4 b/config/opal_check_ze.m4 index d1d47bb67c1..84c8dacd2df 100644 --- a/config/opal_check_ze.m4 +++ b/config/opal_check_ze.m4 @@ -56,7 +56,8 @@ AC_DEFUN([OPAL_CHECK_ZE],[ AS_IF([ test "$opal_check_ze_happy" = "yes" ], [ AC_DEFINE_UNQUOTED([OPAL_ZE_SUPPORT], [1], [Enable Intel ZE support]) - ZE_SUPPORT=1 ], + ZE_SUPPORT=1 + OMPI_HAVE_ACCELERATOR_SUPPORT=1 ], [ AC_DEFINE_UNQUOTED([OPAL_ZE_SUPPORT], [0], [Disable Intel ZE support]) ZE_SUPPORT=0 ]) diff --git a/configure.ac b/configure.ac index 928f41b0415..d4276b23284 100644 --- a/configure.ac +++ b/configure.ac @@ -276,6 +276,7 @@ m4_ifdef([project_oshmem], ############################################################################ # Configuration options ############################################################################ +OMPI_HAVE_ACCELERATOR_SUPPORT=0 OPAL_CONFIGURE_OPTIONS diff --git a/ompi/mca/coll/accelerator/configure.m4 b/ompi/mca/coll/accelerator/configure.m4 new file mode 100644 index 00000000000..057db874435 --- /dev/null +++ b/ompi/mca/coll/accelerator/configure.m4 @@ -0,0 +1,27 @@ +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# +# If any accelerators have been discovered, then build support for the +# accelerator collective component. +# +AC_DEFUN([MCA_ompi_coll_accelerator_CONFIG],[ + + AC_CONFIG_FILES([ompi/mca/coll/accelerator/Makefile]) + + # This component shall be configured only after the accelerator discovery + # has been completed. This discovery is part of the OPAL accelerator framework. + AC_MSG_CHECKING([if any accelerator components were found (cuda, rocm, ze)]) + AS_IF([test "x$OMPI_HAVE_ACCELERATOR_SUPPORT" = "x1"], + [AC_MSG_RESULT([yes]) + $1], + [AC_MSG_RESULT([no]) + $2]) + +])dnl diff --git a/opal/mca/btl/smcuda/configure.m4 b/opal/mca/btl/smcuda/configure.m4 new file mode 100644 index 00000000000..e9cb2df2996 --- /dev/null +++ b/opal/mca/btl/smcuda/configure.m4 @@ -0,0 +1,29 @@ +# Copyright (c) 2024 NVIDIA Corporation. All rights reserved. +# +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# +# If any accelerators have been discovered, then build support for the +# accelerator BTL. This assumes the discovery has already been done. +# +# Beware: Un like the name seems to indicate this BTl is generic and used by +# all accelerators. + +AC_DEFUN([MCA_opal_btl_smcuda_CONFIG],[ + AC_CONFIG_FILES([opal/mca/btl/smcuda/Makefile]) + + # This component shall be configured only after the accelerator discovery + # has been completed. This discovery is part of the OPAL accelerator framework. + AC_MSG_CHECKING([if any accelerator components were found (cuda, rocm, ze)]) + AS_IF([test "x$OMPI_HAVE_ACCELERATOR_SUPPORT" = "x1"], + [AC_MSG_RESULT([yes]) + $1], + [AC_MSG_RESULT([no]) + $2]) + +])dnl diff --git a/opal/mca/rcache/gpusm/configure.m4 b/opal/mca/rcache/gpusm/configure.m4 new file mode 100644 index 00000000000..d721910500e --- /dev/null +++ b/opal/mca/rcache/gpusm/configure.m4 @@ -0,0 +1,27 @@ +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# +# If any accelerators have been discovered, then build support for the +# accelerator rcache component. +# +AC_DEFUN([MCA_opal_rcache_gpusm_CONFIG],[ + + AC_CONFIG_FILES([opal/mca/rcache/gpusm/Makefile]) + + # This component shall be configured only after the accelerator discovery + # has been completed. This discovery is part of the OPAL accelerator framework. + AC_MSG_CHECKING([if any accelerator components were found (cuda, rocm, ze)]) + AS_IF([test "x$OMPI_HAVE_ACCELERATOR_SUPPORT" = "x1"], + [AC_MSG_RESULT([yes]) + $1], + [AC_MSG_RESULT([no]) + $2]) + +])dnl diff --git a/opal/mca/rcache/rgpusm/configure.m4 b/opal/mca/rcache/rgpusm/configure.m4 new file mode 100644 index 00000000000..f5e3eda0154 --- /dev/null +++ b/opal/mca/rcache/rgpusm/configure.m4 @@ -0,0 +1,27 @@ +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# +# If any accelerators have been discovered, then build support for the +# accelerator rcache component. +# +AC_DEFUN([MCA_opal_rcache_rgpusm_CONFIG],[ + + AC_CONFIG_FILES([opal/mca/rcache/rgpusm/Makefile]) + + # This component shall be configured only after the accelerator discovery + # has been completed. This discovery is part of the OPAL accelerator framework. + AC_MSG_CHECKING([if any accelerator components were found (cuda, rocm, ze)]) + AS_IF([test "x$OMPI_HAVE_ACCELERATOR_SUPPORT" = "x1"], + [AC_MSG_RESULT([yes]) + $1], + [AC_MSG_RESULT([no]) + $2]) + +])dnl diff --git a/opal/mca/smsc/accelerator/configure.m4 b/opal/mca/smsc/accelerator/configure.m4 new file mode 100644 index 00000000000..9fa993e9cf5 --- /dev/null +++ b/opal/mca/smsc/accelerator/configure.m4 @@ -0,0 +1,27 @@ +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# +# If any accelerators have been discovered, then build support for the +# accelerator SMSC component. +# +AC_DEFUN([MCA_opal_smsc_accelerator_CONFIG],[ + + AC_CONFIG_FILES([opal/mca/smsc/accelerator/Makefile]) + + # This component shall be configured only after the accelerator discovery + # has been completed. This discovery is part of the OPAL accelerator framework. + AC_MSG_CHECKING([if any accelerator components were found (cuda, rocm, ze)]) + AS_IF([test "x$OMPI_HAVE_ACCELERATOR_SUPPORT" = "x1"], + [AC_MSG_RESULT([yes]) + $1], + [AC_MSG_RESULT([no]) + $2]) + +])dnl From f83cc8baae68c38a879f71962c76df09e19f035b Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Thu, 22 Aug 2024 10:39:41 -0400 Subject: [PATCH 45/53] Deal with PMIX optional keys PMIX returns success and does not set the output to NULL while looking for an optional key. Thus, to prevent segfaults we need to set the output value to a known value before. Signed-off-by: George Bosilca --- opal/mca/btl/smcuda/btl_smcuda.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/opal/mca/btl/smcuda/btl_smcuda.c b/opal/mca/btl/smcuda/btl_smcuda.c index 1ce2b966ece..e832c8ed81e 100644 --- a/opal/mca/btl/smcuda/btl_smcuda.c +++ b/opal/mca/btl/smcuda/btl_smcuda.c @@ -235,7 +235,6 @@ static int smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl, int32_t my_s free(loc); } else { /* If we have hwloc support, then get accurate information */ - loc = NULL; if (OPAL_SUCCESS == opal_hwloc_base_get_topology()) { rc = opal_hwloc_base_get_nbobjs_by_type(opal_hwloc_topology, HWLOC_OBJ_NODE, 0, OPAL_HWLOC_AVAILABLE); @@ -249,6 +248,7 @@ static int smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl, int32_t my_s mca_btl_smcuda_component.num_mem_nodes = rc; } } + loc = NULL; /* see if we were given our location */ OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCALITY_STRING, &OPAL_PROC_MY_NAME, &loc, PMIX_STRING); if (OPAL_SUCCESS == rc) { @@ -267,6 +267,7 @@ static int smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl, int32_t my_s free(mynuma); } free(loc); + loc = NULL; } } else { /* If we have hwloc support, then get accurate information */ From 5376e64fa6eea9a56db929766798ae29211ac4de Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Mon, 23 Feb 2026 21:32:44 +0200 Subject: [PATCH 46/53] Add vscode to gitignore Signed-off-by: George Bosilca --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 7ab0b99af7d..b30321da7ca 100644 --- a/.gitignore +++ b/.gitignore @@ -542,3 +542,5 @@ ompi/mpi/fortran/use-mpi-f08/base/*_generated.c ompi/mpi/fortran/use-mpi-f08/mod/mpi-f08-interfaces-generated.h ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-interfaces-generated.h ompi/mpi/fortran/use-mpi-ignore-tkr/*_generated.F90 + +.vscode/ From 409afbfe19c4f00a365df761d51ade1184fe6a4a Mon Sep 17 00:00:00 2001 From: Joseph Antony Date: Wed, 11 Mar 2026 18:20:13 -0400 Subject: [PATCH 47/53] Configuring memory barries to propagate the update Signed-off-by: Joseph Antony --- ompi/mca/osc/sm/osc_sm_comm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ompi/mca/osc/sm/osc_sm_comm.c b/ompi/mca/osc/sm/osc_sm_comm.c index 5d9032e19ea..6d17d914283 100644 --- a/ompi/mca/osc/sm/osc_sm_comm.c +++ b/ompi/mca/osc/sm/osc_sm_comm.c @@ -44,8 +44,8 @@ ompi_osc_sm_win_get_notify_value(struct ompi_win_t *win, return OMPI_ERR_BAD_PARAM; } - opal_atomic_rmb(); *value = (OMPI_MPI_COUNT_TYPE) osc_sm_target_notify_base(module, rank)[notify]; + opal_atomic_rmb(); return OMPI_SUCCESS; } @@ -62,8 +62,8 @@ ompi_osc_sm_win_set_notify_value(struct ompi_win_t *win, return OMPI_ERR_BAD_PARAM; } - opal_atomic_wmb(); osc_sm_target_notify_base(module, rank)[notify] = (uint64_t) value; + opal_atomic_wmb(); return OMPI_SUCCESS; } From f81e53f95afeefd1237eaf6a29861eb5bc42ab05 Mon Sep 17 00:00:00 2001 From: Joseph Antony Date: Wed, 11 Mar 2026 19:26:11 -0400 Subject: [PATCH 48/53] Reset notify value Implementation Signed-off-by: Joseph Antony --- ompi/include/mpi.h.in | 2 ++ ompi/mca/osc/osc.h | 5 ++++ ompi/mca/osc/sm/osc_sm.h | 6 +++- ompi/mca/osc/sm/osc_sm_comm.c | 19 ++++++++++++ ompi/mca/osc/sm/osc_sm_component.c | 1 + ompi/mpi/c/Makefile.am | 2 ++ ompi/mpi/c/win_reset_notify_value.c.in | 41 ++++++++++++++++++++++++++ 7 files changed, 75 insertions(+), 1 deletion(-) create mode 100644 ompi/mpi/c/win_reset_notify_value.c.in diff --git a/ompi/include/mpi.h.in b/ompi/include/mpi.h.in index eff39e9b1f0..3a147a7a136 100644 --- a/ompi/include/mpi.h.in +++ b/ompi/include/mpi.h.in @@ -2621,6 +2621,7 @@ OMPI_DECLSPEC int MPI_Win_get_info(MPI_Win win, MPI_Info *info_used); OMPI_DECLSPEC int MPI_Win_get_name(MPI_Win win, char *win_name, int *resultlen); OMPI_DECLSPEC int MPI_Win_get_notify_value(MPI_Win win, int notification_idx, MPI_Count *value); OMPI_DECLSPEC int MPI_Win_set_notify_value(MPI_Win win, int notification_idx, MPI_Count value); +OMPI_DECLSPEC int MPI_Win_reset_notify_value(MPI_Win win, int notification_idx, MPI_Count *value); OMPI_DECLSPEC int MPI_Win_lock(int lock_type, int rank, int mpi_assert, MPI_Win win); OMPI_DECLSPEC int MPI_Win_lock_all(int mpi_assert, MPI_Win win); OMPI_DECLSPEC int MPI_Win_post(MPI_Group group, int mpi_assert, MPI_Win win); @@ -3811,6 +3812,7 @@ OMPI_DECLSPEC int PMPI_Win_get_info(MPI_Win win, MPI_Info *info_used); OMPI_DECLSPEC int PMPI_Win_get_name(MPI_Win win, char *win_name, int *resultlen); OMPI_DECLSPEC int PMPI_Win_get_notify_value(MPI_Win win, int notification_idx, MPI_Count *value); OMPI_DECLSPEC int PMPI_Win_set_notify_value(MPI_Win win, int notification_idx, MPI_Count value); +OMPI_DECLSPEC int PMPI_Win_reset_notify_value(MPI_Win win, int notification_idx, MPI_Count *value); OMPI_DECLSPEC int PMPI_Win_lock(int lock_type, int rank, int mpi_assert, MPI_Win win); OMPI_DECLSPEC int PMPI_Win_lock_all(int mpi_assert, MPI_Win win); OMPI_DECLSPEC int PMPI_Win_post(MPI_Group group, int mpi_assert, MPI_Win win); diff --git a/ompi/mca/osc/osc.h b/ompi/mca/osc/osc.h index ca3b9aac3ef..6af72390f7f 100644 --- a/ompi/mca/osc/osc.h +++ b/ompi/mca/osc/osc.h @@ -253,6 +253,10 @@ typedef int (*ompi_osc_base_module_win_set_notify_value_fn_t)(struct ompi_win_t int notify, OMPI_MPI_COUNT_TYPE value); +typedef int (*ompi_osc_base_module_win_reset_notify_value_fn_t)(struct ompi_win_t *win, + int notify, + OMPI_MPI_COUNT_TYPE *value); + typedef int (*ompi_osc_base_module_accumulate_fn_t)(const void *origin_addr, size_t origin_count, struct ompi_datatype_t *origin_dt, @@ -431,6 +435,7 @@ struct ompi_osc_base_module_4_0_0_t { ompi_osc_base_module_get_notify_fn_t osc_get_notify; ompi_osc_base_module_win_get_notify_value_fn_t osc_win_get_notify_value; ompi_osc_base_module_win_set_notify_value_fn_t osc_win_set_notify_value; + ompi_osc_base_module_win_reset_notify_value_fn_t osc_win_reset_notify_value; ompi_osc_base_module_accumulate_fn_t osc_accumulate; ompi_osc_base_module_compare_and_swap_fn_t osc_compare_and_swap; ompi_osc_base_module_fetch_and_op_fn_t osc_fetch_and_op; diff --git a/ompi/mca/osc/sm/osc_sm.h b/ompi/mca/osc/sm/osc_sm.h index cec3fa44bb2..ca776bd59a0 100644 --- a/ompi/mca/osc/sm/osc_sm.h +++ b/ompi/mca/osc/sm/osc_sm.h @@ -157,7 +157,11 @@ int ompi_osc_sm_win_get_notify_value(struct ompi_win_t *win, int ompi_osc_sm_win_set_notify_value(struct ompi_win_t *win, int notify, OMPI_MPI_COUNT_TYPE value); - + +int ompi_osc_sm_win_reset_notify_value(struct ompi_win_t *win, + int notify, + OMPI_MPI_COUNT_TYPE *value); + int ompi_osc_sm_accumulate(const void *origin_addr, size_t origin_count, struct ompi_datatype_t *origin_dt, diff --git a/ompi/mca/osc/sm/osc_sm_comm.c b/ompi/mca/osc/sm/osc_sm_comm.c index 6d17d914283..40cbc9d2813 100644 --- a/ompi/mca/osc/sm/osc_sm_comm.c +++ b/ompi/mca/osc/sm/osc_sm_comm.c @@ -68,6 +68,25 @@ ompi_osc_sm_win_set_notify_value(struct ompi_win_t *win, return OMPI_SUCCESS; } +int +ompi_osc_sm_win_reset_notify_value(struct ompi_win_t *win, + int notify, + OMPI_MPI_COUNT_TYPE *value) +{ + ompi_osc_sm_module_t *module = (ompi_osc_sm_module_t *) win->w_osc_module; + int rank = ompi_comm_rank(module->comm); + + if (notify < 0 || (uint32_t) notify >= module->node_states[rank].notify_counter_count) { + return OMPI_ERR_BAD_PARAM; + } + + /* Atomically swap the counter to 0 and return the previous value */ + *value = (OMPI_MPI_COUNT_TYPE) opal_atomic_swap_64( + &osc_sm_target_notify_base(module, rank)[notify], 0); + + return OMPI_SUCCESS; +} + int ompi_osc_sm_rput(const void *origin_addr, size_t origin_count, diff --git a/ompi/mca/osc/sm/osc_sm_component.c b/ompi/mca/osc/sm/osc_sm_component.c index e64d04d6130..3ba892e0885 100644 --- a/ompi/mca/osc/sm/osc_sm_component.c +++ b/ompi/mca/osc/sm/osc_sm_component.c @@ -84,6 +84,7 @@ ompi_osc_sm_module_t ompi_osc_sm_module_template = { .osc_get_notify = ompi_osc_sm_get_notify, .osc_win_get_notify_value = ompi_osc_sm_win_get_notify_value, .osc_win_set_notify_value = ompi_osc_sm_win_set_notify_value, + .osc_win_reset_notify_value = ompi_osc_sm_win_reset_notify_value, .osc_accumulate = ompi_osc_sm_accumulate, .osc_compare_and_swap = ompi_osc_sm_compare_and_swap, .osc_fetch_and_op = ompi_osc_sm_fetch_and_op, diff --git a/ompi/mpi/c/Makefile.am b/ompi/mpi/c/Makefile.am index fef08119635..096da3ab171 100644 --- a/ompi/mpi/c/Makefile.am +++ b/ompi/mpi/c/Makefile.am @@ -488,6 +488,7 @@ prototype_sources = \ win_get_name.c.in \ win_get_notify_value.c.in \ win_set_notify_value.c.in \ + win_reset_notify_value.c.in \ win_lock_all.c.in \ win_lock.c.in \ win_post.c.in \ @@ -960,6 +961,7 @@ interface_profile_sources = \ win_get_name_generated.c \ win_get_notify_value_generated.c \ win_set_notify_value_generated.c \ + win_reset_notify_value_generated.c \ win_lock_all_generated.c \ win_lock_generated.c \ win_post_generated.c \ diff --git a/ompi/mpi/c/win_reset_notify_value.c.in b/ompi/mpi/c/win_reset_notify_value.c.in new file mode 100644 index 00000000000..99aa1755a76 --- /dev/null +++ b/ompi/mpi/c/win_reset_notify_value.c.in @@ -0,0 +1,41 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2026 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "ompi_config.h" + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/win/win.h" +#include "ompi/mca/osc/osc.h" + +PROTOTYPE ERROR_CLASS win_reset_notify_value(WIN win, INT notification_idx, ELEMENT_COUNT value) +{ + int rc; + + if (MPI_PARAM_CHECK) { + rc = OMPI_SUCCESS; + + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + + if (ompi_win_invalid(win)) { + return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_WIN, FUNC_NAME); + } else if (notification_idx < 0) { + rc = MPI_ERR_NOTIFY_IDX; + } else if (NULL == value) { + rc = MPI_ERR_ARG; + } + + OMPI_ERRHANDLER_CHECK(rc, win, rc, FUNC_NAME); + } + + rc = win->w_osc_module->osc_win_reset_notify_value(win, notification_idx, value); + OMPI_ERRHANDLER_RETURN(rc, win, rc, FUNC_NAME); +} From 80d8d03f22fd8053edcf4d502d624c0f37ebd4ad Mon Sep 17 00:00:00 2001 From: Joseph Antony Date: Fri, 13 Mar 2026 11:43:20 -0400 Subject: [PATCH 49/53] Addressing review comments and bug fixes Signed-off-by: Joseph Antony --- ompi/mca/osc/osc.h | 14 +++++++------- ompi/mca/osc/sm/osc_sm_comm.c | 12 ++++++------ ompi/mca/osc/ubcl/osc_ubcl.c | 5 ----- 3 files changed, 13 insertions(+), 18 deletions(-) diff --git a/ompi/mca/osc/osc.h b/ompi/mca/osc/osc.h index 6af72390f7f..002866a69da 100644 --- a/ompi/mca/osc/osc.h +++ b/ompi/mca/osc/osc.h @@ -430,21 +430,14 @@ struct ompi_osc_base_module_4_0_0_t { ompi_osc_base_module_free_fn_t osc_free; ompi_osc_base_module_put_fn_t osc_put; - ompi_osc_base_module_put_notify_fn_t osc_put_notify; ompi_osc_base_module_get_fn_t osc_get; - ompi_osc_base_module_get_notify_fn_t osc_get_notify; - ompi_osc_base_module_win_get_notify_value_fn_t osc_win_get_notify_value; - ompi_osc_base_module_win_set_notify_value_fn_t osc_win_set_notify_value; - ompi_osc_base_module_win_reset_notify_value_fn_t osc_win_reset_notify_value; ompi_osc_base_module_accumulate_fn_t osc_accumulate; ompi_osc_base_module_compare_and_swap_fn_t osc_compare_and_swap; ompi_osc_base_module_fetch_and_op_fn_t osc_fetch_and_op; ompi_osc_base_module_get_accumulate_fn_t osc_get_accumulate; ompi_osc_base_module_rput_fn_t osc_rput; - ompi_osc_base_module_rput_notify_fn_t osc_rput_notify; ompi_osc_base_module_rget_fn_t osc_rget; - ompi_osc_base_module_rget_notify_fn_t osc_rget_notify; ompi_osc_base_module_raccumulate_fn_t osc_raccumulate; ompi_osc_base_module_rget_accumulate_fn_t osc_rget_accumulate; @@ -466,6 +459,13 @@ struct ompi_osc_base_module_4_0_0_t { ompi_osc_base_module_flush_all_fn_t osc_flush_all; ompi_osc_base_module_flush_local_fn_t osc_flush_local; ompi_osc_base_module_flush_local_all_fn_t osc_flush_local_all; + ompi_osc_base_module_put_notify_fn_t osc_put_notify; + ompi_osc_base_module_get_notify_fn_t osc_get_notify; + ompi_osc_base_module_win_get_notify_value_fn_t osc_win_get_notify_value; + ompi_osc_base_module_win_set_notify_value_fn_t osc_win_set_notify_value; + ompi_osc_base_module_win_reset_notify_value_fn_t osc_win_reset_notify_value; + ompi_osc_base_module_rput_notify_fn_t osc_rput_notify; + ompi_osc_base_module_rget_notify_fn_t osc_rget_notify; }; typedef struct ompi_osc_base_module_4_0_0_t ompi_osc_base_module_4_0_0_t; typedef ompi_osc_base_module_4_0_0_t ompi_osc_base_module_t; diff --git a/ompi/mca/osc/sm/osc_sm_comm.c b/ompi/mca/osc/sm/osc_sm_comm.c index 40cbc9d2813..7d7501dcabf 100644 --- a/ompi/mca/osc/sm/osc_sm_comm.c +++ b/ompi/mca/osc/sm/osc_sm_comm.c @@ -41,7 +41,7 @@ ompi_osc_sm_win_get_notify_value(struct ompi_win_t *win, int rank = ompi_comm_rank(module->comm); if (notify < 0 || (uint32_t) notify >= module->node_states[rank].notify_counter_count) { - return OMPI_ERR_BAD_PARAM; + return MPI_ERR_NOTIFY_IDX; } *value = (OMPI_MPI_COUNT_TYPE) osc_sm_target_notify_base(module, rank)[notify]; @@ -59,7 +59,7 @@ ompi_osc_sm_win_set_notify_value(struct ompi_win_t *win, int rank = ompi_comm_rank(module->comm); if (notify < 0 || (uint32_t) notify >= module->node_states[rank].notify_counter_count) { - return OMPI_ERR_BAD_PARAM; + return MPI_ERR_NOTIFY_IDX; } osc_sm_target_notify_base(module, rank)[notify] = (uint64_t) value; @@ -77,7 +77,7 @@ ompi_osc_sm_win_reset_notify_value(struct ompi_win_t *win, int rank = ompi_comm_rank(module->comm); if (notify < 0 || (uint32_t) notify >= module->node_states[rank].notify_counter_count) { - return OMPI_ERR_BAD_PARAM; + return MPI_ERR_NOTIFY_IDX; } /* Atomically swap the counter to 0 and return the previous value */ @@ -165,7 +165,7 @@ ompi_osc_sm_rput_notify(const void *origin_addr, *ompi_req = &ompi_request_empty; if (notify < 0 || (uint32_t) notify >= module->node_states[target].notify_counter_count) { - return OMPI_ERR_BAD_PARAM; + return MPI_ERR_NOTIFY_IDX; } opal_atomic_wmb(); @@ -252,7 +252,7 @@ ompi_osc_sm_rget_notify(void *origin_addr, *ompi_req = &ompi_request_empty; if (notify < 0 || (uint32_t) notify >= module->node_states[target].notify_counter_count) { - return OMPI_ERR_BAD_PARAM; + return MPI_ERR_NOTIFY_IDX; } opal_atomic_rmb(); @@ -430,7 +430,7 @@ ompi_osc_sm_put_notify(const void *origin_addr, } if (notify < 0 || (uint32_t) notify >= module->node_states[target].notify_counter_count) { - return OMPI_ERR_BAD_PARAM; + return MPI_ERR_NOTIFY_IDX; } opal_atomic_wmb(); diff --git a/ompi/mca/osc/ubcl/osc_ubcl.c b/ompi/mca/osc/ubcl/osc_ubcl.c index d358605244b..5e81ed1add3 100644 --- a/ompi/mca/osc/ubcl/osc_ubcl.c +++ b/ompi/mca/osc/ubcl/osc_ubcl.c @@ -80,19 +80,14 @@ mca_osc_ubcl_module_t mca_osc_ubcl_module_template = { win_free, ompi_osc_ubcl_put, - NULL, ompi_osc_ubcl_get, - NULL, - NULL, ompi_osc_ubcl_accumulate, ompi_osc_ubcl_compare_and_swap, ompi_osc_ubcl_fetch_and_op, ompi_osc_ubcl_get_accumulate, ompi_osc_ubcl_rput, - NULL, ompi_osc_ubcl_rget, - NULL, ompi_osc_ubcl_raccumulate, ompi_osc_ubcl_rget_accumulate, From 93e9e82bf823a4bfef779e807b4bc4f03bba7524 Mon Sep 17 00:00:00 2001 From: Joseph Antony Date: Fri, 13 Mar 2026 17:23:08 -0400 Subject: [PATCH 50/53] Removing Duplicate changes Signed-off-by: Joseph Antony --- ompi/include/mpi.h.in | 2 -- ompi/mca/osc/osc.h | 5 ---- ompi/mca/osc/sm/osc_sm.h | 4 --- ompi/mca/osc/sm/osc_sm_comm.c | 18 ------------ ompi/mca/osc/sm/osc_sm_component.c | 1 - ompi/mpi/c/Makefile.am | 2 -- ompi/mpi/c/win_set_notify_value.c.in | 41 ---------------------------- 7 files changed, 73 deletions(-) delete mode 100644 ompi/mpi/c/win_set_notify_value.c.in diff --git a/ompi/include/mpi.h.in b/ompi/include/mpi.h.in index 3a147a7a136..1422695ea37 100644 --- a/ompi/include/mpi.h.in +++ b/ompi/include/mpi.h.in @@ -2620,7 +2620,6 @@ OMPI_DECLSPEC int MPI_Win_get_group(MPI_Win win, MPI_Group *group); OMPI_DECLSPEC int MPI_Win_get_info(MPI_Win win, MPI_Info *info_used); OMPI_DECLSPEC int MPI_Win_get_name(MPI_Win win, char *win_name, int *resultlen); OMPI_DECLSPEC int MPI_Win_get_notify_value(MPI_Win win, int notification_idx, MPI_Count *value); -OMPI_DECLSPEC int MPI_Win_set_notify_value(MPI_Win win, int notification_idx, MPI_Count value); OMPI_DECLSPEC int MPI_Win_reset_notify_value(MPI_Win win, int notification_idx, MPI_Count *value); OMPI_DECLSPEC int MPI_Win_lock(int lock_type, int rank, int mpi_assert, MPI_Win win); OMPI_DECLSPEC int MPI_Win_lock_all(int mpi_assert, MPI_Win win); @@ -3811,7 +3810,6 @@ OMPI_DECLSPEC int PMPI_Win_get_group(MPI_Win win, MPI_Group *group); OMPI_DECLSPEC int PMPI_Win_get_info(MPI_Win win, MPI_Info *info_used); OMPI_DECLSPEC int PMPI_Win_get_name(MPI_Win win, char *win_name, int *resultlen); OMPI_DECLSPEC int PMPI_Win_get_notify_value(MPI_Win win, int notification_idx, MPI_Count *value); -OMPI_DECLSPEC int PMPI_Win_set_notify_value(MPI_Win win, int notification_idx, MPI_Count value); OMPI_DECLSPEC int PMPI_Win_reset_notify_value(MPI_Win win, int notification_idx, MPI_Count *value); OMPI_DECLSPEC int PMPI_Win_lock(int lock_type, int rank, int mpi_assert, MPI_Win win); OMPI_DECLSPEC int PMPI_Win_lock_all(int mpi_assert, MPI_Win win); diff --git a/ompi/mca/osc/osc.h b/ompi/mca/osc/osc.h index 002866a69da..b43f34ac3c5 100644 --- a/ompi/mca/osc/osc.h +++ b/ompi/mca/osc/osc.h @@ -249,10 +249,6 @@ typedef int (*ompi_osc_base_module_win_get_notify_value_fn_t)(struct ompi_win_t int notify, OMPI_MPI_COUNT_TYPE *value); -typedef int (*ompi_osc_base_module_win_set_notify_value_fn_t)(struct ompi_win_t *win, - int notify, - OMPI_MPI_COUNT_TYPE value); - typedef int (*ompi_osc_base_module_win_reset_notify_value_fn_t)(struct ompi_win_t *win, int notify, OMPI_MPI_COUNT_TYPE *value); @@ -462,7 +458,6 @@ struct ompi_osc_base_module_4_0_0_t { ompi_osc_base_module_put_notify_fn_t osc_put_notify; ompi_osc_base_module_get_notify_fn_t osc_get_notify; ompi_osc_base_module_win_get_notify_value_fn_t osc_win_get_notify_value; - ompi_osc_base_module_win_set_notify_value_fn_t osc_win_set_notify_value; ompi_osc_base_module_win_reset_notify_value_fn_t osc_win_reset_notify_value; ompi_osc_base_module_rput_notify_fn_t osc_rput_notify; ompi_osc_base_module_rget_notify_fn_t osc_rget_notify; diff --git a/ompi/mca/osc/sm/osc_sm.h b/ompi/mca/osc/sm/osc_sm.h index ca776bd59a0..85d250bfa18 100644 --- a/ompi/mca/osc/sm/osc_sm.h +++ b/ompi/mca/osc/sm/osc_sm.h @@ -154,10 +154,6 @@ int ompi_osc_sm_win_get_notify_value(struct ompi_win_t *win, int notify, OMPI_MPI_COUNT_TYPE *value); -int ompi_osc_sm_win_set_notify_value(struct ompi_win_t *win, - int notify, - OMPI_MPI_COUNT_TYPE value); - int ompi_osc_sm_win_reset_notify_value(struct ompi_win_t *win, int notify, OMPI_MPI_COUNT_TYPE *value); diff --git a/ompi/mca/osc/sm/osc_sm_comm.c b/ompi/mca/osc/sm/osc_sm_comm.c index 7d7501dcabf..fbd4f17856c 100644 --- a/ompi/mca/osc/sm/osc_sm_comm.c +++ b/ompi/mca/osc/sm/osc_sm_comm.c @@ -50,24 +50,6 @@ ompi_osc_sm_win_get_notify_value(struct ompi_win_t *win, return OMPI_SUCCESS; } -int -ompi_osc_sm_win_set_notify_value(struct ompi_win_t *win, - int notify, - OMPI_MPI_COUNT_TYPE value) -{ - ompi_osc_sm_module_t *module = (ompi_osc_sm_module_t *) win->w_osc_module; - int rank = ompi_comm_rank(module->comm); - - if (notify < 0 || (uint32_t) notify >= module->node_states[rank].notify_counter_count) { - return MPI_ERR_NOTIFY_IDX; - } - - osc_sm_target_notify_base(module, rank)[notify] = (uint64_t) value; - opal_atomic_wmb(); - - return OMPI_SUCCESS; -} - int ompi_osc_sm_win_reset_notify_value(struct ompi_win_t *win, int notify, diff --git a/ompi/mca/osc/sm/osc_sm_component.c b/ompi/mca/osc/sm/osc_sm_component.c index 3ba892e0885..259c0826017 100644 --- a/ompi/mca/osc/sm/osc_sm_component.c +++ b/ompi/mca/osc/sm/osc_sm_component.c @@ -83,7 +83,6 @@ ompi_osc_sm_module_t ompi_osc_sm_module_template = { .osc_get = ompi_osc_sm_get, .osc_get_notify = ompi_osc_sm_get_notify, .osc_win_get_notify_value = ompi_osc_sm_win_get_notify_value, - .osc_win_set_notify_value = ompi_osc_sm_win_set_notify_value, .osc_win_reset_notify_value = ompi_osc_sm_win_reset_notify_value, .osc_accumulate = ompi_osc_sm_accumulate, .osc_compare_and_swap = ompi_osc_sm_compare_and_swap, diff --git a/ompi/mpi/c/Makefile.am b/ompi/mpi/c/Makefile.am index 096da3ab171..49619694d0b 100644 --- a/ompi/mpi/c/Makefile.am +++ b/ompi/mpi/c/Makefile.am @@ -487,7 +487,6 @@ prototype_sources = \ win_get_info.c.in \ win_get_name.c.in \ win_get_notify_value.c.in \ - win_set_notify_value.c.in \ win_reset_notify_value.c.in \ win_lock_all.c.in \ win_lock.c.in \ @@ -960,7 +959,6 @@ interface_profile_sources = \ win_get_info_generated.c \ win_get_name_generated.c \ win_get_notify_value_generated.c \ - win_set_notify_value_generated.c \ win_reset_notify_value_generated.c \ win_lock_all_generated.c \ win_lock_generated.c \ diff --git a/ompi/mpi/c/win_set_notify_value.c.in b/ompi/mpi/c/win_set_notify_value.c.in deleted file mode 100644 index 8a7d97567db..00000000000 --- a/ompi/mpi/c/win_set_notify_value.c.in +++ /dev/null @@ -1,41 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2026 Triad National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "ompi_config.h" - -#include "ompi/mpi/c/bindings.h" -#include "ompi/runtime/params.h" -#include "ompi/errhandler/errhandler.h" -#include "ompi/win/win.h" -#include "ompi/mca/osc/osc.h" - -PROTOTYPE ERROR_CLASS win_set_notify_value(WIN win, INT notification_idx, PARTITIONED_COUNT value) -{ - int rc; - - if (MPI_PARAM_CHECK) { - rc = OMPI_SUCCESS; - - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); - - if (ompi_win_invalid(win)) { - return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_WIN, FUNC_NAME); - } else if (notification_idx < 0) { - rc = MPI_ERR_NOTIFY_IDX; - } else if (value < 0) { - rc = MPI_ERR_ARG; - } - - OMPI_ERRHANDLER_CHECK(rc, win, rc, FUNC_NAME); - } - - rc = win->w_osc_module->osc_win_set_notify_value(win, notification_idx, value); - OMPI_ERRHANDLER_RETURN(rc, win, rc, FUNC_NAME); -} From a1ab16bc437ca28f25b71ddfc2b6b9ac4d393b13 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Wed, 11 Feb 2026 15:40:43 -0500 Subject: [PATCH 51/53] osc/rdma: Fix handling of MPI_PROC_NULL in shared_query The newly added code to support shared memory queries in osc/rdma had the check for MPI_PROC_NULL too late, which caused an out-of-bounds access into the peer array. Signed-off-by: Joseph Schuchart --- ompi/mca/osc/rdma/osc_rdma_component.c | 36 ++++++++++++++------------ 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/ompi/mca/osc/rdma/osc_rdma_component.c b/ompi/mca/osc/rdma/osc_rdma_component.c index cc34c109683..14eeb928e40 100644 --- a/ompi/mca/osc/rdma/osc_rdma_component.c +++ b/ompi/mca/osc/rdma/osc_rdma_component.c @@ -1649,37 +1649,39 @@ int ompi_osc_rdma_shared_query( ptrdiff_t *disp_unit, void *baseptr) { int rc = OMPI_ERR_NOT_SUPPORTED; - ompi_osc_rdma_peer_t *peer; - int actual_rank = rank; + ompi_osc_rdma_peer_t *peer = NULL; ompi_osc_rdma_module_t *module = GET_MODULE(win); - peer = ompi_osc_module_get_peer (module, actual_rank); - if (NULL == peer) { - return OMPI_ERR_NOT_SUPPORTED; - } - /* currently only supported for allocated windows */ if (MPI_WIN_FLAVOR_ALLOCATE != module->flavor) { return OMPI_ERR_NOT_SUPPORTED; } - if (!ompi_osc_rdma_peer_local_base(peer)) { - return OMPI_ERR_NOT_SUPPORTED; - } - if (MPI_PROC_NULL == rank) { /* iterate until we find a rank that has a non-zero size */ for (int i = 0 ; i < ompi_comm_size(module->comm) ; ++i) { peer = ompi_osc_module_get_peer (module, i); - ompi_osc_rdma_peer_extended_t *ex_peer = (ompi_osc_rdma_peer_extended_t *) peer; - if (!ompi_osc_rdma_peer_local_base(peer)) { + if (NULL == peer) { + /* peer object not cached yet (typically non-local here since local peers are added eagerly) */ continue; - } else if (module->same_size && ex_peer->super.base) { - break; - } else if (ex_peer->size > 0) { - break; } + ompi_osc_rdma_peer_extended_t *ex_peer = (ompi_osc_rdma_peer_extended_t *) peer; + if (ompi_osc_rdma_peer_local_base(peer)) { + if (module->same_size && ex_peer->super.base) { + break; + } else if (ex_peer->size > 0) { + break; + } + } + // reset so we don't mistakenly use a peer without memory + peer = NULL; } + } else { + peer = ompi_osc_module_get_peer (module, rank); + } + + if (NULL == peer || !ompi_osc_rdma_peer_local_base(peer)) { + return OMPI_ERR_NOT_SUPPORTED; } if (module->same_size && module->same_disp_unit) { From 7d026989e4c440d32675fa2ddc891b73c16ea410 Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Fri, 20 Mar 2026 15:14:43 -0400 Subject: [PATCH 52/53] Return MPI_ERR_PROC_FAILED on I(m)probe when appropriate Signed-off-by: Matthew Whitlock --- ompi/mca/pml/ob1/pml_ob1_iprobe.c | 10 ++++++++++ ompi/mca/pml/ob1/pml_ob1_recvreq.c | 17 ++++++++++------- ompi/request/req_ft.c | 4 +++- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/ompi/mca/pml/ob1/pml_ob1_iprobe.c b/ompi/mca/pml/ob1/pml_ob1_iprobe.c index 4d6a0eb8dfd..97744cce5dc 100644 --- a/ompi/mca/pml/ob1/pml_ob1_iprobe.c +++ b/ompi/mca/pml/ob1/pml_ob1_iprobe.c @@ -47,6 +47,11 @@ int mca_pml_ob1_iprobe(int src, *matched = 1; } else { *matched = 0; +#if OPAL_ENABLE_FT_MPI + if( ompi_request_is_failed((ompi_request_t*)&recvreq) ) { + rc = recvreq.req_recv.req_base.req_ompi.req_status.MPI_ERROR; + } +#endif opal_progress(); } MCA_PML_BASE_RECV_REQUEST_FINI( &recvreq.req_recv ); @@ -119,6 +124,11 @@ mca_pml_ob1_improbe(int src, (*message)->count = recvreq->req_recv.req_base.req_ompi.req_status._ucount; } else { *matched = 0; +#if OPAL_ENABLE_FT_MPI + if( ompi_request_is_failed((ompi_request_t*)recvreq) ) { + rc = recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR; + } +#endif /* we only free if we didn't match, because we're going to translate the request into a receive request later on if it diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.c b/ompi/mca/pml/ob1/pml_ob1_recvreq.c index 57aba677a8a..a6a2866f2a2 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.c @@ -108,16 +108,19 @@ static int mca_pml_ob1_recv_request_cancel(struct ompi_request_t* ompi_request, } if( !request->req_match_received ) { /* the match has not been already done */ assert( OMPI_ANY_TAG == ompi_request->req_status.MPI_TAG ); /* not matched isn't it */ + if(OPAL_LIKELY(request->req_recv.req_base.req_type != MCA_PML_REQUEST_IPROBE && + request->req_recv.req_base.req_type != MCA_PML_REQUEST_IMPROBE)) { #if MCA_PML_OB1_CUSTOM_MATCH - custom_match_prq_cancel(ob1_comm->prq, request); + custom_match_prq_cancel(ob1_comm->prq, request); #else - if( request->req_recv.req_base.req_peer == OMPI_ANY_SOURCE ) { - opal_list_remove_item( &ob1_comm->wild_receives, (opal_list_item_t*)request ); - } else { - mca_pml_ob1_comm_proc_t* proc = mca_pml_ob1_peer_lookup (comm, request->req_recv.req_base.req_peer); - opal_list_remove_item(&proc->specific_receives, (opal_list_item_t*)request); - } + if( request->req_recv.req_base.req_peer == OMPI_ANY_SOURCE ) { + opal_list_remove_item( &ob1_comm->wild_receives, (opal_list_item_t*)request ); + } else { + mca_pml_ob1_comm_proc_t* proc = mca_pml_ob1_peer_lookup (comm, request->req_recv.req_base.req_peer); + opal_list_remove_item(&proc->specific_receives, (opal_list_item_t*)request); + } #endif + } PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_REMOVE_FROM_POSTED_Q, &(request->req_recv.req_base), PERUSE_RECV ); OB1_MATCHING_UNLOCK(&ob1_comm->matching_lock); diff --git a/ompi/request/req_ft.c b/ompi/request/req_ft.c index 2c53ce076b0..e855afc59fd 100644 --- a/ompi/request/req_ft.c +++ b/ompi/request/req_ft.c @@ -128,7 +128,9 @@ bool ompi_request_is_failed_fn(ompi_request_t *req) req->req_status.MPI_ERROR = MPI_ERR_PROC_FAILED_PENDING; /* If it is a probe/mprobe, escalate the error */ if( (MCA_PML_REQUEST_MPROBE == pml_req->req_type) || - (MCA_PML_REQUEST_PROBE == pml_req->req_type) ) { + (MCA_PML_REQUEST_IMPROBE == pml_req->req_type) || + (MCA_PML_REQUEST_PROBE == pml_req->req_type) || + (MCA_PML_REQUEST_IPROBE == pml_req->req_type) ) { req->req_status.MPI_ERROR = MPI_ERR_PROC_FAILED; } opal_output_verbose(10, ompi_ftmpi_output_handle, From 66dfeec688ef873c7df2fb03766f251be260e2e8 Mon Sep 17 00:00:00 2001 From: Edgar Gabriel Date: Tue, 24 Mar 2026 10:37:43 -0500 Subject: [PATCH 53/53] opal/mca: remove smcuda from dso list remove btl-smcuda,rcache-gpusm,rcache-rgpusm from the list of components that have to be compiled as dsos at all times. The components do not contain any references/function calls to a GPU software stack anymore, everything is based off the accelerator framework APIs. Signed-off-by: Edgar Gabriel --- config/opal_mca.m4 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/opal_mca.m4 b/config/opal_mca.m4 index cdeb935a3a3..bb51d3bc5f1 100644 --- a/config/opal_mca.m4 +++ b/config/opal_mca.m4 @@ -186,7 +186,7 @@ of type-component pairs. For example, --enable-mca-no-build=pml-ob1]) else msg= if test -z "$enable_mca_dso"; then - enable_mca_dso="accelerator-cuda,accelerator-rocm,accelerator-ze,btl-smcuda,rcache-gpusm,rcache-rgpusm" + enable_mca_dso="accelerator-cuda,accelerator-rocm,accelerator-ze" msg="(default)" fi DSO_all=0