From 9ba3fa53c7963a6a7107a47cd7fadcfb9a2712a2 Mon Sep 17 00:00:00 2001 From: Kai Germaschewski Date: Sun, 8 Mar 2026 10:55:13 -0700 Subject: [PATCH 1/2] gpu: fix test cuda_mparticles_randomize_sort I don't think there was an actual issue, just the test itself making invalid assumptions. --- src/libpsc/tests/test_collision_cuda.cxx | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/libpsc/tests/test_collision_cuda.cxx b/src/libpsc/tests/test_collision_cuda.cxx index b0e0f60640..620d219dcd 100644 --- a/src/libpsc/tests/test_collision_cuda.cxx +++ b/src/libpsc/tests/test_collision_cuda.cxx @@ -210,13 +210,14 @@ TEST(cuda_mparticles_randomize_sort, sort) d_id, (std::vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14})); sort.sort(d_random_idx, d_id); - EXPECT_EQ( - d_id, (std::vector{0, 1, 7, 5, 8, 6, 2, 4, 3, 9, 10, 14, 13, 11, 12})); + // The result here depends on the particular random numbers generated, which + // seem to have changed across CUDA versions, so we just check that the random + // indices end up sorted. EXPECT_EQ( + // d_id, (std::vector{0, 1, 7, 5, 8, 6, 2, 4, 3, 9, 10, 14, 13, 11, + // 12})); - auto last = d_random_idx[0]; for (int i = 1; i < cmprts.size(); i++) { - EXPECT_GE(d_random_idx[i], last); - last = d_random_idx[i]; + EXPECT_GE(d_random_idx[i], d_random_idx[i - 1]); } // std::cout << "rnd idx "; // std::copy(sort.d_random_idx.begin(), sort.d_random_idx.end(), @@ -247,7 +248,7 @@ TEST(cuda_mparticles_randomize_sort, sort) // std::cout << "\n"; EXPECT_EQ(d_off, off); -#if 1 +#if 0 // do over, get different permutation sort.find_indices_ids(cmprts, d_random_idx, d_id); sort.sort(d_random_idx, d_id); From 27825855245c5fdf4e4cc19635c8dd5e11a32d6c Mon Sep 17 00:00:00 2001 From: Kai Germaschewski Date: Mon, 9 Mar 2026 04:38:48 -0700 Subject: [PATCH 2/2] gpu: move particle injection back to before pushing particles. The GPU code is expecting this order, though it certainly is not desirable to have two different ways of doing it depending on whether GPU is used or not. --- src/include/psc.hxx | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/include/psc.hxx b/src/include/psc.hxx index 260c0cf87c..911e77974e 100644 --- a/src/include/psc.hxx +++ b/src/include/psc.hxx @@ -369,6 +369,15 @@ struct Psc prof_stop(pr_collision); } +#ifdef USE_CUDA + // === particle injection + prof_start(pr_inject_prts); + for (auto injector : injectors_) { + injector->inject(mprts_, mflds_); + } + prof_stop(pr_inject_prts); +#endif + if (checks_.continuity.should_do_check(timestep)) { mpi_printf(comm, "***** Checking continuity (1 of 2)...\n"); prof_start(pr_checks); @@ -382,6 +391,7 @@ struct Psc pushp_.push_mprts(mprts_, mflds_); prof_stop(pr_push_prts); +#ifndef USE_CUDA // === particle injection prof_start(pr_inject_prts); for (auto injector : injectors_) { @@ -389,6 +399,7 @@ struct Psc injector->inject(mprts_, mflds_); } prof_stop(pr_inject_prts); +#endif // === external current prof_start(pr_external_current);