wdmapp · bd4 · Mar 5, 2026 · Mar 4, 2026 · Mar 4, 2026 · Mar 4, 2026
diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml
@@ -238,7 +238,54 @@ jobs:
       run: make GTENSOR_DEVICE=cuda
       working-directory: ${{ github.workspace }}/examples
 
+
+  build-cuda13:
+    runs-on: ubuntu-latest
+    container: nvidia/cuda:13.1.1-devel-ubuntu24.04
+    env:
+      GTEST_VERSION: 1.15.2
+      GTEST_ROOT: ${{ github.workspace }}/googletest
+      CMAKE_BUILD_PARALLEL_LEVEL: 8
+      CMAKE_VERSION: 3.21.7
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: install packages
+      run: apt-get update && apt-get install -y wget git
+    - name: install cmake
+      run: |
+        wget -O cmake.sh 'https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}-Linux-x86_64.sh'
+        bash cmake.sh --prefix=/usr --skip-license --exclude-subdir
+    - name: install googletest
+      run: |
+        mkdir -p ${{ env.GTEST_ROOT }}
+        cd ${{ env.GTEST_ROOT }}
+        wget https://github.com/google/googletest/releases/download/v${{ env.GTEST_VERSION }}/googletest-${{ env.GTEST_VERSION }}.tar.gz
+        tar xzf googletest-${{ env.GTEST_VERSION }}.tar.gz
+        cmake -S googletest-${{ env.GTEST_VERSION }} -B build -DCMAKE_INSTALL_PREFIX=${{ env.GTEST_ROOT }}
+        cmake --build build -t install
+      env:
+        CXX: g++
+    - name: cmake thrust
+      run: cmake -S . -B build-cuda -DGTENSOR_DEVICE=cuda -DCMAKE_BUILD_TYPE=RelWithDebInfo -DGTENSOR_BUILD_EXAMPLES=ON -DGTENSOR_USE_THRUST=ON -DGTEST_ROOT=${{ env.GTEST_ROOT }} -DGTENSOR_ENABLE_CLIB=ON -DGTENSOR_ENABLE_BLAS=ON -DGTENSOR_ENABLE_FFT=ON -DGTENSOR_ENABLE_SOLVER=ON -DCMAKE_CUDA_FLAGS_RELWITHDEBINFO="-O2 -g -DNDEBUG -Werror=all-warnings" -DGTENSOR_GPU_ARCHITECTURES=80
+    - name: cmake thrust build
+      run: cmake --build build-cuda -v
+    - name: cmake thrust debug
+      run: cmake -S . -B build-cuda-debug -DGTENSOR_DEVICE=cuda -DCMAKE_BUILD_TYPE=Debug -DGTENSOR_BUILD_EXAMPLES=ON -DGTENSOR_USE_THRUST=ON -DGTEST_ROOT=${{ env.GTEST_ROOT }} -DGTENSOR_ENABLE_CLIB=ON -DGTENSOR_ENABLE_BLAS=ON -DGTENSOR_ENABLE_FFT=ON -DGTENSOR_ENABLE_SOLVER=ON -DCMAKE_CUDA_FLAGS_DEBUG="-g -Werror=all-warnings" -DGTENSOR_GPU_ARCHITECTURES=80
+    - name: cmake thrust debug build
+      run: cmake --build build-cuda-debug -v
+    - name: cmake nothrust
+      run: cmake -S . -B build-cuda-nothrust -DGTENSOR_DEVICE=cuda -DCMAKE_BUILD_TYPE=RelWithDebInfo -DGTENSOR_BUILD_EXAMPLES=ON -DGTENSOR_USE_THRUST=OFF -DGTEST_ROOT=${{ env.GTEST_ROOT }} -DGTENSOR_ENABLE_CLIB=ON -DGTENSOR_ENABLE_BLAS=ON -DGTENSOR_ENABLE_FFT=ON -DGTENSOR_ENABLE_SOLVER=ON -DCMAKE_CUDA_FLAGS_RELWITHDEBINFO="-O2 -g -DNDEBUG -Werror=all-warnings"  -DGTENSOR_GPU_ARCHITECTURES=80
+    - name: cmake nothrust build
+      run: cmake --build build-cuda-nothrust -v
+    - name: GNU make setup gtensor subdir
+      run: mkdir -p external/gtensor &&  cp -R ../include external/gtensor/
+      working-directory: ${{ github.workspace }}/examples
+    - name: GNU make build
+      run: make GTENSOR_DEVICE=cuda
+      working-directory: ${{ github.workspace }}/examples
 
+
   build-cuda-rmm-umpire:
     runs-on: ubuntu-latest
     container: nvidia/cuda:12.3.0-devel-ubuntu22.04

diff --git a/examples/Makefile b/examples/Makefile
@@ -14,7 +14,7 @@ GTENSOR_DEVICE_DEFINE := GTENSOR_DEVICE_$(shell echo $(GTENSOR_DEVICE) | tr a-z
 GTENSOR_DEFINES = -D$(GTENSOR_DEVICE_DEFINE)
 GTENSOR_INCLUDES = -I$(GTENSOR_DIR)/include
 GTENSOR_LIBS =
-GTENSOR_OPTIONS = -std=c++14 -O2
+GTENSOR_OPTIONS = -std=c++17 -O2
 ifeq ($(GTENSOR_DEVICE),cuda)
   GTENSOR_CXX ?= nvcc
   GTENSOR_OPTIONS += -x cu --expt-extended-lambda --expt-relaxed-constexpr

diff --git a/include/gtensor/backend_cuda.h b/include/gtensor/backend_cuda.h
@@ -4,8 +4,8 @@
 
 #include "backend_common.h"
 
+#include <cuda.h>
 #include <cuda_runtime_api.h>
-
 // #include "thrust/cuda/system/execution_policy.h"
 #include "thrust/execution_policy.h"
 
@@ -264,7 +264,13 @@ class backend_ops<gt::space::cuda>
     if (mtype != gt::backend::managed_memory_type::device) {
       int device_id;
       gtGpuCheck(cudaGetDevice(&device_id));
+#if (CUDA_VERSION >= 13000)
+      cudaMemLocation memLocation{cudaMemLocationTypeDevice, device_id};
+      gtGpuCheck(
+        cudaMemPrefetchAsync(p, n * sizeof(T), memLocation, 0, nullptr));
+#else
       gtGpuCheck(cudaMemPrefetchAsync(p, n * sizeof(T), device_id, nullptr));
+#endif
     }
 #endif
   }
@@ -275,8 +281,14 @@ class backend_ops<gt::space::cuda>
 #ifndef GTENSOR_DISABLE_PREFETCH
     auto mtype = gt::backend::get_managed_memory_type();
     if (mtype != gt::backend::managed_memory_type::device) {
+#if (CUDA_VERSION >= 13000)
+      cudaMemLocation memLocation{cudaMemLocationTypeHostNuma, cudaCpuDeviceId};
+      gtGpuCheck(
+        cudaMemPrefetchAsync(p, n * sizeof(T), memLocation, 0, nullptr));
+#else
       gtGpuCheck(
         cudaMemPrefetchAsync(p, n * sizeof(T), cudaCpuDeviceId, nullptr));
+#endif
     }
 #endif
   }