diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml index 7684dd69..6cf100f0 100644 --- a/.github/workflows/ccpp.yml +++ b/.github/workflows/ccpp.yml @@ -238,7 +238,54 @@ jobs: run: make GTENSOR_DEVICE=cuda working-directory: ${{ github.workspace }}/examples + + build-cuda13: + runs-on: ubuntu-latest + container: nvidia/cuda:13.1.1-devel-ubuntu24.04 + env: + GTEST_VERSION: 1.15.2 + GTEST_ROOT: ${{ github.workspace }}/googletest + CMAKE_BUILD_PARALLEL_LEVEL: 8 + CMAKE_VERSION: 3.21.7 + + steps: + - uses: actions/checkout@v3 + - name: install packages + run: apt-get update && apt-get install -y wget git + - name: install cmake + run: | + wget -O cmake.sh 'https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}-Linux-x86_64.sh' + bash cmake.sh --prefix=/usr --skip-license --exclude-subdir + - name: install googletest + run: | + mkdir -p ${{ env.GTEST_ROOT }} + cd ${{ env.GTEST_ROOT }} + wget https://github.com/google/googletest/releases/download/v${{ env.GTEST_VERSION }}/googletest-${{ env.GTEST_VERSION }}.tar.gz + tar xzf googletest-${{ env.GTEST_VERSION }}.tar.gz + cmake -S googletest-${{ env.GTEST_VERSION }} -B build -DCMAKE_INSTALL_PREFIX=${{ env.GTEST_ROOT }} + cmake --build build -t install + env: + CXX: g++ + - name: cmake thrust + run: cmake -S . -B build-cuda -DGTENSOR_DEVICE=cuda -DCMAKE_BUILD_TYPE=RelWithDebInfo -DGTENSOR_BUILD_EXAMPLES=ON -DGTENSOR_USE_THRUST=ON -DGTEST_ROOT=${{ env.GTEST_ROOT }} -DGTENSOR_ENABLE_CLIB=ON -DGTENSOR_ENABLE_BLAS=ON -DGTENSOR_ENABLE_FFT=ON -DGTENSOR_ENABLE_SOLVER=ON -DCMAKE_CUDA_FLAGS_RELWITHDEBINFO="-O2 -g -DNDEBUG -Werror=all-warnings" -DGTENSOR_GPU_ARCHITECTURES=80 + - name: cmake thrust build + run: cmake --build build-cuda -v + - name: cmake thrust debug + run: cmake -S . -B build-cuda-debug -DGTENSOR_DEVICE=cuda -DCMAKE_BUILD_TYPE=Debug -DGTENSOR_BUILD_EXAMPLES=ON -DGTENSOR_USE_THRUST=ON -DGTEST_ROOT=${{ env.GTEST_ROOT }} -DGTENSOR_ENABLE_CLIB=ON -DGTENSOR_ENABLE_BLAS=ON -DGTENSOR_ENABLE_FFT=ON -DGTENSOR_ENABLE_SOLVER=ON -DCMAKE_CUDA_FLAGS_DEBUG="-g -Werror=all-warnings" -DGTENSOR_GPU_ARCHITECTURES=80 + - name: cmake thrust debug build + run: cmake --build build-cuda-debug -v + - name: cmake nothrust + run: cmake -S . -B build-cuda-nothrust -DGTENSOR_DEVICE=cuda -DCMAKE_BUILD_TYPE=RelWithDebInfo -DGTENSOR_BUILD_EXAMPLES=ON -DGTENSOR_USE_THRUST=OFF -DGTEST_ROOT=${{ env.GTEST_ROOT }} -DGTENSOR_ENABLE_CLIB=ON -DGTENSOR_ENABLE_BLAS=ON -DGTENSOR_ENABLE_FFT=ON -DGTENSOR_ENABLE_SOLVER=ON -DCMAKE_CUDA_FLAGS_RELWITHDEBINFO="-O2 -g -DNDEBUG -Werror=all-warnings" -DGTENSOR_GPU_ARCHITECTURES=80 + - name: cmake nothrust build + run: cmake --build build-cuda-nothrust -v + - name: GNU make setup gtensor subdir + run: mkdir -p external/gtensor && cp -R ../include external/gtensor/ + working-directory: ${{ github.workspace }}/examples + - name: GNU make build + run: make GTENSOR_DEVICE=cuda + working-directory: ${{ github.workspace }}/examples + build-cuda-rmm-umpire: runs-on: ubuntu-latest container: nvidia/cuda:12.3.0-devel-ubuntu22.04 diff --git a/examples/Makefile b/examples/Makefile index 3040f6fc..3c25f16d 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -14,7 +14,7 @@ GTENSOR_DEVICE_DEFINE := GTENSOR_DEVICE_$(shell echo $(GTENSOR_DEVICE) | tr a-z GTENSOR_DEFINES = -D$(GTENSOR_DEVICE_DEFINE) GTENSOR_INCLUDES = -I$(GTENSOR_DIR)/include GTENSOR_LIBS = -GTENSOR_OPTIONS = -std=c++14 -O2 +GTENSOR_OPTIONS = -std=c++17 -O2 ifeq ($(GTENSOR_DEVICE),cuda) GTENSOR_CXX ?= nvcc GTENSOR_OPTIONS += -x cu --expt-extended-lambda --expt-relaxed-constexpr diff --git a/include/gtensor/backend_cuda.h b/include/gtensor/backend_cuda.h index 9e9221fa..bebaaf50 100644 --- a/include/gtensor/backend_cuda.h +++ b/include/gtensor/backend_cuda.h @@ -4,8 +4,8 @@ #include "backend_common.h" +#include #include - // #include "thrust/cuda/system/execution_policy.h" #include "thrust/execution_policy.h" @@ -264,7 +264,13 @@ class backend_ops if (mtype != gt::backend::managed_memory_type::device) { int device_id; gtGpuCheck(cudaGetDevice(&device_id)); +#if (CUDA_VERSION >= 13000) + cudaMemLocation memLocation{cudaMemLocationTypeDevice, device_id}; + gtGpuCheck( + cudaMemPrefetchAsync(p, n * sizeof(T), memLocation, 0, nullptr)); +#else gtGpuCheck(cudaMemPrefetchAsync(p, n * sizeof(T), device_id, nullptr)); +#endif } #endif } @@ -275,8 +281,14 @@ class backend_ops #ifndef GTENSOR_DISABLE_PREFETCH auto mtype = gt::backend::get_managed_memory_type(); if (mtype != gt::backend::managed_memory_type::device) { +#if (CUDA_VERSION >= 13000) + cudaMemLocation memLocation{cudaMemLocationTypeHostNuma, cudaCpuDeviceId}; + gtGpuCheck( + cudaMemPrefetchAsync(p, n * sizeof(T), memLocation, 0, nullptr)); +#else gtGpuCheck( cudaMemPrefetchAsync(p, n * sizeof(T), cudaCpuDeviceId, nullptr)); +#endif } #endif }