Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions .github/workflows/ccpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,54 @@ jobs:
run: make GTENSOR_DEVICE=cuda
working-directory: ${{ github.workspace }}/examples


build-cuda13:
runs-on: ubuntu-latest
container: nvidia/cuda:13.1.1-devel-ubuntu24.04
env:
GTEST_VERSION: 1.15.2
GTEST_ROOT: ${{ github.workspace }}/googletest
CMAKE_BUILD_PARALLEL_LEVEL: 8
CMAKE_VERSION: 3.21.7

steps:
- uses: actions/checkout@v3
- name: install packages
run: apt-get update && apt-get install -y wget git
- name: install cmake
run: |
wget -O cmake.sh 'https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}-Linux-x86_64.sh'
bash cmake.sh --prefix=/usr --skip-license --exclude-subdir
- name: install googletest
run: |
mkdir -p ${{ env.GTEST_ROOT }}
cd ${{ env.GTEST_ROOT }}
wget https://github.com/google/googletest/releases/download/v${{ env.GTEST_VERSION }}/googletest-${{ env.GTEST_VERSION }}.tar.gz
tar xzf googletest-${{ env.GTEST_VERSION }}.tar.gz
cmake -S googletest-${{ env.GTEST_VERSION }} -B build -DCMAKE_INSTALL_PREFIX=${{ env.GTEST_ROOT }}
cmake --build build -t install
env:
CXX: g++
- name: cmake thrust
run: cmake -S . -B build-cuda -DGTENSOR_DEVICE=cuda -DCMAKE_BUILD_TYPE=RelWithDebInfo -DGTENSOR_BUILD_EXAMPLES=ON -DGTENSOR_USE_THRUST=ON -DGTEST_ROOT=${{ env.GTEST_ROOT }} -DGTENSOR_ENABLE_CLIB=ON -DGTENSOR_ENABLE_BLAS=ON -DGTENSOR_ENABLE_FFT=ON -DGTENSOR_ENABLE_SOLVER=ON -DCMAKE_CUDA_FLAGS_RELWITHDEBINFO="-O2 -g -DNDEBUG -Werror=all-warnings" -DGTENSOR_GPU_ARCHITECTURES=80
- name: cmake thrust build
run: cmake --build build-cuda -v
- name: cmake thrust debug
run: cmake -S . -B build-cuda-debug -DGTENSOR_DEVICE=cuda -DCMAKE_BUILD_TYPE=Debug -DGTENSOR_BUILD_EXAMPLES=ON -DGTENSOR_USE_THRUST=ON -DGTEST_ROOT=${{ env.GTEST_ROOT }} -DGTENSOR_ENABLE_CLIB=ON -DGTENSOR_ENABLE_BLAS=ON -DGTENSOR_ENABLE_FFT=ON -DGTENSOR_ENABLE_SOLVER=ON -DCMAKE_CUDA_FLAGS_DEBUG="-g -Werror=all-warnings" -DGTENSOR_GPU_ARCHITECTURES=80
- name: cmake thrust debug build
run: cmake --build build-cuda-debug -v
- name: cmake nothrust
run: cmake -S . -B build-cuda-nothrust -DGTENSOR_DEVICE=cuda -DCMAKE_BUILD_TYPE=RelWithDebInfo -DGTENSOR_BUILD_EXAMPLES=ON -DGTENSOR_USE_THRUST=OFF -DGTEST_ROOT=${{ env.GTEST_ROOT }} -DGTENSOR_ENABLE_CLIB=ON -DGTENSOR_ENABLE_BLAS=ON -DGTENSOR_ENABLE_FFT=ON -DGTENSOR_ENABLE_SOLVER=ON -DCMAKE_CUDA_FLAGS_RELWITHDEBINFO="-O2 -g -DNDEBUG -Werror=all-warnings" -DGTENSOR_GPU_ARCHITECTURES=80
- name: cmake nothrust build
run: cmake --build build-cuda-nothrust -v
- name: GNU make setup gtensor subdir
run: mkdir -p external/gtensor && cp -R ../include external/gtensor/
working-directory: ${{ github.workspace }}/examples
- name: GNU make build
run: make GTENSOR_DEVICE=cuda
working-directory: ${{ github.workspace }}/examples


build-cuda-rmm-umpire:
runs-on: ubuntu-latest
container: nvidia/cuda:12.3.0-devel-ubuntu22.04
Expand Down
2 changes: 1 addition & 1 deletion examples/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ GTENSOR_DEVICE_DEFINE := GTENSOR_DEVICE_$(shell echo $(GTENSOR_DEVICE) | tr a-z
GTENSOR_DEFINES = -D$(GTENSOR_DEVICE_DEFINE)
GTENSOR_INCLUDES = -I$(GTENSOR_DIR)/include
GTENSOR_LIBS =
GTENSOR_OPTIONS = -std=c++14 -O2
GTENSOR_OPTIONS = -std=c++17 -O2
ifeq ($(GTENSOR_DEVICE),cuda)
GTENSOR_CXX ?= nvcc
GTENSOR_OPTIONS += -x cu --expt-extended-lambda --expt-relaxed-constexpr
Expand Down
14 changes: 13 additions & 1 deletion include/gtensor/backend_cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

#include "backend_common.h"

#include <cuda.h>
#include <cuda_runtime_api.h>

// #include "thrust/cuda/system/execution_policy.h"
#include "thrust/execution_policy.h"

Expand Down Expand Up @@ -264,7 +264,13 @@ class backend_ops<gt::space::cuda>
if (mtype != gt::backend::managed_memory_type::device) {
int device_id;
gtGpuCheck(cudaGetDevice(&device_id));
#if (CUDA_VERSION >= 13000)
cudaMemLocation memLocation{cudaMemLocationTypeDevice, device_id};
gtGpuCheck(
cudaMemPrefetchAsync(p, n * sizeof(T), memLocation, 0, nullptr));
#else
gtGpuCheck(cudaMemPrefetchAsync(p, n * sizeof(T), device_id, nullptr));
#endif
}
#endif
}
Expand All @@ -275,8 +281,14 @@ class backend_ops<gt::space::cuda>
#ifndef GTENSOR_DISABLE_PREFETCH
auto mtype = gt::backend::get_managed_memory_type();
if (mtype != gt::backend::managed_memory_type::device) {
#if (CUDA_VERSION >= 13000)
cudaMemLocation memLocation{cudaMemLocationTypeHostNuma, cudaCpuDeviceId};
gtGpuCheck(
cudaMemPrefetchAsync(p, n * sizeof(T), memLocation, 0, nullptr));
#else
gtGpuCheck(
cudaMemPrefetchAsync(p, n * sizeof(T), cudaCpuDeviceId, nullptr));
#endif
}
#endif
}
Expand Down