-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMakefile
More file actions
65 lines (46 loc) · 2.03 KB
/
Makefile
File metadata and controls
65 lines (46 loc) · 2.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
CC := gcc
NVCC := nvcc
CUDA_HOME ?= /usr/local/cuda
# List the SMs you want *native* cubins for (adjust to your fleet)
CUDA_ARCHS ?= 80;86;89;90
# Generate -gencode lines for native cubins
GENCODE_NATIVE := $(foreach A,$(subst ;, ,$(CUDA_ARCHS)),-gencode arch=compute_$(A),code=sm_$(A))
# Keep one PTX target for forward JIT on newer GPUs (pick the highest you can compile)
GENCODE_PTX := -gencode arch=compute_90,code=compute_90
CINCLUDE := -Iinclude
CFLAGS := -O3 -DTS=512 -DLOCAL_SIZE=256 -march=native -mavx512f -mavx512bw -mavx512vl -Wall -Wextra -fopenmp -pthread $(CINCLUDE) -MMD -MP
NVFLAGS := -O3 -std=c++14 --expt-extended-lambda \
-Xcompiler "-fopenmp -pthread" $(CINCLUDE) \
$(GENCODE_NATIVE) $(GENCODE_PTX) -MMD -MP
LDFLAGS := -L/usr/local/cuda/targets/x86_64-linux/lib
LDLIBS := -lgomp -lcufft_static -lculibos
SRCDIR := src
OBJDIR := build
CPU_SRCS := $(wildcard $(SRCDIR)/cpu/*.c)
CORE_SRCS := $(wildcard $(SRCDIR)/core/*.c)
MAIN_SRCS := $(wildcard $(SRCDIR)/*.c)
GPU_SRCS := $(wildcard $(SRCDIR)/gpu/*.cu)
CPU_OBJS := $(patsubst $(SRCDIR)/%.c,$(OBJDIR)/%.o,$(CPU_SRCS))
CORE_OBJS := $(patsubst $(SRCDIR)/%.c,$(OBJDIR)/%.o,$(CORE_SRCS))
MAIN_OBJS := $(patsubst $(SRCDIR)/%.c,$(OBJDIR)/%.o,$(MAIN_SRCS))
GPU_OBJS := $(patsubst $(SRCDIR)/%.cu,$(OBJDIR)/%.o,$(GPU_SRCS))
OBJS := $(CPU_OBJS) $(CORE_OBJS) $(MAIN_OBJS) $(GPU_OBJS)
BIN_SORT := sort
.PHONY: all clean debug
all: $(BIN_SORT)
$(BIN_SORT): $(OBJS)
$(NVCC) -o $@ $^ $(LDFLAGS) $(LDLIBS) -cudart static
$(OBJDIR)/%.o: $(SRCDIR)/%.c
@mkdir -p $(dir $@)
$(CC) $(CFLAGS) -c $< -o $@
$(OBJDIR)/%.o: $(SRCDIR)/%.cu
@mkdir -p $(dir $@)
$(NVCC) $(NVFLAGS) -c $< -o $@
-include $(OBJS:.o=.d)
debug: CFLAGS := -g -O0 -DDEBUG -march=native -mavx2 -Wall -Wextra -fopenmp -pthread $(CINCLUDE) -MMD -MP
debug: NVFLAGS := -g -O0 -std=c++14 --expt-extended-lambda \
-Xcompiler "-g -O0 -fopenmp -pthread" $(CINCLUDE) \
$(GENCODE_NATIVE) $(GENCODE_PTX) -MMD -MP
debug: clean all
clean:
rm -rf $(OBJDIR) $(BIN_SORT)