-
Notifications
You must be signed in to change notification settings - Fork 38
Expand file tree
/
Copy pathCMakeLists.txt
More file actions
326 lines (287 loc) · 12.4 KB
/
CMakeLists.txt
File metadata and controls
326 lines (287 loc) · 12.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
cmake_minimum_required(VERSION 3.20)
project(turboquant VERSION 0.1.0 LANGUAGES C CXX)
set(CMAKE_C_STANDARD 11)
set(CMAKE_CXX_STANDARD 17)
option(TQ_BUILD_TESTS "Build tests" OFF)
option(TQ_BUILD_BENCH "Build benchmarks" OFF)
option(TQ_BUILD_CUDA "Build CUDA backend" OFF)
option(TQ_BUILD_METAL "Build Metal backend" OFF)
option(TQ_BUILD_VULKAN "Build Vulkan backend" OFF)
option(TQ_BUILD_ROCM "Build ROCm/HIP backend" OFF)
option(TQ_BUILD_SERVER "Build OpenAI-compatible HTTP server" OFF)
option(TQ_BUILD_EXAMPLES "Build examples" ON)
# Threads (pthread)
find_package(Threads REQUIRED)
# Core library
file(GLOB TQ_CORE_SOURCES src/core/*.c)
file(GLOB TQ_CACHE_SOURCES src/cache/*.c)
file(GLOB TQ_CPU_SOURCES src/backend/cpu/*.c)
file(GLOB TQ_ENGINE_SOURCES src/engine/*.c)
add_library(turboquant STATIC
${TQ_CORE_SOURCES}
${TQ_CACHE_SOURCES}
${TQ_CPU_SOURCES}
${TQ_ENGINE_SOURCES}
)
target_include_directories(turboquant PUBLIC include)
# Platform-specific linking
if(MSVC)
# MSVC: no -lm needed, math is in CRT
target_link_libraries(turboquant PRIVATE Threads::Threads)
target_compile_definitions(turboquant PRIVATE
_CRT_SECURE_NO_WARNINGS
_USE_MATH_DEFINES
)
# Disable warnings that fire on valid C11
target_compile_options(turboquant PRIVATE /W3 /wd4244 /wd4267 /wd4996)
else()
target_link_libraries(turboquant PRIVATE m Threads::Threads)
endif()
# Apple Accelerate framework (cblas_sgemv via AMX coprocessor)
if(APPLE)
find_library(ACCELERATE_LIB Accelerate)
if(ACCELERATE_LIB)
target_link_libraries(turboquant PRIVATE ${ACCELERATE_LIB})
target_compile_definitions(turboquant PRIVATE TQ_HAS_ACCELERATE=1 ACCELERATE_NEW_LAPACK=1)
message(STATUS "quant.cpp: Accelerate framework enabled (cblas/AMX)")
endif()
endif()
# Shared library for Python bindings
add_library(turboquant_shared SHARED
${TQ_CORE_SOURCES}
${TQ_CACHE_SOURCES}
${TQ_CPU_SOURCES}
${TQ_ENGINE_SOURCES}
)
target_include_directories(turboquant_shared PUBLIC include)
if(MSVC)
target_link_libraries(turboquant_shared PRIVATE Threads::Threads)
target_compile_definitions(turboquant_shared PRIVATE _CRT_SECURE_NO_WARNINGS _USE_MATH_DEFINES)
target_compile_options(turboquant_shared PRIVATE /W3 /wd4244 /wd4267 /wd4996)
else()
target_link_libraries(turboquant_shared PRIVATE m Threads::Threads)
endif()
# Accelerate for shared library too
if(APPLE AND ACCELERATE_LIB)
target_link_libraries(turboquant_shared PRIVATE ${ACCELERATE_LIB})
target_compile_definitions(turboquant_shared PRIVATE TQ_HAS_ACCELERATE=1 ACCELERATE_NEW_LAPACK=1)
endif()
set_target_properties(turboquant_shared PROPERTIES
OUTPUT_NAME turboquant
POSITION_INDEPENDENT_CODE ON)
# Metal backend (Apple Silicon GPU)
if(TQ_BUILD_METAL AND APPLE)
enable_language(OBJC)
# Compile .metal shaders to .metallib (requires Xcode metal compiler)
execute_process(
COMMAND xcrun --find metal
OUTPUT_VARIABLE TQ_METAL_COMPILER
ERROR_QUIET
OUTPUT_STRIP_TRAILING_WHITESPACE
RESULT_VARIABLE TQ_METAL_COMPILER_FOUND
)
file(GLOB TQ_METAL_SHADERS src/backend/metal/*.metal)
set(TQ_METALLIB "${CMAKE_BINARY_DIR}/turboquant.metallib")
if(TQ_METAL_COMPILER_FOUND EQUAL 0 AND TQ_METAL_COMPILER)
# Build individual .air files then link into .metallib
set(TQ_METAL_AIR_FILES "")
foreach(shader ${TQ_METAL_SHADERS})
get_filename_component(shader_name ${shader} NAME_WE)
set(air_file "${CMAKE_BINARY_DIR}/${shader_name}.air")
add_custom_command(
OUTPUT ${air_file}
COMMAND xcrun -sdk macosx metal -c ${shader} -o ${air_file}
-std=metal3.0 -O2
DEPENDS ${shader}
COMMENT "Compiling Metal shader: ${shader_name}.metal"
)
list(APPEND TQ_METAL_AIR_FILES ${air_file})
endforeach()
add_custom_command(
OUTPUT ${TQ_METALLIB}
COMMAND xcrun -sdk macosx metallib ${TQ_METAL_AIR_FILES} -o ${TQ_METALLIB}
DEPENDS ${TQ_METAL_AIR_FILES}
COMMENT "Linking Metal library: turboquant.metallib"
)
add_custom_target(turboquant_metallib ALL DEPENDS ${TQ_METALLIB})
else()
message(WARNING "quant.cpp: Metal shader compiler not found (need Xcode). "
"Shaders will not be compiled — runtime .metallib loading will fail. "
"Obj-C dispatch code will still be built.")
add_custom_target(turboquant_metallib) # no-op target
endif()
# Compile Objective-C dispatch files
file(GLOB TQ_METAL_OBJ_SOURCES src/backend/metal/*.m)
target_sources(turboquant PRIVATE ${TQ_METAL_OBJ_SOURCES})
target_sources(turboquant_shared PRIVATE ${TQ_METAL_OBJ_SOURCES})
# Set Objective-C ARC for .m files
set_source_files_properties(${TQ_METAL_OBJ_SOURCES} PROPERTIES
COMPILE_FLAGS "-fobjc-arc"
)
# Link Metal and Foundation frameworks
target_link_libraries(turboquant PRIVATE
"-framework Metal" "-framework Foundation"
)
target_link_libraries(turboquant_shared PRIVATE
"-framework Metal" "-framework Foundation"
)
# Define TQ_HAS_METAL for conditional compilation
target_compile_definitions(turboquant PRIVATE TQ_HAS_METAL=1)
target_compile_definitions(turboquant_shared PRIVATE TQ_HAS_METAL=1)
# Ensure metallib is built before the library
add_dependencies(turboquant turboquant_metallib)
add_dependencies(turboquant_shared turboquant_metallib)
message(STATUS "quant.cpp: Metal backend enabled — shaders: ${TQ_METALLIB}")
endif()
# CUDA backend (NVIDIA GPU)
if(TQ_BUILD_CUDA)
enable_language(CUDA)
file(GLOB TQ_CUDA_SOURCES src/backend/cuda/*.cu)
target_sources(turboquant PRIVATE ${TQ_CUDA_SOURCES})
target_sources(turboquant_shared PRIVATE ${TQ_CUDA_SOURCES})
target_compile_definitions(turboquant PRIVATE TQ_BUILD_CUDA=1)
target_compile_definitions(turboquant_shared PRIVATE TQ_BUILD_CUDA=1)
set_source_files_properties(${TQ_CUDA_SOURCES} PROPERTIES LANGUAGE CUDA)
message(STATUS "quant.cpp: CUDA backend enabled")
endif()
# Vulkan backend (cross-platform GPU — AMD, NVIDIA, Intel)
if(TQ_BUILD_VULKAN)
find_package(Vulkan QUIET)
if(Vulkan_FOUND)
file(GLOB TQ_VULKAN_SOURCES src/backend/vulkan/*.c)
target_sources(turboquant PRIVATE ${TQ_VULKAN_SOURCES})
target_sources(turboquant_shared PRIVATE ${TQ_VULKAN_SOURCES})
target_link_libraries(turboquant PRIVATE Vulkan::Vulkan)
target_link_libraries(turboquant_shared PRIVATE Vulkan::Vulkan)
target_compile_definitions(turboquant PRIVATE TQ_BUILD_VULKAN=1)
target_compile_definitions(turboquant_shared PRIVATE TQ_BUILD_VULKAN=1)
message(STATUS "quant.cpp: Vulkan backend enabled (${Vulkan_LIBRARY})")
else()
message(WARNING "quant.cpp: TQ_BUILD_VULKAN=ON but Vulkan SDK not found. "
"Install Vulkan SDK or set VULKAN_SDK environment variable. "
"Building without Vulkan backend.")
endif()
endif()
# ROCm/HIP backend (AMD GPU)
if(TQ_BUILD_ROCM)
find_package(hip QUIET)
if(hip_FOUND)
file(GLOB TQ_ROCM_SOURCES src/backend/rocm/*.cpp)
target_sources(turboquant PRIVATE ${TQ_ROCM_SOURCES})
target_sources(turboquant_shared PRIVATE ${TQ_ROCM_SOURCES})
target_link_libraries(turboquant PRIVATE hip::device)
target_link_libraries(turboquant_shared PRIVATE hip::device)
target_compile_definitions(turboquant PRIVATE TQ_BUILD_ROCM=1)
target_compile_definitions(turboquant_shared PRIVATE TQ_BUILD_ROCM=1)
message(STATUS "quant.cpp: ROCm/HIP backend enabled")
else()
message(WARNING "quant.cpp: TQ_BUILD_ROCM=ON but HIP SDK not found. "
"Install ROCm or set HIP_PATH. "
"Building without ROCm backend.")
endif()
endif()
# Compiler warnings (skip on MSVC — handled separately above)
if(NOT MSVC)
target_compile_options(turboquant PRIVATE
-Wall -Wextra -Wpedantic -Wno-unused-parameter)
target_compile_options(turboquant_shared PRIVATE
-Wall -Wextra -Wpedantic -Wno-unused-parameter)
endif()
# Tests
if(TQ_BUILD_TESTS)
enable_testing()
include(FetchContent)
FetchContent_Declare(googletest
URL https://github.com/google/googletest/archive/refs/tags/v1.14.0.tar.gz
DOWNLOAD_EXTRACT_TIMESTAMP TRUE
)
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(googletest)
file(GLOB TEST_SOURCES tests/*.cpp)
foreach(test_src ${TEST_SOURCES})
get_filename_component(test_name ${test_src} NAME_WE)
add_executable(${test_name} ${test_src})
target_link_libraries(${test_name} turboquant GTest::gtest_main)
add_test(NAME ${test_name} COMMAND ${test_name})
# Slow tests on MSVC (no auto-vectorization, weaker codegen) need more time
if(MSVC AND ${test_name} MATCHES "test_(multihash_dim64|ops|unbiased|cumulative_error)")
set_tests_properties(${test_name} PROPERTIES TIMEOUT 600)
endif()
# Pass Metal availability to test targets
if(TQ_BUILD_METAL AND APPLE)
target_compile_definitions(${test_name} PRIVATE TQ_HAS_METAL=1)
endif()
endforeach()
# llama.cpp integration test
add_executable(test_llamacpp_integration
integrations/llamacpp/test_integration.cpp)
target_include_directories(test_llamacpp_integration PRIVATE
${CMAKE_SOURCE_DIR}/include
${CMAKE_SOURCE_DIR}/integrations/llamacpp)
target_link_libraries(test_llamacpp_integration turboquant GTest::gtest_main)
add_test(NAME test_llamacpp_integration COMMAND test_llamacpp_integration)
endif()
# Benchmarks
if(TQ_BUILD_BENCH)
file(GLOB BENCH_SOURCES bench/*.cpp)
file(GLOB BENCH_PERF_SOURCES bench/performance/*.cpp)
list(APPEND BENCH_SOURCES ${BENCH_PERF_SOURCES})
foreach(bench_src ${BENCH_SOURCES})
get_filename_component(bench_name ${bench_src} NAME_WE)
add_executable(${bench_name} ${bench_src})
target_link_libraries(${bench_name} turboquant)
endforeach()
endif()
# CLI inference tool
add_executable(quant tools/quant.c)
target_link_libraries(quant turboquant)
# Debug comparison tool
add_executable(debug_compare tools/debug_compare.c)
target_link_libraries(debug_compare turboquant)
# TQM converter tool (not on Windows — uses POSIX dirent/mmap)
if(NOT MSVC)
add_executable(tq_convert tools/tq_convert.c)
target_link_libraries(tq_convert turboquant)
endif()
# Examples
if(TQ_BUILD_EXAMPLES)
file(GLOB EXAMPLE_C_SOURCES examples/*.c)
file(GLOB EXAMPLE_CXX_SOURCES examples/*.cpp)
foreach(ex_src ${EXAMPLE_C_SOURCES} ${EXAMPLE_CXX_SOURCES})
get_filename_component(ex_name ${ex_src} NAME_WE)
# Skip single-header examples (built separately below — they use quant.h, not turboquant)
if(ex_name MATCHES "^embed_" OR ex_name STREQUAL "single_header_example")
continue()
endif()
add_executable(${ex_name} ${ex_src})
target_link_libraries(${ex_name} turboquant)
endforeach()
# Single-header examples (use quant.h directly — link only libm + threads)
if(NOT MSVC)
add_executable(embed_minimal examples/embed_minimal.c)
target_link_libraries(embed_minimal m Threads::Threads)
add_executable(embed_chat examples/embed_chat.c)
target_link_libraries(embed_chat m Threads::Threads)
add_executable(embed_kv_compare examples/embed_kv_compare.c)
target_link_libraries(embed_kv_compare m Threads::Threads)
add_executable(single_header_example examples/single_header_example.c)
target_link_libraries(single_header_example m Threads::Threads)
endif()
endif()
# OpenAI-compatible HTTP server (POSIX only — uses sys/socket.h)
if(TQ_BUILD_SERVER AND NOT MSVC)
add_executable(quant-server src/server/tq_server.c)
target_include_directories(quant-server PRIVATE
${CMAKE_SOURCE_DIR}/src/server
${CMAKE_SOURCE_DIR}/include)
target_compile_definitions(quant-server PRIVATE TQ_SERVER_MAIN)
target_link_libraries(quant-server turboquant Threads::Threads)
if(NOT MSVC)
target_link_libraries(quant-server m)
endif()
if(NOT MSVC)
target_compile_options(quant-server PRIVATE
-Wall -Wextra -Wpedantic -Wno-unused-parameter)
endif()
message(STATUS "quant.cpp: HTTP server target enabled (quant-server)")
endif()