diff --git a/backends/cuda/Makefile.am b/backends/cuda/Makefile.am index 2fd2d163..342b869e 100644 --- a/backends/cuda/Makefile.am +++ b/backends/cuda/Makefile.am @@ -66,16 +66,15 @@ CLEANFILES = \ $(BTX_CUDA_GENERATED) &: $(top_srcdir)/xprof/btx_interval_model.yaml btx_cudamatching_model.yaml btx_cuda_model.yaml $(METABABEL) -u btx_cuda_model.yaml -d $(top_srcdir)/xprof/btx_interval_model.yaml -t FILTER -o btx_filter_cuda -p cudainterval -c interval --matching $(srcdir)/btx_cudamatching_model.yaml -i cuda.h.include -$(MODIFIED_CUDA_HDR) &: $(CUDA_HDR) $(srcdir)/headers.patch +$(MODIFIED_CUDA_HDR) &: $(CUDA_HDR) $(RM) -r modified_include/ cp -r $(srcdir)/include/ modified_include/ chmod -R u+w modified_include/ - cat $(srcdir)/headers.patch | patch -i - -d modified_include/ -s -p1 clean-local: $(RM) -r modified_include -EXTRA_DIST += $(srcdir)/include headers.patch +EXTRA_DIST += $(srcdir)/include CUDA_EXTRACT_H = $(srcdir)/extract/cuda_api.h $(srcdir)/extract/cudart_api.h EXTRA_DIST += $(CUDA_EXTRACT_H) diff --git a/backends/cuda/README.md b/backends/cuda/README.md new file mode 100644 index 00000000..e9e74ada --- /dev/null +++ b/backends/cuda/README.md @@ -0,0 +1,7 @@ +# CUDA Backend + +## VDPAU Stub + +The CUDA SDK includes interop headers (`cudaVDPAU.h`, `cuda_vdpau_interop.h`) that reference VDPAU types (`VdpDevice`, `VdpGetProcAddress`, etc.) from ``. This system header is not always available. + +A stub is provided in `include/vdpau/vdpau.h` that defines the necessary VDPAU types so that the CUDA tracer can compiled. diff --git a/backends/cuda/extract/cuda_api.h b/backends/cuda/extract/cuda_api.h index 303231d0..25cf1f82 100644 --- a/backends/cuda/extract/cuda_api.h +++ b/backends/cuda/extract/cuda_api.h @@ -1,17 +1,10 @@ #define __CUDA_API_VERSION_INTERNAL = 1 -#define THAPI_NO_INCLUDE #include #include -typedef int32_t VdpStatus; -typedef uint32_t VdpFuncId; -typedef uint32_t VdpDevice; -typedef VdpStatus -VdpGetProcAddress(VdpDevice device, VdpFuncId function_id, void **function_pointer); -typedef uint32_t VdpVideoSurface; -typedef uint32_t VdpOutputSurface; +#include #include diff --git a/backends/cuda/extract/cudart_api.h b/backends/cuda/extract/cudart_api.h index d489c7eb..d987d097 100644 --- a/backends/cuda/extract/cudart_api.h +++ b/backends/cuda/extract/cudart_api.h @@ -1,5 +1,4 @@ #define __CUDA_API_VERSION_INTERNAL = 1 -#define THAPI_NO_INCLUDE #include @@ -7,14 +6,6 @@ #include <__cudart.h> -typedef int32_t VdpStatus; -typedef uint32_t VdpFuncId; -typedef uint32_t VdpDevice; -typedef VdpStatus -VdpGetProcAddress(VdpDevice device, VdpFuncId function_id, void **function_pointer); -typedef uint32_t VdpVideoSurface; -typedef uint32_t VdpOutputSurface; - #include #include diff --git a/backends/cuda/gen_cuda_exports_extract.rb b/backends/cuda/gen_cuda_exports_extract.rb index 69643b0f..490484eb 100644 --- a/backends/cuda/gen_cuda_exports_extract.rb +++ b/backends/cuda/gen_cuda_exports_extract.rb @@ -5,7 +5,6 @@ puts <<~EOF #define __CUDA_API_VERSION_INTERNAL=1 - #define THAPI_NO_INCLUDE #include EOF diff --git a/backends/cuda/headers.patch b/backends/cuda/headers.patch deleted file mode 100644 index 66a2002a..00000000 --- a/backends/cuda/headers.patch +++ /dev/null @@ -1,109 +0,0 @@ -diff -u4 -r --new-file include/cuda.h modified_include/cuda.h ---- include/cuda.h 2026-03-27 20:57:07.000000000 +0000 -+++ modified_include/cuda.h 2026-03-30 22:10:41.000000000 +0000 -@@ -290,9 +290,11 @@ - - /** - * CUDA IPC handle size - */ -+#ifndef CU_IPC_HANDLE_SIZE - #define CU_IPC_HANDLE_SIZE 64 -+#endif - - /** - * CUDA IPC event handle - */ -@@ -499,9 +501,9 @@ - * Per-operation parameters for ::cuStreamBatchMemOp - */ - typedef union CUstreamBatchMemOpParams_union { - CUstreamBatchMemOpType operation; -- struct CUstreamMemOpWaitValueParams_st { -+ struct { - CUstreamBatchMemOpType operation; - CUdeviceptr address; - union { - cuuint32_t value; -@@ -509,9 +511,9 @@ - }; - unsigned int flags; - CUdeviceptr alias; /**< For driver internal use. Initial value is unimportant. */ - } waitValue; -- struct CUstreamMemOpWriteValueParams_st { -+ struct { - CUstreamBatchMemOpType operation; - CUdeviceptr address; - union { - cuuint32_t value; -@@ -519,13 +521,13 @@ - }; - unsigned int flags; - CUdeviceptr alias; /**< For driver internal use. Initial value is unimportant. */ - } writeValue; -- struct CUstreamMemOpFlushRemoteWritesParams_st { -+ struct { - CUstreamBatchMemOpType operation; - unsigned int flags; - } flushRemoteWrites; -- struct CUstreamMemOpMemoryBarrierParams_st { /**< Only supported in the _v2 API */ -+ struct { /**< Only supported in the _v2 API */ - CUstreamBatchMemOpType operation; - unsigned int flags; - } memoryBarrier; - cuuint64_t pad[6]; -@@ -1338,9 +1340,11 @@ - - /* - * Indicates that compute device class supports accelerated features. - */ -+#ifndef CU_COMPUTE_ACCELERATED_TARGET_BASE - #define CU_COMPUTE_ACCELERATED_TARGET_BASE 0x10000 -+#endif - - /** - * Online compilation targets - */ -@@ -2851,9 +2855,11 @@ - - /** - * Size of tensor map descriptor - */ -+#ifndef CU_TENSOR_MAP_NUM_QWORDS - #define CU_TENSOR_MAP_NUM_QWORDS 16 -+#endif - - /** - * Tensor map descriptor. Requires compiler support for aligning to 64 bytes. - */ -diff -u4 -r --new-file include/cuda_vdpau_interop.h modified_include/cuda_vdpau_interop.h ---- include/cuda_vdpau_interop.h 2026-03-27 20:57:07.000000000 +0000 -+++ modified_include/cuda_vdpau_interop.h 2026-03-30 22:10:41.000000000 +0000 -@@ -49,11 +49,13 @@ - - #if !defined(__CUDA_VDPAU_INTEROP_H__) - #define __CUDA_VDPAU_INTEROP_H__ - -+#ifndef THAPI_NO_INCLUDE - #include "cuda_runtime_api.h" - - #include -+#endif - - #if defined(__cplusplus) - extern "C" { - #endif /* __cplusplus */ -diff -u4 -r --new-file include/driver_types.h modified_include/driver_types.h ---- include/driver_types.h 2026-03-27 20:57:07.000000000 +0000 -+++ modified_include/driver_types.h 2026-03-30 22:10:41.000000000 +0000 -@@ -2292,9 +2292,11 @@ - - /** - * CUDA IPC Handle Size - */ -+#ifndef CUDA_IPC_HANDLE_SIZE - #define CUDA_IPC_HANDLE_SIZE 64 -+#endif - - /** - * CUDA IPC event handle - */ diff --git a/backends/cuda/include/vdpau/vdpau.h b/backends/cuda/include/vdpau/vdpau.h new file mode 100644 index 00000000..bf49ccc0 --- /dev/null +++ b/backends/cuda/include/vdpau/vdpau.h @@ -0,0 +1,15 @@ +/* Stub for VDPAU types used by CUDA interop headers. */ +#ifndef THAPI_VDPAU_STUB_H +#define THAPI_VDPAU_STUB_H + +#include + +typedef int32_t VdpStatus; +typedef uint32_t VdpFuncId; +typedef uint32_t VdpDevice; +typedef VdpStatus VdpGetProcAddress(VdpDevice device, VdpFuncId function_id, + void **function_pointer); +typedef uint32_t VdpVideoSurface; +typedef uint32_t VdpOutputSurface; + +#endif diff --git a/utils/gen_library_base.rb b/utils/gen_library_base.rb index 693bf711..5b96413b 100644 --- a/utils/gen_library_base.rb +++ b/utils/gen_library_base.rb @@ -1,5 +1,9 @@ require_relative 'yaml_ast' +def has_typedef?(name) + $all_types.any? { |t| t.type.respond_to?(:name) && t.type.name == name } +end + def to_ffi_name(name, default = true) case name when nil @@ -337,18 +341,27 @@ module Composite def to_ffi unamed_count = 0 members.map do |m| - mt = if m.type.is_a?(Array) + mt = case m.type + when Array m.type.to_ffi - elsif m.type.is_a?(Pointer) + when Pointer ':pointer' - elsif m.type.name - to_ffi_name(m.type.name) - elsif m.type.is_a?(Struct) - "(Class::new(#{FFI_STRUCT}) { layout #{gen_layout(m.type.to_ffi)} }.by_value)" - elsif m.type.is_a?(Union) - "(Class::new(#{FFI_UNION}) { layout #{gen_layout(m.type.to_ffi)} }.by_value)" + when Struct + if m.type.name && has_typedef?(m.type.name) + to_ffi_name(m.type.name) + else + s = m.type.name ? $all_structs.find { |st| st.name == m.type.name } : m.type + "(Class::new(#{FFI_STRUCT}) { layout #{gen_layout(s.to_ffi)} }.by_value)" + end + when Union + if m.type.name && has_typedef?(m.type.name) + to_ffi_name(m.type.name) + else + u = m.type.name ? $all_unions&.find { |un| un.name == m.type.name } : m.type + "(Class::new(#{FFI_UNION}) { layout #{gen_layout(u.to_ffi)} }.by_value)" + end else - raise "unknown type: #{m.type}" + m.type.name ? to_ffi_name(m.type.name) : raise("unknown type: #{m.type}") end [m.name ? m.name.to_sym.inspect : ":_unamed_#{unamed_count += 1}", mt] end