-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathblast.c
More file actions
75 lines (58 loc) · 2.71 KB
/
blast.c
File metadata and controls
75 lines (58 loc) · 2.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#include <bits/time.h>
#include <stdio.h>
#include <dlfcn.h>
#include <cublas_v2.h>
#include <time.h>
// Prototype Delarations
void init_notifier();
void notify_gpu_start(cudaStream_t stream);
void notify_gpu_finish(void* userData);
void* dlopen(const char* filename, int flags);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemm_v2( cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const float* alpha, const float* A, int lda, const float* B, int ldb, const float* beta, float* C, int ldc);
// static variables
static cudaStream_t my_stream;
// Define original function pointer
static void* (*orig_dlopen)(const char *filename, int flags) = NULL;
static CUBLASAPI cublasStatus_t CUBLASWINAPI (* orig_cublasSgemm_v2)( cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const float* alpha, const float* A, int lda, const float* B, int ldb, const float* beta, float* C, int ldc) = NULL;
void notify_gpu_start(cudaStream_t stream) {
// unsigned long long streamId;
// cudaStreamGetId(stream, &streamId);
// printf("[Blast (%lld)] start\n", streamId);
struct timespec res;
clock_gettime(CLOCK_REALTIME, &res);
printf("[Blast] start: %ld.%09ld\n", res.tv_sec, res.tv_nsec);
}
void notify_gpu_finish(void* userData) {
// unsigned long long streamId;
// cudaStream_t stream = *(cudaStream_t*) userData;
// cudaStreamGetId(stream, &streamId);
// printf("[Blast (%lld)] end\n", streamId);
struct timespec res;
clock_gettime(CLOCK_REALTIME, &res);
printf("[Blast] end: %ld.%09ld\n", res.tv_sec, res.tv_nsec);
}
void* dlopen(const char* filename, int flags) {
if (orig_dlopen == NULL) {
orig_dlopen = dlsym(RTLD_NEXT, "dlopen");
if (orig_dlopen == NULL) {
return NULL;
}
}
int new_flags = (flags & ~RTLD_LOCAL) | RTLD_GLOBAL;
return orig_dlopen(filename, new_flags);
}
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemm_v2(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const float* alpha, const float* A, int lda, const float* B, int ldb, const float* beta, float* C, int ldc)
{
if (orig_cublasSgemm_v2 == NULL) {
orig_cublasSgemm_v2 = dlsym(RTLD_NEXT, "cublasSgemm_v2");
if (orig_cublasSgemm_v2 == NULL) {
printf("[Blast] Error: %s\n", dlerror());
return CUBLAS_STATUS_INTERNAL_ERROR;
}
}
cublasGetStream(handle, &my_stream);
notify_gpu_start(my_stream);
cublasStatus_t res = orig_cublasSgemm_v2(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
cudaLaunchHostFunc(my_stream, notify_gpu_finish, (void *)&my_stream);
return res;
}