#include #include #include #include #include #include #include #include #include #include "CL/cl.h" #include "CL/cl_ext.h" #define ITERS 1l #define DATA_SIZE (16384) #define CL_CHECK_STATUS(error) { \ if ((error) != CL_SUCCESS) fprintf (stderr, "OpenCL error <%s:%i>: %i\n", __FILE__, __LINE__, (error)); } int sysfs_write(const char *path, const char *arg) { int err; FILE *f = fopen(path, "w"); if (!f) return -ENOENT; err = (fprintf(f, "%s", arg) < 1); fclose(f); return err; } int sysfs_read(const char *path, size_t size, char *arg) { int err = 0; size_t bytes; FILE *f = fopen(path, "r"); if (!f) return -ENOENT; bytes = fread((void*)arg, 1, size - 1, f); if (bytes == 0) err = EIO; else arg[bytes] = 0; fclose(f); return err; } int sysfs_write_int(const char *path, long arg) { char s[16]; sprintf(s, "%d", arg); return sysfs_write(path, s); } long sysfs_read_int(const char *path, int *reterr) { int err; char s[32]; long res = 0; err = sysfs_read(path, 32, s); if (!err) { if (s[1] == 'x') { err = (sscanf(s + 2, "%lx", &res) < 1); } else { err = (sscanf(s, "%lu", &res) < 1); } } if (reterr) *reterr = err; return res; } int main(void) { int i; size_t work_size = 1; cl_context context; cl_command_queue command_queue; cl_int err; cl_uint num_of_platforms=0; cl_platform_id platform_id; cl_device_id device_id; cl_uint num_of_devices=0; cl_mem input;//, output, host; cl_event event; cl_int status; cl_command_type type; size_t res_size; uint32_t check; CL_CHECK_STATUS(clGetPlatformIDs(1, &platform_id, &num_of_platforms)); CL_CHECK_STATUS(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id,&num_of_devices)); cl_context_properties properties[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties) platform_id, 0 }; context = clCreateContext(properties, 1, &device_id, NULL,NULL, &err); CL_CHECK_STATUS(err); command_queue = clCreateCommandQueue(context, device_id, 0, &err); CL_CHECK_STATUS(err); // CL_MEM_ALLOC_HOST_PTR does not help to access the data directly sysfs_write_int("/sys/class/nvtrace/enable", 1); input = clCreateBuffer(context, CL_MEM_READ_WRITE/*|CL_MEM_ALLOC_HOST_PTR*/, DATA_SIZE, NULL, &err); CL_CHECK_STATUS(err); CL_CHECK_STATUS (clEnqueueWriteBuffer (command_queue, input, CL_TRUE, 0, 4, &i, 0, NULL, NULL)); sysfs_write_int("/sys/class/nvtrace/enable", 0); uint32_t nv_user = sysfs_read_int("/sys/class/nvtrace/user", NULL); uint32_t nv_buffer = sysfs_read_int("/sys/class/nvtrace/buffer", NULL); printf("Detected context & buffer: %x %x\n", nv_user, nv_buffer); int nvfd = open("/dev/nvidia0", O_RDWR); if (nvfd >= 0) { uint32_t gpu_method_param = nv_buffer; uint32_t gpu_method[8] = { nv_user, 0x5c00000eu, 0x503c0104u, 0x00000000u, 0, 0, 0x00000004u, 0x00000000u }; ((uint64_t*)gpu_method)[2] = (uint64_t)&gpu_method_param; sysfs_write_int("/sys/class/nvtrace/enable", 1); if (ioctl(nvfd, 0xc020462a, gpu_method) < 0) printf(" ==> Error communicating with nvidia device, errno %i\n", errno); sysfs_write_int("/sys/class/nvtrace/enable", 0); close(nvfd); } clReleaseMemObject(input); clReleaseCommandQueue(command_queue); clReleaseContext(context); }