18
static cl_platform_id platform = 0;
19
static cl_device_id device = 0;
21
static cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
22
static cl_context ctx = 0;
23
static cl_command_queue queue = 0;
25
static cl_mem dev_res, dev_a, dev_b;
27
static cl_program app;
28
static cl_kernel kernel;
30
static size_t matrix_size;
32
int exercise_required_alignment = PPT * BLOCK_SIZE;
34
int exercise_init(const char *name, size_t size) {
42
err = clGetPlatformIDs(1, &platform, NULL);
43
if (err != CL_SUCCESS) {
44
printf( "clGetPlatformIDs() failed with %d\n", err );
48
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
49
if (err != CL_SUCCESS) {
50
printf( "clGetDeviceIDs() failed with %d\n", err );
54
err = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(param), param, NULL);
56
printf("Using device: %s\n", param);
60
props[1] = (cl_context_properties)platform;
61
ctx = clCreateContext(props, 1, &device, NULL, NULL, &err);
62
if (err != CL_SUCCESS) {
63
printf( "clCreateContext() failed with %d\n", err );
67
queue = clCreateCommandQueue(ctx, device, CL_QUEUE_PROFILING_ENABLE, &err);
68
if (err != CL_SUCCESS) {
69
printf( "clCreateCommandQueue() failed with %d\n", err );
70
clReleaseContext(ctx);
74
dev_a = clCreateBuffer(ctx, CL_MEM_READ_ONLY, size * size * sizeof(unsigned char), NULL, &err);
75
if (err != CL_SUCCESS) {
76
printf("clCreateBuffer() failed with %d\n", err);
80
dev_b = clCreateBuffer(ctx, CL_MEM_READ_ONLY, size * size * sizeof(unsigned char), NULL, &err);
81
if (err != CL_SUCCESS) {
82
printf("clCreateBuffer() failed with %d\n", err);
86
dev_res = clCreateBuffer(ctx, CL_MEM_READ_WRITE, size * size * sizeof(unsigned char), NULL, &err);
87
if (err != CL_SUCCESS) {
88
printf("clCreateBuffer() failed with %d\n", err);
92
char *res = calloc(size * size, sizeof(unsigned char));
94
err = clEnqueueWriteBuffer(queue, dev_res, CL_TRUE, 0, size * size * sizeof(unsigned char), res, 0, NULL, NULL);
95
if (err != CL_SUCCESS) {
96
printf("clEnqueueWriteBuffer() failed with %d\n", err);
101
err = clFinish(queue);
102
if (err != CL_SUCCESS) {
103
printf("clFinish() failed with %d\n", err);
108
sprintf(param, "%s.cl", name);
109
f = fopen(param, "r");
111
printf("Can't open file with OpenCL kernels\n");
115
fseek(f, 0, SEEK_END);
117
fseek(f, 0, SEEK_SET);
119
source = (char*)malloc(len + 128);
121
printf("Can't allocate memory for OpenCL source\n");
125
sprintf(source, "#define BLOCK_SIZE %lu\n#define PPT %lu\n\n", BLOCK_SIZE, PPT);
126
if (fread(source + strlen(source), 1, len, f) != len) {
127
printf("Can't read OpenCL source\n");
132
len = strlen(source);
134
app = clCreateProgramWithSource(ctx, 1, (const char**)&source, &len, &err);
135
if (err != CL_SUCCESS) {
136
printf("clCreateProgramWithSource() failed with %d\n", err);
140
err = clBuildProgram(app, 1, &device, "", NULL, NULL);
141
if (err != CL_SUCCESS) {
142
printf("clBuildProgram() failed with %d\n", err);
148
kernel = clCreateKernel(app, "multiply", &err);
149
if (err != CL_SUCCESS) {
150
printf("clCreateKernel() failed with %d\n", err);
154
clSetKernelArg(kernel, 0, sizeof(cl_mem), &dev_res);
155
clSetKernelArg(kernel, 1, sizeof(cl_mem), &dev_a);
156
clSetKernelArg(kernel, 2, sizeof(cl_mem), &dev_b);
158
clSetKernelArg(kernel, 3, sizeof(unsigned long), &matrix_size);
159
// printf("%lu %lu %lu\n", PPT, PPT, 2 * PPT * PPT * BLOCK_SIZE * BLOCK_SIZE * sizeof(unsigned char) / 1024);
160
err = clSetKernelArg(kernel, 4, 2 * SHMEM * BLOCK_SIZE * BLOCK_SIZE * sizeof(unsigned char), NULL);
165
int exercise_allocate(unsigned char **res, unsigned char **a, unsigned char **b, size_t size) {
170
void exercise_free() {
171
clReleaseKernel(kernel);
172
clReleaseProgram(app);
174
clReleaseCommandQueue(queue);
175
clReleaseContext(ctx);
177
clReleaseMemObject(dev_res);
178
clReleaseMemObject(dev_b);
179
clReleaseMemObject(dev_a);
182
size_t exercise(unsigned char *res, unsigned char *a, unsigned char *b, size_t size, int iterations) {
185
cl_event event = NULL;
189
size_t local_size[] = {BLOCK_SIZE, BLOCK_SIZE};
190
size_t global_size[] = {size/PPT, size/PPT};
192
for (i = 0; i < iterations; i++) {
193
err = clEnqueueWriteBuffer(queue, dev_a, CL_TRUE, 0, size * size * sizeof(unsigned char), a, 0, NULL, NULL);
194
if (err != CL_SUCCESS) {
195
printf("clEnqueueWriteBuffer() failed with %d\n", err);
199
err = clEnqueueWriteBuffer(queue, dev_b, CL_TRUE, 0, size * size * sizeof(unsigned char), b, 0, NULL, NULL);
200
if (err != CL_SUCCESS) {
201
printf("clEnqueueWriteBuffer() failed with %d\n", err);
205
err = clEnqueueNDRangeKernel(queue, kernel, 2, 0, global_size, local_size, 0, NULL, &event);
206
if (err != CL_SUCCESS) {
207
printf("clEnqueueNDRangeKernel() failed with %d\n", err);
211
err = clEnqueueReadBuffer(queue, dev_res, CL_TRUE, 0, size * size * sizeof(unsigned char), res, 0, NULL, NULL);
212
if (err != CL_SUCCESS) {
213
printf("clEnqueueReadBuffer() failed with %d\n", err);
217
err = clFinish(queue);
218
if (err != CL_SUCCESS) {
219
printf("clFinish() failed with %d\n", err);
223
err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, NULL);
224
if (err != CL_SUCCESS) {
225
printf("clGetEventProfilingInfo() failed with %d\n", err);
229
err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL);
230
if (err != CL_SUCCESS) {
231
printf("clGetEventProfilingInfo() failed with %d\n", err);
235
runtime += end - start;