18
static cl_platform_id platform = 0;
19
static cl_device_id device = 0;
21
static cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
22
static cl_context ctx = 0;
23
static cl_command_queue queue = 0;
25
static cl_mem dev_res, dev_a, dev_b;
26
static cl_mem pinned_mem_res, pinned_mem_a, pinned_mem_b;
27
float *pinned_res, *pinned_a, *pinned_b;
34
int exercise_required_alignment = PPT * BLOCK_SIZE;
36
int exercise_init(const char *name, size_t size) {
44
err = clGetPlatformIDs(1, &platform, NULL);
45
if (err != CL_SUCCESS) {
46
printf( "clGetPlatformIDs() failed with %d\n", err );
50
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
51
if (err != CL_SUCCESS) {
52
printf( "clGetDeviceIDs() failed with %d\n", err );
56
err = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(param), param, NULL);
58
printf("Using device: %s\n", param);
62
props[1] = (cl_context_properties)platform;
63
ctx = clCreateContext(props, 1, &device, NULL, NULL, &err);
64
if (err != CL_SUCCESS) {
65
printf( "clCreateContext() failed with %d\n", err );
69
queue = clCreateCommandQueue(ctx, device, CL_QUEUE_PROFILING_ENABLE, &err);
70
if (err != CL_SUCCESS) {
71
printf( "clCreateCommandQueue() failed with %d\n", err );
72
clReleaseContext(ctx);
76
dev_a = clCreateBuffer(ctx, CL_MEM_READ_ONLY, size * size * sizeof(float), NULL, &err);
77
if (err != CL_SUCCESS) {
78
printf("clCreateBuffer() failed with %d\n", err);
82
dev_b = clCreateBuffer(ctx, CL_MEM_READ_ONLY, size * size * sizeof(float), NULL, &err);
83
if (err != CL_SUCCESS) {
84
printf("clCreateBuffer() failed with %d\n", err);
88
dev_res = clCreateBuffer(ctx, CL_MEM_READ_WRITE, size * size * sizeof(float), NULL, &err);
89
if (err != CL_SUCCESS) {
90
printf("clCreateBuffer() failed with %d\n", err);
94
char *res = calloc(size * size, sizeof(float));
96
err = clEnqueueWriteBuffer(queue, dev_res, CL_TRUE, 0, size * size * sizeof(float), res, 0, NULL, NULL);
97
if (err != CL_SUCCESS) {
98
printf("clEnqueueWriteBuffer() failed with %d\n", err);
103
err = clFinish(queue);
104
if (err != CL_SUCCESS) {
105
printf("clFinish() failed with %d\n", err);
110
sprintf(param, "%s.cl", name);
111
f = fopen(param, "r");
113
printf("Can't open file with OpenCL kernels\n");
117
fseek(f, 0, SEEK_END);
119
fseek(f, 0, SEEK_SET);
121
source = (char*)malloc(len + 128);
123
printf("Can't allocate memory for OpenCL source\n");
127
sprintf(source, "#define BLOCK_SIZE %lu\n#define PPT %lu\n\n", BLOCK_SIZE, PPT, PPT);
128
if (fread(source + strlen(source), 1, len, f) != len) {
129
printf("Can't read OpenCL source\n");
134
len = strlen(source);
136
app = clCreateProgramWithSource(ctx, 1, (const char**)&source, &len, &err);
137
if (err != CL_SUCCESS) {
138
printf("clCreateProgramWithSource() failed with %d\n", err);
142
err = clBuildProgram(app, 1, &device, "", NULL, NULL);
143
if (err != CL_SUCCESS) {
144
printf("clBuildProgram() failed with %d\n", err);
150
kernel = clCreateKernel(app, "multiply", &err);
151
if (err != CL_SUCCESS) {
152
printf("clCreateKernel() failed with %d\n", err);
156
clSetKernelArg(kernel, 0, sizeof(cl_mem), &dev_res);
157
clSetKernelArg(kernel, 1, sizeof(cl_mem), &dev_a);
158
clSetKernelArg(kernel, 2, sizeof(cl_mem), &dev_b);
160
clSetKernelArg(kernel, 3, sizeof(unsigned long), &matrix_size);
161
err = clSetKernelArg(kernel, 4, 2 * SHMEM * BLOCK_SIZE * BLOCK_SIZE * sizeof(float), NULL);
166
int exercise_allocate(float **res, float **a, float **b, size_t size) {
167
pinned_mem_a = clCreateBuffer(ctx, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, size * size * sizeof(float), NULL, &err);
168
if (err != CL_SUCCESS) {
169
printf("clCreateBuffer() for pinned memory failed with %d\n", err);
173
pinned_a = (cl_float*)clEnqueueMapBuffer(queue, pinned_mem_a, CL_TRUE, CL_MAP_WRITE, 0, size * size * sizeof(float), 0, NULL, NULL, &err);
174
if (err != CL_SUCCESS) {
175
printf("clEnqueueMapBuffer() failed with %d\n", err);
179
pinned_mem_b = clCreateBuffer(ctx, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, size * size * sizeof(float), NULL, &err);
180
if (err != CL_SUCCESS) {
181
printf("clCreateBuffer() for pinned memory failed with %d\n", err);
185
pinned_b = (cl_float*)clEnqueueMapBuffer(queue, pinned_mem_b, CL_TRUE, CL_MAP_WRITE, 0, size * size * sizeof(float), 0, NULL, NULL, &err);
186
if (err != CL_SUCCESS) {
187
printf("clEnqueueMapBuffer() failed with %d\n", err);
191
pinned_mem_res = clCreateBuffer(ctx, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, size * size * sizeof(float), NULL, &err);
192
if (err != CL_SUCCESS) {
193
printf("clCreateBuffer() for pinned memory failed with %d\n", err);
197
pinned_res = (cl_float*)clEnqueueMapBuffer(queue, pinned_mem_res, CL_TRUE, CL_MAP_READ, 0, size * size * sizeof(float), 0, NULL, NULL, &err);
198
if (err != CL_SUCCESS) {
199
printf("clEnqueueMapBuffer() failed with %d\n", err);
211
void exercise_free() {
212
clReleaseKernel(kernel);
213
clReleaseProgram(app);
215
clEnqueueUnmapMemObject(queue, pinned_mem_a, pinned_a, 0, NULL, NULL);
216
clEnqueueUnmapMemObject(queue, pinned_mem_b, pinned_b, 0, NULL, NULL);
217
clEnqueueUnmapMemObject(queue, pinned_mem_res, pinned_res, 0, NULL, NULL);
219
clReleaseMemObject(pinned_mem_a);
220
clReleaseMemObject(pinned_mem_b);
221
clReleaseMemObject(pinned_mem_res);
223
clReleaseCommandQueue(queue);
224
clReleaseContext(ctx);
226
clReleaseMemObject(dev_res);
227
clReleaseMemObject(dev_b);
228
clReleaseMemObject(dev_a);
231
size_t exercise(float *res, float *a, float *b, size_t size, int iterations) {
234
cl_event event = NULL;
238
size_t local_size[] = {BLOCK_SIZE, BLOCK_SIZE};
239
size_t global_size[] = {size/PPT, size/PPT};
241
for (i = 0; i < iterations; i++) {
242
err = clEnqueueWriteBuffer(queue, dev_a, CL_TRUE, 0, size * size * sizeof(float), a, 0, NULL, NULL);
243
if (err != CL_SUCCESS) {
244
printf("clEnqueueWriteBuffer() failed with %d\n", err);
248
err = clEnqueueWriteBuffer(queue, dev_b, CL_TRUE, 0, size * size * sizeof(float), b, 0, NULL, NULL);
249
if (err != CL_SUCCESS) {
250
printf("clEnqueueWriteBuffer() failed with %d\n", err);
254
err = clEnqueueNDRangeKernel(queue, kernel, 2, 0, global_size, local_size, 0, NULL, &event);
255
if (err != CL_SUCCESS) {
256
printf("clEnqueueNDRangeKernel() failed with %d\n", err);
260
err = clEnqueueReadBuffer(queue, dev_res, CL_TRUE, 0, size * size * sizeof(float), res, 0, NULL, NULL);
261
if (err != CL_SUCCESS) {
262
printf("clEnqueueReadBuffer() failed with %d\n", err);
266
err = clFinish(queue);
267
if (err != CL_SUCCESS) {
268
printf("clFinish() failed with %d\n", err);
272
err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, NULL);
273
if (err != CL_SUCCESS) {
274
printf("clGetEventProfilingInfo() failed with %d\n", err);
278
err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL);
279
if (err != CL_SUCCESS) {
280
printf("clGetEventProfilingInfo() failed with %d\n", err);
284
runtime += end - start;