15
static cl_platform_id platform = 0;
16
static cl_device_id device = 0;
18
static cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
19
static cl_context ctx = 0;
20
static cl_command_queue queue = 0;
22
static cl_mem dev_res, dev_a, dev_b, scratch;
24
static cl_program app;
25
static cl_kernel kernel;
27
static int vector_size;
29
int exercise_required_alignment = BLOCK_SIZE * USE_BLOCKS;
31
int exercise_init(const char *name, size_t size) {
39
err = clGetPlatformIDs(1, &platform, NULL);
40
if (err != CL_SUCCESS) {
41
printf( "clGetPlatformIDs() failed with %d\n", err );
45
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
46
if (err != CL_SUCCESS) {
47
printf( "clGetDeviceIDs() failed with %d\n", err );
51
err = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(param), param, NULL);
53
printf("Using device: %s\n", param);
57
props[1] = (cl_context_properties)platform;
58
ctx = clCreateContext(props, 1, &device, NULL, NULL, &err);
59
if (err != CL_SUCCESS) {
60
printf( "clCreateContext() failed with %d\n", err );
64
queue = clCreateCommandQueue(ctx, device, CL_QUEUE_PROFILING_ENABLE, &err);
65
if (err != CL_SUCCESS) {
66
printf( "clCreateCommandQueue() failed with %d\n", err );
67
clReleaseContext(ctx);
71
dev_a = clCreateBuffer(ctx, CL_MEM_READ_ONLY, size * sizeof(float), NULL, &err);
72
if (err != CL_SUCCESS) {
73
printf("clCreateBuffer() failed with %d\n", err);
77
dev_b = clCreateBuffer(ctx, CL_MEM_READ_ONLY, size * sizeof(float), NULL, &err);
78
if (err != CL_SUCCESS) {
79
printf("clCreateBuffer() failed with %d\n", err);
83
dev_res = clCreateBuffer(ctx, CL_MEM_READ_WRITE, sizeof(float), NULL, &err);
84
if (err != CL_SUCCESS) {
85
printf("clCreateBuffer() failed with %d\n", err);
89
scratch = clCreateBuffer(ctx, CL_MEM_READ_WRITE, size * sizeof(float), NULL, &err);
90
if (err != CL_SUCCESS) {
91
printf("clCreateBuffer() failed with %d\n", err);
97
err = clEnqueueWriteBuffer(queue, dev_res, CL_TRUE, 0, sizeof(float), &res, 0, NULL, NULL);
98
if (err != CL_SUCCESS) {
99
printf("clEnqueueWriteBuffer() failed with %d\n", err);
103
err = clFinish(queue);
104
if (err != CL_SUCCESS) {
105
printf("clFinish() failed with %d\n", err);
109
sprintf(param, "%s.cl", name);
110
f = fopen(param, "r");
112
printf("Can't open file with OpenCL kernels\n");
116
fseek(f, 0, SEEK_END);
118
fseek(f, 0, SEEK_SET);
120
source = (char*)malloc(len + 128);
122
printf("Can't allocate memory for OpenCL source\n");
126
sprintf(source, "#define BLOCK_SIZE %lu\n\n", BLOCK_SIZE);
127
if (fread(source + strlen(source), 1, len, f) != len) {
128
printf("Can't read OpenCL source\n");
133
len = strlen(source);
135
app = clCreateProgramWithSource(ctx, 1, (const char**)&source, &len, &err);
136
if (err != CL_SUCCESS) {
137
printf("clCreateProgramWithSource() failed with %d\n", err);
141
err = clBuildProgram(app, 1, &device, "", NULL, NULL);
142
if (err != CL_SUCCESS) {
143
printf("clBuildProgram() failed with %d\n", err);
149
kernel = clCreateKernel(app, "multiply", &err);
150
if (err != CL_SUCCESS) {
151
printf("clCreateKernel() failed with %d\n", err);
155
clSetKernelArg(kernel, 0, sizeof(cl_mem), &dev_res);
156
clSetKernelArg(kernel, 1, sizeof(cl_mem), &dev_a);
157
clSetKernelArg(kernel, 2, sizeof(cl_mem), &dev_b);
159
clSetKernelArg(kernel, 3, sizeof(unsigned int), &vector_size);
160
clSetKernelArg(kernel, 4, sizeof(cl_mem), &scratch);
161
clSetKernelArg(kernel, 5, BLOCK_SIZE * sizeof(float), NULL);
166
int exercise_allocate(float **res, float **a, float **b, size_t size) {
171
void exercise_free() {
172
clReleaseKernel(kernel);
173
clReleaseProgram(app);
175
clReleaseCommandQueue(queue);
176
clReleaseContext(ctx);
178
clReleaseMemObject(scratch);
179
clReleaseMemObject(dev_res);
180
clReleaseMemObject(dev_b);
181
clReleaseMemObject(dev_a);
184
size_t exercise(float *res, float *a, float *b, size_t size, int iterations) {
187
cl_event event = NULL;
188
struct timeval tv1, tv2;
189
unsigned long us = 0;
192
cl_ulong runtime = 0;
194
size_t local_size[] = {BLOCK_SIZE};
195
size_t global_size[] = {USE_BLOCKS * BLOCK_SIZE};
197
for (i = 0; i < iterations; i++) {
198
err = clEnqueueWriteBuffer(queue, dev_a, CL_TRUE, 0, size * sizeof(float), a, 0, NULL, &event);
199
if (err != CL_SUCCESS) {
200
printf("clEnqueueWriteBuffer() failed with %d\n", err);
203
err = clEnqueueWriteBuffer(queue, dev_b, CL_TRUE, 0, size * sizeof(float), b, 0, NULL, &event);
204
if (err != CL_SUCCESS) {
205
printf("clEnqueueWriteBuffer() failed with %d\n", err);
209
// We need this to avoid some optimizations preventing proper measurements of time
210
err = clEnqueueWriteBuffer(queue, dev_res, CL_TRUE, 0, sizeof(float), res, 0, NULL, &event);
211
if (err != CL_SUCCESS) {
212
printf("clEnqueueReadBuffer() failed with %d\n", err);
215
clWaitForEvents(1, &event);
218
gettimeofday(&tv1, NULL);
219
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_size, local_size, 0, NULL, &event);
220
if (err != CL_SUCCESS) {
221
printf("clEnqueueNDRangeKernel() failed with %d\n", err);
225
gettimeofday(&tv2, NULL);
226
us += (tv2.tv_sec - tv1.tv_sec)*1000000 + (tv2.tv_usec - tv1.tv_usec);
229
clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, NULL);
230
clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL);
231
runtime += end - start;
233
err = clEnqueueReadBuffer(queue, dev_res, CL_TRUE, 0, sizeof(float), res, 0, NULL, NULL);
234
if (err != CL_SUCCESS) {
235
printf("clEnqueueReadBuffer() failed with %d\n", err);