15
static cl_platform_id platform = 0;
16
static cl_device_id device = 0;
18
static cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
19
static cl_context ctx = 0;
20
static cl_command_queue queue = 0;
22
static cl_mem dev_res, scratch;
24
static cl_program app;
25
static cl_kernel kernel;
27
static size_t vector_size;
29
int exercise_required_alignment = 2 * BLOCK_SIZE * USE_BLOCKS;
31
int exercise_init(const char *name, size_t size) {
39
err = clGetPlatformIDs(1, &platform, NULL);
40
if (err != CL_SUCCESS) {
41
printf( "clGetPlatformIDs() failed with %d\n", err );
45
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
46
if (err != CL_SUCCESS) {
47
printf( "clGetDeviceIDs() failed with %d\n", err );
51
err = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(param), param, NULL);
53
printf("Using device: %s\n", param);
57
props[1] = (cl_context_properties)platform;
58
ctx = clCreateContext(props, 1, &device, NULL, NULL, &err);
59
if (err != CL_SUCCESS) {
60
printf( "clCreateContext() failed with %d\n", err );
64
queue = clCreateCommandQueue(ctx, device, CL_QUEUE_PROFILING_ENABLE, &err);
65
if (err != CL_SUCCESS) {
66
printf( "clCreateCommandQueue() failed with %d\n", err );
67
clReleaseContext(ctx);
71
dev_res = clCreateBuffer(ctx, CL_MEM_READ_WRITE, sizeof(unsigned long), NULL, &err);
72
if (err != CL_SUCCESS) {
73
printf("clCreateBuffer() failed with %d\n", err);
77
scratch = clCreateBuffer(ctx, CL_MEM_READ_WRITE, USE_BLOCKS * sizeof(unsigned long), NULL, &err);
78
if (err != CL_SUCCESS) {
79
printf("clCreateBuffer() failed with %d\n", err);
85
err = clEnqueueWriteBuffer(queue, dev_res, CL_TRUE, 0, sizeof(unsigned long), &res, 0, NULL, NULL);
86
if (err != CL_SUCCESS) {
87
printf("clEnqueueWriteBuffer() failed with %d\n", err);
91
err = clFinish(queue);
92
if (err != CL_SUCCESS) {
93
printf("clFinish() failed with %d\n", err);
97
sprintf(param, "%s.cl", name);
98
f = fopen(param, "r");
100
printf("Can't open file with OpenCL kernels\n");
104
fseek(f, 0, SEEK_END);
106
fseek(f, 0, SEEK_SET);
108
source = (char*)malloc(len + 128);
110
printf("Can't allocate memory for OpenCL source\n");
114
sprintf(source, "#define BLOCK_SIZE %lu\n\n", BLOCK_SIZE);
115
if (fread(source + strlen(source), 1, len, f) != len) {
116
printf("Can't read OpenCL source\n");
121
len = strlen(source);
123
app = clCreateProgramWithSource(ctx, 1, (const char**)&source, &len, &err);
124
if (err != CL_SUCCESS) {
125
printf("clCreateProgramWithSource() failed with %d\n", err);
129
err = clBuildProgram(app, 1, &device, "", NULL, NULL);
130
if (err != CL_SUCCESS) {
131
printf("clBuildProgram() failed with %d\n", err);
137
kernel = clCreateKernel(app, "pi", &err);
138
if (err != CL_SUCCESS) {
139
printf("clCreateKernel() failed with %d\n", err);
143
clSetKernelArg(kernel, 0, sizeof(cl_mem), &dev_res);
144
vector_size = size / (USE_BLOCKS * BLOCK_SIZE);
145
clSetKernelArg(kernel, 1, sizeof(unsigned long), &vector_size);
146
clSetKernelArg(kernel, 2, sizeof(cl_mem), &scratch);
147
clSetKernelArg(kernel, 3, BLOCK_SIZE * sizeof(unsigned long), NULL);
153
void exercise_free() {
154
clReleaseKernel(kernel);
155
clReleaseProgram(app);
157
clReleaseCommandQueue(queue);
158
clReleaseContext(ctx);
160
clReleaseMemObject(scratch);
161
clReleaseMemObject(dev_res);
164
size_t exercise(unsigned long *res, size_t size, int iterations) {
167
cl_event event = NULL;
168
struct timeval tv1, tv2;
171
size_t local_size[] = {BLOCK_SIZE};
172
size_t global_size[] = {USE_BLOCKS * BLOCK_SIZE};
174
gettimeofday(&tv1, NULL);
175
for (i = 0; i < iterations; i++) {
176
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_size, local_size, 0, NULL, &event);
177
if (err != CL_SUCCESS) {
178
printf("clEnqueueNDRangeKernel() failed with %d\n", err);
183
gettimeofday(&tv2, NULL);
185
us = (tv2.tv_sec - tv1.tv_sec)*1000000 + (tv2.tv_usec - tv1.tv_usec);
187
err = clEnqueueReadBuffer(queue, dev_res, CL_TRUE, 0, sizeof(unsigned long), res, 0, NULL, NULL);
188
if (err != CL_SUCCESS) {
189
printf("clEnqueueReadBuffer() failed with %d\n", err);