18
static cl_platform_id platform = 0;
19
static cl_device_id device = 0;
21
static cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
22
static cl_context ctx = 0;
23
static cl_command_queue queue = 0;
25
static cl_mem dev_res, dev_a;
27
static cl_program app;
28
static cl_kernel kernel;
30
static size_t matrix_size;
32
int exercise_required_alignment = PPT * BLOCK_SIZE;
34
int exercise_init(const char *name, size_t size) {
42
err = clGetPlatformIDs(1, &platform, NULL);
43
if (err != CL_SUCCESS) {
44
printf( "clGetPlatformIDs() failed with %d\n", err );
48
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
49
if (err != CL_SUCCESS) {
50
printf( "clGetDeviceIDs() failed with %d\n", err );
54
err = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(param), param, NULL);
56
printf("Using device: %s\n", param);
60
props[1] = (cl_context_properties)platform;
61
ctx = clCreateContext(props, 1, &device, NULL, NULL, &err);
62
if (err != CL_SUCCESS) {
63
printf( "clCreateContext() failed with %d\n", err );
67
queue = clCreateCommandQueue(ctx, device, CL_QUEUE_PROFILING_ENABLE, &err);
68
if (err != CL_SUCCESS) {
69
printf( "clCreateCommandQueue() failed with %d\n", err );
70
clReleaseContext(ctx);
74
dev_a = clCreateBuffer(ctx, CL_MEM_READ_ONLY, size * size * sizeof(float), NULL, &err);
75
if (err != CL_SUCCESS) {
76
printf("clCreateBuffer() failed with %d\n", err);
80
dev_res = clCreateBuffer(ctx, CL_MEM_READ_WRITE, size * size * sizeof(float), NULL, &err);
81
if (err != CL_SUCCESS) {
82
printf("clCreateBuffer() failed with %d\n", err);
86
char *res = calloc(size * size, sizeof(float));
88
err = clEnqueueWriteBuffer(queue, dev_res, CL_TRUE, 0, size * size * sizeof(float), res, 0, NULL, NULL);
89
if (err != CL_SUCCESS) {
90
printf("clEnqueueWriteBuffer() failed with %d\n", err);
95
err = clFinish(queue);
96
if (err != CL_SUCCESS) {
97
printf("clFinish() failed with %d\n", err);
102
sprintf(param, "%s.cl", name);
103
f = fopen(param, "r");
105
printf("Can't open file with OpenCL kernels\n");
109
fseek(f, 0, SEEK_END);
111
fseek(f, 0, SEEK_SET);
113
source = (char*)malloc(len + 128);
115
printf("Can't allocate memory for OpenCL source\n");
119
sprintf(source, "#define BLOCK_SIZE %u\n#define PPT %u\n\n", BLOCK_SIZE, PPT);
120
if (fread(source + strlen(source), 1, len, f) != len) {
121
printf("Can't read OpenCL source\n");
126
len = strlen(source);
128
app = clCreateProgramWithSource(ctx, 1, (const char**)&source, &len, &err);
129
if (err != CL_SUCCESS) {
130
printf("clCreateProgramWithSource() failed with %d\n", err);
134
err = clBuildProgram(app, 1, &device, "", NULL, NULL);
135
if (err != CL_SUCCESS) {
136
printf("clBuildProgram() failed with %d\n", err);
142
kernel = clCreateKernel(app, "transpose", &err);
143
if (err != CL_SUCCESS) {
144
printf("clCreateKernel() failed with %d\n", err);
148
clSetKernelArg(kernel, 0, sizeof(cl_mem), &dev_res);
149
clSetKernelArg(kernel, 1, sizeof(cl_mem), &dev_a);
151
clSetKernelArg(kernel, 2, sizeof(unsigned long), &matrix_size);
152
err = clSetKernelArg(kernel, 3, 2 * BLOCK_SIZE * BLOCK_SIZE * sizeof(float), NULL);
157
int exercise_allocate(float **res, float **a, size_t size) {
162
void exercise_free() {
163
clReleaseKernel(kernel);
164
clReleaseProgram(app);
166
clReleaseCommandQueue(queue);
167
clReleaseContext(ctx);
169
clReleaseMemObject(dev_res);
170
clReleaseMemObject(dev_a);
173
size_t exercise(float *res, float *a, size_t size, int iterations) {
176
cl_event event = NULL;
180
size_t local_size[] = {BLOCK_SIZE, BLOCK_SIZE};
181
size_t global_size[] = {size/PPT, size/PPT};
183
for (i = 0; i < iterations; i++) {
184
err = clEnqueueWriteBuffer(queue, dev_a, CL_TRUE, 0, size * size * sizeof(float), a, 0, NULL, NULL);
185
if (err != CL_SUCCESS) {
186
printf("clEnqueueWriteBuffer() failed with %d\n", err);
190
err = clEnqueueNDRangeKernel(queue, kernel, 2, 0, global_size, local_size, 0, NULL, &event);
191
if (err != CL_SUCCESS) {
192
printf("clEnqueueNDRangeKernel() failed with %d\n", err);
196
err = clEnqueueReadBuffer(queue, dev_res, CL_TRUE, 0, size * size * sizeof(float), res, 0, NULL, NULL);
197
if (err != CL_SUCCESS) {
198
printf("clEnqueueReadBuffer() failed with %d\n", err);
202
err = clFinish(queue);
203
if (err != CL_SUCCESS) {
204
printf("clFinish() failed with %d\n", err);
208
err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, NULL);
209
if (err != CL_SUCCESS) {
210
printf("clGetEventProfilingInfo() failed with %d\n", err);
214
err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL);
215
if (err != CL_SUCCESS) {
216
printf("clGetEventProfilingInfo() failed with %d\n", err);
220
runtime += end - start;