14
static cl_platform_id platform = 0;
15
static cl_device_id device = 0;
17
static cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
18
static cl_context ctx = 0;
19
static cl_command_queue queue, queues[PARALLEL] = {0};
21
static cl_mem dev_res, dev_a, dev_b;
23
int exercise_required_alignment = 0;
25
int exercise_init(const char *name, size_t size) {
30
err = clGetPlatformIDs(1, &platform, NULL);
31
if (err != CL_SUCCESS) {
32
printf( "clGetPlatformIDs() failed with %d\n", err );
36
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
37
if (err != CL_SUCCESS) {
38
printf( "clGetDeviceIDs() failed with %d\n", err );
42
err = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(param), param, NULL);
44
printf("Using device: %s\n", param);
48
props[1] = (cl_context_properties)platform;
49
ctx = clCreateContext(props, 1, &device, NULL, NULL, &err);
50
if (err != CL_SUCCESS) {
51
printf( "clCreateContext() failed with %d\n", err );
55
for (i = 0; i < PARALLEL; i++) {
56
queues[i] = clCreateCommandQueue(ctx, device, CL_QUEUE_PROFILING_ENABLE, &err);
57
if (err != CL_SUCCESS) {
58
printf( "clCreateCommandQueue() failed with %d\n", err );
59
clReleaseContext(ctx);
65
err = clAmdBlasSetup();
66
if (err != CL_SUCCESS) {
67
printf("clAmdBlasSetup() failed with %d\n", err);
68
clReleaseCommandQueue(queue);
69
clReleaseContext(ctx);
73
dev_a = clCreateBuffer(ctx, CL_MEM_READ_ONLY, size * size * sizeof(float), NULL, &err);
74
if (err != CL_SUCCESS) {
75
printf("clCreateBuffer() failed with %d\n", err);
79
dev_b = clCreateBuffer(ctx, CL_MEM_READ_ONLY, size * size * sizeof(float), NULL, &err);
80
if (err != CL_SUCCESS) {
81
printf("clCreateBuffer() failed with %d\n", err);
85
dev_res = clCreateBuffer(ctx, CL_MEM_READ_WRITE, size * size * sizeof(float), NULL, &err);
86
if (err != CL_SUCCESS) {
87
printf("clCreateBuffer() failed with %d\n", err);
94
int exercise_allocate(float **res, float **a, float **b, size_t size) {
98
void exercise_free() {
102
for (i = 0; i < PARALLEL; i++) {
103
clReleaseCommandQueue(queues[i]);
105
clReleaseContext(ctx);
107
clReleaseMemObject(dev_res);
108
clReleaseMemObject(dev_b);
109
clReleaseMemObject(dev_a);
112
size_t exercise(float *res, float *a, float *b, size_t size, int iterations) {
115
cl_event event = NULL;
119
float alpha = 1, beta = 0;
120
static const clAmdBlasTranspose transA = clAmdBlasNoTrans, transB = clAmdBlasNoTrans;
121
static const clAmdBlasOrder order = clAmdBlasRowMajor;
123
for (i = 0; i < iterations; i++) {
124
err = clEnqueueWriteBuffer(queue, dev_a, CL_TRUE, 0, size * size * sizeof(float), a, 0, NULL, NULL);
125
if (err != CL_SUCCESS) {
126
printf("clEnqueueWriteBuffer() failed with %d\n", err);
130
err = clEnqueueWriteBuffer(queue, dev_b, CL_TRUE, 0, size * size * sizeof(float), b, 0, NULL, NULL);
131
if (err != CL_SUCCESS) {
132
printf("clEnqueueWriteBuffer() failed with %d\n", err);
136
// Memory corruption above 2 with current version of clBlas, i is set to 0
137
err = clAmdBlasSgemm(order, transA, transB, size, size, size, alpha, dev_a, size, dev_b, size, beta, dev_res, size, PARALLEL, queues, 0, NULL, &event);
138
if (err != CL_SUCCESS) {
139
printf("clAmdBlasSgemm() failed with %d\n", err);
143
for (j = 0; j < PARALLEL; j++) {
144
err = clFinish(queues[j]);
145
if (err != CL_SUCCESS) {
146
printf("clFinish() failed with %d\n", err);
151
err = clEnqueueReadBuffer(queue, dev_res, CL_TRUE, 0, size * size * sizeof(float), res, 0, NULL, NULL);
152
if (err != CL_SUCCESS) {
153
printf("clEnqueueReadBuffer() failed with %d\n", err);
157
err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, NULL);
158
if (err != CL_SUCCESS) {
159
printf("clGetEventProfilingInfo() failed with %d\n", err);
163
err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL);
164
if (err != CL_SUCCESS) {
165
printf("clGetEventProfilingInfo() failed with %d\n", err);
169
runtime += end - start;