7
#define fail(code, reason) {\
8
fprintf(stderr, "%s\n", reason); \
14
cl_device_id *devices;
17
static inline cl_program load_app(cl_context ctx, cl_uint num_devices, cl_device_id *devices, const char *name) {
21
device_info_t info = { num_devices, devices };
25
f = fopen(name, "rb");
28
fseek(f, 0, SEEK_END);
30
fseek(f, 0, SEEK_SET);
32
buf = (char*)malloc(len*sizeof(char));
33
if (!buf) return NULL;
35
if (fread(buf, 1, len, f) != len) {
44
printf("Compiling for ");
45
for (i=0;i<num_devices;i++) {
47
int err = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(name), name, NULL);
48
if (err != CL_SUCCESS) puts("Unknown ");
49
else printf("%s ", name);
53
app = clCreateProgramWithSource(ctx, 1, (const char**)&buf, &len, NULL);
60
cl_build_status build_status;
62
//"-cl-nv-maxrregcount=48"
63
//"-cl-nv-opt-level=<N>" (0 - no optimizations, 3 - default)
64
//"-cl-nv-arch sm_XX" - selects the target CUDA Compute Level architecture to compile for (sm_10 for 1.0, sm_11 for 1.1, sm_12 for 1.2, sm_13 for 1.3 and sm_20 for 2.0 (Fermi))
65
//"--cl-nv-cstd=CLX.X" - selects the target OpenCL C language version (CL1.0 or CL1.1)
66
char *build_flags = "-cl-nv-arch sm_20 -cl-nv-verbose -cl-denorms-are-zero -cl-mad-enable -cl-no-signed-zeros -cl-unsafe-math-optimizations -cl-finite-math-only";
67
int err = clBuildProgram(app, num_devices, devices, build_flags, NULL, NULL);
68
if (err != CL_SUCCESS) printf("Application build failed (%i)\n", err);
70
for (i = 0; i < num_devices; i++) {
72
err = clGetProgramBuildInfo(app, devices[i], CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &build_status, NULL);
73
} while (build_status == CL_BUILD_IN_PROGRESS);
75
err = clGetProgramBuildInfo(app, devices[i], CL_PROGRAM_BUILD_LOG, sizeof(build_log) - 1, &build_log, &size);
79
if (build_status == CL_BUILD_SUCCESS) {
80
if (strlen(build_log) > 2) {
81
printf("Build successed for device %i:\n===================\n%s\n--------------------\n\n", i, build_log);
83
} else if (build_status == CL_BUILD_ERROR) {
84
printf("Build failed for device %i:\n======================\n%s\n--------------------\n\n", i, build_log);
86
printf("Build failed for device %i\n", i);
92
printf("Program creation failed\n");
99
int main(int argc, char *argv[]) {
103
cl_device_id devices[16];
107
unsigned char *binary[16];
108
size_t binary_size[16];
114
fprintf(stderr, "Usage: %s <source> [device] [binary] [kernel]\n", argv[0]);
119
devid = atoi(argv[2]);
123
cl_platform_id selected_platform;
124
cl_uint i, num_platforms, num_devices;
125
cl_platform_id platforms[4];
127
clGetPlatformIDs(4, platforms, &num_platforms);
129
for (i = 0; i < num_platforms; i++) {
130
err = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices);
131
if ((err == CL_SUCCESS)&&(num_devices > max)) {
133
selected_platform = platforms[i];
138
clGetDeviceIDs(selected_platform, CL_DEVICE_TYPE_GPU, 16, devices, &num_devices);
140
if (devid >= (int)num_devices) fail(-1, "Invalid device number is specified");
144
ctx = clCreateContext(0, num_devices, devices, NULL, NULL, &err);
146
ctx = clCreateContext(0, 1, &devices[devid], NULL, NULL, &err);
147
if (!ctx) fail(-1, "Failed to create OpenCL context");
149
// cl_context ctx = clCreateContextFromType(NULL, CL_DEVICE_TYPE_GPU, NULL, NULL, NULL);
150
// if (!ctx) fail(-1, "Failed to create OpenCL context");
153
// err = clGetContextInfo(ctx, CL_CONTEXT_DEVICES, 16 * sizeof(cl_device_id), &devices, &num_devices);
154
// if (err != CL_SUCCESS) fail(-1, "clGetContextInfo call is failed");
156
// num_devices /= sizeof(cl_device_id);
159
app = load_app(ctx, num_devices, devices, argv[1]);
161
app = load_app(ctx, 1, &devices[devid], argv[1]);
162
if (!app) fail(-1, "Compilation failed");
164
if (devid >= 0) num_devices = 1;
165
err = clGetProgramInfo(app, CL_PROGRAM_BINARY_SIZES, num_devices * sizeof(size_t), &binary_size, &real_size);
167
printf("Status: %i, Sizes: ", err);
168
for (i = 0; i < num_devices; i++) printf (" %li", binary_size[i]);
171
if ((err != CL_SUCCESS)||(real_size != num_devices*sizeof(size_t)))
172
fail(-1, "Failed to get binary size");
175
for (i = 0; i < num_devices; i++) {
176
binary[i] = malloc(binary_size[i] + 1);
178
for (i--; i>=0; i--) free(binary[i]);
179
fail(-1, "allocation failed");
183
err = clGetProgramInfo(app, CL_PROGRAM_BINARIES, num_devices * sizeof(unsigned char*), &binary, &real_size);
184
if ((err == CL_SUCCESS)&&(real_size == num_devices * sizeof(unsigned char*))) {
185
for (i = 0; i < num_devices; i++) {
186
sprintf(fname, "%s.%u", ((argc>3)?argv[3]:"opencl.out"), i);
187
f = fopen(fname, "wb");
189
fwrite(binary[i], 1, binary_size[i], f);
194
for (; i < num_devices; i++)
196
fail(-1, "Failed to create output file");
200
for (i = 0; i < num_devices; i++)
202
fail(-1, "Binary readout failed")
207
kern = clCreateKernel(app, argv[4], &err);
208
if (err == CL_SUCCESS) clReleaseKernel(kern);
209
else printf("Error creating kernel (%i)\n", err);
212
clReleaseProgram(app);
213
clReleaseContext(ctx);