/opencl/tools

To get this branch, use:
bzr branch http://darksoft.org/webbzr/opencl/tools
1 by Suren A. Chilingaryan
Initial commit
1
#include <stdio.h>
2
#include <stdlib.h>
3
#include <string.h>
4
5
#include <CL/cl.h>
6
7
#define fail(code, reason) {\
8
    fprintf(stderr, "%s\n", reason); \
9
    exit (code); \
10
}
11
12
typedef struct  {
13
    cl_uint num_devices;
14
    cl_device_id *devices;
15
} device_info_t;
16
17
static inline cl_program load_app(cl_context ctx, cl_uint num_devices, cl_device_id *devices, const char *name) {
18
    FILE *f;
19
    size_t len;
20
    char *buf;
21
    device_info_t info = { num_devices, devices }; 
22
    
23
    cl_program app;
24
    
25
    f = fopen(name, "rb");
26
    if (!f) return NULL;
27
28
    fseek(f, 0, SEEK_END); 
29
    len = ftell(f);
30
    fseek(f, 0, SEEK_SET); 
31
    
32
    buf = (char*)malloc(len*sizeof(char));
33
    if (!buf) return NULL;
34
    
35
    if (fread(buf, 1, len, f) != len) {
36
	free(buf);
37
	fclose(f);
38
	return NULL;
39
    }
40
    
41
    fclose(f);
42
    
43
    int i;
44
    printf("Compiling for ");
45
    for (i=0;i<num_devices;i++) {
46
	char name[256];
47
	int err = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(name), name, NULL);
48
	if (err != CL_SUCCESS) puts("Unknown ");
49
	else printf("%s ", name);
50
    }
51
    printf("\n");
52
    
53
    app = clCreateProgramWithSource(ctx, 1, (const char**)&buf, &len, NULL);
54
    
55
    
56
    if (app) {
57
	size_t size;
58
	char build_log[4096];
59
	cl_build_status build_status;
60
61
	//"-cl-nv-maxrregcount=48"
62
	//"-cl-nv-opt-level=<N>" (0 - no optimizations, 3 - default)
63
	//"-cl-nv-arch sm_XX" - selects the target CUDA Compute Level architecture to compile for (sm_10 for 1.0, sm_11 for 1.1, sm_12 for 1.2, sm_13 for 1.3 and sm_20 for 2.0 (Fermi))
64
	//"--cl-nv-cstd=CLX.X" - selects the target OpenCL C language version (CL1.0 or CL1.1)
65
	char *build_flags = "-cl-nv-arch sm_20 -cl-nv-verbose -cl-denorms-are-zero -cl-mad-enable -cl-no-signed-zeros -cl-unsafe-math-optimizations -cl-finite-math-only";
66
	int err = clBuildProgram(app, num_devices, devices, build_flags, NULL, NULL);
67
	if (err != CL_SUCCESS) printf("Application build failed (%i)\n", err);
68
	    
69
	for (i = 0; i < num_devices; i++) {
70
	    do {
71
		err = clGetProgramBuildInfo(app, devices[i], CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &build_status, NULL);
72
	    } while (build_status == CL_BUILD_IN_PROGRESS);
73
		
74
	    err = clGetProgramBuildInfo(app, devices[i], CL_PROGRAM_BUILD_LOG, sizeof(build_log) - 1, &build_log, &size);
75
	    printf("%i\n", err);
76
	    if (!err) {
77
		build_log[size] = 0;
78
	        if (!err) {
79
		    if (build_status == CL_BUILD_SUCCESS) {
80
			if (strlen(build_log) > 2) {
81
		    	    printf("Build successed for device %i:\n===================\n%s\n--------------------\n\n", i, build_log);
82
		    	}
83
		    } else if (build_status == CL_BUILD_ERROR) {
84
		        printf("Build failed for device %i:\n======================\n%s\n--------------------\n\n", i, build_log);
85
		    } else {
86
		        printf("Build failed for device %i\n", i);
87
		    }
88
		}
89
	    }
90
	}
91
    } else {
92
	printf("Program creation failed\n");
93
    }
94
        
95
    free(buf);
96
    return app;
97
}
98
99
int main(int argc, char *argv[]) {
100
    int devid = -1;
101
    
102
    cl_int err;
103
    cl_device_id devices[16];
104
    cl_program app;
105
106
    char fname[256];    
107
    unsigned char *binary[16];
108
    size_t binary_size[16];
109
    size_t real_size;
110
    
111
    FILE *f;
112
    
113
    if (argc < 2) {
114
	fprintf(stderr, "Usage: %s <source> [device] [binary] [kernel]\n", argv[0]);
115
	exit(0);	
116
    }
117
    
118
    if (argc > 2) {
119
	devid = atoi(argv[2]);	
120
    }
121
122
    cl_uint max = 0;
123
    cl_platform_id selected_platform;
124
    cl_uint i, num_platforms, num_devices;
125
    cl_platform_id platforms[4];
126
127
    clGetPlatformIDs(4, platforms, &num_platforms);
128
129
    for (i = 0; i < num_platforms; i++) {
130
	err = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices);
131
	if ((err == CL_SUCCESS)&&(num_devices > max)) {
132
	    max = num_devices;
133
	    selected_platform = platforms[i];
134
	}
135
    }
136
    
137
138
    clGetDeviceIDs(selected_platform, CL_DEVICE_TYPE_GPU, 16, devices, &num_devices);
139
    
140
    if (devid >= (int)num_devices) fail(-1, "Invalid device number is specified");
141
142
    cl_context ctx;
143
    if (devid < 0)
144
        ctx = clCreateContext(0, num_devices, devices, NULL, NULL, &err);
145
    else
146
	ctx = clCreateContext(0, 1, &devices[devid], NULL, NULL, &err);
147
    if (!ctx) fail(-1, "Failed to create OpenCL context");
148
149
//    cl_context ctx = clCreateContextFromType(NULL, CL_DEVICE_TYPE_GPU, NULL, NULL, NULL);
150
//    if (!ctx) fail(-1, "Failed to create OpenCL context");
151
152
  
153
//    err = clGetContextInfo(ctx, CL_CONTEXT_DEVICES, 16 * sizeof(cl_device_id), &devices, &num_devices);
154
//    if (err != CL_SUCCESS) fail(-1, "clGetContextInfo call is failed");
155
156
//    num_devices /= sizeof(cl_device_id);
157
158
    if (devid < 0)
159
	app = load_app(ctx, num_devices, devices, argv[1]);
160
    else
161
	app = load_app(ctx, 1, &devices[devid], argv[1]);
162
    if (!app) fail(-1, "Compilation failed");
163
164
    if (devid >= 0) num_devices = 1;
165
    err = clGetProgramInfo(app, CL_PROGRAM_BINARY_SIZES, num_devices * sizeof(size_t), &binary_size, &real_size);
166
    
167
    printf("Status: %i, Sizes: ", err);
168
    for (i = 0; i < num_devices; i++) printf (" %li", binary_size[i]);
169
    printf("\n");
170
171
    if ((err != CL_SUCCESS)||(real_size != num_devices*sizeof(size_t)))
172
	fail(-1, "Failed to get binary size");
173
    
174
175
    for (i = 0; i < num_devices; i++) {
176
        binary[i] = malloc(binary_size[i] + 1);
177
	if (!binary[i]) {
178
	    for (i--; i>=0; i--) free(binary[i]);
179
	    fail(-1, "allocation failed");
180
	}
181
    }
182
    
183
    err = clGetProgramInfo(app, CL_PROGRAM_BINARIES, num_devices * sizeof(unsigned char*), &binary, &real_size);
184
    if ((err == CL_SUCCESS)&&(real_size == num_devices * sizeof(unsigned char*))) {
185
	for (i = 0; i < num_devices; i++) {
186
	    sprintf(fname, "%s.%u", ((argc>3)?argv[3]:"opencl.out"), i);
187
	    f = fopen(fname, "wb");
188
	    if (f) {
189
		fwrite(binary[i], 1, binary_size[i], f);
190
		fclose(f);
191
192
		free(binary[i]);
193
	    } else {
194
		for (; i < num_devices; i++)
195
		    free(binary[i]);
196
		fail(-1, "Failed to create output file");
197
	    }
198
	}
199
    } else {
200
	for (i = 0; i < num_devices; i++)
201
	    free(binary[i]);
202
	fail(-1, "Binary readout failed")
203
    }
204
205
    if (argc > 4) {
206
	cl_kernel kern;
207
	kern = clCreateKernel(app, argv[4], &err);
208
	if (err == CL_SUCCESS) clReleaseKernel(kern);
209
	else printf("Error creating kernel (%i)\n", err);
210
    }
211
212
    clReleaseProgram(app);
213
    clReleaseContext(ctx);
214
    
215
}