#define REG_PERF_COUNTER 0x20 #define REG_DESCRIPTOR_ADDRESS 0x50 #define RD32(addr) ((__global volatile uint*)bar)[addr / 4]; #define WR32(addr, value) ((__global volatile uint*)bar)[addr / 4] = value; #define WR64(addr, value) ((__global volatile ulong*)bar)[addr / 8] = value; __kernel void process(uint iter, __global uint *input, __global uint *output, __global volatile uint *bar, ulong bus_addr) { WR32 (REG_PERF_COUNTER, 1); } __kernel void measure(uint iter, __global uint *input, __global uint *output, __global volatile uint *bar, ulong bus_addr) { output[0] = iter; output[iter + 1] = RD32(REG_PERF_COUNTER); WR64 (REG_DESCRIPTOR_ADDRESS, bus_addr); } __kernel void nil() { ; }