diff options
author | Roman Shkarin <mathii2009@gmail.com> | 2014-03-07 16:36:25 +0100 |
---|---|---|
committer | Roman Shkarin <mathii2009@gmail.com> | 2014-03-08 23:46:35 +0100 |
commit | 4c5aeea55719627e8dca47b102200f84279d8d4b (patch) | |
tree | d4590f87143458a15de2eb19502ea5d62db5db81 /deps/oclfft | |
parent | 67b75f1c40a19071693fdcfa1ff4816cd4ae42fd (diff) | |
download | ufo-filters-4c5aeea55719627e8dca47b102200f84279d8d4b.tar.gz ufo-filters-4c5aeea55719627e8dca47b102200f84279d8d4b.tar.bz2 ufo-filters-4c5aeea55719627e8dca47b102200f84279d8d4b.tar.xz ufo-filters-4c5aeea55719627e8dca47b102200f84279d8d4b.zip |
Add profiling to oclfft library
Diffstat (limited to 'deps/oclfft')
-rw-r--r-- | deps/oclfft/CMakeLists.txt | 3 | ||||
-rw-r--r-- | deps/oclfft/clFFT.h | 20 | ||||
-rw-r--r-- | deps/oclfft/fft_execute.cpp | 70 |
3 files changed, 80 insertions, 13 deletions
diff --git a/deps/oclfft/CMakeLists.txt b/deps/oclfft/CMakeLists.txt index 194ffd0..abda5b8 100644 --- a/deps/oclfft/CMakeLists.txt +++ b/deps/oclfft/CMakeLists.txt @@ -1,5 +1,6 @@ project(oclfft CXX) -include_directories(${OPENCL_INCLUDE_DIRS}) +include_directories(${OPENCL_INCLUDE_DIRS} + ${UFO_INCLUDE_DIRS}) add_library(oclfft SHARED fft_execute.cpp diff --git a/deps/oclfft/clFFT.h b/deps/oclfft/clFFT.h index e893d95..6f91c15 100644 --- a/deps/oclfft/clFFT.h +++ b/deps/oclfft/clFFT.h @@ -55,6 +55,7 @@ extern "C" { #include <CL/cl.h> #include <stdio.h> +#include <ufo/ufo.h> // XForm type typedef enum @@ -107,18 +108,31 @@ void clFFT_DestroyPlan( clFFT_Plan plan ); cl_int clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan plan, cl_int batchSize, clFFT_Direction dir, cl_mem data_in, cl_mem data_out, - cl_int num_events, cl_event *event_list, cl_event *event ); + cl_int num_events, cl_event *event_list, cl_event *event); + +cl_int clFFT_ExecuteInterleaved_Ufo( cl_command_queue queue, clFFT_Plan plan, cl_int batchSize, clFFT_Direction dir, + cl_mem data_in, cl_mem data_out, + cl_int num_events, cl_event *event_list, cl_event *event, UfoProfiler *profiler); cl_int clFFT_ExecutePlannar( cl_command_queue queue, clFFT_Plan plan, cl_int batchSize, clFFT_Direction dir, cl_mem data_in_real, cl_mem data_in_imag, cl_mem data_out_real, cl_mem data_out_imag, - cl_int num_events, cl_event *event_list, cl_event *event ); + cl_int num_events, cl_event *event_list, cl_event *event); + +cl_int clFFT_ExecutePlannar_Ufo( cl_command_queue queue, clFFT_Plan plan, cl_int batchSize, clFFT_Direction dir, + cl_mem data_in_real, cl_mem data_in_imag, cl_mem data_out_real, cl_mem data_out_imag, + cl_int num_events, cl_event *event_list, cl_event *event, UfoProfiler *profiler); cl_int clFFT_1DTwistInterleaved(clFFT_Plan Plan, cl_command_queue queue, cl_mem array, size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir); - + +cl_int clFFT_1DTwistInterleaved_Ufo(clFFT_Plan Plan, cl_command_queue queue, cl_mem array, + size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir, UfoProfiler *profiler); cl_int clFFT_1DTwistPlannar(clFFT_Plan Plan, cl_command_queue queue, cl_mem array_real, cl_mem array_imag, size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir); + +cl_int clFFT_1DTwistPlannar_Ufo(clFFT_Plan Plan, cl_command_queue queue, cl_mem array_real, cl_mem array_imag, + size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir, UfoProfiler *profiler); void clFFT_DumpPlan( clFFT_Plan plan, FILE *file); diff --git a/deps/oclfft/fft_execute.cpp b/deps/oclfft/fft_execute.cpp index 64dacdf..67686fe 100644 --- a/deps/oclfft/fft_execute.cpp +++ b/deps/oclfft/fft_execute.cpp @@ -124,7 +124,15 @@ getKernelWorkDimensions(cl_fft_plan *plan, cl_fft_kernel_info *kernelInfo, cl_in cl_int clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan Plan, cl_int batchSize, clFFT_Direction dir, cl_mem data_in, cl_mem data_out, - cl_int num_events, cl_event *event_list, cl_event *event ) + cl_int num_events, cl_event *event_list, cl_event *event) +{ + return clFFT_ExecuteInterleaved_Ufo(queue, Plan, batchSize, dir, data_in, data_out, num_events, event_list, event, NULL); +} + +cl_int +clFFT_ExecuteInterleaved_Ufo( cl_command_queue queue, clFFT_Plan Plan, cl_int batchSize, clFFT_Direction dir, + cl_mem data_in, cl_mem data_out, + cl_int num_events, cl_event *event_list, cl_event *event, UfoProfiler *profiler) { int s; cl_fft_plan *plan = (cl_fft_plan *) Plan; @@ -181,7 +189,11 @@ clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan Plan, cl_int batchS err |= clSetKernelArg(kernelInfo->kernel, 2, sizeof(cl_int), &dir); err |= clSetKernelArg(kernelInfo->kernel, 3, sizeof(cl_int), &s); - err |= clEnqueueNDRangeKernel(queue, kernelInfo->kernel, 1, NULL, &gWorkItems, &lWorkItems, num_events, event_list, event); + if (profiler) + ufo_profiler_call (profiler, queue, kernelInfo->kernel, 1, &gWorkItems, &lWorkItems); + else + err |= clEnqueueNDRangeKernel(queue, kernelInfo->kernel, 1, NULL, &gWorkItems, &lWorkItems, num_events, event_list, event); + if(err) return err; @@ -203,8 +215,12 @@ clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan Plan, cl_int batchS err |= clSetKernelArg(kernelInfo->kernel, 1, sizeof(cl_mem), &memObj[currWrite]); err |= clSetKernelArg(kernelInfo->kernel, 2, sizeof(cl_int), &dir); err |= clSetKernelArg(kernelInfo->kernel, 3, sizeof(cl_int), &s); - - err |= clEnqueueNDRangeKernel(queue, kernelInfo->kernel, 1, NULL, &gWorkItems, &lWorkItems, num_events, event_list, event); + + if (profiler) + ufo_profiler_call (profiler, queue, kernelInfo->kernel, 1, &gWorkItems, &lWorkItems); + else + err |= clEnqueueNDRangeKernel(queue, kernelInfo->kernel, 1, NULL, &gWorkItems, &lWorkItems, num_events, event_list, event); + if(err) return err; @@ -222,6 +238,14 @@ cl_int clFFT_ExecutePlannar( cl_command_queue queue, clFFT_Plan Plan, cl_int batchSize, clFFT_Direction dir, cl_mem data_in_real, cl_mem data_in_imag, cl_mem data_out_real, cl_mem data_out_imag, cl_int num_events, cl_event *event_list, cl_event *event) +{ + return clFFT_ExecutePlannar_Ufo(queue, Plan, batchSize, dir, data_in_real, data_in_imag, data_out_real, data_out_imag, num_events, event_list, event, NULL); +} + +cl_int +clFFT_ExecutePlannar_Ufo( cl_command_queue queue, clFFT_Plan Plan, cl_int batchSize, clFFT_Direction dir, + cl_mem data_in_real, cl_mem data_in_imag, cl_mem data_out_real, cl_mem data_out_imag, + cl_int num_events, cl_event *event_list, cl_event *event, UfoProfiler *profiler) { int s; cl_fft_plan *plan = (cl_fft_plan *) Plan; @@ -286,7 +310,11 @@ clFFT_ExecutePlannar( cl_command_queue queue, clFFT_Plan Plan, cl_int batchSize, err |= clSetKernelArg(kernelInfo->kernel, 4, sizeof(cl_int), &dir); err |= clSetKernelArg(kernelInfo->kernel, 5, sizeof(cl_int), &s); - err |= clEnqueueNDRangeKernel(queue, kernelInfo->kernel, 1, NULL, &gWorkItems, &lWorkItems, num_events, event_list, event); + if (profiler) + ufo_profiler_call (profiler, queue, kernelInfo->kernel, 1, &gWorkItems, &lWorkItems); + else + err |= clEnqueueNDRangeKernel(queue, kernelInfo->kernel, 1, NULL, &gWorkItems, &lWorkItems, num_events, event_list, event); + if(err) return err; @@ -309,8 +337,12 @@ clFFT_ExecutePlannar( cl_command_queue queue, clFFT_Plan Plan, cl_int batchSize, err |= clSetKernelArg(kernelInfo->kernel, 3, sizeof(cl_mem), &memObj_imag[currWrite]); err |= clSetKernelArg(kernelInfo->kernel, 4, sizeof(cl_int), &dir); err |= clSetKernelArg(kernelInfo->kernel, 5, sizeof(cl_int), &s); - - err |= clEnqueueNDRangeKernel(queue, kernelInfo->kernel, 1, NULL, &gWorkItems, &lWorkItems, num_events, event_list, event); + + if (profiler) + ufo_profiler_call (profiler, queue, kernelInfo->kernel, 1, &gWorkItems, &lWorkItems); + else + err |= clEnqueueNDRangeKernel(queue, kernelInfo->kernel, 1, NULL, &gWorkItems, &lWorkItems, num_events, event_list, event); + if(err) return err; @@ -328,6 +360,13 @@ cl_int clFFT_1DTwistInterleaved(clFFT_Plan Plan, cl_command_queue queue, cl_mem array, size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir) { + return clFFT_1DTwistInterleaved_Ufo(Plan, queue, array, numRows, numCols, startRow, rowsToProcess, dir, NULL); +} + +cl_int +clFFT_1DTwistInterleaved_Ufo(clFFT_Plan Plan, cl_command_queue queue, cl_mem array, + size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir, UfoProfiler *profiler) +{ cl_fft_plan *plan = (cl_fft_plan *) Plan; unsigned int N = numRows*numCols; @@ -358,7 +397,10 @@ clFFT_1DTwistInterleaved(clFFT_Plan Plan, cl_command_queue queue, cl_mem array, err |= clSetKernelArg(plan->twist_kernel, 4, sizeof(unsigned int), &rToProcess); err |= clSetKernelArg(plan->twist_kernel, 5, sizeof(int), &d); - err |= clEnqueueNDRangeKernel(queue, plan->twist_kernel, 1, NULL, numGlobalThreads, numLocalThreads, 0, NULL, NULL); + if (profiler) + ufo_profiler_call (profiler, queue, plan->twist_kernel, 1, numGlobalThreads, numLocalThreads); + else + err |= clEnqueueNDRangeKernel(queue, plan->twist_kernel, 1, NULL, numGlobalThreads, numLocalThreads, 0, NULL, NULL); return err; } @@ -367,6 +409,13 @@ cl_int clFFT_1DTwistPlannar(clFFT_Plan Plan, cl_command_queue queue, cl_mem array_real, cl_mem array_imag, size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir) { + return clFFT_1DTwistPlannar_Ufo(Plan, queue, array_real, array_imag, numRows, numCols, startRow, rowsToProcess, dir, NULL); +} + +cl_int +clFFT_1DTwistPlannar_Ufo(clFFT_Plan Plan, cl_command_queue queue, cl_mem array_real, cl_mem array_imag, + size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir, UfoProfiler *profiler) +{ cl_fft_plan *plan = (cl_fft_plan *) Plan; unsigned int N = numRows*numCols; @@ -398,7 +447,10 @@ clFFT_1DTwistPlannar(clFFT_Plan Plan, cl_command_queue queue, cl_mem array_real, err |= clSetKernelArg(plan->twist_kernel, 5, sizeof(unsigned int), &rToProcess); err |= clSetKernelArg(plan->twist_kernel, 6, sizeof(int), &d); - err |= clEnqueueNDRangeKernel(queue, plan->twist_kernel, 1, NULL, numGlobalThreads, numLocalThreads, 0, NULL, NULL); + if (profiler) + ufo_profiler_call (profiler, queue, plan->twist_kernel, 1, numGlobalThreads, numLocalThreads); + else + err |= clEnqueueNDRangeKernel(queue, plan->twist_kernel, 1, NULL, numGlobalThreads, numLocalThreads, 0, NULL, NULL); return err; } |