summaryrefslogtreecommitdiffstats
path: root/deps/oclfft
diff options
context:
space:
mode:
authorRoman Shkarin <mathii2009@gmail.com>2014-03-07 16:36:25 +0100
committerRoman Shkarin <mathii2009@gmail.com>2014-03-08 23:46:35 +0100
commit4c5aeea55719627e8dca47b102200f84279d8d4b (patch)
treed4590f87143458a15de2eb19502ea5d62db5db81 /deps/oclfft
parent67b75f1c40a19071693fdcfa1ff4816cd4ae42fd (diff)
downloadufo-filters-4c5aeea55719627e8dca47b102200f84279d8d4b.tar.gz
ufo-filters-4c5aeea55719627e8dca47b102200f84279d8d4b.tar.bz2
ufo-filters-4c5aeea55719627e8dca47b102200f84279d8d4b.tar.xz
ufo-filters-4c5aeea55719627e8dca47b102200f84279d8d4b.zip
Add profiling to oclfft library
Diffstat (limited to 'deps/oclfft')
-rw-r--r--deps/oclfft/CMakeLists.txt3
-rw-r--r--deps/oclfft/clFFT.h20
-rw-r--r--deps/oclfft/fft_execute.cpp70
3 files changed, 80 insertions, 13 deletions
diff --git a/deps/oclfft/CMakeLists.txt b/deps/oclfft/CMakeLists.txt
index 194ffd0..abda5b8 100644
--- a/deps/oclfft/CMakeLists.txt
+++ b/deps/oclfft/CMakeLists.txt
@@ -1,5 +1,6 @@
project(oclfft CXX)
-include_directories(${OPENCL_INCLUDE_DIRS})
+include_directories(${OPENCL_INCLUDE_DIRS}
+ ${UFO_INCLUDE_DIRS})
add_library(oclfft SHARED
fft_execute.cpp
diff --git a/deps/oclfft/clFFT.h b/deps/oclfft/clFFT.h
index e893d95..6f91c15 100644
--- a/deps/oclfft/clFFT.h
+++ b/deps/oclfft/clFFT.h
@@ -55,6 +55,7 @@ extern "C" {
#include <CL/cl.h>
#include <stdio.h>
+#include <ufo/ufo.h>
// XForm type
typedef enum
@@ -107,18 +108,31 @@ void clFFT_DestroyPlan( clFFT_Plan plan );
cl_int clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan plan, cl_int batchSize, clFFT_Direction dir,
cl_mem data_in, cl_mem data_out,
- cl_int num_events, cl_event *event_list, cl_event *event );
+ cl_int num_events, cl_event *event_list, cl_event *event);
+
+cl_int clFFT_ExecuteInterleaved_Ufo( cl_command_queue queue, clFFT_Plan plan, cl_int batchSize, clFFT_Direction dir,
+ cl_mem data_in, cl_mem data_out,
+ cl_int num_events, cl_event *event_list, cl_event *event, UfoProfiler *profiler);
cl_int clFFT_ExecutePlannar( cl_command_queue queue, clFFT_Plan plan, cl_int batchSize, clFFT_Direction dir,
cl_mem data_in_real, cl_mem data_in_imag, cl_mem data_out_real, cl_mem data_out_imag,
- cl_int num_events, cl_event *event_list, cl_event *event );
+ cl_int num_events, cl_event *event_list, cl_event *event);
+
+cl_int clFFT_ExecutePlannar_Ufo( cl_command_queue queue, clFFT_Plan plan, cl_int batchSize, clFFT_Direction dir,
+ cl_mem data_in_real, cl_mem data_in_imag, cl_mem data_out_real, cl_mem data_out_imag,
+ cl_int num_events, cl_event *event_list, cl_event *event, UfoProfiler *profiler);
cl_int clFFT_1DTwistInterleaved(clFFT_Plan Plan, cl_command_queue queue, cl_mem array,
size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir);
-
+
+cl_int clFFT_1DTwistInterleaved_Ufo(clFFT_Plan Plan, cl_command_queue queue, cl_mem array,
+ size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir, UfoProfiler *profiler);
cl_int clFFT_1DTwistPlannar(clFFT_Plan Plan, cl_command_queue queue, cl_mem array_real, cl_mem array_imag,
size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir);
+
+cl_int clFFT_1DTwistPlannar_Ufo(clFFT_Plan Plan, cl_command_queue queue, cl_mem array_real, cl_mem array_imag,
+ size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir, UfoProfiler *profiler);
void clFFT_DumpPlan( clFFT_Plan plan, FILE *file);
diff --git a/deps/oclfft/fft_execute.cpp b/deps/oclfft/fft_execute.cpp
index 64dacdf..67686fe 100644
--- a/deps/oclfft/fft_execute.cpp
+++ b/deps/oclfft/fft_execute.cpp
@@ -124,7 +124,15 @@ getKernelWorkDimensions(cl_fft_plan *plan, cl_fft_kernel_info *kernelInfo, cl_in
cl_int
clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan Plan, cl_int batchSize, clFFT_Direction dir,
cl_mem data_in, cl_mem data_out,
- cl_int num_events, cl_event *event_list, cl_event *event )
+ cl_int num_events, cl_event *event_list, cl_event *event)
+{
+ return clFFT_ExecuteInterleaved_Ufo(queue, Plan, batchSize, dir, data_in, data_out, num_events, event_list, event, NULL);
+}
+
+cl_int
+clFFT_ExecuteInterleaved_Ufo( cl_command_queue queue, clFFT_Plan Plan, cl_int batchSize, clFFT_Direction dir,
+ cl_mem data_in, cl_mem data_out,
+ cl_int num_events, cl_event *event_list, cl_event *event, UfoProfiler *profiler)
{
int s;
cl_fft_plan *plan = (cl_fft_plan *) Plan;
@@ -181,7 +189,11 @@ clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan Plan, cl_int batchS
err |= clSetKernelArg(kernelInfo->kernel, 2, sizeof(cl_int), &dir);
err |= clSetKernelArg(kernelInfo->kernel, 3, sizeof(cl_int), &s);
- err |= clEnqueueNDRangeKernel(queue, kernelInfo->kernel, 1, NULL, &gWorkItems, &lWorkItems, num_events, event_list, event);
+ if (profiler)
+ ufo_profiler_call (profiler, queue, kernelInfo->kernel, 1, &gWorkItems, &lWorkItems);
+ else
+ err |= clEnqueueNDRangeKernel(queue, kernelInfo->kernel, 1, NULL, &gWorkItems, &lWorkItems, num_events, event_list, event);
+
if(err)
return err;
@@ -203,8 +215,12 @@ clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan Plan, cl_int batchS
err |= clSetKernelArg(kernelInfo->kernel, 1, sizeof(cl_mem), &memObj[currWrite]);
err |= clSetKernelArg(kernelInfo->kernel, 2, sizeof(cl_int), &dir);
err |= clSetKernelArg(kernelInfo->kernel, 3, sizeof(cl_int), &s);
-
- err |= clEnqueueNDRangeKernel(queue, kernelInfo->kernel, 1, NULL, &gWorkItems, &lWorkItems, num_events, event_list, event);
+
+ if (profiler)
+ ufo_profiler_call (profiler, queue, kernelInfo->kernel, 1, &gWorkItems, &lWorkItems);
+ else
+ err |= clEnqueueNDRangeKernel(queue, kernelInfo->kernel, 1, NULL, &gWorkItems, &lWorkItems, num_events, event_list, event);
+
if(err)
return err;
@@ -222,6 +238,14 @@ cl_int
clFFT_ExecutePlannar( cl_command_queue queue, clFFT_Plan Plan, cl_int batchSize, clFFT_Direction dir,
cl_mem data_in_real, cl_mem data_in_imag, cl_mem data_out_real, cl_mem data_out_imag,
cl_int num_events, cl_event *event_list, cl_event *event)
+{
+ return clFFT_ExecutePlannar_Ufo(queue, Plan, batchSize, dir, data_in_real, data_in_imag, data_out_real, data_out_imag, num_events, event_list, event, NULL);
+}
+
+cl_int
+clFFT_ExecutePlannar_Ufo( cl_command_queue queue, clFFT_Plan Plan, cl_int batchSize, clFFT_Direction dir,
+ cl_mem data_in_real, cl_mem data_in_imag, cl_mem data_out_real, cl_mem data_out_imag,
+ cl_int num_events, cl_event *event_list, cl_event *event, UfoProfiler *profiler)
{
int s;
cl_fft_plan *plan = (cl_fft_plan *) Plan;
@@ -286,7 +310,11 @@ clFFT_ExecutePlannar( cl_command_queue queue, clFFT_Plan Plan, cl_int batchSize,
err |= clSetKernelArg(kernelInfo->kernel, 4, sizeof(cl_int), &dir);
err |= clSetKernelArg(kernelInfo->kernel, 5, sizeof(cl_int), &s);
- err |= clEnqueueNDRangeKernel(queue, kernelInfo->kernel, 1, NULL, &gWorkItems, &lWorkItems, num_events, event_list, event);
+ if (profiler)
+ ufo_profiler_call (profiler, queue, kernelInfo->kernel, 1, &gWorkItems, &lWorkItems);
+ else
+ err |= clEnqueueNDRangeKernel(queue, kernelInfo->kernel, 1, NULL, &gWorkItems, &lWorkItems, num_events, event_list, event);
+
if(err)
return err;
@@ -309,8 +337,12 @@ clFFT_ExecutePlannar( cl_command_queue queue, clFFT_Plan Plan, cl_int batchSize,
err |= clSetKernelArg(kernelInfo->kernel, 3, sizeof(cl_mem), &memObj_imag[currWrite]);
err |= clSetKernelArg(kernelInfo->kernel, 4, sizeof(cl_int), &dir);
err |= clSetKernelArg(kernelInfo->kernel, 5, sizeof(cl_int), &s);
-
- err |= clEnqueueNDRangeKernel(queue, kernelInfo->kernel, 1, NULL, &gWorkItems, &lWorkItems, num_events, event_list, event);
+
+ if (profiler)
+ ufo_profiler_call (profiler, queue, kernelInfo->kernel, 1, &gWorkItems, &lWorkItems);
+ else
+ err |= clEnqueueNDRangeKernel(queue, kernelInfo->kernel, 1, NULL, &gWorkItems, &lWorkItems, num_events, event_list, event);
+
if(err)
return err;
@@ -328,6 +360,13 @@ cl_int
clFFT_1DTwistInterleaved(clFFT_Plan Plan, cl_command_queue queue, cl_mem array,
size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir)
{
+ return clFFT_1DTwistInterleaved_Ufo(Plan, queue, array, numRows, numCols, startRow, rowsToProcess, dir, NULL);
+}
+
+cl_int
+clFFT_1DTwistInterleaved_Ufo(clFFT_Plan Plan, cl_command_queue queue, cl_mem array,
+ size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir, UfoProfiler *profiler)
+{
cl_fft_plan *plan = (cl_fft_plan *) Plan;
unsigned int N = numRows*numCols;
@@ -358,7 +397,10 @@ clFFT_1DTwistInterleaved(clFFT_Plan Plan, cl_command_queue queue, cl_mem array,
err |= clSetKernelArg(plan->twist_kernel, 4, sizeof(unsigned int), &rToProcess);
err |= clSetKernelArg(plan->twist_kernel, 5, sizeof(int), &d);
- err |= clEnqueueNDRangeKernel(queue, plan->twist_kernel, 1, NULL, numGlobalThreads, numLocalThreads, 0, NULL, NULL);
+ if (profiler)
+ ufo_profiler_call (profiler, queue, plan->twist_kernel, 1, numGlobalThreads, numLocalThreads);
+ else
+ err |= clEnqueueNDRangeKernel(queue, plan->twist_kernel, 1, NULL, numGlobalThreads, numLocalThreads, 0, NULL, NULL);
return err;
}
@@ -367,6 +409,13 @@ cl_int
clFFT_1DTwistPlannar(clFFT_Plan Plan, cl_command_queue queue, cl_mem array_real, cl_mem array_imag,
size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir)
{
+ return clFFT_1DTwistPlannar_Ufo(Plan, queue, array_real, array_imag, numRows, numCols, startRow, rowsToProcess, dir, NULL);
+}
+
+cl_int
+clFFT_1DTwistPlannar_Ufo(clFFT_Plan Plan, cl_command_queue queue, cl_mem array_real, cl_mem array_imag,
+ size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir, UfoProfiler *profiler)
+{
cl_fft_plan *plan = (cl_fft_plan *) Plan;
unsigned int N = numRows*numCols;
@@ -398,7 +447,10 @@ clFFT_1DTwistPlannar(clFFT_Plan Plan, cl_command_queue queue, cl_mem array_real,
err |= clSetKernelArg(plan->twist_kernel, 5, sizeof(unsigned int), &rToProcess);
err |= clSetKernelArg(plan->twist_kernel, 6, sizeof(int), &d);
- err |= clEnqueueNDRangeKernel(queue, plan->twist_kernel, 1, NULL, numGlobalThreads, numLocalThreads, 0, NULL, NULL);
+ if (profiler)
+ ufo_profiler_call (profiler, queue, plan->twist_kernel, 1, numGlobalThreads, numLocalThreads);
+ else
+ err |= clEnqueueNDRangeKernel(queue, plan->twist_kernel, 1, NULL, numGlobalThreads, numLocalThreads, 0, NULL, NULL);
return err;
}