From 1028504583fe0ba72ecce004610e50a6eca42474 Mon Sep 17 00:00:00 2001 From: Tomas Farago Date: Mon, 5 Aug 2019 08:58:37 +0200 Subject: NLM: Put common things to src/common --- src/CMakeLists.txt | 4 ++ src/common/ufo-common.c | 104 ++++++++++++++++++++++++++++++++++++++++ src/common/ufo-common.h | 41 ++++++++++++++++ src/common/ufo-math.c | 9 ++++ src/common/ufo-math.h | 3 ++ src/meson.build | 15 +++++- src/ufo-non-local-means-task.c | 105 +++++++---------------------------------- 7 files changed, 191 insertions(+), 90 deletions(-) create mode 100644 src/common/ufo-common.c create mode 100644 src/common/ufo-common.h (limited to 'src') diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d4bc70c..6f69f19 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -127,6 +127,10 @@ set(general_backproject_aux_SRCS common/ufo-scarray.c common/ufo-ctgeometry.c) +set(non_local_means_aux_SRCS + common/ufo-math.c + common/ufo-common.c) + file(GLOB ufofilter_KERNELS "kernels/*.cl") #}}} #{{{ Variables diff --git a/src/common/ufo-common.c b/src/common/ufo-common.c new file mode 100644 index 0000000..f157cf4 --- /dev/null +++ b/src/common/ufo-common.c @@ -0,0 +1,104 @@ +/* + * Copyright (C) 2015-2019 Karlsruhe Institute of Technology + * + * This file is part of Ufo. + * + * This library is free software: you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see . + */ + +#include +#include +#include "ufo-math.h" +#include "ufo-common.h" + +gfloat +ufo_common_estimate_sigma (cl_kernel convolution_kernel, + cl_kernel sum_kernel, + cl_command_queue cmd_queue, + cl_sampler sampler, + UfoProfiler *profiler, + cl_mem input_image, + cl_mem out_mem, + const gsize max_work_group_size, + const gsize *global_size) +{ + gsize n = global_size[0] * global_size[1]; + gsize local_size, num_groups, global_size_1D; + gint num_group_iterations; + gfloat *result, sum = 0.0f; + cl_int err; + cl_mem group_sums; + cl_context context; + + clGetCommandQueueInfo (cmd_queue, CL_QUEUE_CONTEXT, sizeof (cl_context), &context, NULL); + + /* First compute the convolution of the input with the difference of + * laplacians. + */ + UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (convolution_kernel, 0, sizeof (cl_mem), &input_image)); + UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (convolution_kernel, 1, sizeof (cl_sampler), &sampler)); + UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (convolution_kernel, 2, sizeof (cl_mem), &out_mem)); + ufo_profiler_call (profiler, cmd_queue, convolution_kernel, 2, global_size, NULL); + + /* Now compute partial sums of the convolved image. */ + /* Compute global and local dimensions for the cumsum kernel */ + /* Make sure local_size is a power of 2 */ + local_size = ufo_math_compute_closest_smaller_power_of_2 (max_work_group_size); + /* Number of iterations of every group is given by the number of pixels + * divided by the number of pixels *num_groups* can process. */ + num_groups = MIN (local_size, UFO_MATH_NUM_CHUNKS (n, local_size)); + num_group_iterations = UFO_MATH_NUM_CHUNKS (n, local_size * num_groups); + /* The real number of groups is given by the number of pixels + * divided by the group size and the number of group iterations. */ + num_groups = UFO_MATH_NUM_CHUNKS (n, num_group_iterations * local_size); + global_size_1D = num_groups * local_size; + + g_debug (" n: %lu", n); + g_debug (" num groups: %lu", num_groups); + g_debug (" group iterations: %d", num_group_iterations); + g_debug ("kernel global size: %lu", global_size_1D); + g_debug (" kernel local size: %lu", local_size); + + result = g_malloc0 (sizeof (cl_float) * num_groups); + group_sums = clCreateBuffer (context, + CL_MEM_READ_WRITE, + sizeof (cl_float) * num_groups, + NULL, + &err); + UFO_RESOURCES_CHECK_CLERR (err); + + UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (sum_kernel, 0, sizeof (cl_mem), &out_mem)); + UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (sum_kernel, 1, sizeof (cl_mem), &group_sums)); + UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (sum_kernel, 2, sizeof (cl_mem), &out_mem)); + UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (sum_kernel, 3, sizeof (cl_float) * local_size, NULL)); + UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (sum_kernel, 4, sizeof (gsize), &n)); + UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (sum_kernel, 5, sizeof (gint), &num_group_iterations)); + ufo_profiler_call (profiler, cmd_queue, sum_kernel, 1, &global_size_1D, &local_size); + + clEnqueueReadBuffer (cmd_queue, + group_sums, + CL_TRUE, + 0, sizeof (cl_float) * num_groups, + result, + 0, NULL, NULL); + UFO_RESOURCES_CHECK_CLERR (clReleaseMemObject (group_sums)); + + /* Sum partial sums computed by the groups. */ + for (gsize i = 0; i < num_groups; i++) { + sum += result[i]; + } + g_free (result); + + return sqrt (G_PI_2) / (6 * (global_size[0] - 2.0f) * (global_size[1] - 2.0f)) * sum; +} diff --git a/src/common/ufo-common.h b/src/common/ufo-common.h new file mode 100644 index 0000000..f6e2349 --- /dev/null +++ b/src/common/ufo-common.h @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2015-2019 Karlsruhe Institute of Technology + * + * This file is part of Ufo. + * + * This library is free software: you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see . + */ + +#ifndef UFO_COMMON_H +#define UFO_COMMON_H + +#ifdef __APPLE__ +#include +#else +#include +#endif + +#include + +gfloat ufo_common_estimate_sigma (cl_kernel convolution_kernel, + cl_kernel sum_kernel, + cl_command_queue cmd_queue, + cl_sampler sampler, + UfoProfiler *profiler, + cl_mem input_image, + cl_mem out_mem, + const gsize max_work_group_size, + const gsize *global_size); + +#endif diff --git a/src/common/ufo-math.c b/src/common/ufo-math.c index 78b7dda..725b7e6 100644 --- a/src/common/ufo-math.c +++ b/src/common/ufo-math.c @@ -140,3 +140,12 @@ ufo_array_minimum (gdouble *array, gint num_values) { return find_extremum (array, num_values, 1); } + +gsize +ufo_math_compute_closest_smaller_power_of_2 (gsize value) +{ + gdouble integer; + modf (log2 (value), &integer); + + return (gsize) pow (2, integer); +} diff --git a/src/common/ufo-math.h b/src/common/ufo-math.h index 70084ce..8b283e1 100644 --- a/src/common/ufo-math.h +++ b/src/common/ufo-math.h @@ -24,6 +24,8 @@ #define UFO_MATH_EPSILON 1e-7 #define UFO_MATH_ARE_ALMOST_EQUAL(a, b) (ABS ((a) - (b)) < UFO_MATH_EPSILON) +#define UFO_MATH_NUM_CHUNKS(n, k) (((n) - 1) / (k) + 1) + typedef struct { gdouble x, y, z; @@ -54,5 +56,6 @@ gdouble ufo_array_minimum (gdouble *array, gdouble ufo_clip_value (gdouble value, gdouble minimum, gdouble maximum); +gsize ufo_math_compute_closest_smaller_power_of_2 (gsize value); #endif diff --git a/src/meson.build b/src/meson.build index eb981b2..4cd4bdc 100644 --- a/src/meson.build +++ b/src/meson.build @@ -48,7 +48,6 @@ plugins = [ 'merge', 'metaballs', 'monitor', - 'non-local-means', 'null', 'opencl', 'opencl-reduce', @@ -171,6 +170,20 @@ shared_module('conebeamprojectionweight', install_dir: plugin_install_dir, ) +# non local means + +shared_module('nonlocalmeans', + sources: [ + 'ufo-non-local-means-task.c', + 'common/ufo-math.c', + 'common/ufo-common.c', + ], + dependencies: deps, + name_prefix: 'libufofilter', + install: true, + install_dir: plugin_install_dir, +) + # fft plugins have_clfft = clfft_dep.found() diff --git a/src/ufo-non-local-means-task.c b/src/ufo-non-local-means-task.c index 3a9b1a7..a0eff2c 100644 --- a/src/ufo-non-local-means-task.c +++ b/src/ufo-non-local-means-task.c @@ -25,10 +25,11 @@ #include #include "ufo-non-local-means-task.h" +#include "common/ufo-math.h" #include "common/ufo-addressing.h" +#include "common/ufo-common.h" #define PIXELS_PER_THREAD 4 -#define NUM_CHUNKS(n, k) (((n) - 1) / (k) + 1) struct _UfoNonLocalMeansTaskPrivate { guint search_radius; @@ -71,15 +72,6 @@ enum { static GParamSpec *properties[N_PROPERTIES] = { NULL, }; -static gsize -compute_closest_smaller_power_of_2 (gsize value) -{ - gdouble integer; - modf (log2 (value), &integer); - - return (gsize) pow (2, integer); -} - static gint compute_cumsum_local_width (UfoNonLocalMeansTaskPrivate *priv) { @@ -88,7 +80,7 @@ compute_cumsum_local_width (UfoNonLocalMeansTaskPrivate *priv) /* Compute global and local dimensions for the cumsum kernel */ /* First make sure local_width is a power of 2 */ - local_width = (gint) compute_closest_smaller_power_of_2 (priv->max_work_group_size); + local_width = (gint) ufo_math_compute_closest_smaller_power_of_2 (priv->max_work_group_size); if (local_width > 4) { /* Empirically determined value on NVIDIA cards */ local_width /= 4; @@ -253,19 +245,19 @@ compute_cumsum (UfoNonLocalMeansTaskPrivate *priv, * This is not be the final number of groups, it's just used to compute the * number of iterations of every group. */ - num_groups = MIN (local_width, NUM_CHUNKS (width, local_width)); + num_groups = MIN (local_width, UFO_MATH_NUM_CHUNKS (width, local_width)); /* Number of iterations of every group is given by the number of pixels * divided by the number of pixels *num_groups* can process. */ - num_group_iterations = NUM_CHUNKS (width, local_width * num_groups); + num_group_iterations = UFO_MATH_NUM_CHUNKS (width, local_width * num_groups); /* Finally, the real number of groups is given by the number of pixels * divided by the group size and the number of group iterations. */ - num_groups = NUM_CHUNKS (width, num_group_iterations * local_width); + num_groups = UFO_MATH_NUM_CHUNKS (width, num_group_iterations * local_width); /* Cache size must be larger by *local_size* / 16 because of the bank * conflicts avoidance. Additionally, +1 is needed because of the shifted * access to the local memory. */ - cache_size = sizeof (cl_float) * (local_width + NUM_CHUNKS (local_width, 16) + 1); + cache_size = sizeof (cl_float) * (local_width + UFO_MATH_NUM_CHUNKS (local_width, 16) + 1); cumsum_global_size[0] = num_groups * local_width / 2; cumsum_global_size[1] = height; block_sums_global_size[0] = local_width / 2; @@ -559,79 +551,6 @@ ufo_non_local_means_task_get_mode (UfoTask *task) return UFO_TASK_MODE_PROCESSOR | UFO_TASK_MODE_GPU; } -static gfloat -compute_sigma (UfoNonLocalMeansTaskPrivate *priv, - cl_command_queue cmd_queue, - UfoProfiler *profiler, - cl_mem input_image, - cl_mem out_mem) -{ - gsize n = priv->cropped_size[0] * priv->cropped_size[1]; - gsize local_size, num_groups, num_group_iterations, global_size; - gfloat *result, sum = 0.0f; - cl_int err; - cl_mem group_sums; - - /* First compute the convolution of the input with the difference of - * laplacians. - */ - UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->convolution_kernel, 0, sizeof (cl_mem), &input_image)); - UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->convolution_kernel, 1, sizeof (cl_sampler), &priv->sampler)); - UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->convolution_kernel, 2, sizeof (cl_mem), &out_mem)); - ufo_profiler_call (profiler, cmd_queue, priv->convolution_kernel, 2, priv->cropped_size, NULL); - - /* Now compute partial sums of the convolved image. */ - /* Compute global and local dimensions for the cumsum kernel */ - /* Make sure local_size is a power of 2 */ - local_size = compute_closest_smaller_power_of_2 (priv->max_work_group_size); - /* Number of iterations of every group is given by the number of pixels - * divided by the number of pixels *num_groups* can process. */ - num_groups = MIN (local_size, NUM_CHUNKS (n, local_size)); - num_group_iterations = NUM_CHUNKS (n, local_size * num_groups); - /* The real number of groups is given by the number of pixels - * divided by the group size and the number of group iterations. */ - num_groups = NUM_CHUNKS (n, num_group_iterations * local_size); - global_size = num_groups * local_size; - - g_debug (" n: %lu", n); - g_debug (" num groups: %lu", num_groups); - g_debug (" group iterations: %lu", num_group_iterations); - g_debug ("kernel global size: %lu", global_size); - g_debug (" kernel local size: %lu", local_size); - - result = g_malloc0 (sizeof (cl_float) * num_groups); - group_sums = clCreateBuffer (priv->context, - CL_MEM_READ_WRITE, - sizeof (cl_float) * num_groups, - NULL, - &err); - UFO_RESOURCES_CHECK_CLERR (err); - - UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->sum_kernel, 0, sizeof (cl_mem), &out_mem)); - UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->sum_kernel, 1, sizeof (cl_mem), &group_sums)); - UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->sum_kernel, 2, sizeof (cl_mem), &out_mem)); - UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->sum_kernel, 3, sizeof (cl_float) * local_size, NULL)); - UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->sum_kernel, 4, sizeof (gsize), &n)); - UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->sum_kernel, 5, sizeof (gint), &num_group_iterations)); - ufo_profiler_call (profiler, cmd_queue, priv->sum_kernel, 1, &global_size, &local_size); - - clEnqueueReadBuffer (cmd_queue, - group_sums, - CL_TRUE, - 0, sizeof (cl_float) * num_groups, - result, - 0, NULL, NULL); - UFO_RESOURCES_CHECK_CLERR (clReleaseMemObject (group_sums)); - - /* Sum partial sums computed by the groups. */ - for (gsize i = 0; i < num_groups; i++) { - sum += result[i]; - } - g_free (result); - - return sqrt (G_PI_2) / (6 * (priv->cropped_size[0] - 2.0f) * (priv->cropped_size[1] - 2.0f)) * sum; -} - static gboolean ufo_non_local_means_task_process (UfoTask *task, UfoBuffer **inputs, @@ -665,7 +584,15 @@ ufo_non_local_means_task_process (UfoTask *task, /* Use out_mem for the convolution, it's not necessary after the * computation and can be re-used by the de-noising itself. */ - estimated_sigma = compute_sigma (priv, cmd_queue, profiler, in_mem, out_mem); + estimated_sigma = ufo_common_estimate_sigma (priv->convolution_kernel, + priv->sum_kernel, + cmd_queue, + priv->sampler, + profiler, + in_mem, + out_mem, + priv->max_work_group_size, + priv->cropped_size); g_debug ("Estimated sigma: %g", estimated_sigma); if (priv->h <= 0.0f) { priv->h = estimated_sigma; -- cgit v1.2.1