/* * Gathering statistics on a image stream, copying input to output * This file is part of ufo-serge filter set. * Copyright (C) 2016 Serge Cohen * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . * * Serge Cohen */ #include "config.h" #include #include #include #include "ufo-stat-monitor-task.h" #include "ufo-sxc-common.h" struct _UfoStatMonitorTaskPrivate { FILE * stat_file; gchar * stat_fn; gboolean trace_count; gboolean be_quiet; gboolean node_has_fp64; cl_kernel kernel; cl_kernel kernel_final; gsize im_index; guint n_items; cl_ulong max_local_mem; size_t local_scratch_size; cl_uint wg_size; cl_uint wg_num; cl_mem stat_out_buff; /* The buffer used by the kernel to output its results */ cl_mem stat_out_red; /* The buffer used by the final reduction kernel */ guint sm_index; }; static void ufo_task_interface_init (UfoTaskIface *iface); G_DEFINE_TYPE_WITH_CODE (UfoStatMonitorTask, ufo_stat_monitor_task, UFO_TYPE_TASK_NODE, G_IMPLEMENT_INTERFACE (UFO_TYPE_TASK, ufo_task_interface_init)) #define UFO_STAT_MONITOR_TASK_GET_PRIVATE(obj) (G_TYPE_INSTANCE_GET_PRIVATE((obj), UFO_TYPE_STAT_MONITOR_TASK, UfoStatMonitorTaskPrivate)) enum { PROP_0, PROP_NUM_ITEMS, PROP_STAT_FN, PROP_TRACE, PROP_QUIET, N_PROPERTIES }; static GParamSpec *properties[N_PROPERTIES] = { NULL, }; UfoNode * ufo_stat_monitor_task_new (void) { return UFO_NODE (g_object_new (UFO_TYPE_STAT_MONITOR_TASK, NULL)); } static void ufo_stat_monitor_task_setup (UfoTask *task, UfoResources *resources, GError **error) { UfoStatMonitorTaskPrivate *priv; UfoGpuNode *node; cl_command_queue cmd_queue; cl_context context_cl; cl_device_id dev_cl; cl_int err_code; cl_uint num_cu; size_t max_wgs, max_wis[3]; size_t ker_pref_wgs; node = UFO_GPU_NODE (ufo_task_node_get_proc_node (UFO_TASK_NODE (task))); cmd_queue = ufo_gpu_node_get_cmd_queue (node); UFO_RESOURCES_CHECK_SET_AND_RETURN (clGetCommandQueueInfo (cmd_queue, CL_QUEUE_DEVICE, sizeof (cl_device_id), &dev_cl, NULL), error); UFO_RESOURCES_CHECK_SET_AND_RETURN (clGetCommandQueueInfo (cmd_queue, CL_QUEUE_CONTEXT, sizeof (cl_context), &context_cl, NULL), error); priv = UFO_STAT_MONITOR_TASK_GET_PRIVATE (task); priv->node_has_fp64 = device_has_extension (node, "cl_khr_fp64"); /* Error : kernel compilation is buggy is one device has fp64 and another has not ! */ if (priv->node_has_fp64) { priv->kernel = ufo_resources_get_kernel (resources, "stat-monitor.cl", "stat_monitor_f64", NULL, error); priv->kernel_final = ufo_resources_get_kernel (resources, "stat-monitor.cl", "stat_monitor_f64_fin", NULL, error); } else { priv->kernel = ufo_resources_get_kernel (resources, "stat-monitor.cl", "stat_monitor_f32", NULL, error); priv->kernel_final = ufo_resources_get_kernel (resources, "stat-monitor.cl", "stat_monitor_f32_fin", NULL, error); } if (priv->kernel == NULL || priv->kernel_final == NULL) return; UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainKernel (priv->kernel), error); UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainKernel (priv->kernel_final), error); UFO_RESOURCES_CHECK_SET_AND_RETURN (clGetDeviceInfo (dev_cl, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), &(priv->max_local_mem), NULL), error); UFO_RESOURCES_CHECK_SET_AND_RETURN (clGetDeviceInfo (dev_cl, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &num_cu, NULL), error); UFO_RESOURCES_CHECK_SET_AND_RETURN (clGetDeviceInfo (dev_cl, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &max_wgs, NULL), error); UFO_RESOURCES_CHECK_SET_AND_RETURN (clGetDeviceInfo (dev_cl, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_wis), max_wis, NULL), error); UFO_RESOURCES_CHECK_SET_AND_RETURN (clGetKernelWorkGroupInfo (priv->kernel, dev_cl, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), &ker_pref_wgs, NULL), error); /* 4 work-groups per comput unit. */ priv->wg_num = num_cu << 2; if (priv->wg_num > max_wis[0]) { /* * We have to reduce the number of workgroup so that last reduction can * be done in a single workgroup (hence 1-value per workroup should * eventually fit within a single workgroup. */ priv->wg_num = max_wis[0]; } priv->wg_size = ker_pref_wgs; if (priv->wg_size > max_wis[0]) { priv->wg_size = max_wis[0]; } if (priv->wg_size < priv->wg_num) { // To ensure the final reduction step. priv->wg_num = priv->wg_size; } /* dividing by 2 the total local memory. */ priv->local_scratch_size = (size_t) (priv->max_local_mem >> 1); if (priv->node_has_fp64) { /* * Each workgroup needs 4 items * 8 bytes per work-item, that is * (wg_size << 5) B of local memory. */ if (priv->local_scratch_size > (priv->wg_size << 5)) priv->local_scratch_size = (priv->wg_size << 5); else priv->wg_size = (priv->local_scratch_size) >> 5; } else { /* * Each workgroup needs 4 items * 4 bytes per work-item, that is * (wg_size << 4) B of local memory. */ if (priv->local_scratch_size > (priv->wg_size << 4)) priv->local_scratch_size = (priv->wg_size << 4); else priv->wg_size = (priv->local_scratch_size) >> 4; } priv->im_index = 0; /* Opening (if required) the statistic file */ if (strcmp ("-", priv->stat_fn)) { priv->stat_file = fopen (priv->stat_fn, "a"); fprintf (stdout, "stat-monitor (%u) will outputs its results to file '%s'\n", priv->sm_index, priv->stat_fn); fprintf (priv->stat_file, "# index min max sum sum-of-sq mean var\n"); } else { priv->stat_file = stdout; fprintf (stdout, "stat-monitor (%u) will outputs its results to stdout\n", priv->sm_index); } /* Allocating once for all the output buffer that will be used for statistcis output. */ if ( priv->node_has_fp64 ) { /* min, max, mean, sd (one 4-tuple per work-group) */ priv->stat_out_buff = clCreateBuffer (context_cl, CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, priv->wg_num << 5, NULL, &err_code); UFO_RESOURCES_CHECK_SET_AND_RETURN (err_code, error); /* min, max, mean, sd (one 4-tuple once only) */ priv->stat_out_red = clCreateBuffer (context_cl, CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY, 1 << 5, NULL, &err_code); UFO_RESOURCES_CHECK_SET_AND_RETURN (err_code, error); } else { /* min, max, mean, sd (one 4-tuple per work-group) */ priv->stat_out_buff = clCreateBuffer (context_cl, CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, priv->wg_num << 4, NULL, &err_code); UFO_RESOURCES_CHECK_SET_AND_RETURN (err_code, error); /* min, max, mean, sd (one 4-tuple once only) */ priv->stat_out_red = clCreateBuffer (context_cl, CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY, 1 << 4, NULL, &err_code); UFO_RESOURCES_CHECK_SET_AND_RETURN (err_code, error); } } static void ufo_stat_monitor_task_get_requisition (UfoTask *task, UfoBuffer **inputs, UfoRequisition *requisition, GError **error) { /* In the current version the statistics are NEVER the output of the filter. * Indeed is behaving as a /pass-through/ filter, doing nothing to the * image. */ ufo_buffer_get_requisition (inputs[0], requisition); } static guint ufo_stat_monitor_task_get_num_inputs (UfoTask *task) { return 1; } static guint ufo_stat_monitor_task_get_num_dimensions (UfoTask *task, guint input) { return 2; } static UfoTaskMode ufo_stat_monitor_task_get_mode (UfoTask *task) { /* We are still needing the GPU (OpenCL device indeed) to perform the * statistics computation. */ return UFO_TASK_MODE_PROCESSOR | UFO_TASK_MODE_GPU; } /* Copied over from the monitor task, as we are trying to mimick it (plus * statistics gathering/printing) */ static gchar * join_list (GList *list, const gchar *sep) { gchar **array; GList *it; gchar *result; guint i = 0; array = g_new0 (gchar *, g_list_length (list) + 1); g_list_for (list, it) array[i++] = it->data; result = g_strjoinv (sep, array); g_free (array); return result; } static gboolean ufo_stat_monitor_task_process (UfoTask *task, UfoBuffer **inputs, UfoBuffer *output, UfoRequisition *requisition) { UfoStatMonitorTaskPrivate *priv; priv = UFO_STAT_MONITOR_TASK_GET_PRIVATE (task); UfoGpuNode *node; UfoProfiler *profiler; cl_command_queue cmd_queue; cl_mem in_mem; cl_uint img_size; UfoRequisition img_req; UfoBufferLocation location; GList *keys; GList *sizes; gchar *keystring; gchar *dimstring; /* Getting information from the buffer, before computing statistics */ location = ufo_buffer_get_location (inputs[0]); keys = ufo_buffer_get_metadata_keys (inputs[0]); sizes = NULL; /* Launching the kernel first, so that it has a bit of extra time while CPU is running */ node = UFO_GPU_NODE (ufo_task_node_get_proc_node (UFO_TASK_NODE (task))); cmd_queue = ufo_gpu_node_get_cmd_queue (node); in_mem = ufo_buffer_get_device_array (inputs[0], cmd_queue); ufo_buffer_get_requisition (inputs[0], &img_req); img_size = 1; for (guint i = 0; i < img_req.n_dims; i++) img_size *= img_req.dims[i]; UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel, 0, sizeof (cl_mem), &in_mem)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel, 1, sizeof (cl_mem), &(priv->stat_out_buff))); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel, 2, sizeof (cl_uint), &img_size)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel, 3, priv->local_scratch_size, NULL)); gsize total_wi = priv->wg_num * priv->wg_size; gsize wg_size = (gsize)(priv->wg_size); gsize true_wi = (total_wi < (gsize)img_size) ? total_wi : (gsize)img_size; gsize true_wg_num = ((true_wi-1) / priv->wg_size) + 1; /* Making sure that true_wi is a mulitple of work group size. */ true_wi = priv->wg_size * true_wg_num; profiler = ufo_task_node_get_profiler (UFO_TASK_NODE (task)); /* First reduction step : */ ufo_profiler_call (profiler, cmd_queue, priv->kernel, 1, &true_wi, &wg_size); /* At this time, we need to have a second kernel to further reduce the * results of the previous results that where produced at the rate of one * per work-group. */ UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel_final, 0, sizeof (cl_mem), &(priv->stat_out_buff))); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel_final, 1, sizeof (cl_mem), &(priv->stat_out_red))); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel_final, 2, sizeof (cl_uint), &(true_wg_num))); if (priv->node_has_fp64) { UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel_final, 3, sizeof(cl_double)*4*true_wg_num, NULL)); } else { UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel_final, 3, sizeof(cl_float)*4*true_wg_num, NULL)); } total_wi = (true_wg_num & 0x1) + (true_wg_num >> 1); /* Second reduction step : */ ufo_profiler_call(profiler, cmd_queue, priv->kernel_final, 1, &total_wi, &total_wi); if (priv->node_has_fp64) { double stat_res[6]; double img_size_f = (double) img_size; UFO_RESOURCES_CHECK_CLERR (clEnqueueReadBuffer (cmd_queue, priv->stat_out_red, CL_TRUE, 0, 4<<3, stat_res, 0, NULL, NULL)); stat_res[4] = stat_res[2] / img_size_f; stat_res[5] = (stat_res[3] - img_size_f * stat_res[4] * stat_res[4]) / (img_size_f - 1.0); if (stdout == priv->stat_file) { fprintf (priv->stat_file, "(%u) ", priv->sm_index); } fprintf (priv->stat_file, "%zu %le %le %le %le %le %le\n", priv->im_index, stat_res[0], stat_res[1], stat_res[2], stat_res[3], stat_res[4], stat_res[5]); } else { float stat_res[6]; float img_size_f = (float)img_size; UFO_RESOURCES_CHECK_CLERR (clEnqueueReadBuffer(cmd_queue, priv->stat_out_red, CL_TRUE, 0, 4<<2, stat_res, 0, NULL, NULL)); stat_res[4] = stat_res[2] / img_size_f; stat_res[5] = (stat_res[3] - img_size_f * stat_res[4] * stat_res[4]) / (img_size_f - 1.0f); if (stdout == priv->stat_file) { fprintf (priv->stat_file, "(%u) ", priv->sm_index); } fprintf (priv->stat_file, "%zu %e %e %e %e %e %e\n", priv->im_index, stat_res[0], stat_res[1], stat_res[2], stat_res[3], stat_res[4], stat_res[5]); } if (priv->trace_count) fprintf (stdout, "stat-monitor (%u) : done frame %zu\n", priv->sm_index, priv->im_index); ++(priv->im_index); if (!priv->be_quiet) { for (guint i = 0; i < requisition->n_dims; i++) sizes = g_list_append (sizes, g_strdup_printf ("%zu", requisition->dims[i])); dimstring = join_list (sizes, " "); keystring = join_list (keys, ", "); g_print ("stat-monitor: dims=[%s] keys=[%s] location=", dimstring, keystring); switch (location) { case UFO_BUFFER_LOCATION_HOST: g_print ("host"); break; case UFO_BUFFER_LOCATION_DEVICE: g_print ("device"); break; case UFO_BUFFER_LOCATION_DEVICE_IMAGE: g_print ("image"); break; case UFO_BUFFER_LOCATION_INVALID: g_print ("invalid"); break; } g_print ("\n"); g_free (dimstring); g_free (keystring); g_list_free (keys); g_list_free_full (sizes, (GDestroyNotify) g_free); } if (priv->n_items > 0) { gfloat *data_f32; data_f32 = (gfloat *) ufo_buffer_get_host_array (inputs[0], NULL); g_print (" "); for (guint i = 0; i < priv->n_items; i++) { g_print ("%e ", data_f32[i]); if ((i != 0) && (((i + 1) % 8) == 0)) g_print ("\n "); } if ((priv->n_items % 8) != 0) g_print ("\n"); } ufo_buffer_copy (inputs[0], output); return TRUE; } static void ufo_stat_monitor_task_set_property (GObject *object, guint property_id, const GValue *value, GParamSpec *pspec) { UfoStatMonitorTaskPrivate *priv = UFO_STAT_MONITOR_TASK_GET_PRIVATE (object); switch (property_id) { case PROP_NUM_ITEMS: priv->n_items = g_value_get_uint (value); break; case PROP_STAT_FN: g_free (priv->stat_fn); priv->stat_fn = g_value_dup_string (value); break; case PROP_TRACE: priv->trace_count = g_value_get_boolean (value); break; case PROP_QUIET: priv->be_quiet = g_value_get_boolean (value); break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec); break; } } static void ufo_stat_monitor_task_get_property (GObject *object, guint property_id, GValue *value, GParamSpec *pspec) { UfoStatMonitorTaskPrivate *priv = UFO_STAT_MONITOR_TASK_GET_PRIVATE (object); switch (property_id) { case PROP_NUM_ITEMS: g_value_set_uint (value, priv->n_items); break; case PROP_STAT_FN: g_value_set_string (value, priv->stat_fn); break; case PROP_TRACE: g_value_set_boolean (value, priv->trace_count); break; case PROP_QUIET: g_value_set_boolean (value, priv->be_quiet); break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec); break; } } static void ufo_stat_monitor_task_finalize (GObject *object) { UfoStatMonitorTaskPrivate *priv = UFO_STAT_MONITOR_TASK_GET_PRIVATE (object); if (stdout != priv->stat_file) { fclose(priv->stat_file); priv->stat_file = NULL; } if (priv->trace_count) { fprintf (stdout, "stat-monitor (%u) finalising after processing %zu frames.\n", priv->sm_index, priv->im_index); } g_free (priv->stat_fn); priv->stat_fn = NULL; if (priv->kernel) UFO_RESOURCES_CHECK_CLERR (clReleaseKernel (priv->kernel)); if (priv->kernel_final) UFO_RESOURCES_CHECK_CLERR (clReleaseKernel (priv->kernel_final)); if (priv->stat_out_buff) UFO_RESOURCES_CHECK_CLERR (clReleaseMemObject (priv->stat_out_buff)); if (priv->stat_out_red) UFO_RESOURCES_CHECK_CLERR (clReleaseMemObject (priv->stat_out_red)); G_OBJECT_CLASS (ufo_stat_monitor_task_parent_class)->finalize (object); } static void ufo_task_interface_init (UfoTaskIface *iface) { iface->setup = ufo_stat_monitor_task_setup; iface->get_num_inputs = ufo_stat_monitor_task_get_num_inputs; iface->get_num_dimensions = ufo_stat_monitor_task_get_num_dimensions; iface->get_mode = ufo_stat_monitor_task_get_mode; iface->get_requisition = ufo_stat_monitor_task_get_requisition; iface->process = ufo_stat_monitor_task_process; } static void ufo_stat_monitor_task_class_init (UfoStatMonitorTaskClass *klass) { GObjectClass *oclass = G_OBJECT_CLASS (klass); oclass->set_property = ufo_stat_monitor_task_set_property; oclass->get_property = ufo_stat_monitor_task_get_property; oclass->finalize = ufo_stat_monitor_task_finalize; properties[PROP_STAT_FN] = g_param_spec_string("filename", "Filename for the statistics output file.", "If provided with a '-' it will output statistcis to standard output of the process", "-", G_PARAM_READWRITE); properties[PROP_TRACE] = g_param_spec_boolean("trace", "When turned to true, will print processed frame index on stdout", "Defaulting to 'false', that is mimicking the 'monitor' filter", FALSE, G_PARAM_READWRITE); properties[PROP_QUIET] = g_param_spec_boolean("quiet", "When turned to true, will not print frame monitoring information on stdout", "Defaulting to 'false', that is mimicking the 'monitor' filter", FALSE, G_PARAM_READWRITE); properties[PROP_NUM_ITEMS] = g_param_spec_uint ("print", "Number of items to print", "Number of items to print", 0, G_MAXUINT, 0, G_PARAM_READWRITE); for (guint i = PROP_0 + 1; i < N_PROPERTIES; i++) g_object_class_install_property (oclass, i, properties[i]); g_type_class_add_private (oclass, sizeof(UfoStatMonitorTaskPrivate)); } static void ufo_stat_monitor_task_init(UfoStatMonitorTask *self) { static guint sm_next_index = 0; self->priv = UFO_STAT_MONITOR_TASK_GET_PRIVATE(self); self->priv->stat_file = stdout; self->priv->stat_fn = g_strdup ("-"); self->priv->trace_count = FALSE; self->priv->be_quiet = FALSE; self->priv->n_items = 0; self->priv->sm_index = sm_next_index++; }