summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSuren A. Chilingaryan <csa@suren.me>2016-06-19 05:11:21 +0200
committerSuren A. Chilingaryan <csa@suren.me>2016-06-19 05:11:21 +0200
commit04ef44def02c76029dc91d1eb17d1532e2714a5c (patch)
treefd527b3769a6cfacecdffd7ef1f7bb6531d31871
parent151541b83d540c3476965368f819e48a7b289cad (diff)
downloaddgmatest-04ef44def02c76029dc91d1eb17d1532e2714a5c.tar.gz
dgmatest-04ef44def02c76029dc91d1eb17d1532e2714a5c.tar.bz2
dgmatest-04ef44def02c76029dc91d1eb17d1532e2714a5c.tar.xz
dgmatest-04ef44def02c76029dc91d1eb17d1532e2714a5c.zip
StreamingHEADmaster
-rw-r--r--kernel.cl5
-rw-r--r--test.c112
2 files changed, 63 insertions, 54 deletions
diff --git a/kernel.cl b/kernel.cl
index 26a0009..258ea5f 100644
--- a/kernel.cl
+++ b/kernel.cl
@@ -17,3 +17,8 @@ __kernel void measure(uint iter, __global uint *input, __global uint *output, __
output[iter + 1] = RD32(REG_PERF_COUNTER);
WR64 (REG_DESCRIPTOR_ADDRESS, bus_addr);
}
+
+__kernel void nil()
+{
+ ;
+}
diff --git a/test.c b/test.c
index ae92c57..3327102 100644
--- a/test.c
+++ b/test.c
@@ -12,12 +12,15 @@
#define KERNEL_CONTROL
//#define OPENCL_TIMINGS
+#define CPU_WAIT
+//#define CPU_MARKER
#define BAR PCILIB_BAR0
#define TLP_SIZE 64
#define PAGE_SIZE 4096
#define NUM_PAGES 16
+#define ITERATIONS 100
#define KMEM_DEFAULT_FLAGS (pcilib_kmem_flags_t)(PCILIB_KMEM_FLAG_HARDWARE | PCILIB_KMEM_FLAG_PERSISTENT | PCILIB_KMEM_FLAG_EXCLUSIVE)
@@ -184,6 +187,8 @@ int main(void)
CL_CHECK_STATUS (clSetKernelArg (measure_kernel, 3, sizeof (cl_mem), &bar_cl));
CL_CHECK_STATUS (clSetKernelArg (measure_kernel, 4, sizeof (ulong), &bus_address.surface_bus_address));
+ cl_kernel nil_kernel = clCreateKernel (program, "nil", &err);
+
pcilib_kmem_handle_t *desc_kmem = pcilib_alloc_kernel_memory (pci, PCILIB_KMEM_TYPE_CONSISTENT, 1, 128, 4096, KMEM_USE_RING, KMEM_DEFAULT_FLAGS);
uintptr_t desc_bus = pcilib_kmem_get_block_ba (pci, desc_kmem, 0);
volatile void *desc = (uint32_t *) pcilib_kmem_get_block_ua (pci, desc_kmem, 0);
@@ -227,44 +232,6 @@ int main(void)
struct timespec tss, tse, tsk;
-
- for (i = 0; i < 100; i++) {
- WR32_sleep (REG_DMA, 0);
- WR32_sleep (REG_PERF_COUNTER, 0);
- WR64_sleep (REG_DESCRIPTOR_ADDRESS, bus_address.surface_bus_address);
-
- *(volatile uint32_t*)marker = 0;
- *hwaddr = 0;
-
- clock_gettime(CLOCK_REALTIME, &tss);
- WR32 (REG_DMA, 1);
-
- if (i < 50) {
- while ((*hwaddr) == 0) {
- }
- } else {
- while ((*(volatile uint32_t*)marker) < 1) {
- }
- }
- clock_gettime(CLOCK_REALTIME, &tse);
-
- CL_CHECK_STATUS (clEnqueueNDRangeKernel (command_queue, process_kernel, 1, NULL, &work_size, NULL, 0, NULL, &event));
- CL_CHECK_STATUS (clWaitForEvents (1, &event));
- clock_gettime(CLOCK_REALTIME, &tsk);
-
- double lath = 4. * RD32 (0x20) / 1000;
-
- double lat = (tse.tv_sec - tss.tv_sec)*1000000 + 1. * (tse.tv_nsec - tss.tv_nsec) / 1000.;
- double latk = (tsk.tv_sec - tss.tv_sec)*1000000 + 1. * (tsk.tv_nsec - tss.tv_nsec) / 1000.;
-
- printf(" iteration %u, hw: %6.3lf us, sw: %6.3lf us, +krn: %6.3lf us, maker: %u\n", i, lath, lat, latk, *(volatile uint32_t*)marker);
-
- CL_CHECK_STATUS (clReleaseEvent (event));
- }
-
- usleep(100000);
-
-
#ifdef KERNEL_CONTROL
cl_kernel kernel = measure_kernel;
#else
@@ -272,13 +239,14 @@ int main(void)
#endif /* KERNEL_CONTROL */
- for (i = 0; i < 10; i++) {
+ for (i = 0; i < ITERATIONS; i++) {
cl_event wevent[NUM_PAGES], kevent[NUM_PAGES];
printf("Iteration %i\n", i);
WR32_sleep (REG_DMA, 0);
WR32_sleep (REG_PERF_COUNTER, 0);
*(volatile uint32_t*)marker = 0;
+ *hwaddr = 0;
clock_gettime(CLOCK_REALTIME, &tss);
// we rather need to trigger it every few milliseconds and see what happens.
@@ -287,11 +255,13 @@ int main(void)
#ifndef KERNEL_CONTROL
WR64_sleep (REG_DESCRIPTOR_ADDRESS, bus_address.surface_bus_address + j * PAGE_SIZE);
#endif
-// if (j) {
-// CL_CHECK_STATUS (clEnqueueWaitSignalAMD (command_queue, input, j + 1, 1, &wevent[j-1], &wevent[j]));
-// } else {
- CL_CHECK_STATUS (clEnqueueWaitSignalAMD (command_queue, input, j + 1, 0, NULL, &wevent[j]));
-// }
+
+#ifdef CPU_WAIT
+ wevent[j] = clCreateUserEvent(context, &err); CL_CHECK_STATUS(err);
+#else
+ CL_CHECK_STATUS (clEnqueueWaitSignalAMD (command_queue, input, j + 1, 0, NULL, &wevent[j]));
+#endif
+
CL_CHECK_STATUS (clSetKernelArg (kernel, 0, sizeof (uint), &j));
CL_CHECK_STATUS (clEnqueueNDRangeKernel (command_queue, kernel, 1, NULL, &work_size, NULL, 1, &wevent[j], &kevent[j]));
CL_CHECK_STATUS (clFlush(command_queue));
@@ -299,7 +269,9 @@ int main(void)
#ifdef KERNEL_CONTROL
// we write one extra in the end.
+# ifndef CPU_WAIT
CL_CHECK_STATUS (clEnqueueWaitSignalAMD (command_queue, input, NUM_PAGES + 1, 0, NULL, NULL));
+# endif
CL_CHECK_STATUS (clFlush(command_queue));
#endif
clock_gettime(CLOCK_REALTIME, &tse);
@@ -309,27 +281,53 @@ int main(void)
clock_gettime(CLOCK_REALTIME, &tss);
double lat_flush = (tss.tv_sec - tse.tv_sec)*1000000 + 1. * (tss.tv_nsec - tse.tv_nsec) / 1000.;
+
#ifdef KERNEL_CONTROL
WR64 (REG_DESCRIPTOR_ADDRESS, bus_address.surface_bus_address);
#endif
+
+ clock_gettime(CLOCK_REALTIME, &tss);
WR32 (REG_DMA, 1);
- int cur;
+ printf(" Markers (us):");
+ double lastlat = 0;
+#ifdef CPU_WAIT
+ for (j = 0; j < NUM_PAGES; j++) {
+# ifdef CPU_MARKER
+ while ((*hwaddr) == 0) {
+ }
+ *hwaddr = 0;
+# else
+ while ((*(volatile uint32_t*)marker) < (j + 1)) {
+ }
+# endif
+ clock_gettime(CLOCK_REALTIME, &tse);
+ CL_CHECK_STATUS (clSetUserEventStatus(wevent[j], CL_COMPLETE));
+ double latm = (tse.tv_sec - tss.tv_sec)*1000000 + 1. * (tse.tv_nsec - tss.tv_nsec) / 1000.;
+ printf(" %8.3lf", latm - lastlat);
+ lastlat = latm;
+ }
+#else
+ int cur, curupd;
for (cur = 0; (*(volatile uint32_t*)marker) < NUM_PAGES;) {
if (cur != (*(volatile uint32_t*)marker)) {
clock_gettime(CLOCK_REALTIME, &tse);
- cur = (*(volatile uint32_t*)marker);
+ curupd = (*(volatile uint32_t*)marker);
double latm = (tse.tv_sec - tss.tv_sec)*1000000 + 1. * (tse.tv_nsec - tss.tv_nsec) / 1000.;
- printf(" Marker %u after %6.3lf us\n", cur, latm);
+ for (cur++; cur < curupd; cur++) printf(" -");
+ printf(" %8.3lf", latm - lastlat);
+ cur = curupd;
+ lastlat = latm;
}
-
}
+#endif
+ printf("\n");
CL_CHECK_STATUS (clWaitForEvents (1, &kevent[NUM_PAGES - 1]));
CL_CHECK_STATUS (clFinish(command_queue));
clock_gettime(CLOCK_REALTIME, &tse);
- double lat = (tse.tv_sec - tss.tv_sec)*1000000 + 1. * (tse.tv_nsec - tss.tv_nsec) / 1000.;
+ double lat = (tse.tv_sec - tss.tv_sec)*1000000 + 1. * (tse.tv_nsec - tss.tv_nsec) / 1000.; lat /= NUM_PAGES;
printf(" Markers: 0x%lx %u\n", *hwaddr, *(volatile uint32_t*)marker);
@@ -350,18 +348,23 @@ int main(void)
cl_ulong end, endk, endw, startw, startk;
CL_CHECK_STATUS (clGetEventProfilingInfo (kevent[j - 1], CL_PROFILING_COMMAND_END, sizeof (cl_ulong), &endk, NULL));
- CL_CHECK_STATUS (clGetEventProfilingInfo (wevent[j], CL_PROFILING_COMMAND_START, sizeof (cl_ulong), &startw, NULL));
- CL_CHECK_STATUS (clGetEventProfilingInfo (wevent[j], CL_PROFILING_COMMAND_END, sizeof (cl_ulong), &endw, NULL));
CL_CHECK_STATUS (clGetEventProfilingInfo (kevent[j], CL_PROFILING_COMMAND_START, sizeof (cl_ulong), &startk, NULL));
CL_CHECK_STATUS (clGetEventProfilingInfo (kevent[j], CL_PROFILING_COMMAND_END, sizeof (cl_ulong), &end, NULL));
+#ifdef CPU_WAIT
+ startw = endk;
+ endw = startk;
+#else
+ CL_CHECK_STATUS (clGetEventProfilingInfo (wevent[j], CL_PROFILING_COMMAND_START, sizeof (cl_ulong), &startw, NULL));
+ CL_CHECK_STATUS (clGetEventProfilingInfo (wevent[j], CL_PROFILING_COMMAND_END, sizeof (cl_ulong), &endw, NULL));
+#endif
printf("k-%.3lf-w-%.3lf-w-%.3lf-k-%.3lf ", 1. * (startw - endk) / 1000, 1. * (endw - startw) / 1000, 1. * (startk - endw) / 1000, 1. * (end - startk) / 1000);
}
printf("\n");
- double lath = 4. * RD32 (0x20) / 1000;
- printf(" fpga: %6.3lf us, software: %6.3lf us, sched: %6.3lf us, flush: %6.3lf us\n", lath, lat, lat_sched, lat_flush);
+ double lath = 4. * RD32 (0x20) / 1000; lath /= NUM_PAGES;
+ printf(" fpga: %8.3lf us, software: %8.3lf us, sched: %8.3lf us, flush: %8.3lf us\n", lath, lat, lat_sched, lat_flush);
/*
CL_CHECK_STATUS(clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, &res_size));
@@ -383,7 +386,7 @@ int main(void)
#ifdef KERNEL_CONTROL
uint data[1024];
CL_CHECK_STATUS (clEnqueueReadBuffer (command_queue, output, CL_TRUE, 0, 4096, data, 0, NULL, NULL));
- printf("\nLatencies: ");
+ printf(" FPGA Latencies: ");
for (j = 0; j < NUM_PAGES + 1; j++) {
/* if (j)
printf("%6.3lf ", 4. * (data[j] - data[j - 1]) / 1000);
@@ -411,6 +414,7 @@ int main(void)
pcilib_close(pci);
+ clReleaseKernel (nil_kernel);
clReleaseKernel (process_kernel);
clReleaseKernel (measure_kernel);
clReleaseProgram (program);