12
#include "nwl_private.h"
14
#include "nwl_defines.h"
16
#define NWL_BUG_EXTRA_DATA
19
int dma_nwl_start_loopback(nwl_dma_t *ctx, pcilib_dma_direction_t direction, size_t packet_size) {
22
ctx->loopback_started = 1;
23
dma_nwl_stop_loopback(ctx);
26
nwl_write_register(val, ctx, ctx->base_addr, PKT_SIZE_ADDRESS);
28
if (ctx->type == PCILIB_DMA_MODIFICATION_DEFAULT) {
30
case PCILIB_DMA_BIDIRECTIONAL:
32
nwl_write_register(val, ctx, ctx->base_addr, TX_CONFIG_ADDRESS);
34
case PCILIB_DMA_TO_DEVICE:
36
case PCILIB_DMA_FROM_DEVICE:
38
nwl_write_register(val, ctx, ctx->base_addr, RX_CONFIG_ADDRESS);
43
ctx->loopback_started = 1;
48
int dma_nwl_stop_loopback(nwl_dma_t *ctx) {
51
if (!ctx->loopback_started) return 0;
53
/* Stop in any case, otherwise we can have problems in benchmark due to
54
engine initialized in previous run, and benchmark is only actual usage.
55
Otherwise, we should detect current loopback status during initialization */
57
if (ctx->type == PCILIB_DMA_MODIFICATION_DEFAULT) {
58
nwl_write_register(val, ctx, ctx->base_addr, TX_CONFIG_ADDRESS);
59
nwl_write_register(val, ctx, ctx->base_addr, RX_CONFIG_ADDRESS);
62
ctx->loopback_started = 0;
67
double dma_nwl_benchmark(pcilib_dma_context_t *vctx, pcilib_dma_engine_addr_t dma, uintptr_t addr, size_t size, size_t iterations, pcilib_dma_direction_t direction) {
72
const char *error = NULL;
73
size_t packet_size, blocks;
76
struct timeval start, cur;
78
nwl_dma_t *ctx = (nwl_dma_t*)vctx;
80
pcilib_dma_engine_t readid = pcilib_find_dma_by_addr(ctx->pcilib, PCILIB_DMA_FROM_DEVICE, dma);
81
pcilib_dma_engine_t writeid = pcilib_find_dma_by_addr(ctx->pcilib, PCILIB_DMA_TO_DEVICE, dma);
83
if (size%sizeof(uint32_t)) size = 1 + size / sizeof(uint32_t);
84
else size /= sizeof(uint32_t);
87
if (ctx->type == PCILIB_DMA_MODIFICATION_DEFAULT) {
88
if (direction == PCILIB_DMA_TO_DEVICE) return -1.;
90
// else if ((direction == PCILIB_DMA_FROM_DEVICE)&&(ctx->type != PCILIB_DMA_MODIFICATION_DEFAULT)) return -1.;
92
// Stop Generators and drain old data
93
if (ctx->type == PCILIB_DMA_MODIFICATION_DEFAULT) dma_nwl_stop_loopback(ctx);
94
// dma_nwl_stop_engine(ctx, readid); // DS: replace with something better
98
err = pcilib_skip_dma(ctx->pcilib, readid);
100
pcilib_error("Can't start benchmark, devices continuously writes unexpected data using DMA engine");
104
#ifdef NWL_GENERATE_DMA_IRQ
105
dma_nwl_enable_engine_irq(ctx, readid);
106
dma_nwl_enable_engine_irq(ctx, writeid);
107
#endif /* NWL_GENERATE_DMA_IRQ */
109
if (size * sizeof(uint32_t) > NWL_MAX_PACKET_SIZE) {
110
packet_size = NWL_MAX_PACKET_SIZE;
111
blocks = (size * sizeof(uint32_t)) / packet_size + (((size*sizeof(uint32_t))%packet_size)?1:0);
113
packet_size = size * sizeof(uint32_t);
117
dma_nwl_start_loopback(ctx, direction, packet_size);
119
// Allocate memory and prepare data
120
buf = malloc(blocks * packet_size * sizeof(uint32_t));
121
cmp = malloc(blocks * packet_size * sizeof(uint32_t));
122
if ((!buf)||(!cmp)) {
128
if (ctx->type == PCILIB_NWL_MODIFICATION_IPECAMERA) {
129
pcilib_write_register(ctx->pcilib, NULL, "control", 0x1e5);
131
pcilib_write_register(ctx->pcilib, NULL, "control", 0x1e1);
133
// This way causes more problems with garbage
134
//pcilib_write_register(ctx->pcilib, NULL, "control", 0x3e1);
138
for (iter = 0; iter < iterations; iter++) {
139
memset(cmp, 0x13 + iter, size * sizeof(uint32_t));
141
if (ctx->type == PCILIB_NWL_MODIFICATION_IPECAMERA) {
142
pcilib_write_register(ctx->pcilib, NULL, "control", 0x1e1);
145
if ((direction&PCILIB_DMA_TO_DEVICE)||(ctx->type != PCILIB_DMA_MODIFICATION_DEFAULT)) {
146
memcpy(buf, cmp, size * sizeof(uint32_t));
148
if (direction&PCILIB_DMA_TO_DEVICE) {
149
gettimeofday(&start, NULL);
152
err = pcilib_write_dma(ctx->pcilib, writeid, addr, size * sizeof(uint32_t), buf, &bytes);
153
if ((err)||(bytes != size * sizeof(uint32_t))) {
154
error = "Write failed";
158
if (direction&PCILIB_DMA_TO_DEVICE) {
160
if (direction == PCILIB_DMA_TO_DEVICE) {
161
dma_nwl_wait_completion(ctx, writeid, PCILIB_DMA_TIMEOUT);
163
gettimeofday(&cur, NULL);
164
us += ((cur.tv_sec - start.tv_sec)*1000000 + (cur.tv_usec - start.tv_usec));
168
if (ctx->type == PCILIB_NWL_MODIFICATION_IPECAMERA) {
169
pcilib_write_register(ctx->pcilib, NULL, "control", 0x3e1);
172
memset(buf, 0, size * sizeof(uint32_t));
174
if (direction&PCILIB_DMA_FROM_DEVICE) {
175
gettimeofday(&start, NULL);
178
for (i = 0, bytes = 0; i < blocks; i++) {
179
#ifdef NWL_BUG_EXTRA_DATA
183
err = pcilib_read_dma(ctx->pcilib, readid, addr, packet_size * sizeof(uint32_t), buf + (bytes>>2), &rbytes);
184
if ((err)||(rbytes%sizeof(uint32_t))) {
187
#ifdef NWL_BUG_EXTRA_DATA
188
else if (rbytes == 8) {
195
if (direction&PCILIB_DMA_FROM_DEVICE) {
196
gettimeofday(&cur, NULL);
197
us += ((cur.tv_sec - start.tv_sec)*1000000 + (cur.tv_usec - start.tv_usec));
199
#ifdef NWL_BUG_EXTRA_DATA
200
if ((err)||((bytes != size * sizeof(uint32_t))&&((bytes - 8) != size * sizeof(uint32_t)))) {
202
if ((err)||(bytes != size * sizeof(uint32_t))) {
204
printf("Expected: %zu bytes, but %zu read, error: %i\n", size * sizeof(uint32_t), bytes, err);
205
error = "Read failed";
209
#ifndef NWL_BUG_EXTRA_DATA
210
if (direction == PCILIB_DMA_BIDIRECTIONAL) {
211
if (memcmp(buf, cmp, size * sizeof(uint32_t))) {
212
for (i = 0; i < size; i++)
213
if (buf[i] != cmp[i]) break;
216
printf("Expected: *0x%lx, Written at dword %lu:", 0x13 + iter, bytes);
217
for (; (i < size)&&(i < (bytes + 16)); i++) {
218
if (((i - bytes)%8)==0) printf("\n");
219
printf("% 10lx", buf[i]);
223
error = "Written and read values does not match";
230
if (ctx->type == PCILIB_NWL_MODIFICATION_IPECAMERA) {
231
pcilib_write_register(ctx->pcilib, NULL, "control", 0x1e1);
235
pcilib_warning("%s at iteration %i, error: %i, bytes: %zu", error, iter, err, bytes);
238
#ifdef NWL_GENERATE_DMA_IRQ
239
dma_nwl_disable_engine_irq(ctx, writeid);
240
dma_nwl_disable_engine_irq(ctx, readid);
241
#endif /* NWL_GENERATE_DMA_IRQ */
243
dma_nwl_stop_loopback(ctx);
245
__sync_synchronize();
247
if (direction == PCILIB_DMA_FROM_DEVICE) {
248
pcilib_skip_dma(ctx->pcilib, readid);
254
return /*error?-1:*/(1. * size * sizeof(uint32_t) * iterations * 1000000) / (1024. * 1024. * us);