bzr branch
http://darksoft.org/webbzr/alps/pcitool
201
by Suren A. Chilingaryan
Xilinx benchmark |
1 |
#define _BSD_SOURCE
|
302
by Suren A. Chilingaryan
Fixes out-of-source builds and minor build issues |
2 |
#define _DEFAULT_SOURCE
|
202
by Suren A. Chilingaryan
Some tests with Xilinx DMA benchmark |
3 |
#define _POSIX_C_SOURCE 199309L
|
201
by Suren A. Chilingaryan
Xilinx benchmark |
4 |
#include <stdio.h> |
5 |
#include <stdlib.h> |
|
223
by Suren A. Chilingaryan
Merge changes from xilinx_dma branch providing support of multipage kmem allocations and mapping memory regions reserved with memmap boot option |
6 |
#include <string.h> |
201
by Suren A. Chilingaryan
Xilinx benchmark |
7 |
#include <unistd.h> |
8 |
#include <stdarg.h> |
|
202
by Suren A. Chilingaryan
Some tests with Xilinx DMA benchmark |
9 |
#include <time.h> |
10 |
#include <sched.h> |
|
201
by Suren A. Chilingaryan
Xilinx benchmark |
11 |
#include <sys/time.h> |
12 |
||
13 |
#include "pcilib.h" |
|
14 |
#include "irq.h" |
|
15 |
#include "kmem.h" |
|
258
by Suren A. Chilingaryan
Split bar manipulation and fifo operations in stand-alone source and publish kmem and bar headers |
16 |
#include "bar.h" |
201
by Suren A. Chilingaryan
Xilinx benchmark |
17 |
|
18 |
#define DEVICE "/dev/fpga0"
|
|
19 |
#define BAR PCILIB_BAR0
|
|
20 |
#define USE PCILIB_KMEM_USE(PCILIB_KMEM_USE_USER, 1)
|
|
223
by Suren A. Chilingaryan
Merge changes from xilinx_dma branch providing support of multipage kmem allocations and mapping memory regions reserved with memmap boot option |
21 |
#define STATIC_REGION 0x80000000 // to reserve 512 MB at the specified address, add "memmap=512M$2G" to kernel parameters |
22 |
#define BUFFERS 1
|
|
23 |
#define ITERATIONS 100
|
|
24 |
#define TLP_SIZE 64
|
|
25 |
#define HUGE_PAGE 4096 // number of pages per huge page |
|
202
by Suren A. Chilingaryan
Some tests with Xilinx DMA benchmark |
26 |
#define PAGE_SIZE 4096 // other values are not supported in the kernel |
201
by Suren A. Chilingaryan
Xilinx benchmark |
27 |
#define TIMEOUT 100000
|
28 |
||
202
by Suren A. Chilingaryan
Some tests with Xilinx DMA benchmark |
29 |
/* IRQs are slow for some reason. REALTIME mode is slower. Adding delays does not really help,
|
30 |
otherall we have only 3 checks in average. Check ready seems to be not needed and adds quite
|
|
31 |
much extra time */
|
|
223
by Suren A. Chilingaryan
Merge changes from xilinx_dma branch providing support of multipage kmem allocations and mapping memory regions reserved with memmap boot option |
32 |
#define USE_IRQ
|
202
by Suren A. Chilingaryan
Some tests with Xilinx DMA benchmark |
33 |
//#define CHECK_READY
|
34 |
//#define REALTIME
|
|
35 |
//#define ADD_DELAYS
|
|
223
by Suren A. Chilingaryan
Merge changes from xilinx_dma branch providing support of multipage kmem allocations and mapping memory regions reserved with memmap boot option |
36 |
#define CHECK_RESULT
|
202
by Suren A. Chilingaryan
Some tests with Xilinx DMA benchmark |
37 |
|
201
by Suren A. Chilingaryan
Xilinx benchmark |
38 |
//#define WR(addr, value) { val = value; pcilib_write(pci, BAR, addr, sizeof(val), &val); }
|
39 |
//#define RD(addr, value) { pcilib_read(pci, BAR, addr, sizeof(val), &val); value = val; }
|
|
223
by Suren A. Chilingaryan
Merge changes from xilinx_dma branch providing support of multipage kmem allocations and mapping memory regions reserved with memmap boot option |
40 |
#define WR(addr, value) { *(uint32_t*)(bar + addr + offset) = value; }
|
41 |
#define RD(addr, value) { value = *(uint32_t*)(bar + addr + offset); }
|
|
201
by Suren A. Chilingaryan
Xilinx benchmark |
42 |
|
43 |
static void fail(const char *msg, ...) { |
|
44 |
va_list va; |
|
45 |
||
46 |
va_start(va, msg); |
|
47 |
vprintf(msg, va); |
|
48 |
va_end(va); |
|
49 |
printf("\n"); |
|
50 |
||
51 |
exit(-1); |
|
52 |
}
|
|
53 |
||
202
by Suren A. Chilingaryan
Some tests with Xilinx DMA benchmark |
54 |
void hpsleep(size_t ns) { |
55 |
struct timespec wait, tv; |
|
56 |
||
57 |
clock_gettime(CLOCK_REALTIME, &wait); |
|
58 |
||
59 |
wait.tv_nsec += ns; |
|
60 |
if (wait.tv_nsec > 999999999) { |
|
61 |
wait.tv_sec += 1; |
|
62 |
wait.tv_nsec = 1000000000 - wait.tv_nsec; |
|
63 |
}
|
|
64 |
||
65 |
do { |
|
66 |
clock_gettime(CLOCK_REALTIME, &tv); |
|
67 |
} while ((wait.tv_sec > tv.tv_sec)||((wait.tv_sec == tv.tv_sec)&&(wait.tv_nsec > tv.tv_nsec))); |
|
68 |
}
|
|
69 |
||
70 |
||
201
by Suren A. Chilingaryan
Xilinx benchmark |
71 |
int main() { |
72 |
int err; |
|
223
by Suren A. Chilingaryan
Merge changes from xilinx_dma branch providing support of multipage kmem allocations and mapping memory regions reserved with memmap boot option |
73 |
long i, j; |
201
by Suren A. Chilingaryan
Xilinx benchmark |
74 |
pcilib_t *pci; |
75 |
pcilib_kmem_handle_t *kbuf; |
|
76 |
uint32_t status; |
|
77 |
struct timeval start, end; |
|
78 |
size_t size, run_time; |
|
79 |
void* volatile bar; |
|
202
by Suren A. Chilingaryan
Some tests with Xilinx DMA benchmark |
80 |
uintptr_t bus_addr[BUFFERS]; |
201
by Suren A. Chilingaryan
Xilinx benchmark |
81 |
|
223
by Suren A. Chilingaryan
Merge changes from xilinx_dma branch providing support of multipage kmem allocations and mapping memory regions reserved with memmap boot option |
82 |
pcilib_bar_t bar_tmp = BAR; |
83 |
uintptr_t offset = 0; |
|
84 |
||
201
by Suren A. Chilingaryan
Xilinx benchmark |
85 |
pcilib_kmem_flags_t clean_flags = PCILIB_KMEM_FLAG_HARDWARE|PCILIB_KMEM_FLAG_PERSISTENT|PCILIB_KMEM_FLAG_EXCLUSIVE; |
86 |
||
202
by Suren A. Chilingaryan
Some tests with Xilinx DMA benchmark |
87 |
#ifdef ADD_DELAYS
|
88 |
long rpt = 0, rpt2 = 0; |
|
89 |
size_t best_time; |
|
223
by Suren A. Chilingaryan
Merge changes from xilinx_dma branch providing support of multipage kmem allocations and mapping memory regions reserved with memmap boot option |
90 |
best_time = 1000000000L * HUGE_PAGE * PAGE_SIZE / (4L * 1024 * 1024 * 1024); |
202
by Suren A. Chilingaryan
Some tests with Xilinx DMA benchmark |
91 |
#endif /* ADD_DELAYS */ |
92 |
||
93 |
#ifdef REALTIME
|
|
94 |
pid_t pid; |
|
95 |
struct sched_param sched = {0}; |
|
96 |
||
97 |
pid = getpid(); |
|
98 |
sched.sched_priority = sched_get_priority_min(SCHED_FIFO); |
|
99 |
if (sched_setscheduler(pid, SCHED_FIFO, &sched)) |
|
100 |
printf("Warning: not able to get real-time priority\n"); |
|
101 |
#endif /* REALTIME */ |
|
201
by Suren A. Chilingaryan
Xilinx benchmark |
102 |
|
103 |
pci = pcilib_open(DEVICE, PCILIB_MODEL_DETECT); |
|
104 |
if (!pci) fail("pcilib_open"); |
|
105 |
||
106 |
bar = pcilib_map_bar(pci, BAR); |
|
107 |
if (!bar) { |
|
108 |
pcilib_close(pci); |
|
109 |
fail("map bar"); |
|
110 |
}
|
|
111 |
||
223
by Suren A. Chilingaryan
Merge changes from xilinx_dma branch providing support of multipage kmem allocations and mapping memory regions reserved with memmap boot option |
112 |
pcilib_detect_address(pci, &bar_tmp, &offset, 1); |
113 |
||
114 |
// Reset
|
|
115 |
WR(0x00, 1) |
|
116 |
usleep(1000); |
|
117 |
WR(0x00, 0) |
|
118 |
||
201
by Suren A. Chilingaryan
Xilinx benchmark |
119 |
pcilib_enable_irq(pci, PCILIB_IRQ_TYPE_ALL, 0); |
120 |
pcilib_clear_irq(pci, PCILIB_IRQ_SOURCE_DEFAULT); |
|
121 |
||
122 |
pcilib_clean_kernel_memory(pci, USE, clean_flags); |
|
123 |
||
223
by Suren A. Chilingaryan
Merge changes from xilinx_dma branch providing support of multipage kmem allocations and mapping memory regions reserved with memmap boot option |
124 |
#ifdef STATIC_REGION
|
125 |
kbuf = pcilib_alloc_kernel_memory(pci, PCILIB_KMEM_TYPE_REGION_C2S, BUFFERS, HUGE_PAGE * PAGE_SIZE, STATIC_REGION, USE, 0); |
|
126 |
#else /* STATIC_REGION */ |
|
127 |
kbuf = pcilib_alloc_kernel_memory(pci, PCILIB_KMEM_TYPE_DMA_C2S_PAGE, BUFFERS, HUGE_PAGE * PAGE_SIZE, 4096, USE, 0); |
|
128 |
#endif /* STATIC_REGION */ |
|
129 |
||
130 |
if (!kbuf) { |
|
131 |
printf("KMem allocation failed\n"); |
|
132 |
exit(0); |
|
133 |
}
|
|
134 |
||
135 |
||
136 |
#ifdef CHECK_RESULT
|
|
137 |
volatile uint32_t *ptr0 = pcilib_kmem_get_block_ua(pci, kbuf, 0); |
|
138 |
||
139 |
memset((void*)ptr0, 0, (HUGE_PAGE * PAGE_SIZE)); |
|
201
by Suren A. Chilingaryan
Xilinx benchmark |
140 |
|
223
by Suren A. Chilingaryan
Merge changes from xilinx_dma branch providing support of multipage kmem allocations and mapping memory regions reserved with memmap boot option |
141 |
for (i = 0; i < (HUGE_PAGE * PAGE_SIZE / 4); i++) { |
142 |
if (ptr0[i] != 0) break; |
|
143 |
}
|
|
144 |
if (i < (HUGE_PAGE * PAGE_SIZE / 4)) { |
|
145 |
printf("Initialization error in position %lu, value = %x\n", i * 4, ptr0[i]); |
|
146 |
}
|
|
147 |
#endif /* CHECK_RESULT */ |
|
148 |
||
201
by Suren A. Chilingaryan
Xilinx benchmark |
149 |
WR(0x04, 0) |
223
by Suren A. Chilingaryan
Merge changes from xilinx_dma branch providing support of multipage kmem allocations and mapping memory regions reserved with memmap boot option |
150 |
WR(0x0C, TLP_SIZE) |
151 |
WR(0x10, (HUGE_PAGE * (PAGE_SIZE / (4 * TLP_SIZE)))) |
|
201
by Suren A. Chilingaryan
Xilinx benchmark |
152 |
WR(0x14, 0x13131313) |
153 |
||
202
by Suren A. Chilingaryan
Some tests with Xilinx DMA benchmark |
154 |
for (j = 0; j < BUFFERS; j++ ) { |
155 |
bus_addr[j] = pcilib_kmem_get_block_ba(pci, kbuf, j); |
|
156 |
}
|
|
201
by Suren A. Chilingaryan
Xilinx benchmark |
157 |
|
158 |
gettimeofday(&start, NULL); |
|
202
by Suren A. Chilingaryan
Some tests with Xilinx DMA benchmark |
159 |
|
201
by Suren A. Chilingaryan
Xilinx benchmark |
160 |
for (i = 0; i < ITERATIONS; i++) { |
161 |
for (j = 0; j < BUFFERS; j++ ) { |
|
202
by Suren A. Chilingaryan
Some tests with Xilinx DMA benchmark |
162 |
// uintptr_t ba = pcilib_kmem_get_block_ba(pci, kbuf, j);
|
163 |
// WR(0x08, ba)
|
|
164 |
WR(0x08, bus_addr[j]); |
|
201
by Suren A. Chilingaryan
Xilinx benchmark |
165 |
WR(0x04, 0x01) |
166 |
||
202
by Suren A. Chilingaryan
Some tests with Xilinx DMA benchmark |
167 |
#ifdef USE_IRQ
|
201
by Suren A. Chilingaryan
Xilinx benchmark |
168 |
err = pcilib_wait_irq(pci, PCILIB_IRQ_SOURCE_DEFAULT, TIMEOUT, NULL); |
169 |
if (err) printf("Timeout waiting for IRQ, err: %i\n", err); |
|
202
by Suren A. Chilingaryan
Some tests with Xilinx DMA benchmark |
170 |
|
201
by Suren A. Chilingaryan
Xilinx benchmark |
171 |
RD(0x04, status); |
172 |
if ((status&0xFFFF) != 0x101) printf("Invalid status %x\n", status); |
|
173 |
// WR(0x04, 0x00);
|
|
202
by Suren A. Chilingaryan
Some tests with Xilinx DMA benchmark |
174 |
#else /* USE_IRQ */ |
175 |
# ifdef ADD_DELAYS
|
|
176 |
// hpsleep(best_time);
|
|
177 |
do { |
|
178 |
rpt++; |
|
179 |
RD(0x04, status); |
|
180 |
} while (status != 0x101); |
|
181 |
# else /* ADD_DELAYS */ |
|
182 |
do { |
|
183 |
RD(0x04, status); |
|
184 |
} while (status != 0x101); |
|
185 |
# endif /* ADD_DELAYS */ |
|
186 |
#endif /* USE_IRQ */ |
|
201
by Suren A. Chilingaryan
Xilinx benchmark |
187 |
|
188 |
WR(0x00, 1) |
|
202
by Suren A. Chilingaryan
Some tests with Xilinx DMA benchmark |
189 |
#ifdef CHECK_READY
|
201
by Suren A. Chilingaryan
Xilinx benchmark |
190 |
do { |
202
by Suren A. Chilingaryan
Some tests with Xilinx DMA benchmark |
191 |
rpt2++; |
201
by Suren A. Chilingaryan
Xilinx benchmark |
192 |
RD(0x04, status); |
193 |
} while (status != 0); |
|
202
by Suren A. Chilingaryan
Some tests with Xilinx DMA benchmark |
194 |
#endif /* CHECK_READY */ |
201
by Suren A. Chilingaryan
Xilinx benchmark |
195 |
WR(0x00, 0) |
196 |
}
|
|
197 |
}
|
|
198 |
gettimeofday(&end, NULL); |
|
199 |
||
223
by Suren A. Chilingaryan
Merge changes from xilinx_dma branch providing support of multipage kmem allocations and mapping memory regions reserved with memmap boot option |
200 |
#ifdef CHECK_RESULT
|
201 |
pcilib_kmem_sync_block(pci, kbuf, PCILIB_KMEM_SYNC_FROMDEVICE, 0); |
|
202 |
||
203 |
for (i = 0; i < (HUGE_PAGE * PAGE_SIZE / 4); i++) { |
|
204 |
// printf("%lx ", ptr0[i]);
|
|
205 |
if (ptr0[i] != 0x13131313) break; |
|
206 |
}
|
|
207 |
if (i < (HUGE_PAGE * PAGE_SIZE / 4)) { |
|
208 |
printf("Error in position %lu, value = %x\n", i * 4, ptr0[i]); |
|
209 |
}
|
|
210 |
#endif /* CHECK_RESULT */ |
|
211 |
||
201
by Suren A. Chilingaryan
Xilinx benchmark |
212 |
pcilib_free_kernel_memory(pci, kbuf, 0); |
213 |
pcilib_disable_irq(pci, 0); |
|
214 |
pcilib_unmap_bar(pci, BAR, bar); |
|
215 |
pcilib_close(pci); |
|
216 |
||
217 |
run_time = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec); |
|
223
by Suren A. Chilingaryan
Merge changes from xilinx_dma branch providing support of multipage kmem allocations and mapping memory regions reserved with memmap boot option |
218 |
size = (long long int)ITERATIONS * BUFFERS * HUGE_PAGE * PAGE_SIZE; |
201
by Suren A. Chilingaryan
Xilinx benchmark |
219 |
|
220 |
printf("%.3lf GB/s: transfered %zu bytes in %zu us using %u buffers\n", 1000000. * size / run_time / 1024 / 1024 / 1024, size, run_time, BUFFERS); |
|
202
by Suren A. Chilingaryan
Some tests with Xilinx DMA benchmark |
221 |
|
222 |
# ifdef ADD_DELAYS
|
|
223 |
printf("Repeats: %lf, %lf\n",1. * rpt / (ITERATIONS * BUFFERS), 1. * rpt2 / (ITERATIONS * BUFFERS)); |
|
224 |
#endif /* USE_IRQ */ |
|
223
by Suren A. Chilingaryan
Merge changes from xilinx_dma branch providing support of multipage kmem allocations and mapping memory regions reserved with memmap boot option |
225 |
|
226 |
||
201
by Suren A. Chilingaryan
Xilinx benchmark |
227 |
}
|