#define _POSIX_C_SOURCE 200809L #define _BSD_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include //#include //#include #define DEVICE "/dev/fpga0" #define BAR PCILIB_BAR0 #define USE_RING PCILIB_KMEM_USE(PCILIB_KMEM_USE_USER, 1) #define USE PCILIB_KMEM_USE(PCILIB_KMEM_USE_USER, 2) //#define STATIC_REGION 0x80000000 // to reserve 512 MB at the specified address, add "memmap=512M$2G" to kernel parameters #define BUFFERS 128 #define ITERATIONS 1000 #define DESC_THRESHOLD 1//BUFFERS/8 // Lorenzo: after how many desc the FPGA must update the "written descriptor counter" in PC mem // if set to 0, the update only happens when INT is received #define HUGE_PAGE 1 // number of pages per huge page #define TLP_SIZE 64 // TLP SIZE = 64 for 256B payload, 32 for 128B payload #define PAGE_SIZE 4096 // other values are not supported in the kernel #define USE_64 // Lorenzo: use 64bit addressing #define USE_STREAMING // Use alternative implementation with streaming sequence of memory pages #define ENABLE_COUNTER 0x9000 // If set wee need to write 1 to the specified register to enable counter //#define DUAL_CORE // Lorenzo: DUAL Core //#define SHARED_MEMORY // Lorenzo: Test for fast GUI #define CHECK_READY // Lorenzo: Check if PCI-Express is ready by reading 0x0 #define MEM_COPY // Lorenzo: CPY data //#define CHECK_RESULTS // Lorenzo: Check if data received is ok (only for counter!) //#define CHECK_RESULTS_LOG // Lorenzo: Check if data received is ok (only for counter!) #define PRINT_RESULTS // Lorenzo: Save the received data in "data.out" //#define EXIT_ON_EMPTY // Lorenzo: Exit if an "empty_detected" signal is received #define TIMEOUT 1000000 /* IRQs are slow for some reason. REALTIME mode is slower. Adding delays does not really help, otherall we have only 3 checks in average. Check ready seems to be not needed and adds quite much extra time */ //#define USE_IRQ //#define REALTIME //#define ADD_DELAYS #define FPGA_CLOCK 250 // Lorenzo: in MHz ! //#define WR(addr, value) { val = value; pcilib_write(pci, BAR, addr, sizeof(val), &val); } //#define RD(addr, value) { pcilib_read(pci, BAR, addr, sizeof(val), &val); value = val; } #define WR(addr, value) { *(uint32_t*)(bar + addr + offset) = value; } #define RD(addr, value) { value = *(uint32_t*)(bar + addr + offset); } // ************************************************************************************** // Progress BAR // Process has done x out of n rounds, // and we want a bar of width w and resolution r. static inline void loadBar(int x, int n, int r, int w) { // Only update r times. if ( x % (n/r +1) != 0 ) return; // Calculuate the ratio of complete-to-incomplete. float ratio = x/(float)n; int c = ratio * w; // Show the percentage complete. printf("%3d%% [", (int)(ratio*100) ); // Show the load bar. for (x=0; x 999999999) { wait.tv_sec += 1; wait.tv_nsec = 1000000000 - wait.tv_nsec; } do { clock_gettime(CLOCK_REALTIME, &tv); } while ((wait.tv_sec > tv.tv_sec)||((wait.tv_sec == tv.tv_sec)&&(wait.tv_nsec > tv.tv_nsec))); } // ************************************************************************************** int main() { int err; long i, j; pcilib_t *pci; pcilib_kmem_handle_t *kdesc; pcilib_kmem_handle_t *kbuf; struct timeval start, end; size_t run_time; long long int size_mb; void* volatile bar; uintptr_t bus_addr[BUFFERS]; uintptr_t kdesc_bus; volatile uint32_t *desc; typedef volatile uint32_t *Tbuf; Tbuf ptr[BUFFERS]; #ifdef SWITCH_GENERATOR int switch_generator = 0; #endif /* SWITCH_GENERATOR */ #if defined(CHECK_RESULTS)||defined(CHECK_RESULTS_LOG) long k; int mem_diff; #endif /* CHECK_RESULTS */ float performance, perf_counter; pcilib_bar_t bar_tmp = BAR; uintptr_t offset = 0; unsigned int temp; int iterations_completed, buffers_filled; // int shmid; printf("\n\n**** **** **** KIT-DMA TEST **** **** ****\n\n"); //size = ITERATIONS * BUFFERS * HUGE_PAGE * PAGE_SIZE; size_mb = ITERATIONS * BUFFERS * HUGE_PAGE * 4 / 1024; printf("Total size of memory buffer: \t %.3lf GBytes\n", (float)size_mb/1024 ); printf("Using %d Buffers with %d iterations\n\n", BUFFERS, ITERATIONS ); #ifdef ADD_DELAYS long rpt = 0, rpt2 = 0; size_t best_time; best_time = 1000000000L * HUGE_PAGE * PAGE_SIZE / (4L * 1024 * 1024 * 1024); #endif /* ADD_DELAYS */ pcilib_kmem_flags_t flags = PCILIB_KMEM_FLAG_HARDWARE|PCILIB_KMEM_FLAG_PERSISTENT|PCILIB_KMEM_FLAG_EXCLUSIVE/*|PCILIB_KMEM_FLAG_REUSE*/; // Lorenzo: if REUSE = 1, the re-allocation fails! pcilib_kmem_flags_t free_flags = PCILIB_KMEM_FLAG_HARDWARE/*|PCILIB_KMEM_FLAG_EXCLUSIVE|PCILIB_KMEM_FLAG_REUSE*/; pcilib_kmem_flags_t clean_flags = PCILIB_KMEM_FLAG_HARDWARE|PCILIB_KMEM_FLAG_PERSISTENT|PCILIB_KMEM_FLAG_EXCLUSIVE; pci = pcilib_open(DEVICE, "pci"); if (!pci) fail("pcilib_open"); bar = pcilib_map_bar(pci, BAR); if (!bar) { pcilib_close(pci); fail("map bar"); } pcilib_detect_address(pci, &bar_tmp, &offset, 1); pcilib_enable_irq(pci, PCILIB_IRQ_TYPE_ALL, 0); pcilib_clear_irq(pci, PCILIB_IRQ_SOURCE_DEFAULT); pcilib_clean_kernel_memory(pci, USE, clean_flags); pcilib_clean_kernel_memory(pci, USE_RING, clean_flags); kdesc = pcilib_alloc_kernel_memory(pci, PCILIB_KMEM_TYPE_CONSISTENT, 1, 128, 4096, USE_RING, flags); kdesc_bus = pcilib_kmem_get_block_ba(pci, kdesc, 0); desc = (uint32_t*)pcilib_kmem_get_block_ua(pci, kdesc, 0); memset((void*)desc, 0, 5*sizeof(uint32_t)); #ifdef REALTIME pid_t pid; struct sched_param sched = {0}; pid = getpid(); sched.sched_priority = sched_get_priority_min(SCHED_FIFO); if (sched_setscheduler(pid, SCHED_FIFO, &sched)) printf("Warning: not able to get real-time priority\n"); #endif /* REALTIME */ // ****************************************************************** // **** MEM: check 4k boundary ***** // ****************************************************************** do { printf("* Allocating KMem, "); #ifdef STATIC_REGION kbuf = pcilib_alloc_kernel_memory(pci, PCILIB_KMEM_TYPE_REGION_C2S, BUFFERS, HUGE_PAGE * PAGE_SIZE, STATIC_REGION, USE, flags); #else kbuf = pcilib_alloc_kernel_memory(pci, PCILIB_KMEM_TYPE_DMA_C2S_PAGE, BUFFERS, HUGE_PAGE * PAGE_SIZE, 4096, USE, flags); #endif if (!kbuf) { printf("KMem allocation failed\n"); exit(0); } // Pointers for Virtualized Mem for (j = 0; j < BUFFERS; j++) { ptr[j] = (volatile uint32_t*)pcilib_kmem_get_block_ua(pci, kbuf, j); memset((void*)(ptr[j]), 0, HUGE_PAGE * PAGE_SIZE); } err = 0; // Check if HW addresses satisfy 4k boundary condition, if not -> free (!!) and reallocate memory printf("4k boundary test: "); for (j = 0; j < BUFFERS; j++) { temp = (((unsigned int)pcilib_kmem_get_block_ba(pci, kbuf, j)) % 4096); //printf("%u", temp); if (temp != 0) { err = 1; } } if (err == 1) { pcilib_clean_kernel_memory(pci, USE, clean_flags); pcilib_clean_kernel_memory(pci, USE_RING, clean_flags); pcilib_free_kernel_memory(pci, kbuf, free_flags); printf("failed \xE2\x9C\x98\n"); } else printf("passed \xE2\x9C\x93\n"); } while (err == 1); // ****************************************************************** // **** Allocate RAM buffer Memory ***** // ****************************************************************** FILE * Output; FILE * error_log; #ifdef MEM_COPY uint32_t *temp_data[ITERATIONS][BUFFERS]; for (j=0; j < ITERATIONS; j++) { for (i=0; i < BUFFERS; i++) { temp_data[j][i] = (uint32_t *)malloc(HUGE_PAGE*PAGE_SIZE); if (temp_data[j][i] == 0) { printf("******* Error: could not allocate memory! ********\n"); exit(0); } memset((void*)(temp_data[j][i]), 0, HUGE_PAGE * PAGE_SIZE); } } #endif #ifdef SHARED_MEMORY // give your shared memory an id, anything will do key_t key = 123456; char *shared_memory; // Setup shared memory, 11 is the size /* if ((shmid = shmget(key, HUGE_PAGE*PAGE_SIZE, IPC_CREAT | 0666)) < 0) { printf("Error getting shared memory id"); exit(1); } // Attached shared memory if ((shared_memory = shmat(shmid, NULL, 0)) == (char *) -1) { printf("Error attaching shared memory id"); exit(1); } printf("* Shared memory created... Id:\t %d\n", key); //////////////// SHARED MEMORY TEST */ #endif Output = fopen ("data.out", "w"); fclose(Output); error_log = fopen ("error_log.txt", "w"); fclose(error_log); // ****************************************************************** // **** PCIe TEST ***** // ****************************************************************** // Reset DMA printf("* DMA: Reset...\n"); WR(0x00, 0x1); usleep(100000); WR(0x00, 0x0); usleep(100000); #ifdef CHECK_READY printf("* PCIe: Testing..."); RD(0x0, err); if (err == 335746816 || err == 335681280) { printf("\xE2\x9C\x93 \n"); } else { printf("\xE2\x9C\x98\n PCIe not ready!\n"); exit(0); } #endif // ****************************************************************** // **** DMA CONFIGURATION ***** // ****************************************************************** printf("* DMA: Send Data Amount\n"); #ifdef DUAL_CORE WR(0x10, (HUGE_PAGE * (PAGE_SIZE / (4 * TLP_SIZE)))/2); #else WR(0x10, (HUGE_PAGE * (PAGE_SIZE / (4 * TLP_SIZE)))); #endif printf("* DMA: Running mode: "); #ifdef USE_64 if (TLP_SIZE == 64) { WR(0x0C, 0x80040); printf ("64bit - 256B Payload\n"); } else if (TLP_SIZE == 32) { WR(0x0C, 0x80020); printf ("64bit - 128B Payload\n"); } #else if (TLP_SIZE == 64) { WR(0x0C, 0x0040); printf ("32bit - 256B Payload\n"); } else if (TLP_SIZE == 32) { WR(0x0C, 0x0020); printf ("32bit - 128B Payload\n"); } #endif printf("* DMA: Reset Desc Memory...\n"); WR(0x5C, 0x00); // RST Desc Memory printf("Writing SW Read Descriptor\n"); WR(0x58, BUFFERS-1); //WR(0x58, 0x01); printf("Writing the Descriptor Threshold\n"); WR(0x60, DESC_THRESHOLD); printf("Writing HW write Descriptor Address: %lx\n", kdesc_bus); WR(0x54, kdesc_bus); usleep(100000); printf("* DMA: Writing Descriptors\n"); for (j = 0; j < BUFFERS; j++ ) { bus_addr[j] = pcilib_kmem_get_block_ba(pci, kbuf, j); // LEAVE THIS DELAY???!?!?!?! usleep(1000); //printf("Writing descriptor num. %ld: \t %08lx \n", j, bus_addr[j]); WR(0x50, bus_addr[j]); } // ****************************************************************** // **** START DMA ***** // ****************************************************************** //printf ("\n ---- Press ENTER to start DMA ---- \n"); //getchar(); printf("* DMA: Start \n"); WR(0x04, 0x1); #ifdef ENABLE_COUNTER printf("* Enable counter\n"); WR(ENABLE_COUNTER, 0x1); #endif /* ENABLE_COUNTER */ gettimeofday(&start, NULL); // ****************************************************************** // **** Handshaking DMA ***** // ****************************************************************** uint32_t curptr = 0, hwptr; uint32_t curbuf = 0; int empty = 0; i = 0; while (i < ITERATIONS) { j = 0; //printf("\ndesc0: %lx", desc[0]); //printf("\ndesc1: %lx", desc[1]); //printf("\ndesc2: %lx", desc[2]); //printf("\ndesc3: %lx", desc[3]); //printf("\ndesc4: %lx", desc[4]); // printf("\ndesc5: %lx", htonl(desc[5])); //printf("Iteration: %li of %li \r", i+1, ITERATIONS); //getchar(); //loadBar(i+1, ITERATIONS, ITERATIONS, 30); // printf("\nhwptr: %zu", hwptr); // printf("\ncurptr: %zu", curptr); do { #ifdef USE_64 hwptr = desc[3]; #else // 32-bit hwptr = desc[4]; #endif j++; //printf("curptr: %lx \t \t hwptr: %lx\n", curptr, hwptr); } while (hwptr == curptr); do { pcilib_kmem_sync_block(pci, kbuf, PCILIB_KMEM_SYNC_FROMDEVICE, curbuf); #ifdef MEM_COPY memcpy(temp_data[i][curbuf], (void*)ptr[curbuf], 4096); #endif #ifdef USE_STREAMING WR(0x50, bus_addr[curbuf]); // Now, we can check error in the register 0x68 (non-zero if there is an error) #endif /* USE_STREAMING */ #ifdef CHECK_RESULTS for (k = 0; k < 1024 ; k++) { mem_diff = (ptr[curbuf][k] - ptr[curbuf][k]); //if ((mem_diff == 1) || (mem_diff == (-7)) || (k == 1023) ) if (mem_diff == -1) {;} else { //fprintf(error_log, "Error in: \t IT %li \t BUF : %li \t OFFSET: %li \t | %08x --> %08x - DIFF: %d \n", i, j, k, temp_data[i][j][k], temp_data[i][j][k+1], mem_diff); err++; } } #endif #ifdef SHARED_MEMORY memcpy(shared_memory, ptr[curbuf], 4096); #endif //printf("\ncurbuf: %08x", curbuf); //printf("\nbus_addr[curbuf]\n: %08x",bus_addr[curbuf]); // for (k = 0; k < 63; k++){ // if (k%16 == 0) printf("\n# %d # :", k); // printf(" %08x", ptr[curbuf][k]); // } //pcilib_kmem_sync_block(pci, kbuf, PCILIB_KMEM_SYNC_TODEVICE, curbuf); curbuf++; if (curbuf == BUFFERS) { i++; curbuf = 0; #ifdef SWITCH_GENERATOR if (switch_generator == 1) { switch_generator = 0; WR(0x9040, 0x100007F0); } else { WR(0x9040, 0x180007F0); switch_generator = 1; } #endif if (i >= ITERATIONS) break; //if (i >= (ITERATIONS - 4) ) WR(0x04, 0x0f); } } while (bus_addr[curbuf] != hwptr); #ifdef EXIT_ON_EMPTY #ifdef USE_64 if (desc[1] != 0) #else // 32bit if (desc[2] != 0) #endif { if (bus_addr[curbuf] == hwptr) { empty = 1; break; } } #endif #ifndef USE_STREAMING // Generally it is safe to write register in any case, it is just unsed in streaming mode // Therefore, in the driver we can use it instead of software register so far WR(0x58, curbuf + 1); #endif /* ! USE_STREAMING */ //printf("WR %d\n", curbuf + 1); //printf("%u (%lu)\n", curbuf, j); curptr = hwptr; } // ****************************************************************** // **** Read performance and stop DMA ******* // ****************************************************************** gettimeofday(&end, NULL); WR(0x04, 0x00); usleep(100); RD(0x28, perf_counter); usleep(100); WR(0x00, 0x01); iterations_completed = i; buffers_filled = curbuf; if (empty) printf("* DMA: Empty FIFO! Last iteration: %li of %i\n", i+1, ITERATIONS); printf ("* DMA: Stop\n\n"); #ifdef MEM_COPY printf ("First value:\t %08x\n", temp_data[0][0][0]); printf ("Last value:\t %08x\n\n", temp_data[ITERATIONS-1][BUFFERS-1][(PAGE_SIZE/4)-4]); #endif // ****************************************************************** // **** Performance ******* // ****************************************************************** printf("Iterations done: %d\n", iterations_completed); printf("Buffers filled on last iteration: %d\n", buffers_filled); run_time = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec); //size = (long long int) (( BUFFERS * (iterations_completed) + buffers_filled) * HUGE_PAGE * PAGE_SIZE); size_mb = (long long int) (( BUFFERS * (iterations_completed) + buffers_filled) * HUGE_PAGE * 4 / 1024); printf("Performance: transfered %llu Mbytes in %zu us using %d buffers\n", (size_mb), run_time, BUFFERS); //printf("Buffers: \t %d \n", BUFFERS); //printf("Buf_Size: \t %d \n", PAGE_SIZE); //printf("Perf_counter: \t %f \n", perf_counter); performance = ((size_mb * FPGA_CLOCK * 1000000)/(perf_counter*256)); printf("DMA perf counter:\t%d\n", (int)perf_counter); printf("DMA side:\t\t%.3lf MB/s\n", performance); printf("PC side:\t\t%.3lf MB/s\n\n", 1000000. * size_mb / run_time ); // ****************************************************************** // **** Read Data ******* // ****************************************************************** #ifdef PRINT_RESULTS printf("Writing Data to HDD... \n"); for (i=0; i < iterations_completed; i++) { for (j=0; j < BUFFERS; j++) { Output = fopen("data.out", "a"); fwrite(temp_data[i][j], 4096, 1, Output); fclose(Output); } loadBar(i+1, ITERATIONS, ITERATIONS, 30); } // Save last partially filled iteration for (j=0; j < buffers_filled; j++) { Output = fopen("data.out", "a"); fwrite(temp_data[iterations_completed][j], 4096, 1, Output); fclose(Output); } printf("Data saved in data.out. \n"); #endif #ifdef CHECK_RESULTS_LOG err = 0; error_log = fopen ("error_log.txt", "a"); printf("\nChecking data ...\n"); for (i=0; i < iterations_completed; i++) { for (j = 0; j < BUFFERS; j++) { for (k = 0; k < 1024 ; k++) { mem_diff = ((uint32_t)temp_data[i][j][k] - (uint32_t)temp_data[i][j][k+1]); //if ((mem_diff == 1) || (mem_diff == (-7)) || (k == 1023) ) if ((mem_diff == -1) || (k == 1023) ) {;} else { fprintf(error_log, "Error in: \t IT %li \t BUF : %li \t OFFSET: %li \t | %08x --> %08x - DIFF: %d \n", i, j, k, temp_data[i][j][k], temp_data[i][j][k+1], mem_diff); err++; } } if (j != BUFFERS-1) { // Check first and Last mem_diff = (uint32_t)(temp_data[i][j+1][0] - temp_data[i][j][1023]); if (mem_diff == (1)) {;} else { fprintf(error_log, "Error_2 in: \t IT %li \t BUF : %li \t OFFSET: %li \t | %08x --> %08x - DIFF: %d \n", i, j, k, temp_data[i][j+1][0], temp_data[i][j][1023], mem_diff); err++; } } } loadBar(i+1, ITERATIONS, ITERATIONS, 30); } for (j = 0; j < buffers_filled; j++) { for (k = 0; k < 1024 ; k++) { mem_diff = ((uint32_t)temp_data[iterations_completed][j][k] - (uint32_t)temp_data[iterations_completed][j][k+1]); if ((mem_diff == -1) || (k == 1023) ) {;} else { fprintf(error_log, "Error in: \t IT %li \t BUF : %li \t OFFSET: %li \t | %08x --> %08x - DIFF: %d \n", iterations_completed, j, k, temp_data[iterations_completed][j][k], temp_data[iterations_completed][j][k+1], mem_diff); err++; } } if (j != buffers_filled-1) { // Check first and Last mem_diff = (uint32_t)(temp_data[i][j+1][0] - temp_data[i][j][1023]); if (mem_diff == (1)) {;} else { fprintf(error_log, "Error_2 in: \t IT %li \t BUF : %li \t OFFSET: %li \t | %08x --> %08x - DIFF: %d \n", iterations_completed, j, k, temp_data[iterations_completed][j+1][0], temp_data[iterations_completed][j][1023], mem_diff); err++; } } } if (err != 0) printf("\rChecking data: \xE2\x9C\x98 %d errors found \n See \"error_log.txt\" for details \n\n", err); else printf("\rChecking data: \xE2\x9C\x93 no errors found \n\n"); fclose(error_log); #endif // *********** Free Memory #ifdef MEM_COPY for (i=0; i < ITERATIONS; i++) { for (j=0; j < BUFFERS; j++) { free(temp_data[i][j]); } } #endif pcilib_free_kernel_memory(pci, kbuf, free_flags); pcilib_free_kernel_memory(pci, kdesc, free_flags); pcilib_disable_irq(pci, 0); pcilib_unmap_bar(pci, BAR, bar); pcilib_close(pci); // shmdt(shmid); // shmctl(shmid, IPC_RMID, NULL); }