7
#define DEFAULT_SIZE 1000000
9
size_t exercise(float *res, float *a, float *b, size_t size, int iterations);
10
int exercise_init(const char *name, size_t size);
11
int exercise_allocate(float **a, float **b, size_t size);
13
extern int exercise_required_alignment;
16
size_t get_padded_size(size_t size, size_t block_size) {
17
size_t blocks = (size / block_size);
18
size_t inc = blocks * block_size - size;
19
if (inc) return (blocks + 1) * block_size;
24
int main(int argc, char *argv[]) {
27
size_t size = DEFAULT_SIZE;
28
size_t iterations = ITERATIONS;
29
int standard_memory = 0;
30
float *a = NULL, *b = NULL, res;
33
size_t us, throughput;
34
struct timeval tv1,tv2;
36
char *name, fname[255];
39
name = strrchr(argv[0], '/');
48
iterations = atoi(argv[2]);
49
if (!iterations) iterations = 1;
52
if (exercise_required_alignment > 1) {
53
size = get_padded_size(size, exercise_required_alignment);
56
throughput = 2 * size * sizeof(float);
57
setlocale(LC_NUMERIC, "en_US");
58
printf("Dot product of %'lu elements, Iterations %u, %lu Gthroughput required\n", size, iterations, (throughput/1000000000));
60
err = exercise_init(name, size);
63
err = exercise_allocate(&a, &b, size);
66
else printf("Allocation of fast memory failed, using standard slow mode...\n");
72
a = (float *)calloc( size, sizeof( float ) );
73
b = (float *)calloc( size, sizeof( float ) );
74
if(a == NULL || b == NULL) {
75
printf( "Can't allocate memory for arrays\n");
81
for (i = 0; i < size; i++) {
82
a[i] = 1. * rand() / RAND_MAX;
83
b[i] = 1. * rand() / RAND_MAX;
86
gettimeofday(&tv1, NULL);
87
runtime = exercise(&res, a, b, size, iterations);
88
gettimeofday(&tv2, NULL);
90
if (runtime == (size_t)-1) {
94
printf("Result: %lf\n", res);
98
if (standard_memory) {
103
us = (tv2.tv_sec - tv1.tv_sec)*1000000 + (tv2.tv_usec - tv1.tv_usec);
105
printf("Throughput: %.2lf GB/s (%lf s)\n", ((0.001 * throughput) / us), us / 1000000.);
107
runtime /= iterations;
108
printf("Throughput (excluding transfer): %.2lf GB/s (%lf s)\n", ((1. * throughput) / runtime), runtime / 1000000000.);