1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
|
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define ITERATIONS 10
#define DEFAULT_SIZE 1024
size_t exercise(float *res, float *a, float *b, size_t size, int iterations);
int exercise_init(const char *name, size_t size);
int exercise_allocate(float **res, float **a, float **b, size_t size);
void exercise_free();
extern int exercise_required_alignment;
size_t get_padded_size(size_t size, size_t block_size) {
size_t blocks = (size / block_size);
size_t inc = blocks * block_size - size;
if (inc) return (blocks + 1) * block_size;
return size;
}
int main(int argc, char *argv[]) {
int err;
long i;
size_t size = DEFAULT_SIZE;
int standard_memory = 0;
float *a = NULL, *b = NULL, *res = NULL;
size_t runtime;
size_t us, flops;
struct timeval tv1,tv2;
char *name, fname[255];
FILE *f;
name = strrchr(argv[0], '/');
if (name) name += 1;
else name = argv[0];
if (argc > 1) {
size = atoi(argv[1]);
}
if (exercise_required_alignment > 1) {
size = get_padded_size(size, exercise_required_alignment);
}
flops = 2 * size * size * size;
printf("Matrix multiplication %lux%lu by %lux%lu, Iterations %u, %lu GFlops required\n", size, size, size, size, ITERATIONS, (flops/1000000000));
err = exercise_init(name, size);
if (err) return err;
err = exercise_allocate(&res, &a, &b, size);
if (err) return err;
if (!res) {
standard_memory = 1;
a = (float *)calloc( size * size, sizeof( float ) );
b = (float *)calloc( size * size, sizeof( float ) );
res = (float *)calloc( size * size, sizeof( float ) );
if(a == NULL || b == NULL || res == NULL) {
printf( "\n Can't allocate memory for arrays\n");
return 0;
}
}
srand(1);
for (i = 0; i < size * size; i++) {
a[i] = 1. * rand() / RAND_MAX;
b[i] = 1. * rand() / RAND_MAX;
}
memset(res, 0, size * size * sizeof(float));
gettimeofday(&tv1, NULL);
runtime = exercise(res, a, b, size, ITERATIONS);
gettimeofday(&tv2, NULL);
if (runtime == (size_t)-1) {
return 1;
}
sprintf(fname, "result-%s.out", name);
f = fopen(fname, "w");
if (f) {
fwrite(res, sizeof(float), size * size, f);
fclose(f);
}
exercise_free();
if (standard_memory) {
free(res);
free(b);
free(a);
}
us = (tv2.tv_sec - tv1.tv_sec)*1000000 + (tv2.tv_usec - tv1.tv_usec);
us /= ITERATIONS;
printf("GFlops: %.2lf (%lf s)\n", ((0.001 * flops) / us), us / 1000000.);
if (runtime) {
runtime /= ITERATIONS;
printf("GFlops (excluding transfer): %.2lf (%lf s)\n", ((1. * flops) / runtime), runtime / 1000000000.);
}
return 0;
}
|