6
#define DEFAULT_SIZE 1024
8
size_t exercise(unsigned char *res, unsigned char *a, unsigned char *b, size_t size, int iterations);
9
int exercise_init(const char *name, size_t size);
10
int exercise_allocate(unsigned char **res, unsigned char **a, unsigned char **b, size_t size);
12
extern int exercise_required_alignment;
15
size_t get_padded_size(size_t size, size_t block_size) {
16
size_t blocks = (size / block_size);
17
size_t inc = blocks * block_size - size;
18
if (inc) return (blocks + 1) * block_size;
23
int main(int argc, char *argv[]) {
26
size_t size = DEFAULT_SIZE;
27
size_t iterations = ITERATIONS;
28
int standard_memory = 0;
29
unsigned char *a = NULL, *b = NULL, *res = NULL;
33
struct timeval tv1,tv2;
35
char *name, fname[255];
38
name = strrchr(argv[0], '/');
47
iterations = atoi(argv[2]);
48
if (!iterations) iterations = 1;
51
if (exercise_required_alignment > 1) {
52
size = get_padded_size(size, exercise_required_alignment);
55
flops = 2 * size * size * sizeof(char);
56
printf("Matrix multiplication %lux%lu by %lux%lu, Iterations %u\n", size, size, size, size, iterations);
58
err = exercise_init(name, size);
61
err = exercise_allocate(&res, &a, &b, size);
64
else printf("Allocation of fast memory failed, using standard slow mode...\n");
70
a = (unsigned char *)calloc( size * size, sizeof( unsigned char ) );
71
b = (unsigned char *)calloc( size * size, sizeof( unsigned char ) );
72
res = (unsigned char *)calloc( size * size, sizeof( unsigned char ) );
73
if(a == NULL || b == NULL || res == NULL) {
74
printf( "Can't allocate memory for arrays\n");
80
for (i = 0; i < size * size; i++) {
81
a[i] = 127. * rand() / RAND_MAX;
82
b[i] = 127. * rand() / RAND_MAX;
84
memset(res, 0, size * size * sizeof(unsigned char));
86
gettimeofday(&tv1, NULL);
87
runtime = exercise(res, a, b, size, iterations);
88
gettimeofday(&tv2, NULL);
90
if (runtime == (size_t)-1) {
94
sprintf(fname, "result-%s.out", name);
95
f = fopen(fname, "w");
97
fwrite(res, sizeof(unsigned char), size * size, f);
103
if (standard_memory) {
109
us = (tv2.tv_sec - tv1.tv_sec)*1000000 + (tv2.tv_usec - tv1.tv_usec);
111
printf("Bandwidth: %.2lf GB/s (%lf s)\n", ((0.001 * flops) / us), us / 1000000.);
113
runtime /= iterations;
114
printf("Bandwidth (excluding transfer): %.2lf GB/s (%lf s)\n", ((1. * flops) / runtime), runtime / 1000000000.);