/perf/kseta

To get this branch, use:
bzr branch http://darksoft.org/webbzr/perf/kseta
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define ITERATIONS 10
#define DEFAULT_SIZE 1024

size_t exercise(float *res, float *a, float *b, size_t size, int iterations);
int exercise_init(const char *name, size_t size);
int exercise_allocate(float **res, float **a, float **b, size_t size);
void exercise_free();
extern int exercise_required_alignment;


size_t get_padded_size(size_t size, size_t block_size) {
    size_t blocks = (size / block_size);
    size_t inc = blocks * block_size - size;
    if (inc) return (blocks + 1) * block_size;
    return size;
}


int main(int argc, char *argv[]) {
    int err;
    long i;
    size_t size = DEFAULT_SIZE;
    int standard_memory = 0;
    float *a = NULL, *b = NULL, *res = NULL;

    size_t runtime;
    size_t us, flops;
    struct timeval tv1,tv2;

    char *name, fname[255];
    FILE *f;

    name = strrchr(argv[0], '/');
    if (name) name += 1;
    else name = argv[0];

    if (argc > 1) {
        size = atoi(argv[1]);
    }
    
    if (exercise_required_alignment > 1) {
	size = get_padded_size(size, exercise_required_alignment);
    }

    flops = 2 * size * size * size;
    printf("Matrix multiplication %lux%lu by %lux%lu, Iterations %u, %lu GFlops required\n", size, size, size, size, ITERATIONS, (flops/1000000000));

    err = exercise_init(name, size);
    if (err) return err;

    err = exercise_allocate(&res, &a, &b, size);
    if (err) return err;

    if (!res) {
	standard_memory = 1;
	
	a = (float *)calloc( size * size, sizeof( float ) );
        b = (float *)calloc( size * size, sizeof( float ) );
	res = (float *)calloc( size * size, sizeof( float ) );
	if(a == NULL || b == NULL || res == NULL) {
    	    printf( "\n Can't allocate memory for arrays\n");
    	    return 0;
	}
    }

    srand(1);
    for (i = 0; i < size * size; i++) {
        a[i] = 1. * rand() / RAND_MAX;
        b[i] = 1. * rand() / RAND_MAX;
    }
    memset(res, 0, size * size * sizeof(float));

    
    gettimeofday(&tv1, NULL);
    runtime = exercise(res, a, b, size, ITERATIONS);
    gettimeofday(&tv2, NULL);

    if (runtime == (size_t)-1) {
	return 1;
    }

    sprintf(fname, "result-%s.out", name);
    f = fopen(fname, "w");
    if (f) {
	fwrite(res, sizeof(float), size * size, f);
	fclose(f);
    }

    exercise_free();

    if (standard_memory) {
	free(res);
	free(b);
	free(a);
    }

    us = (tv2.tv_sec - tv1.tv_sec)*1000000 + (tv2.tv_usec - tv1.tv_usec);
    us /= ITERATIONS;
    printf("GFlops: %.2lf (%lf s)\n", ((0.001 * flops) / us), us / 1000000.);
    if (runtime) {
	runtime /= ITERATIONS;
        printf("GFlops (excluding transfer): %.2lf (%lf s)\n", ((1. * flops) / runtime), runtime / 1000000000.);
    }

    return 0;
}