bzr branch
http://darksoft.org/webbzr/ani/mrses
1
by Suren A. Chilingaryan
Initial import |
1 |
#ifndef _MRSES_IMPL_H
|
2 |
#define _MRSES_IMPL_H
|
|
3 |
||
4 |
#include <stdlib.h> |
|
5 |
#include <math.h> |
|
6 |
||
7 |
#include "mrses.h" |
|
8 |
#include "hw_sched.h" |
|
9 |
#include "tools.h" |
|
10 |
||
11 |
#define HW_ALIGN 16 // For SPE support minimum 16 is required (should be multiple of MRSESDataType anyway) |
|
12 |
#define SPE_BLOCK 16 // This indicates minimal block of processing by SPE code |
|
13 |
#define SIMD_BLOCK 16 // In bytes |
|
14 |
#define HW_ITERATE_BLOCKS 16
|
|
2
by Suren A. Chilingaryan
Fix SPE crashes on big (above 4096) and non-power-of-2 number of properties |
15 |
#define SPE_MAX_TRANSFER_SIZE 16384
|
1
by Suren A. Chilingaryan
Initial import |
16 |
|
17 |
//#define TRACE_TIMINGS 1
|
|
18 |
//#define USE_FAST_RANDOM 1 //* default random/rand somehow sequentalize threads
|
|
19 |
//#define FIX_RANDOM 1
|
|
20 |
//#define HW_USE_BLOCKED_MULTIPLY //* Alternative outdated way of processing in SPU (don't use)
|
|
21 |
||
22 |
||
23 |
#define blas_asum cblas_sasum
|
|
24 |
#define blas_scal cblas_sscal
|
|
25 |
#define blas_axpy cblas_saxpy
|
|
26 |
#define blas_gemv cblas_sgemv
|
|
27 |
#define blas_gemm cblas_sgemm
|
|
28 |
#define blas_syrk cblas_ssyrk
|
|
29 |
#define blas_trsm cblas_strsm
|
|
30 |
#define blas_trsv cblas_strsv
|
|
31 |
#define blas_dot cblas_sdot
|
|
32 |
#define lapack_potrf spotrf_
|
|
33 |
||
34 |
struct MRSESContextT { |
|
35 |
MRSESDistance dist; |
|
36 |
||
2
by Suren A. Chilingaryan
Fix SPE crashes on big (above 4096) and non-power-of-2 number of properties |
37 |
int palloc; /**< Size of vectors in index array, iterate only */ |
1
by Suren A. Chilingaryan
Initial import |
38 |
int properties; /**< Number of properties */ |
39 |
int alloc; /**< Size of vectors in A and B matrixes for alignment */ |
|
40 |
int nA, nB; /**< Number of samples */ |
|
41 |
||
42 |
MRSESDataType *A; /**< Actually: (A - mean(A))/sqrt(nA) */ |
|
43 |
MRSESDataType *B; /**< Actually: (B - mean(B))/sqrt(nB) */ |
|
44 |
||
45 |
MRSESDataType *mean; /**< Actually: mean(A) - mean(B) */ |
|
46 |
||
47 |
HWSched sched; |
|
48 |
||
49 |
int iterate_size; /**< Number of blocks per block group */ |
|
50 |
||
51 |
int max_block_size; |
|
52 |
int block_size; |
|
53 |
int cur_chunk; |
|
54 |
||
55 |
int width; |
|
56 |
int iterations; |
|
57 |
MRSESIntType *index; /**< In compute: provided by client app, no allocation, in iteration: allocated, indicates that iterations were started before */ |
|
58 |
MRSESIntType *ires; /**< Array of resulting indeces */ |
|
59 |
MRSESDataType *result; /**< Provided by client app, no allocation */ |
|
60 |
||
61 |
};
|
|
62 |
typedef struct MRSESContextT MRSESContextS; |
|
63 |
||
64 |
#endif /* _MRSES_IMPL_H */ |