1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
|
/* bitplex - Parallel bit stream module.
Copyright (c) 2007, 2008, Robert D. Cameron.
Licensed to the public under the Open Software License 3.0.
Licensed to International Characters, Inc., under the Academic
Free License 3.0.
Given a character stream of 8-bit code units, this module
produces a parallel bit stream representation.
*/
#include "bitplex.h"
#include "../lib/lib_simd.h"
#ifdef CODE_CLOCKING
/*
#include "../../code_clocker/clocker/code_clocker_session.h"
Code_Clocker * transpose_clocker;
*/
#endif
#ifdef TEMPLATED_SIMD_LIB
#define s2p_step(s0, s1, hi_mask, shift, p0, p1) \
{\
BitBlock t0, t1;\
t0 = simd<16>::pack<h,h>(s0, s1);\
t1 = simd<16>::pack<l,l>(s0, s1);\
p0 = simd_if(hi_mask, t0, simd<16>::srli<shift>(t1));\
p1 = simd_if(hi_mask, simd<16>::slli<shift>(t0), t1);\
}
#endif
#ifndef TEMPLATED_SIMD_LIB
#define s2p_step(s0, s1, hi_mask, shift, p0, p1) \
{\
BitBlock t0, t1;\
t0 = simd_pack_16_hh(s0, s1);\
t1 = simd_pack_16_ll(s0, s1);\
p0 = simd_if(hi_mask, t0, simd_srli_16(t1, shift));\
p1 = simd_if(hi_mask, simd_slli_16(t0, shift), t1);\
}
#endif
static inline void s2p_bytepack(BytePack s[], BitBlock p[]) {
#ifdef TEMPLATED_SIMD_LIB
BitBlock mask_2 = simd<2>::himask();
BitBlock mask_4 = simd<4>::himask();
BitBlock mask_8 = simd<8>::himask();
#endif
#ifndef TEMPLATED_SIMD_LIB
BitBlock mask_2 = simd_himask_2;
BitBlock mask_4 = simd_himask_4;
BitBlock mask_8 = simd_himask_8;
#endif
BitBlock bit00224466_0, bit00224466_1, bit00224466_2, bit00224466_3;
BitBlock bit11335577_0, bit11335577_1, bit11335577_2, bit11335577_3;
BitBlock bit00004444_0, bit22226666_0, bit00004444_1, bit22226666_1;
BitBlock bit11115555_0, bit33337777_0, bit11115555_1, bit33337777_1;
#if (BYTE_ORDER == BIG_ENDIAN)
s2p_step(s[0], s[1], mask_2, 1, bit00224466_0, bit11335577_0);
s2p_step(s[2], s[3], mask_2, 1, bit00224466_1, bit11335577_1);
s2p_step(s[4], s[5], mask_2, 1, bit00224466_2, bit11335577_2);
s2p_step(s[6], s[7], mask_2, 1, bit00224466_3, bit11335577_3);
#endif
#if (BYTE_ORDER == LITTLE_ENDIAN)
s2p_step(s[7], s[6], mask_2, 1, bit00224466_0, bit11335577_0);
s2p_step(s[5], s[4], mask_2, 1, bit00224466_1, bit11335577_1);
s2p_step(s[3], s[2], mask_2, 1, bit00224466_2, bit11335577_2);
s2p_step(s[1], s[0], mask_2, 1, bit00224466_3, bit11335577_3);
#endif
s2p_step(bit00224466_0, bit00224466_1, mask_4, 2, bit00004444_0, bit22226666_0);
s2p_step(bit00224466_2, bit00224466_3, mask_4, 2, bit00004444_1, bit22226666_1);
s2p_step(bit11335577_0, bit11335577_1, mask_4, 2, bit11115555_0, bit33337777_0);
s2p_step(bit11335577_2, bit11335577_3, mask_4, 2, bit11115555_1, bit33337777_1);
s2p_step(bit00004444_0, bit00004444_1, mask_8, 4, p[0], p[4]);
s2p_step(bit11115555_0, bit11115555_1, mask_8, 4, p[1], p[5]);
s2p_step(bit22226666_0, bit22226666_1, mask_8, 4, p[2], p[6]);
s2p_step(bit33337777_0, bit33337777_1, mask_8, 4, p[3], p[7]);
}
Bitplex::Bitplex () {
x8basis = (BitBlockBasis *) simd_new(BUFFER_SIZE/PACKSIZE);
#ifdef CODE_CLOCKING
/*
transpose_clocker = register_Code_Clocker("s2p", "transposition to parallel bit streams\n");
*/
#endif
}
Bitplex::~Bitplex() {
simd_delete((SIMD_type *) x8basis);
};
void Bitplex::TransposeToBitStreams(BytePack * pseudo_ASCII_stream, int blocks) {
#ifdef CODE_CLOCKING
/*
start_Interval(transpose_clocker);
*/
#endif
for (int blk = 0; blk < blocks; blk++) {
s2p_bytepack(&pseudo_ASCII_stream[blk*8], x8basis[blk].bit);
}
#ifdef CODE_CLOCKING
/*
end_Interval(transpose_clocker, BUFFER_SIZE);
*/
#endif
}
|