/xmlbench/trunk

To get this branch, use:
bzr branch http://darksoft.org/webbzr/xmlbench/trunk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
/*  bitplex - Parallel bit stream module.
    Copyright (c) 2007, 2008, Robert D. Cameron.
    Licensed to the public under the Open Software License 3.0.
    Licensed to International Characters, Inc., under the Academic
    Free License 3.0.

    Given a character stream of 8-bit code units, this module
    produces a parallel bit stream representation.


*/

#include "bitplex.h"
#include "../lib/lib_simd.h"

#ifdef CODE_CLOCKING
/*
#include "../../code_clocker/clocker/code_clocker_session.h"
Code_Clocker * transpose_clocker;
*/
#endif
#ifdef TEMPLATED_SIMD_LIB
#define s2p_step(s0, s1, hi_mask, shift, p0, p1) \
{\
  BitBlock t0, t1;\
  t0 = simd<16>::pack<h,h>(s0, s1);\
  t1 = simd<16>::pack<l,l>(s0, s1);\
  p0 = simd_if(hi_mask, t0, simd<16>::srli<shift>(t1));\
  p1 = simd_if(hi_mask, simd<16>::slli<shift>(t0), t1);\
}
#endif
#ifndef TEMPLATED_SIMD_LIB
#define s2p_step(s0, s1, hi_mask, shift, p0, p1) \
{\
  BitBlock t0, t1;\
  t0 = simd_pack_16_hh(s0, s1);\
  t1 = simd_pack_16_ll(s0, s1);\
  p0 = simd_if(hi_mask, t0, simd_srli_16(t1, shift));\
  p1 = simd_if(hi_mask, simd_slli_16(t0, shift), t1);\
}
#endif

static inline void s2p_bytepack(BytePack s[], BitBlock p[]) {
#ifdef TEMPLATED_SIMD_LIB
    BitBlock mask_2 = simd<2>::himask();
    BitBlock mask_4 = simd<4>::himask();
    BitBlock mask_8 = simd<8>::himask();
#endif
#ifndef TEMPLATED_SIMD_LIB
    BitBlock mask_2 = simd_himask_2;
    BitBlock mask_4 = simd_himask_4;
    BitBlock mask_8 = simd_himask_8;
#endif
    BitBlock bit00224466_0, bit00224466_1, bit00224466_2, bit00224466_3;
    BitBlock bit11335577_0, bit11335577_1, bit11335577_2, bit11335577_3;
    BitBlock bit00004444_0, bit22226666_0, bit00004444_1, bit22226666_1;
    BitBlock bit11115555_0, bit33337777_0, bit11115555_1, bit33337777_1;
#if (BYTE_ORDER == BIG_ENDIAN)
    s2p_step(s[0], s[1], mask_2, 1, bit00224466_0, bit11335577_0);
    s2p_step(s[2], s[3], mask_2, 1, bit00224466_1, bit11335577_1);
    s2p_step(s[4], s[5], mask_2, 1, bit00224466_2, bit11335577_2);
    s2p_step(s[6], s[7], mask_2, 1, bit00224466_3, bit11335577_3);
#endif
#if (BYTE_ORDER == LITTLE_ENDIAN)
    s2p_step(s[7], s[6], mask_2, 1, bit00224466_0, bit11335577_0);
    s2p_step(s[5], s[4], mask_2, 1, bit00224466_1, bit11335577_1);
    s2p_step(s[3], s[2], mask_2, 1, bit00224466_2, bit11335577_2);
    s2p_step(s[1], s[0], mask_2, 1, bit00224466_3, bit11335577_3);
#endif
    s2p_step(bit00224466_0, bit00224466_1, mask_4, 2, bit00004444_0, bit22226666_0);
    s2p_step(bit00224466_2, bit00224466_3, mask_4, 2, bit00004444_1, bit22226666_1);
    s2p_step(bit11335577_0, bit11335577_1, mask_4, 2, bit11115555_0, bit33337777_0);
    s2p_step(bit11335577_2, bit11335577_3, mask_4, 2, bit11115555_1, bit33337777_1);
    s2p_step(bit00004444_0, bit00004444_1, mask_8, 4, p[0], p[4]);
    s2p_step(bit11115555_0, bit11115555_1, mask_8, 4, p[1], p[5]);
    s2p_step(bit22226666_0, bit22226666_1, mask_8, 4, p[2], p[6]);
    s2p_step(bit33337777_0, bit33337777_1, mask_8, 4, p[3], p[7]);
}
Bitplex::Bitplex () {
	x8basis = (BitBlockBasis *) simd_new(BUFFER_SIZE/PACKSIZE);
#ifdef CODE_CLOCKING
/*
	transpose_clocker =	register_Code_Clocker("s2p", "transposition to parallel bit streams\n");
*/
#endif
}

Bitplex::~Bitplex() {
	simd_delete((SIMD_type *) x8basis);
};





void Bitplex::TransposeToBitStreams(BytePack * pseudo_ASCII_stream, int blocks) {
#ifdef CODE_CLOCKING
/*
	start_Interval(transpose_clocker);
*/
#endif
	
for (int blk = 0; blk < blocks; blk++) {
	s2p_bytepack(&pseudo_ASCII_stream[blk*8], x8basis[blk].bit);
}
	
#ifdef CODE_CLOCKING
/*
	end_Interval(transpose_clocker, BUFFER_SIZE);
*/
#endif
}