1
/* bitplex - Parallel bit stream module.
2
Copyright (c) 2007, 2008, Robert D. Cameron.
3
Licensed to the public under the Open Software License 3.0.
4
Licensed to International Characters, Inc., under the Academic
7
Given a character stream of 8-bit code units, this module
8
produces a parallel bit stream representation.
14
#include "../lib/lib_simd.h"
18
#include "../../code_clocker/clocker/code_clocker_session.h"
19
Code_Clocker * transpose_clocker;
22
#ifdef TEMPLATED_SIMD_LIB
23
#define s2p_step(s0, s1, hi_mask, shift, p0, p1) \
26
t0 = simd<16>::pack<h,h>(s0, s1);\
27
t1 = simd<16>::pack<l,l>(s0, s1);\
28
p0 = simd_if(hi_mask, t0, simd<16>::srli<shift>(t1));\
29
p1 = simd_if(hi_mask, simd<16>::slli<shift>(t0), t1);\
32
#ifndef TEMPLATED_SIMD_LIB
33
#define s2p_step(s0, s1, hi_mask, shift, p0, p1) \
36
t0 = simd_pack_16_hh(s0, s1);\
37
t1 = simd_pack_16_ll(s0, s1);\
38
p0 = simd_if(hi_mask, t0, simd_srli_16(t1, shift));\
39
p1 = simd_if(hi_mask, simd_slli_16(t0, shift), t1);\
43
static inline void s2p_bytepack(BytePack s[], BitBlock p[]) {
44
#ifdef TEMPLATED_SIMD_LIB
45
BitBlock mask_2 = simd<2>::himask();
46
BitBlock mask_4 = simd<4>::himask();
47
BitBlock mask_8 = simd<8>::himask();
49
#ifndef TEMPLATED_SIMD_LIB
50
BitBlock mask_2 = simd_himask_2;
51
BitBlock mask_4 = simd_himask_4;
52
BitBlock mask_8 = simd_himask_8;
54
BitBlock bit00224466_0, bit00224466_1, bit00224466_2, bit00224466_3;
55
BitBlock bit11335577_0, bit11335577_1, bit11335577_2, bit11335577_3;
56
BitBlock bit00004444_0, bit22226666_0, bit00004444_1, bit22226666_1;
57
BitBlock bit11115555_0, bit33337777_0, bit11115555_1, bit33337777_1;
58
#if (BYTE_ORDER == BIG_ENDIAN)
59
s2p_step(s[0], s[1], mask_2, 1, bit00224466_0, bit11335577_0);
60
s2p_step(s[2], s[3], mask_2, 1, bit00224466_1, bit11335577_1);
61
s2p_step(s[4], s[5], mask_2, 1, bit00224466_2, bit11335577_2);
62
s2p_step(s[6], s[7], mask_2, 1, bit00224466_3, bit11335577_3);
64
#if (BYTE_ORDER == LITTLE_ENDIAN)
65
s2p_step(s[7], s[6], mask_2, 1, bit00224466_0, bit11335577_0);
66
s2p_step(s[5], s[4], mask_2, 1, bit00224466_1, bit11335577_1);
67
s2p_step(s[3], s[2], mask_2, 1, bit00224466_2, bit11335577_2);
68
s2p_step(s[1], s[0], mask_2, 1, bit00224466_3, bit11335577_3);
70
s2p_step(bit00224466_0, bit00224466_1, mask_4, 2, bit00004444_0, bit22226666_0);
71
s2p_step(bit00224466_2, bit00224466_3, mask_4, 2, bit00004444_1, bit22226666_1);
72
s2p_step(bit11335577_0, bit11335577_1, mask_4, 2, bit11115555_0, bit33337777_0);
73
s2p_step(bit11335577_2, bit11335577_3, mask_4, 2, bit11115555_1, bit33337777_1);
74
s2p_step(bit00004444_0, bit00004444_1, mask_8, 4, p[0], p[4]);
75
s2p_step(bit11115555_0, bit11115555_1, mask_8, 4, p[1], p[5]);
76
s2p_step(bit22226666_0, bit22226666_1, mask_8, 4, p[2], p[6]);
77
s2p_step(bit33337777_0, bit33337777_1, mask_8, 4, p[3], p[7]);
80
x8basis = (BitBlockBasis *) simd_new(BUFFER_SIZE/PACKSIZE);
83
transpose_clocker = register_Code_Clocker("s2p", "transposition to parallel bit streams\n");
89
simd_delete((SIMD_type *) x8basis);
96
void Bitplex::TransposeToBitStreams(BytePack * pseudo_ASCII_stream, int blocks) {
99
start_Interval(transpose_clocker);
103
for (int blk = 0; blk < blocks; blk++) {
104
s2p_bytepack(&pseudo_ASCII_stream[blk*8], x8basis[blk].bit);
109
end_Interval(transpose_clocker, BUFFER_SIZE);