/xmlbench/trunk

To get this branch, use:
bzr branch http://darksoft.org/webbzr/xmlbench/trunk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
/*  lib_simd_h:  SIMD Library including idealized SIMD operations
    Copyright (C) 2008, Robert D. Cameron
    Licensed to the public under the Open Software License 3.0.
    Licensed to International Characters Inc. 
       under the Academic Free License version 3.0.

    This file contains generic architecture-independent definitions,
    importing architecture-specific implementations from appropriate
    files.
*/

/*------------------------------------------------------------*/
#ifndef LIB_SIMD_H
#define LIB_SIMD_H
#include <sys/types.h>
#include <limits.h>

#if (defined(__i386) || defined(__x86_64))
#ifdef TEMPLATED_SIMD_LIB
#include "sse_simd_t.h"
#endif
#ifndef TEMPLATED_SIMD_LIB
#include "sse_simd.h"
#endif
#endif
#ifdef _ARCH_PPC
#include "altivec_simd.h"
#endif

/* Useful definitions from Linux kernel*/
#ifdef __GNUC__
/*
#define likely(x) __builtin_expect((x),1)
#define unlikely(x) __builtin_expect((x),0)
*/
static inline long likely(long x) {
	return __builtin_expect(x, 1);
}
static inline long unlikely(long x) {
	return __builtin_expect(x, 0);
}

#endif
#ifdef _MSC_VER
#define likely(x) (x)
#define unlikely(x) (x)
#endif

/* Shift forward and back operations, based on endianness */
#if BYTE_ORDER == BIG_ENDIAN
#define sisd_sfl(blk, n) sisd_srl(blk, n)
#define sisd_sbl(blk, n) sisd_sll(blk, n)
#define sisd_sfli(blk, n) sisd_srli(blk, n)
#define sisd_sbli(blk, n) sisd_slli(blk, n)
#define sb_op(x, n) ((x)<<(n))
#define sf_op(x, n) ((x)>>(n))
#define cfzl __builtin_clzl
#endif
#if BYTE_ORDER == LITTLE_ENDIAN
#ifdef TEMPLATED_SIMD_LIB
static inline SIMD_type sisd_sfl(SIMD_type blk, SIMD_type n) {
	return simd<128>::sll(blk, n);
}

static inline SIMD_type sisd_sbl(SIMD_type blk, SIMD_type n) {
	return simd<128>::srl(blk, n);
}
#define sisd_sfli(blk, n) simd<128>::slli<n>(blk)
#define sisd_sbli(blk, n) simd<128>::srli<n>(blk)
#endif
#ifndef TEMPLATED_SIMD_LIB
static inline SIMD_type sisd_sfl(SIMD_type blk, SIMD_type n) {
	return sisd_sll(blk, n);
}
static inline SIMD_type sisd_sbl(SIMD_type blk, SIMD_type n) {
	return sisd_srl(blk, n);
}
#define sisd_sfli(blk, n) sisd_slli(blk, n)
#define sisd_sbli(blk, n) sisd_srli(blk, n)
#endif
#define sb_op(x, n) ((x)>>(n))
#define sf_op(x, n) ((x)<<(n))
#ifdef __GNUC__
#define cfzl __builtin_ctzl
#endif
#ifdef _MSC_VER
#include <intrin.h>
#pragma intrinsic(_BitScanForward)
//  precondition: x > 0
static inline unsigned long cfzl(unsigned long x) {
	unsigned long zeroes;
	_BitScanForward(&zeroes, x);
	return zeroes;
}
#endif
#endif


static inline int count_forward_zeroes(SIMD_type bits) {
  union {SIMD_type vec; unsigned long elems[sizeof(SIMD_type)/sizeof(long)];} v;
  v.vec = bits;
  if (v.elems[0] != 0) return cfzl(v.elems[0]);
  else if (v.elems[1] != 0) return LONG_BIT + cfzl(v.elems[1]);
#ifdef _MSC_VER
  else if (v.elems[2] != 0) return 2*LONG_BIT + cfzl(v.elems[2]);
  else if (v.elems[3] != 0) return 3*LONG_BIT + cfzl(v.elems[3]);
#endif
#ifndef _MSC_VER
#if LONG_BIT < 64
  else if (v.elems[2] != 0) return 2*LONG_BIT + cfzl(v.elems[2]);
  else if (v.elems[3] != 0) return 3*LONG_BIT + cfzl(v.elems[3]);
#endif
#endif
  else return 8*sizeof(SIMD_type);
}

static inline unsigned long bitstream_segment_from(SIMD_type * stream, int bit_posn) {
  unsigned long * bitstream_ptr = (unsigned long *) (((intptr_t) stream) + bit_posn/8);
  return sb_op(*bitstream_ptr, bit_posn % 8);
}

/* Scans for a 1 as long as it takes.  Use a sentinel to fence. 
   Works for either endianness.  */
static inline int bitstream_scan(SIMD_type * stream, int bit_posn) {
  unsigned long * bitstream_ptr = (unsigned long *) (((intptr_t) stream) + bit_posn/8);
  unsigned long bitstream_slice = sb_op(*bitstream_ptr, bit_posn % 8);
  if (bitstream_slice != 0) return bit_posn + cfzl(bitstream_slice);
  else {
    do {
      bitstream_ptr++;
      bitstream_slice = *bitstream_ptr;
    } while (bitstream_slice == 0);
    int base_posn = 8*((intptr_t) bitstream_ptr - (intptr_t) stream);
    return base_posn + cfzl(bitstream_slice);
  }
}

static inline int bitstream_scan0(SIMD_type * stream) {
  unsigned long * bitstream_ptr = (unsigned long *) stream;
  unsigned long bitstream_slice = *bitstream_ptr;
  int base_posn = 0;
  while (bitstream_slice == 0) {
    bitstream_ptr++;
    bitstream_slice = *bitstream_ptr;
  }
  base_posn = 8*((intptr_t) bitstream_ptr - (intptr_t) stream);
  return base_posn + cfzl(bitstream_slice);
}


/* Allocator for arrays of aligned SIMD data values.
   Ideally the new operator could be used to allocate arrays
   of vector data aligned on the required boundaries 
   (16-byte for SSE or Altivec).  But since this alignment
   is not guaranteed except on Mac OS X, the following routine
   is used. */

static inline SIMD_type * simd_new(size_t SIMD_packs) {
#ifdef __APPLE__
	return new SIMD_type [SIMD_packs];
#endif
#ifdef _MSC_VER
	SIMD_type * v = (SIMD_type*)_aligned_malloc(sizeof(SIMD_type) * SIMD_packs, sizeof(SIMD_type));
	if (v != 0) return v;
	else {
		printf("Failed to allocated new array of %i SIMD packs.\n", SIMD_packs);
		exit(-1);
	}
#endif
#if !defined(__APPLE__) && !defined(_MSC_VER)
	SIMD_type * v;
	int rslt = posix_memalign((void **) &v,
				  sizeof(SIMD_type),
				  sizeof(SIMD_type) * SIMD_packs);
	if (rslt == 0) return v;
	else {
		printf("Failed to allocated new array of %i SIMD packs.\n", SIMD_packs);
		exit(-1);
	}
#endif
}

static inline void simd_delete(SIMD_type * blk_ptr) {
#ifdef __APPLE__
  delete [] blk_ptr;
#endif
#ifndef __APPLE__
  free((void *) blk_ptr);
#endif
}

#endif