2
Copyright 2010-2011, D. E. Shaw Research.
5
Redistribution and use in source and binary forms, with or without
6
modification, are permitted provided that the following conditions are
9
* Redistributions of source code must retain the above copyright
10
notice, this list of conditions, and the following disclaimer.
12
* Redistributions in binary form must reproduce the above copyright
13
notice, this list of conditions, and the following disclaimer in the
14
documentation and/or other materials provided with the distribution.
16
* Neither the name of D. E. Shaw Research nor the names of its
17
contributors may be used to endorse or promote products derived from
18
this software without specific prior written permission.
20
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
#ifndef _r123array_dot_h__
33
#define _r123array_dot_h__
34
#include "features/compilerfeatures.h"
35
#include "features/sse.h"
38
#define CXXMETHODS(_N, W, T)
39
#define CXXOVERLOADS(_N, W, T)
49
/** @defgroup arrayNxW The r123arrayNxW classes
51
Each of the r123arrayNxW is a fixed size array of N W-bit unsigned integers.
52
It is functionally equivalent to the C++0x std::array<N, uintW_t>,
53
but does not require C++0x features or libraries.
55
In addition to meeting most of the requirements of a Container,
56
it also has a member function, incr(), which increments the zero-th
57
element and carrys overflows into higher indexed elements. Thus,
58
by using incr(), sequences of up to 2^(N*W) distinct values
61
If SSE is supported by the compiler, then the class
62
r123array1xm128i is also defined, in which the data member is an
63
array of one r123128i object.
65
@cond HIDDEN_FROM_DOXYGEN
68
template <typename value_type>
69
inline R123_CUDA_DEVICE value_type assemble_from_u32(uint32_t *p32){
71
for(size_t i=0; i<(3+sizeof(value_type))/4; ++i)
72
v |= ((value_type)(*p32++)) << (32*i);
76
// Work-alike methods and typedefs modeled on std::array:
77
#define CXXMETHODS(_N, W, T) \
78
typedef T value_type; \
79
typedef T* iterator; \
80
typedef const T* const_iterator; \
81
typedef value_type& reference; \
82
typedef const value_type& const_reference; \
83
typedef size_t size_type; \
84
typedef ptrdiff_t difference_type; \
86
typedef const T* const_pointer; \
87
typedef std::reverse_iterator<iterator> reverse_iterator; \
88
typedef std::reverse_iterator<const_iterator> const_reverse_iterator; \
89
/* Boost.array has static_size. C++11 specializes tuple_size */ \
90
enum {static_size = _N}; \
91
R123_CUDA_DEVICE reference operator[](size_type i){return v[i];} \
92
R123_CUDA_DEVICE const_reference operator[](size_type i) const {return v[i];} \
93
R123_CUDA_DEVICE reference at(size_type i){ if(i >= _N) R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \
94
R123_CUDA_DEVICE const_reference at(size_type i) const { if(i >= _N) R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \
95
R123_CUDA_DEVICE size_type size() const { return _N; } \
96
R123_CUDA_DEVICE size_type max_size() const { return _N; } \
97
R123_CUDA_DEVICE bool empty() const { return _N==0; }; \
98
R123_CUDA_DEVICE iterator begin() { return &v[0]; } \
99
R123_CUDA_DEVICE iterator end() { return &v[_N]; } \
100
R123_CUDA_DEVICE const_iterator begin() const { return &v[0]; } \
101
R123_CUDA_DEVICE const_iterator end() const { return &v[_N]; } \
102
R123_CUDA_DEVICE const_iterator cbegin() const { return &v[0]; } \
103
R123_CUDA_DEVICE const_iterator cend() const { return &v[_N]; } \
104
R123_CUDA_DEVICE reverse_iterator rbegin(){ return reverse_iterator(end()); } \
105
R123_CUDA_DEVICE const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); } \
106
R123_CUDA_DEVICE reverse_iterator rend(){ return reverse_iterator(begin()); } \
107
R123_CUDA_DEVICE const_reverse_iterator rend() const{ return const_reverse_iterator(begin()); } \
108
R123_CUDA_DEVICE const_reverse_iterator crbegin() const{ return const_reverse_iterator(cend()); } \
109
R123_CUDA_DEVICE const_reverse_iterator crend() const{ return const_reverse_iterator(cbegin()); } \
110
R123_CUDA_DEVICE pointer data(){ return &v[0]; } \
111
R123_CUDA_DEVICE const_pointer data() const{ return &v[0]; } \
112
R123_CUDA_DEVICE reference front(){ return v[0]; } \
113
R123_CUDA_DEVICE const_reference front() const{ return v[0]; } \
114
R123_CUDA_DEVICE reference back(){ return v[_N-1]; } \
115
R123_CUDA_DEVICE const_reference back() const{ return v[_N-1]; } \
116
R123_CUDA_DEVICE bool operator==(const r123array##_N##x##W& rhs) const{ \
117
/* CUDA3 does not have std::equal */ \
118
for (size_t i = 0; i < _N; ++i) \
119
if (v[i] != rhs.v[i]) return false; \
122
R123_CUDA_DEVICE bool operator!=(const r123array##_N##x##W& rhs) const{ return !(*this == rhs); } \
123
/* CUDA3 does not have std::fill_n */ \
124
R123_CUDA_DEVICE void fill(const value_type& val){ for (size_t i = 0; i < _N; ++i) v[i] = val; } \
125
R123_CUDA_DEVICE void swap(r123array##_N##x##W& rhs){ \
126
/* CUDA3 does not have std::swap_ranges */ \
127
for (size_t i = 0; i < _N; ++i) { \
133
R123_CUDA_DEVICE r123array##_N##x##W& incr(R123_ULONG_LONG n=1){ \
134
/* This test is tricky because we're trying to avoid spurious \
135
complaints about illegal shifts, yet still be compile-time \
137
if(sizeof(T)<sizeof(n) && n>>((sizeof(T)<sizeof(n))?8*sizeof(T):0) ) \
138
return incr_carefully(n); \
141
if(_N==1 || R123_BUILTIN_EXPECT(!!v[0], 1)) return *this; \
144
if(_N==1 || R123_BUILTIN_EXPECT(n<=v[0], 1)) return *this; \
146
/* We expect that the N==?? tests will be \
147
constant-folded/optimized away by the compiler, so only the \
148
overflow tests (!!v[i]) remain to be done at runtime. For \
149
small values of N, it would be better to do this as an \
150
uncondtional sequence of adc. An experiment/optimization \
152
N.B. The weird subscripting: v[_N>3?3:0] is to silence \
153
a spurious error from icpc \
156
if(_N==2 || R123_BUILTIN_EXPECT(!!v[_N>1?1:0], 1)) return *this; \
158
if(_N==3 || R123_BUILTIN_EXPECT(!!v[_N>2?2:0], 1)) return *this; \
160
for(size_t i=4; i<_N; ++i){ \
161
if( R123_BUILTIN_EXPECT(!!v[i-1], 1) ) return *this; \
166
/* seed(SeedSeq) would be a constructor if having a constructor */ \
167
/* didn't cause headaches with defaults */ \
168
template <typename SeedSeq> \
169
R123_CUDA_DEVICE static r123array##_N##x##W seed(SeedSeq &ss){ \
170
r123array##_N##x##W ret; \
171
const size_t Ngen = _N*((3+sizeof(value_type))/4); \
172
uint32_t u32[Ngen]; \
173
uint32_t *p32 = &u32[0]; \
174
ss.generate(&u32[0], &u32[Ngen]); \
175
for(size_t i=0; i<_N; ++i){ \
176
ret.v[i] = assemble_from_u32<value_type>(p32); \
177
p32 += (3+sizeof(value_type))/4; \
182
R123_CUDA_DEVICE r123array##_N##x##W& incr_carefully(R123_ULONG_LONG n){ \
183
/* n may be greater than the maximum value of a single value_type */ \
187
const unsigned rshift = 8* ((sizeof(n)>sizeof(value_type))? sizeof(value_type) : 0); \
188
for(size_t i=1; i<_N; ++i){ \
204
// There are several tricky considerations for the insertion and extraction
206
// - we would like to be able to print r123array16x8 as a sequence of 16 integers,
208
// - we would like to be able to print r123array1xm128i.
209
// - we do not want an int conversion operator in r123m128i because it causes
210
// lots of ambiguity problems with automatic promotions.
211
// Solution: r123arrayinsertable and r123arrayextractable
214
struct r123arrayinsertable{
216
r123arrayinsertable(const T& t_) : v(t_) {}
217
friend std::ostream& operator<<(std::ostream& os, const r123arrayinsertable<T>& t){
223
struct r123arrayinsertable<uint8_t>{
225
r123arrayinsertable(const uint8_t& t_) : v(t_) {}
226
friend std::ostream& operator<<(std::ostream& os, const r123arrayinsertable<uint8_t>& t){
227
return os << (int)t.v;
232
struct r123arrayextractable{
234
r123arrayextractable(T& t_) : v(t_) {}
235
friend std::istream& operator>>(std::istream& is, r123arrayextractable<T>& t){
241
struct r123arrayextractable<uint8_t>{
243
r123arrayextractable(uint8_t& t_) : v(t_) {}
244
friend std::istream& operator>>(std::istream& is, r123arrayextractable<uint8_t>& t){
252
#define CXXOVERLOADS(_N, W, T) \
254
inline std::ostream& operator<<(std::ostream& os, const r123array##_N##x##W& a){ \
255
os << r123arrayinsertable<T>(a.v[0]); \
256
for(size_t i=1; i<_N; ++i) \
257
os << " " << r123arrayinsertable<T>(a.v[i]); \
261
inline std::istream& operator>>(std::istream& is, r123array##_N##x##W& a){ \
262
for(size_t i=0; i<_N; ++i){ \
263
r123arrayextractable<T> x(a.v[i]); \
270
typedef r123array##_N##x##W Array##_N##x##W; \
273
#endif /* __cplusplus */
275
/* _r123array_tpl expands to a declaration of struct r123arrayNxW.
277
In C, it's nothing more than a struct containing an array of N
280
In C++ it's the same, but endowed with an assortment of member
281
functions, typedefs and friends. In C++, r123arrayNxW looks a lot
282
like std::array<T,N>, has most of the capabilities of a container,
283
and satisfies the requirements outlined in compat/Engine.hpp for
284
counter and key types. ArrayNxW, in the r123 namespace is
285
a typedef equivalent to r123arrayNxW.
288
#define _r123array_tpl(_N, W, T) \
289
/** @ingroup arrayNxW */ \
290
/** @see arrayNxW */ \
291
struct r123array##_N##x##W{ \
293
CXXMETHODS(_N, W, T) \
296
CXXOVERLOADS(_N, W, T)
300
_r123array_tpl(1, 32, uint32_t) /* r123array1x32 */
301
_r123array_tpl(2, 32, uint32_t) /* r123array2x32 */
302
_r123array_tpl(4, 32, uint32_t) /* r123array4x32 */
303
_r123array_tpl(8, 32, uint32_t) /* r123array8x32 */
305
_r123array_tpl(1, 64, uint64_t) /* r123array1x64 */
306
_r123array_tpl(2, 64, uint64_t) /* r123array2x64 */
307
_r123array_tpl(4, 64, uint64_t) /* r123array4x64 */
309
_r123array_tpl(16, 8, uint8_t) /* r123array16x8 for ARSsw, AESsw */
312
_r123array_tpl(1, m128i, r123m128i) /* r123array1x128i for ARSni, AESni */
315
/* In C++, it's natural to use sizeof(a::value_type), but in C it's
316
pretty convoluted to figure out the width of the value_type of an
319
#define R123_W(a) (8*sizeof(((a *)0)->v[0]))
322
Most of the Random123 C++ API is contained in the r123 namespace.