2
Copyright 2010-2011, D. E. Shaw Research.
5
Redistribution and use in source and binary forms, with or without
6
modification, are permitted provided that the following conditions are
9
* Redistributions of source code must retain the above copyright
10
notice, this list of conditions, and the following disclaimer.
12
* Redistributions in binary form must reproduce the above copyright
13
notice, this list of conditions, and the following disclaimer in the
14
documentation and/or other materials provided with the distribution.
16
* Neither the name of D. E. Shaw Research nor the names of its
17
contributors may be used to endorse or promote products derived from
18
this software without specific prior written permission.
20
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
#ifndef __Random123_ars_dot_hpp__
33
#define __Random123_ars_dot_hpp__
35
#include "features/compilerfeatures.h"
40
#ifndef ARS1xm128i_DEFAULT_ROUNDS
41
#define ARS1xm128i_DEFAULT_ROUNDS 7
45
enum r123_enum_ars1xm128i {ars1xm128i_rounds = ARS1xm128i_DEFAULT_ROUNDS};
47
/* ARS1xm128i with Weyl keys. Fast, and Crush-resistant, but NOT CRYPTO. */
49
typedef struct r123array1xm128i ars1xm128i_ctr_t;
51
typedef struct r123array1xm128i ars1xm128i_key_t;
53
typedef struct r123array1xm128i ars1xm128i_ukey_t;
55
R123_STATIC_INLINE ars1xm128i_key_t ars1xm128ikeyinit(ars1xm128i_ukey_t uk) { return uk; }
57
R123_STATIC_INLINE ars1xm128i_ctr_t ars1xm128i_R(unsigned int Nrounds, ars1xm128i_ctr_t in, ars1xm128i_key_t k){
58
__m128i kweyl = _mm_set_epi64x(R123_64BIT(0xBB67AE8584CAA73B), /* sqrt(3) - 1.0 */
59
R123_64BIT(0x9E3779B97F4A7C15)); /* golden ratio */
60
/* N.B. the aesenc instructions do the xor *after*
61
// so if we want to follow the AES pattern, we
62
// have to do the initial xor explicitly */
63
__m128i kk = k.v[0].m;
64
__m128i v = _mm_xor_si128(in.v[0].m, kk);
66
R123_ASSERT(Nrounds<=10);
68
kk = _mm_add_epi64(kk, kweyl);
69
v = _mm_aesenc_si128(v, kk);
72
kk = _mm_add_epi64(kk, kweyl);
73
v = _mm_aesenc_si128(v, kk);
76
kk = _mm_add_epi64(kk, kweyl);
77
v = _mm_aesenc_si128(v, kk);
80
kk = _mm_add_epi64(kk, kweyl);
81
v = _mm_aesenc_si128(v, kk);
84
kk = _mm_add_epi64(kk, kweyl);
85
v = _mm_aesenc_si128(v, kk);
88
kk = _mm_add_epi64(kk, kweyl);
89
v = _mm_aesenc_si128(v, kk);
92
kk = _mm_add_epi64(kk, kweyl);
93
v = _mm_aesenc_si128(v, kk);
96
kk = _mm_add_epi64(kk, kweyl);
97
v = _mm_aesenc_si128(v, kk);
100
kk = _mm_add_epi64(kk, kweyl);
101
v = _mm_aesenc_si128(v, kk);
103
kk = _mm_add_epi64(kk, kweyl);
104
v = _mm_aesenclast_si128(v, kk);
111
The ars1mx128i macro provides a C API interface to the @ref AESNI "ARS" CBRNG with the default number of rounds i.e. \c ars1xm128i_rounds **/
112
#define ars1xm128i(c,k) ars1xm128i_R(ars1xm128i_rounds, c, k)
114
/** @ingroup AESNI */
115
typedef struct r123array4x32 ars4x32_ctr_t;
116
/** @ingroup AESNI */
117
typedef struct r123array4x32 ars4x32_key_t;
118
/** @ingroup AESNI */
119
typedef struct r123array4x32 ars4x32_ukey_t;
120
/** @ingroup AESNI */
121
enum r123_enum_ars4x32 {ars4x32_rounds = ARS1xm128i_DEFAULT_ROUNDS};
122
/** @ingroup AESNI */
123
R123_STATIC_INLINE ars4x32_key_t ars4x32keyinit(ars4x32_ukey_t uk) { return uk; }
124
/** @ingroup AESNI */
125
R123_STATIC_INLINE ars4x32_ctr_t ars4x32_R(unsigned int Nrounds, ars4x32_ctr_t c, ars4x32_key_t k){
126
ars1xm128i_ctr_t c128;
127
ars1xm128i_key_t k128;
128
c128.v[0].m = _mm_set_epi32(c.v[3], c.v[2], c.v[1], c.v[0]);
129
k128.v[0].m = _mm_set_epi32(k.v[3], k.v[2], k.v[1], k.v[0]);
130
c128 = ars1xm128i_R(Nrounds, c128, k128);
131
_mm_storeu_si128((__m128i*)&c.v[0], c128.v[0].m);
137
The ars4x32 macro provides a C API interface to the @ref AESNI "ARS" CBRNG with the default number of rounds i.e. \c ars4x32_rounds **/
138
#define ars4x32(c,k) ars4x32_R(ars4x32_rounds, c, k)
145
ARS1xm128i_R exports the member functions, typedefs and operator overloads required by a @ref CBRNG class.
147
ARS1xm128i uses the crypotgraphic AES round function, but a @b non-cryptographc key schedule
148
to save time and space.
150
ARS1xm128i is only available when the feature-test macro R123_USE_AES_NI is true, which
151
should occur only when the compiler is configured to generate AES-NI instructions (or
152
when defaults are overridden by compile-time, compiler-command-line options).
154
The template argument, ROUNDS, is the number of times the ARS round
155
functions will be applied.
157
As of September 2011, the authors know of no statistical flaws with
163
template<unsigned int ROUNDS>
165
typedef ars1xm128i_ctr_t ctr_type;
166
typedef ars1xm128i_key_t key_type;
167
typedef ars1xm128i_key_t ukey_type;
168
static const unsigned int rounds=ROUNDS;
169
R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key) const){
170
return ars1xm128i_R(ROUNDS, ctr, key);
178
template<unsigned int ROUNDS>
180
typedef ars4x32_ctr_t ctr_type;
181
typedef ars4x32_key_t key_type;
182
typedef ars4x32_key_t ukey_type;
183
static const unsigned int rounds=ROUNDS;
184
R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key) const){
185
return ars4x32_R(ROUNDS, ctr, key);
192
ARS1xm128i is equivalent to ARS1xm128i_R<7>. With 7 rounds,
193
the ARS1xm128i CBRNG has a considerable safety margin over the minimum number
194
of rounds with no known statistical flaws, but still has excellent
196
typedef ARS1xm128i_R<ars1xm128i_rounds> ARS1xm128i;
197
typedef ARS4x32_R<ars4x32_rounds> ARS4x32;
200
#endif /* __cplusplus */
202
#endif /* R123_USE_AES_NI */