2
Copyright 2010-2011, D. E. Shaw Research.
5
Redistribution and use in source and binary forms, with or without
6
modification, are permitted provided that the following conditions are
9
* Redistributions of source code must retain the above copyright
10
notice, this list of conditions, and the following disclaimer.
12
* Redistributions in binary form must reproduce the above copyright
13
notice, this list of conditions, and the following disclaimer in the
14
documentation and/or other materials provided with the distribution.
16
* Neither the name of D. E. Shaw Research nor the names of its
17
contributors may be used to endorse or promote products derived from
18
this software without specific prior written permission.
20
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
#ifndef __Random123_aes_dot_hpp__
33
#define __Random123_aes_dot_hpp__
35
#include "features/compilerfeatures.h"
38
/* Implement a bona fide AES block cipher. It's minimally
39
// checked against the test vector in FIPS-197 in ut_aes.cpp. */
43
typedef struct r123array1xm128i aesni1xm128i_ctr_t;
45
typedef struct r123array1xm128i aesni1xm128i_ukey_t;
47
typedef struct r123array4x32 aesni4x32_ukey_t;
49
enum r123_enum_aesni1xm128i { aesni1xm128i_rounds = 10 };
51
/** \cond HIDDEN_FROM_DOXYGEN */
52
R123_STATIC_INLINE __m128i AES_128_ASSIST (__m128i temp1, __m128i temp2) {
54
temp2 = _mm_shuffle_epi32 (temp2 ,0xff);
55
temp3 = _mm_slli_si128 (temp1, 0x4);
56
temp1 = _mm_xor_si128 (temp1, temp3);
57
temp3 = _mm_slli_si128 (temp3, 0x4);
58
temp1 = _mm_xor_si128 (temp1, temp3);
59
temp3 = _mm_slli_si128 (temp3, 0x4);
60
temp1 = _mm_xor_si128 (temp1, temp3);
61
temp1 = _mm_xor_si128 (temp1, temp2);
65
R123_STATIC_INLINE void aesni1xm128iexpand(aesni1xm128i_ukey_t uk, __m128i ret[11])
67
__m128i rkey = uk.v[0].m;
71
tmp2 = _mm_aeskeygenassist_si128(rkey, 0x1);
72
rkey = AES_128_ASSIST(rkey, tmp2);
75
tmp2 = _mm_aeskeygenassist_si128(rkey, 0x2);
76
rkey = AES_128_ASSIST(rkey, tmp2);
79
tmp2 = _mm_aeskeygenassist_si128(rkey, 0x4);
80
rkey = AES_128_ASSIST(rkey, tmp2);
83
tmp2 = _mm_aeskeygenassist_si128(rkey, 0x8);
84
rkey = AES_128_ASSIST(rkey, tmp2);
87
tmp2 = _mm_aeskeygenassist_si128(rkey, 0x10);
88
rkey = AES_128_ASSIST(rkey, tmp2);
91
tmp2 = _mm_aeskeygenassist_si128(rkey, 0x20);
92
rkey = AES_128_ASSIST(rkey, tmp2);
95
tmp2 = _mm_aeskeygenassist_si128(rkey, 0x40);
96
rkey = AES_128_ASSIST(rkey, tmp2);
99
tmp2 = _mm_aeskeygenassist_si128(rkey, 0x80);
100
rkey = AES_128_ASSIST(rkey, tmp2);
103
tmp2 = _mm_aeskeygenassist_si128(rkey, 0x1b);
104
rkey = AES_128_ASSIST(rkey, tmp2);
107
tmp2 = _mm_aeskeygenassist_si128(rkey, 0x36);
108
rkey = AES_128_ASSIST(rkey, tmp2);
114
/** @ingroup AESNI */
115
struct aesni1xm128i_key_t{
117
aesni1xm128i_key_t(){
118
aesni1xm128i_ukey_t uk;
119
uk.v[0].m = _mm_setzero_si128();
120
aesni1xm128iexpand(uk, k);
122
aesni1xm128i_key_t(const aesni1xm128i_ukey_t& uk){
123
aesni1xm128iexpand(uk, k);
125
aesni1xm128i_key_t(const aesni4x32_ukey_t& uk){
126
aesni1xm128i_ukey_t uk128;
127
uk128.v[0].m = _mm_set_epi32(uk.v[3], uk.v[2], uk.v[1], uk.v[0]);
128
aesni1xm128iexpand(uk128, k);
130
aesni1xm128i_key_t& operator=(const aesni1xm128i_ukey_t& uk){
131
aesni1xm128iexpand(uk, k);
134
aesni1xm128i_key_t& operator=(const aesni4x32_ukey_t& uk){
135
aesni1xm128i_ukey_t uk128;
136
uk128.v[0].m = _mm_set_epi32(uk.v[3], uk.v[2], uk.v[1], uk.v[0]);
137
aesni1xm128iexpand(uk128, k);
146
/** @ingroup AESNI */
147
R123_STATIC_INLINE aesni1xm128i_key_t aesni1xm128ikeyinit(aesni1xm128i_ukey_t uk){
148
aesni1xm128i_key_t ret;
149
aesni1xm128iexpand(uk, ret.k);
154
/** @ingroup AESNI */
155
R123_STATIC_INLINE aesni1xm128i_ctr_t aesni1xm128i(aesni1xm128i_ctr_t in, aesni1xm128i_key_t k) {
156
__m128i x = _mm_xor_si128(k.k[0], in.v[0].m);
157
x = _mm_aesenc_si128(x, k.k[1]);
158
x = _mm_aesenc_si128(x, k.k[2]);
159
x = _mm_aesenc_si128(x, k.k[3]);
160
x = _mm_aesenc_si128(x, k.k[4]);
161
x = _mm_aesenc_si128(x, k.k[5]);
162
x = _mm_aesenc_si128(x, k.k[6]);
163
x = _mm_aesenc_si128(x, k.k[7]);
164
x = _mm_aesenc_si128(x, k.k[8]);
165
x = _mm_aesenc_si128(x, k.k[9]);
166
x = _mm_aesenclast_si128(x, k.k[10]);
168
aesni1xm128i_ctr_t ret;
174
/** @ingroup AESNI */
175
R123_STATIC_INLINE aesni1xm128i_ctr_t aesni1xm128i_R(unsigned R, aesni1xm128i_ctr_t in, aesni1xm128i_key_t k){
177
return aesni1xm128i(in, k);
181
/** @ingroup AESNI */
182
typedef struct r123array4x32 aesni4x32_ctr_t;
183
/** @ingroup AESNI */
184
typedef aesni1xm128i_key_t aesni4x32_key_t;
185
/** @ingroup AESNI */
186
enum r123_enum_aesni4x32 { aesni4x32_rounds = 10 };
187
/** @ingroup AESNI */
188
R123_STATIC_INLINE aesni4x32_key_t aesni4x32keyinit(aesni4x32_ukey_t uk){
189
aesni1xm128i_ukey_t uk128;
191
uk128.v[0].m = _mm_set_epi32(uk.v[3], uk.v[2], uk.v[1], uk.v[0]);
192
aesni1xm128iexpand(uk128, ret.k);
196
/** @ingroup AESNI */
197
/** The aesni4x32_R function provides a C API to the @ref AESNI "AESNI" CBRNG, allowing the number of rounds to be specified explicitly **/
198
R123_STATIC_INLINE aesni4x32_ctr_t aesni4x32_R(unsigned int Nrounds, aesni4x32_ctr_t c, aesni4x32_key_t k){
199
aesni1xm128i_ctr_t c128;
200
c128.v[0].m = _mm_set_epi32(c.v[3], c.v[2], c.v[1], c.v[0]);
201
c128 = aesni1xm128i_R(Nrounds, c128, k);
202
_mm_storeu_si128((__m128i*)&c.v[0], c128.v[0].m);
206
#define aesni4x32_rounds aesni1xm128i_rounds
208
/** The aesni4x32 macro provides a C API to the @ref AESNI "AESNI" CBRNG, uses the default number of rounds i.e. \c aesni4x32_rounds **/
209
/** @ingroup AESNI */
210
#define aesni4x32(c,k) aesni4x32_R(aesni4x32_rounds, c, k)
215
@defgroup AESNI ARS and AESNI Classes and Typedefs
217
The ARS4x32, ARS1xm128i, AESNI4x32 and AESNI1xm128i classes export the member functions, typedefs and
218
operator overloads required by a @ref CBRNG "CBRNG" class.
220
ARS1xm128i and AESNI1xm128i are based on the AES block cipher and rely on the AES-NI hardware instructions
221
available on some some new (2011) CPUs.
223
The ARS1xm128i CBRNG and the use of AES for random number generation are described in
224
<a href="http://dl.acm.org/citation.cfm?doid=2063405"><i>Parallel Random Numbers: As Easy as 1, 2, 3</i> </a>.
225
Although it uses some cryptographic primitives, ARS1xm128i uses a cryptographically weak key schedule and is \b not suitable for cryptographic use.
229
AESNI exports the member functions, typedefs and operator overloads required by a @ref CBRNG class.
231
AESNI1xm128i uses the crypotgraphic AES round function, including the cryptographic key schedule.
233
In contrast to the other CBRNGs in the Random123 library, the AESNI1xm128i_R::key_type is opaque
234
and is \b not identical to the AESNI1xm128i_R::ukey_type. Creating a key_type, using either the constructor
235
or assignment operator, is significantly more time-consuming than running the bijection (hundreds
236
of clock cycles vs. tens of clock cycles).
238
AESNI1xm128i is only available when the feature-test macro R123_USE_AES_NI is true, which
239
should occur only when the compiler is configured to generate AES-NI instructions (or
240
when defaults are overridden by compile-time, compiler-command-line options).
242
As of September 2011, the authors know of no statistical flaws with AESNI1xm128i. It
243
would be an event of major cryptographic note if any such flaws were ever found.
246
typedef aesni1xm128i_ctr_t ctr_type;
247
typedef aesni1xm128i_ukey_t ukey_type;
248
typedef aesni1xm128i_key_t key_type;
249
static const unsigned int rounds=10;
250
ctr_type operator()(ctr_type ctr, key_type key) const{
251
return aesni1xm128i(ctr, key);
255
/* @class AESNI4x32 */
257
typedef aesni4x32_ctr_t ctr_type;
258
typedef aesni4x32_ukey_t ukey_type;
259
typedef aesni4x32_key_t key_type;
260
static const unsigned int rounds=10;
261
ctr_type operator()(ctr_type ctr, key_type key) const{
262
return aesni4x32(ctr, key);
267
@class AESNI1xm128i_R
269
AESNI1xm128i_R is provided for completeness, but is only instantiable with ROUNDS=10, in
270
which case it is identical to AESNI1xm128i */
271
template <unsigned ROUNDS=10>
272
struct AESNI1xm128i_R : public AESNI1xm128i{
273
R123_STATIC_ASSERT(ROUNDS==10, "AESNI1xm128i_R<R> is only valid with R=10");
276
/** @class AESNI4x32_R **/
277
template <unsigned ROUNDS=10>
278
struct AESNI4x32_R : public AESNI4x32{
279
R123_STATIC_ASSERT(ROUNDS==10, "AESNI4x32_R<R> is only valid with R=10");
282
#endif /* __cplusplus */
284
#endif /* R123_USE_AES_NI */
286
#if R123_USE_AES_OPENSSL
287
#include <openssl/aes.h>
288
typedef struct r123array16x8 aesopenssl16x8_ctr_t;
289
typedef struct r123array16x8 aesopenssl16x8_ukey_t;
291
struct aesopenssl16x8_key_t{
293
aesopenssl16x8_key_t(){
294
aesopenssl16x8_ukey_t ukey={{}};
295
AES_set_encrypt_key((const unsigned char *)&ukey.v[0], 128, &k);
297
aesopenssl16x8_key_t(const aesopenssl16x8_ukey_t& ukey){
298
AES_set_encrypt_key((const unsigned char *)&ukey.v[0], 128, &k);
300
aesopenssl16x8_key_t& operator=(const aesopenssl16x8_ukey_t& ukey){
301
AES_set_encrypt_key((const unsigned char *)&ukey.v[0], 128, &k);
306
typedef struct aesopenssl16x8_key_t{
308
}aesopenssl16x8_key_t;
309
R123_STATIC_INLINE struct aesopenssl16x8_key_t aesopenssl16x8keyinit(aesopenssl16x8_ukey_t uk){
310
aesopenssl16x8_key_t ret;
311
AES_set_encrypt_key((const unsigned char *)&uk.v[0], 128, &ret.k);
316
R123_STATIC_INLINE R123_FORCE_INLINE(aesopenssl16x8_ctr_t aesopenssl16x8_R(aesopenssl16x8_ctr_t ctr, aesopenssl16x8_key_t key));
318
aesopenssl16x8_ctr_t aesopenssl16x8_R(aesopenssl16x8_ctr_t ctr, aesopenssl16x8_key_t key){
319
aesopenssl16x8_ctr_t ret;
320
AES_encrypt((const unsigned char*)&ctr.v[0], (unsigned char *)&ret.v[0], &key.k);
324
#define aesopenssl16x8_rounds aesni4x32_rounds
325
#define aesopenssl16x8(c,k) aesopenssl16x8_R(aesopenssl16x8_rounds)
329
struct AESOpenSSL16x8{
330
typedef aesopenssl16x8_ctr_t ctr_type;
331
typedef aesopenssl16x8_key_t key_type;
332
typedef aesopenssl16x8_ukey_t ukey_type;
333
static const unsigned int rounds=10;
334
ctr_type operator()(const ctr_type& in, const key_type& k){
336
AES_encrypt((const unsigned char *)&in[0], (unsigned char *)&out[0], &k.k);
341
#endif /* __cplusplus */
342
#endif /* R123_USE_AES_OPENSSL */