32#ifndef __Random123_aes_dot_hpp__
33#define __Random123_aes_dot_hpp__
43typedef struct r123array1xm128i aesni1xm128i_ctr_t;
45typedef struct r123array1xm128i aesni1xm128i_ukey_t;
47typedef struct r123array4x32 aesni4x32_ukey_t;
49enum r123_enum_aesni1xm128i { aesni1xm128i_rounds = 10 };
54 temp2 = _mm_shuffle_epi32(temp2, 0xff);
55 temp3 = _mm_slli_si128(temp1, 0x4);
56 temp1 = _mm_xor_si128(temp1, temp3);
57 temp3 = _mm_slli_si128(temp3, 0x4);
58 temp1 = _mm_xor_si128(temp1, temp3);
59 temp3 = _mm_slli_si128(temp3, 0x4);
60 temp1 = _mm_xor_si128(temp1, temp3);
61 temp1 = _mm_xor_si128(temp1, temp2);
66 __m128i rkey = uk.v[0].m;
70 tmp2 = _mm_aeskeygenassist_si128(rkey, 0x1);
71 rkey = AES_128_ASSIST(rkey, tmp2);
74 tmp2 = _mm_aeskeygenassist_si128(rkey, 0x2);
75 rkey = AES_128_ASSIST(rkey, tmp2);
78 tmp2 = _mm_aeskeygenassist_si128(rkey, 0x4);
79 rkey = AES_128_ASSIST(rkey, tmp2);
82 tmp2 = _mm_aeskeygenassist_si128(rkey, 0x8);
83 rkey = AES_128_ASSIST(rkey, tmp2);
86 tmp2 = _mm_aeskeygenassist_si128(rkey, 0x10);
87 rkey = AES_128_ASSIST(rkey, tmp2);
90 tmp2 = _mm_aeskeygenassist_si128(rkey, 0x20);
91 rkey = AES_128_ASSIST(rkey, tmp2);
94 tmp2 = _mm_aeskeygenassist_si128(rkey, 0x40);
95 rkey = AES_128_ASSIST(rkey, tmp2);
98 tmp2 = _mm_aeskeygenassist_si128(rkey, 0x80);
99 rkey = AES_128_ASSIST(rkey, tmp2);
102 tmp2 = _mm_aeskeygenassist_si128(rkey, 0x1b);
103 rkey = AES_128_ASSIST(rkey, tmp2);
106 tmp2 = _mm_aeskeygenassist_si128(rkey, 0x36);
107 rkey = AES_128_ASSIST(rkey, tmp2);
114struct aesni1xm128i_key_t {
116 aesni1xm128i_key_t() {
117 aesni1xm128i_ukey_t uk;
118 uk.v[0].m = _mm_setzero_si128();
119 aesni1xm128iexpand(uk, k);
121 aesni1xm128i_key_t(aesni1xm128i_ukey_t
const& uk) { aesni1xm128iexpand(uk, k); }
122 aesni1xm128i_key_t(aesni4x32_ukey_t
const& uk) {
123 aesni1xm128i_ukey_t uk128;
124 uk128.v[0].m = _mm_set_epi32(uk.v[3], uk.v[2], uk.v[1], uk.v[0]);
125 aesni1xm128iexpand(uk128, k);
127 aesni1xm128i_key_t& operator=(aesni1xm128i_ukey_t
const& uk) {
128 aesni1xm128iexpand(uk, k);
131 aesni1xm128i_key_t& operator=(aesni4x32_ukey_t
const& uk) {
132 aesni1xm128i_ukey_t uk128;
133 uk128.v[0].m = _mm_set_epi32(uk.v[3], uk.v[2], uk.v[1], uk.v[0]);
134 aesni1xm128iexpand(uk128, k);
137 bool operator==(aesni1xm128i_key_t
const& rhs)
const {
138 for(
int i = 0; i < 11; ++i) {
149 bool operator!=(aesni1xm128i_key_t
const& rhs)
const {
return !(*
this == rhs); }
150 friend std::ostream& operator<<(std::ostream& os, aesni1xm128i_key_t
const& v) {
152 for(
int i = 0; i < 10; ++i) {
159 friend std::istream& operator>>(std::istream& is, aesni1xm128i_key_t& v) {
161 for(
int i = 0; i < 11; ++i) {
175 aesni1xm128i_key_t ret;
176 aesni1xm128iexpand(uk, ret.k);
182R123_STATIC_INLINE aesni1xm128i_ctr_t aesni1xm128i(aesni1xm128i_ctr_t in, aesni1xm128i_key_t k) {
183 __m128i
x = _mm_xor_si128(k.k[0], in.v[0].m);
184 x = _mm_aesenc_si128(x, k.k[1]);
185 x = _mm_aesenc_si128(x, k.k[2]);
186 x = _mm_aesenc_si128(x, k.k[3]);
187 x = _mm_aesenc_si128(x, k.k[4]);
188 x = _mm_aesenc_si128(x, k.k[5]);
189 x = _mm_aesenc_si128(x, k.k[6]);
190 x = _mm_aesenc_si128(x, k.k[7]);
191 x = _mm_aesenc_si128(x, k.k[8]);
192 x = _mm_aesenc_si128(x, k.k[9]);
193 x = _mm_aesenclast_si128(x, k.k[10]);
195 aesni1xm128i_ctr_t ret;
202R123_STATIC_INLINE aesni1xm128i_ctr_t aesni1xm128i_R(
unsigned R, aesni1xm128i_ctr_t in, aesni1xm128i_key_t k) {
204 return aesni1xm128i(in, k);
208typedef struct r123array4x32 aesni4x32_ctr_t;
210typedef aesni1xm128i_key_t aesni4x32_key_t;
212enum r123_enum_aesni4x32 { aesni4x32_rounds = 10 };
215 aesni1xm128i_ukey_t uk128;
217 uk128.v[0].m = _mm_set_epi32(uk.v[3], uk.v[2], uk.v[1], uk.v[0]);
218 aesni1xm128iexpand(uk128, ret.k);
225R123_STATIC_INLINE aesni4x32_ctr_t aesni4x32_R(
unsigned int Nrounds, aesni4x32_ctr_t c, aesni4x32_key_t k) {
226 aesni1xm128i_ctr_t c128;
227 c128.v[0].m = _mm_set_epi32(c.v[3], c.v[2], c.v[1], c.v[0]);
228 c128 = aesni1xm128i_R(Nrounds, c128, k);
229 _mm_storeu_si128((__m128i*)&c.v[0], c128.v[0].m);
233#define aesni4x32_rounds aesni1xm128i_rounds
237#define aesni4x32(c, k) aesni4x32_R(aesni4x32_rounds, c, k)
274 typedef aesni1xm128i_ctr_t ctr_type;
275 typedef aesni1xm128i_ukey_t ukey_type;
276 typedef aesni1xm128i_key_t key_type;
277 static unsigned int const rounds = 10;
278 ctr_type operator()(ctr_type ctr, key_type key)
const {
return aesni1xm128i(ctr, key); }
283 typedef aesni4x32_ctr_t ctr_type;
284 typedef aesni4x32_ukey_t ukey_type;
285 typedef aesni4x32_key_t key_type;
286 static unsigned int const rounds = 10;
287 ctr_type operator()(ctr_type ctr, key_type key)
const {
return aesni4x32(ctr, key); }
295template<
unsigned ROUNDS = 10>
struct AESNI1xm128i_R :
public AESNI1xm128i {
R123_STATIC_ASSERT(ROUNDS == 10,
"AESNI1xm128i_R<R> is only valid with R=10"); };
298template<
unsigned ROUNDS = 10>
struct AESNI4x32_R :
public AESNI4x32 {
R123_STATIC_ASSERT(ROUNDS == 10,
"AESNI4x32_R<R> is only valid with R=10"); };
304#if R123_USE_AES_OPENSSL
306#include <openssl/aes.h>
307typedef struct r123array16x8 aesopenssl16x8_ctr_t;
308typedef struct r123array16x8 aesopenssl16x8_ukey_t;
310struct aesopenssl16x8_key_t {
312 aesopenssl16x8_key_t() {
313 aesopenssl16x8_ukey_t ukey = {{}};
314 AES_set_encrypt_key((
unsigned char const*)&ukey.v[0], 128, &k);
316 aesopenssl16x8_key_t(aesopenssl16x8_ukey_t
const& ukey) { AES_set_encrypt_key((
unsigned char const*)&ukey.v[0], 128, &k); }
317 aesopenssl16x8_key_t& operator=(aesopenssl16x8_ukey_t
const& ukey) {
318 AES_set_encrypt_key((
unsigned char const*)&ukey.v[0], 128, &k);
321 bool operator==(aesopenssl16x8_key_t
const& rhs)
const {
return (k.rounds == rhs.k.rounds) && 0 == ::memcmp(&k.rd_key[0], &rhs.k.rd_key[0], (k.rounds + 1) * 4 *
sizeof(
uint32_t)); }
322 bool operator!=(aesopenssl16x8_key_t
const& rhs)
const {
return !(*
this == rhs); }
323 friend std::ostream& operator<<(std::ostream& os, aesopenssl16x8_key_t
const& v) {
325 unsigned int const* p = &v.k.rd_key[0];
326 for(
int i = 0; i < (v.k.rounds + 1); ++i) {
327 os <<
" " << p[0] <<
" " << p[1] <<
" " << p[2] <<
" " << p[3];
332 friend std::istream& operator>>(std::istream& is, aesopenssl16x8_key_t& v) {
334 unsigned int* p = &v.k.rd_key[0];
335 for(
int i = 0; i < (v.k.rounds + 1); ++i) {
336 is >> p[0] >> p[1] >> p[2] >> p[3];
343typedef struct aesopenssl16x8_key_t {
345} aesopenssl16x8_key_t;
346R123_STATIC_INLINE struct aesopenssl16x8_key_t aesopenssl16x8keyinit(aesopenssl16x8_ukey_t uk) {
347 aesopenssl16x8_key_t ret;
348 AES_set_encrypt_key((
unsigned char const*)&uk.v[0], 128, &ret.k);
355aesopenssl16x8_ctr_t aesopenssl16x8_R(aesopenssl16x8_ctr_t ctr, aesopenssl16x8_key_t key) {
356 aesopenssl16x8_ctr_t ret;
357 AES_encrypt((
unsigned char const*)&ctr.v[0], (
unsigned char*)&ret.v[0], &key.k);
361#define aesopenssl16x8_rounds aesni4x32_rounds
362#define aesopenssl16x8(c, k) aesopenssl16x8_R(aesopenssl16x8_rounds)
366struct AESOpenSSL16x8 {
367 typedef aesopenssl16x8_ctr_t ctr_type;
368 typedef aesopenssl16x8_key_t key_type;
369 typedef aesopenssl16x8_ukey_t ukey_type;
370 static unsigned int const rounds = 10;
371 ctr_type operator()(ctr_type
const& in, key_type
const& k) {
373 AES_encrypt((
unsigned char const*)&in[0], (
unsigned char*)&out[0], &k.k);
#define R123_STATIC_ASSERT(expr, msg)
#define R123_STATIC_INLINE
#define R123_FORCE_INLINE(decl)
mask_vector operator==(contiguous_tags_base< tag_type > a, tag_type b)
mask_vector operator!=(contiguous_tags_base< tag_type > a, tag_type b)