32#ifndef _Random123_sse_dot_h__
33#define _Random123_sse_dot_h__
37#if R123_USE_X86INTRIN_H
40#if R123_USE_IA32INTRIN_H
41#include <ia32intrin.h>
43#if R123_USE_XMMINTRIN_H
46#if R123_USE_EMMINTRIN_H
49#if R123_USE_SMMINTRIN_H
52#if R123_USE_WMMINTRIN_H
68 unsigned int eax, ebx, ecx, edx;
69 __asm__ __volatile__(
"cpuid" :
"=a"(eax),
"=b"(ebx),
"=c"(ecx),
"=d"(edx) :
"a"(1));
70 return (ecx >> 25) & 1;
72#elif R123_USE_CPUID_MSVC
76 return (CPUInfo[2] >> 25) & 1;
79#warning "No R123_USE_CPUID_XXX method chosen. haveAESNI will always return false"
90#if(defined(__ICC) && __ICC < 1210) || (defined(_MSC_VER) && !defined(_WIN64) && _MSC_VER < 1900)
102 return _mm_set_epi32(u1.u32[1], u1.u32[0], u0.u32[1], u0.u32[0]);
115#if !defined(__x86_64__) || defined(_MSC_VER) || defined(__OPEN64__)
121 _mm_store_si128(&u.m, si);
124#elif defined(__llvm__) || defined(__ICC)
132#if defined(__GNUC__) && __GNUC__ < 4
141#if R123_USE_CXX11_UNRESTRICTED_UNIONS
147 r123m128i() =
default;
148 r123m128i(__m128i _m) :
m(_m) { }
150 r123m128i& operator=(__m128i
const& rhs) {
155 m = _mm_set_epi64x(0, n);
158#if R123_USE_CXX11_EXPLICIT_CONVERSIONS
162 explicit operator bool()
const {
return _bool(); }
166 operator void const*()
const {
return _bool() ? this : 0; }
168 operator __m128i()
const {
return m; }
172 bool _bool()
const {
return !_mm_testz_si128(m, m); }
174 bool _bool()
const {
return 0xf != _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(m, _mm_setzero_si128()))); }
180 __m128i zeroone = _mm_set_epi64x(R123_64BIT(0), R123_64BIT(1));
181 c = _mm_add_epi64(c, zeroone);
184 __m128i zerofff = _mm_set_epi64x(0, ~(R123_64BIT(0)));
186 __m128i onezero = _mm_set_epi64x(R123_64BIT(1), R123_64BIT(0));
187 c = _mm_add_epi64(c, onezero);
190 unsigned mask = _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(c, _mm_setzero_si128())));
194 __m128i onezero = _mm_set_epi64x(1, 0);
195 c = _mm_add_epi64(c, onezero);
203 __m128i incr128 = _mm_set_epi64x(0, n);
204 c = _mm_add_epi64(c, incr128);
207 int64_t lo64 = _mm_extract_lo64(c);
209 c = _mm_add_epi64(c, _mm_set_epi64x(R123_64BIT(1), R123_64BIT(0)));
220R123_STATIC_INLINE bool operator<(r123m128i
const&, r123m128i
const&) { std::abort(); }
221R123_STATIC_INLINE bool operator<=(r123m128i
const&, r123m128i
const&) { std::abort(); }
222R123_STATIC_INLINE bool operator>(r123m128i
const&, r123m128i
const&) { std::abort(); }
223R123_STATIC_INLINE bool operator>=(r123m128i
const&, r123m128i
const&) { std::abort(); }
225R123_STATIC_INLINE bool operator==(r123m128i
const& lhs, r123m128i
const& rhs) {
return 0xf == _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(lhs, rhs))); }
229 LHS.m = _mm_set_epi64x(0, lhs);
238 _mm_storeu_si128(&u.m,
m.m);
239 return os << u.u64[0] <<
" " << u.u64[1];
244 is >> u64[0] >> u64[1];
245 m.m = _mm_set_epi64x(u64[1], u64[0]);
249template<
typename T>
inline T assemble_from_u32(
uint32_t* p32);
251template<>
inline r123m128i assemble_from_u32<r123m128i>(
uint32_t* p32) {
253 ret.m = _mm_set_epi32(p32[3], p32[2], p32[1], p32[0]);
#define R123_STATIC_INLINE
#define R123_BUILTIN_EXPECT(expr, likely)
mask_vector operator==(contiguous_tags_base< tag_type > a, tag_type b)
mask_vector operator!=(contiguous_tags_base< tag_type > a, tag_type b)
R123_STATIC_INLINE int haveAESNI()