Project Alice
Loading...
Searching...
No Matches
array.h
Go to the documentation of this file.
1/*
2Copyright 2010-2011, D. E. Shaw Research.
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are
7met:
8
9* Redistributions of source code must retain the above copyright
10 notice, this list of conditions, and the following disclaimer.
11
12* Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions, and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16* Neither the name of D. E. Shaw Research nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31*/
32#ifndef _r123array_dot_h__
33#define _r123array_dot_h__
35#include "features/sse.h"
36
37#if !defined(__cplusplus) || defined(__METAL_MACOS__)
38#define CXXMETHODS(_N, W, T)
39#define CXXOVERLOADS(_N, W, T)
40#define CXXMETHODS_REQUIRING_STL
41#else
42
43#include <stddef.h>
44#include <algorithm>
45#include <stdexcept>
46#include <iterator>
47#include <limits>
48#include <iostream>
49
74template<typename value_type> inline R123_CUDA_DEVICE value_type assemble_from_u32(uint32_t* p32) {
75 value_type v = 0;
76 for(size_t i = 0; i < (3 + sizeof(value_type)) / 4; ++i)
77 v |= ((value_type)(*p32++)) << (32 * i);
78 return v;
79}
80
83#ifdef __CUDA_ARCH__
84/* CUDA can't handle std::reverse_iterator. We *could* implement it
85 ourselves, but let's not bother until somebody really feels a need
86 to reverse-iterate through an r123array */
87#define CXXMETHODS_REQUIRING_STL
88#else
89#define CXXMETHODS_REQUIRING_STL \
90public: \
91 typedef std::reverse_iterator<iterator> reverse_iterator; \
92 typedef std::reverse_iterator<const_iterator> const_reverse_iterator; \
93 R123_CUDA_DEVICE reverse_iterator rbegin() { return reverse_iterator(end()); } \
94 R123_CUDA_DEVICE const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); } \
95 R123_CUDA_DEVICE reverse_iterator rend() { return reverse_iterator(begin()); } \
96 R123_CUDA_DEVICE const_reverse_iterator rend() const { return const_reverse_iterator(begin()); } \
97 R123_CUDA_DEVICE const_reverse_iterator crbegin() const { return const_reverse_iterator(cend()); } \
98 R123_CUDA_DEVICE const_reverse_iterator crend() const { return const_reverse_iterator(cbegin()); }
99#endif
100
101// Work-alike methods and typedefs modeled on std::array:
102#define CXXMETHODS(_N, W, T) \
103 typedef T value_type; \
104 typedef T* iterator; \
105 typedef const T* const_iterator; \
106 typedef value_type& reference; \
107 typedef value_type const& const_reference; \
108 typedef size_t size_type; \
109 typedef ptrdiff_t difference_type; \
110 typedef T* pointer; \
111 typedef const T* const_pointer; \
112 /* Boost.array has static_size. C++11 specializes tuple_size */ \
113 enum { static_size = _N }; \
114 R123_CUDA_DEVICE reference operator[](size_type i) { return v[i]; } \
115 R123_CUDA_DEVICE const_reference operator[](size_type i) const { return v[i]; } \
116 R123_CUDA_DEVICE reference at(size_type i) { \
117 if(i >= _N) \
118 R123_THROW(std::out_of_range("array index out of range")); \
119 return (*this)[i]; \
120 } \
121 R123_CUDA_DEVICE const_reference at(size_type i) const { \
122 if(i >= _N) \
123 R123_THROW(std::out_of_range("array index out of range")); \
124 return (*this)[i]; \
125 } \
126 R123_CUDA_DEVICE size_type size() const { return _N; } \
127 R123_CUDA_DEVICE size_type max_size() const { return _N; } \
128 R123_CUDA_DEVICE bool empty() const { return _N == 0; }; \
129 R123_CUDA_DEVICE iterator begin() { return &v[0]; } \
130 R123_CUDA_DEVICE iterator end() { return &v[_N]; } \
131 R123_CUDA_DEVICE const_iterator begin() const { return &v[0]; } \
132 R123_CUDA_DEVICE const_iterator end() const { return &v[_N]; } \
133 R123_CUDA_DEVICE const_iterator cbegin() const { return &v[0]; } \
134 R123_CUDA_DEVICE const_iterator cend() const { return &v[_N]; } \
135 R123_CUDA_DEVICE pointer data() { return &v[0]; } \
136 R123_CUDA_DEVICE const_pointer data() const { return &v[0]; } \
137 R123_CUDA_DEVICE reference front() { return v[0]; } \
138 R123_CUDA_DEVICE const_reference front() const { return v[0]; } \
139 R123_CUDA_DEVICE reference back() { return v[_N - 1]; } \
140 R123_CUDA_DEVICE const_reference back() const { return v[_N - 1]; } \
141 R123_CUDA_DEVICE bool operator==(const r123array##_N##x##W& rhs) const { \
142 /* CUDA3 does not have std::equal */ \
143 for(size_t i = 0; i < _N; ++i) \
144 if(v[i] != rhs.v[i]) \
145 return false; \
146 return true; \
147 } \
148 R123_CUDA_DEVICE bool operator!=(const r123array##_N##x##W& rhs) const { return !(*this == rhs); } \
149 /* CUDA3 does not have std::fill_n */ \
150 R123_CUDA_DEVICE void fill(value_type const& val) { \
151 for(size_t i = 0; i < _N; ++i) \
152 v[i] = val; \
153 } \
154 R123_CUDA_DEVICE void swap(r123array##_N##x##W& rhs) { \
155 /* CUDA3 does not have std::swap_ranges */ \
156 for(size_t i = 0; i < _N; ++i) { \
157 T tmp = v[i]; \
158 v[i] = rhs.v[i]; \
159 rhs.v[i] = tmp; \
160 } \
161 } \
162 R123_CUDA_DEVICE r123array##_N##x##W& incr(R123_ULONG_LONG n = 1) { \
163 /* This test is tricky because we're trying to avoid spurious \
164 complaints about illegal shifts, yet still be compile-time \
165 evaulated. */ \
166 if(sizeof(T) < sizeof(n) && n >> ((sizeof(T) < sizeof(n)) ? 8 * sizeof(T) : 0)) \
167 return incr_carefully(n); \
168 if(n == 1) { \
169 ++v[0]; \
170 if(_N == 1 || R123_BUILTIN_EXPECT(!!v[0], 1)) \
171 return *this; \
172 } else { \
173 v[0] += n; \
174 if(_N == 1 || R123_BUILTIN_EXPECT(n <= v[0], 1)) \
175 return *this; \
176 } \
177 /* We expect that the N==?? tests will be \
178 constant-folded/optimized away by the compiler, so only the \
179 overflow tests (!!v[i]) remain to be done at runtime. For \
180 small values of N, it would be better to do this as an \
181 uncondtional sequence of adc. An experiment/optimization \
182 for another day... \
183 N.B. The weird subscripting: v[_N>3?3:0] is to silence \
184 a spurious error from icpc \
185 */ \
186 ++v[_N > 1 ? 1 : 0]; \
187 if(_N == 2 || R123_BUILTIN_EXPECT(!!v[_N > 1 ? 1 : 0], 1)) \
188 return *this; \
189 ++v[_N > 2 ? 2 : 0]; \
190 if(_N == 3 || R123_BUILTIN_EXPECT(!!v[_N > 2 ? 2 : 0], 1)) \
191 return *this; \
192 ++v[_N > 3 ? 3 : 0]; \
193 for(size_t i = 4; i < _N; ++i) { \
194 if(R123_BUILTIN_EXPECT(!!v[i - 1], 1)) \
195 return *this; \
196 ++v[i]; \
197 } \
198 return *this; \
199 } \
200 /* seed(SeedSeq) would be a constructor if having a constructor */ \
201 /* didn't cause headaches with defaults */ \
202 template<typename SeedSeq> R123_CUDA_DEVICE static r123array##_N##x##W seed(SeedSeq& ss) { \
203 r123array##_N##x##W ret; \
204 const size_t Ngen = _N * ((3 + sizeof(value_type)) / 4); \
205 uint32_t u32[Ngen]; \
206 uint32_t* p32 = &u32[0]; \
207 ss.generate(&u32[0], &u32[Ngen]); \
208 for(size_t i = 0; i < _N; ++i) { \
209 ret.v[i] = assemble_from_u32<value_type>(p32); \
210 p32 += (3 + sizeof(value_type)) / 4; \
211 } \
212 return ret; \
213 } \
214 \
215protected: \
216 R123_CUDA_DEVICE r123array##_N##x##W& incr_carefully(R123_ULONG_LONG n) { \
217 /* n may be greater than the maximum value of a single value_type */ \
218 value_type vtn; \
219 vtn = n; \
220 v[0] += n; \
221 unsigned const rshift = 8 * ((sizeof(n) > sizeof(value_type)) ? sizeof(value_type) : 0); \
222 for(size_t i = 1; i < _N; ++i) { \
223 if(rshift) { \
224 n >>= rshift; \
225 } else { \
226 n = 0; \
227 } \
228 if(v[i - 1] < vtn) \
229 ++n; \
230 if(n == 0) \
231 break; \
232 vtn = n; \
233 v[i] += n; \
234 } \
235 return *this; \
236 }
237
240// There are several tricky considerations for the insertion and extraction
241// operators:
242// - we would like to be able to print r123array16x8 as a sequence of 16 integers,
243// not as 16 bytes.
244// - we would like to be able to print r123array1xm128i.
245// - we do not want an int conversion operator in r123m128i because it causes
246// lots of ambiguity problems with automatic promotions.
247// Solution: r123arrayinsertable and r123arrayextractable
248
249template<typename T> struct r123arrayinsertable {
250 const T& v;
251 r123arrayinsertable(const T& t_) : v(t_) { }
252 friend std::ostream& operator<<(std::ostream& os, r123arrayinsertable<T> const& t) { return os << t.v; }
253};
254
255template<> struct r123arrayinsertable<uint8_t> {
256 uint8_t const& v;
257 r123arrayinsertable(uint8_t const& t_) : v(t_) { }
258 friend std::ostream& operator<<(std::ostream& os, r123arrayinsertable<uint8_t> const& t) { return os << (int)t.v; }
259};
260
261template<typename T> struct r123arrayextractable {
262 T& v;
263 r123arrayextractable(T& t_) : v(t_) { }
264 friend std::istream& operator>>(std::istream& is, r123arrayextractable<T>& t) { return is >> t.v; }
265};
266
267template<> struct r123arrayextractable<uint8_t> {
268 uint8_t& v;
269 r123arrayextractable(uint8_t& t_) : v(t_) { }
270 friend std::istream& operator>>(std::istream& is, r123arrayextractable<uint8_t>& t) {
271 int i;
272 is >> i;
273 t.v = i;
274 return is;
275 }
276};
279#define CXXOVERLOADS(_N, W, T) \
280 \
281 inline std::ostream& operator<<(std::ostream& os, const r123array##_N##x##W& a) { \
282 os << r123arrayinsertable<T>(a.v[0]); \
283 for(size_t i = 1; i < _N; ++i) \
284 os << " " << r123arrayinsertable<T>(a.v[i]); \
285 return os; \
286 } \
287 \
288 inline std::istream& operator>>(std::istream& is, r123array##_N##x##W& a) { \
289 for(size_t i = 0; i < _N; ++i) { \
290 r123arrayextractable<T> x(a.v[i]); \
291 is >> x; \
292 } \
293 return is; \
294 } \
295 \
296 namespace r123 { \
297 typedef r123array##_N##x##W Array##_N##x##W; \
298 }
299
300#endif /* __cplusplus */
301
302/* _r123array_tpl expands to a declaration of struct r123arrayNxW.
303
304 In C, it's nothing more than a struct containing an array of N
305 objects of type T.
306
307 In C++ it's the same, but endowed with an assortment of member
308 functions, typedefs and friends. In C++, r123arrayNxW looks a lot
309 like std::array<T,N>, has most of the capabilities of a container,
310 and satisfies the requirements outlined in compat/Engine.hpp for
311 counter and key types. ArrayNxW, in the r123 namespace is
312 a typedef equivalent to r123arrayNxW.
313*/
314
315#define _r123array_tpl(_N, W, T) \
316 \
317 \
318 struct r123array##_N##x##W { \
319 T v[_N]; \
320 CXXMETHODS(_N, W, T) \
321 CXXMETHODS_REQUIRING_STL \
322 }; \
323 \
324 CXXOVERLOADS(_N, W, T)
325
326#if defined(__CUDACC__)
327/* Disable complaints from CUDA8 and C++ */
328#pragma diag_suppress = code_is_unreachable
329#endif
330_r123array_tpl(1, 32, uint32_t) /* r123array1x32 */
331 _r123array_tpl(2, 32, uint32_t) /* r123array2x32 */
332 _r123array_tpl(4, 32, uint32_t) /* r123array4x32 */
333 _r123array_tpl(8, 32, uint32_t) /* r123array8x32 */
334
335#if R123_USE_64BIT
336 _r123array_tpl(1, 64, uint64_t) /* r123array1x64 */
337 _r123array_tpl(2, 64, uint64_t) /* r123array2x64 */
338 _r123array_tpl(4, 64, uint64_t) /* r123array4x64 */
339#endif
340#if defined(__CUDACC__)
341#pragma diag_default = code_is_unreachable
342#endif
343
344 _r123array_tpl(16, 8, uint8_t) /* r123array16x8 for ARSsw, AESsw */
345
346#if R123_USE_SSE
347 _r123array_tpl(1, m128i, r123m128i) /* r123array1x128i for ARSni, AESni */
348#endif
349
350/* In C++, it's natural to use sizeof(a::value_type), but in C it's
351 pretty convoluted to figure out the width of the value_type of an
352 r123arrayNxW:
353*/
354#define R123_W(a) (8 * sizeof(((a*)0)->v[0]))
355
360#endif
#define _r123array_tpl(_N, W, T)
Definition: array.h:315
#define R123_CUDA_DEVICE
uint uint32_t
ulong uint64_t
uchar uint8_t