Project Alice
Loading...
Searching...
No Matches
nvccfeatures.h
Go to the documentation of this file.
1/*
2Copyright 2010-2011, D. E. Shaw Research.
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are
7met:
8
9* Redistributions of source code must retain the above copyright
10 notice, this list of conditions, and the following disclaimer.
11
12* Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions, and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16* Neither the name of D. E. Shaw Research nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31*/
32#ifndef __r123_nvcc_features_dot_h__
33#define __r123_nvcc_features_dot_h__
34
35#if !defined(CUDART_VERSION)
36#error "why are we in nvccfeatures.h if CUDART_VERSION is not defined"
37#endif
38
39#if CUDART_VERSION < 4010
40#error "CUDA versions earlier than 4.1 produce incorrect results for some templated functions in namespaces. Random123 isunsupported. See comments in nvccfeatures.h"
41// This test was added in Random123-1.08 (August, 2013) because we
42// discovered that Ftype(maxTvalue<T>()) with Ftype=double and
43// T=uint64_t in examples/uniform.hpp produces -1 for CUDA4.0 and
44// earlier. We can't be sure this bug doesn't also affect invocations
45// of other templated functions, e.g., essentially all of Random123.
46// Thus, we no longer trust CUDA versions earlier than 4.1 even though
47// we had previously tested and timed Random123 with CUDA 3.x and 4.0.
48// If you feel lucky or desperate, you can change #error to #warning, but
49// please take extra care to be sure that you are getting correct
50// results.
51#endif
52
53// nvcc falls through to gcc or msvc. So first define
54// a couple of things and then include either gccfeatures.h
55// or msvcfeatures.h
56
57//#ifdef __CUDA_ARCH__ allows Philox32 and Philox64 to be compiled
58// for both device and host functions in CUDA by setting compiler flags
59// for the device function
60#ifdef __CUDA_ARCH__
61#ifndef R123_CUDA_DEVICE
62#define R123_CUDA_DEVICE __device__
63#endif
64
65#ifndef R123_USE_MULHILO64_CUDA_INTRIN
66#define R123_USE_MULHILO64_CUDA_INTRIN 1
67#endif
68
69#ifndef R123_THROW
70// No exceptions in CUDA, at least upto 4.0
71#define R123_THROW(x) R123_ASSERT(0)
72#endif
73
74#ifndef R123_ASSERT
75#define R123_ASSERT(x) \
76 if((x)) \
77 ; \
78 else \
79 asm("trap;")
80#endif
81
82#ifndef R123_BUILTIN_EXPECT
83#define R123_BUILTIN_EXPECT(expr, likely) expr
84#endif
85
86#ifndef R123_USE_AES_NI
87#define R123_USE_AES_NI 0
88#endif
89
90#ifndef R123_USE_SSE4_2
91#define R123_USE_SSE4_2 0
92#endif
93
94#ifndef R123_USE_SSE4_1
95#define R123_USE_SSE4_1 0
96#endif
97
98#ifndef R123_USE_SSE
99#define R123_USE_SSE 0
100#endif
101
102#ifndef R123_USE_GNU_UINT128
103#define R123_USE_GNU_UINT128 0
104#endif
105
106#ifndef R123_ULONG_LONG
107// uint64_t, which is what we'd get without this, is
108// not the same as unsigned long long
109#define R123_ULONG_LONG unsigned long long
110#endif
111
112#else // ! __CUDA_ARCH__
113
114// If we're using nvcc, but not compiling for the CUDA architecture,
115// then we must be compiling for the host. But host-compilation might
116// use gcc, msvc, or xlc. This #else/#endif used to be higher up,
117// mistakenly turning off all kinds of things the host that are really
118// problematic only in device code. It's not clear that we need to do
119// anything special for host-code that we wouldn't otherwise do in
120// xlcfeatures, gccfeatures or msvcfeatures. But if we do, this is
121// the place to do it.
122
123#endif // __CUDA_ARCH__
124
125#if defined(__xlC__) || defined(__ibmxl__)
126#include "xlcfeatures.h"
127#elif defined(__GNUC__)
128#include "gccfeatures.h"
129#elif defined(_MSC_FULL_VER)
130#include "msvcfeatures.h"
131#endif
132
133#endif