Project Alice
Loading...
Searching...
No Matches
boxmuller.hpp
Go to the documentation of this file.
1/*
2Copyright 2010-2011, D. E. Shaw Research.
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are
7met:
8
9* Redistributions of source code must retain the above copyright
10 notice, this list of conditions, and the following disclaimer.
11
12* Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions, and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16* Neither the name of D. E. Shaw Research nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31*/
32
33// This file implements the Box-Muller method for generating gaussian
34// random variables (GRVs). Box-Muller has the advantage of
35// deterministically requiring exactly two uniform random variables as
36// input and producing exactly two GRVs as output, which makes it
37// especially well-suited to the counter-based generators in
38// Random123. Other methods (e.g., Ziggurat, polar) require an
39// indeterminate number of inputs for each output and so require a
40// 'MicroURNG' to be used with Random123. The down side of Box-Muller
41// is that it calls sincos, log and sqrt, which may be slow. However,
42// on GPUs, these functions are remarkably fast, which makes
43// Box-Muller the fastest GRV generator we know of on GPUs.
44//
45// This file exports two structs and one overloaded function,
46// all in the r123 namespace:
47// struct r123::float2{ float x,y; }
48// struct r123::double2{ double x,y; }
49//
50// r123::float2 r123::boxmuller(uint32_t u0, uint32_t u1);
51// r123::double2 r123::boxmuller(uint64_t u0, uint64_t u1);
52//
53// float2 and double2 are identical to their synonymous global-
54// namespace structures in CUDA.
55//
56// This file may not be as portable, and has not been tested as
57// rigorously as other files in the library, e.g., the generators.
58// Nevertheless, we hope it is useful and we encourage developers to
59// copy it and modify it for their own use. We invite comments and
60// improvements.
61
62#ifndef _r123_BOXMULLER_HPP__
63#define _r123_BOXMULLER_HPP__
64
65#include <Random123/features/compilerfeatures.h>
66#include <Random123/uniform.hpp>
67#include <math.h>
68
69namespace r123 {
70
71#if !defined(__CUDACC__)
72typedef struct {
73 float x, y;
74} float2;
75typedef struct {
76 double x, y;
77} double2;
78#else
79typedef ::float2 float2;
80typedef ::double2 double2;
81#endif
82
83#if !defined(R123_NO_SINCOS) && defined(__APPLE__)
84/* MacOS X 10.10.5 (2015) doesn't have sincosf */
85#define R123_NO_SINCOS 1
86#endif
87
88#if R123_NO_SINCOS /* enable this if sincos and sincosf are not in the math library */
89R123_CUDA_DEVICE R123_STATIC_INLINE void sincosf(float x, float* s, float* c) {
90 *s = sinf(x);
91 *c = cosf(x);
92}
93
94R123_CUDA_DEVICE R123_STATIC_INLINE void sincos(double x, double* s, double* c) {
95 *s = sin(x);
96 *c = cos(x);
97}
98#endif /* sincos is not in the math library */
99
100#if !defined(CUDART_VERSION) || CUDART_VERSION < 5000 /* enabled if sincospi and sincospif are not in math lib */
101
102R123_CUDA_DEVICE R123_STATIC_INLINE void sincospif(float x, float* s, float* c) {
103 float const PIf = 3.1415926535897932f;
104 sincosf(PIf * x, s, c);
105}
106
107R123_CUDA_DEVICE R123_STATIC_INLINE void sincospi(double x, double* s, double* c) {
108 double const PI = 3.1415926535897932;
109 sincos(PI * x, s, c);
110}
111#endif /* sincospi is not in math lib */
112
113/*
114 * take two 32bit unsigned random values and return a float2 with
115 * two random floats in a normal distribution via a Box-Muller transform
116 */
118 float r;
119 float2 f;
120 sincospif(uneg11<float>(u0), &f.x, &f.y);
121 r = sqrtf(-2.f * logf(u01<float>(u1))); // u01 is guaranteed to avoid 0.
122 f.x *= r;
123 f.y *= r;
124 return f;
125}
126
127/*
128 * take two 64bit unsigned random values and return a double2 with
129 * two random doubles in a normal distribution via a Box-Muller transform
130 */
132 double r;
133 double2 f;
134
135 sincospi(uneg11<double>(u0), &f.x, &f.y);
136 r = sqrt(-2. * log(u01<double>(u1))); // u01 is guaranteed to avoid 0.
137 f.x *= r;
138 f.y *= r;
139 return f;
140}
141} // namespace r123
142
143#endif /* BOXMULLER_H__ */
#define R123_CUDA_DEVICE
#define R123_STATIC_INLINE
Definition: array.h:344
R123_CUDA_DEVICE R123_STATIC_INLINE void sincospif(float x, float *s, float *c)
Definition: boxmuller.hpp:102
R123_CUDA_DEVICE R123_STATIC_INLINE void sincospi(double x, double *s, double *c)
Definition: boxmuller.hpp:107
R123_CUDA_DEVICE R123_STATIC_INLINE float2 boxmuller(uint32_t u0, uint32_t u1)
Definition: boxmuller.hpp:117
uint uint32_t
ulong uint64_t