Metall v0.30
A persistent memory allocator for data-centric analytics
 
Loading...
Searching...
No Matches
random.hpp
Go to the documentation of this file.
1// Copyright 2020 Lawrence Livermore National Security, LLC and other Metall
2// Project Developers. See the top-level COPYRIGHT file for details.
3//
4// SPDX-License-Identifier: (Apache-2.0 OR MIT)
5
6#ifndef METALL_UTILITY_RANDOM_HPP
7#define METALL_UTILITY_RANDOM_HPP
8
9#include <cstdint>
10#include <cstring>
11#include <cassert>
12
13namespace metall::utility {
14
15#if !defined(DOXYGEN_SKIP)
16namespace detail {
17
18// -----------------------------------------------------------------------------
19// This also contains public domain code from <http://prng.di.unimi.it/>.
20// From splitmix64.c:
21// -----------------------------------------------------------------------------
22// Written in 2015 by Sebastiano Vigna (vigna@acm.org)
23//
24// To the extent possible under law, the author has dedicated all copyright
25// and related and neighboring rights to this software to the public domain
26// worldwide. This software is distributed without any warranty.
27//
28// See <http://creativecommons.org/publicdomain/zero/1.0/>.
29// -----------------------------------------------------------------------------
30class SplitMix64 {
31 public:
32 explicit SplitMix64(const uint64_t seed) : m_x(seed) {}
33
34 // This is a fixed-increment version of Java 8's SplittableRandom generator
35 // See http://dx.doi.org/10.1145/2714064.2660195 and
36 // http://docs.oracle.com/javase/8/docs/api/java/util/SplittableRandom.html
37 //
38 // It is a very fast generator passing BigCrush, and it can be useful if
39 // for some reason you absolutely want 64 bits of state.
40 uint64_t next() {
41 uint64_t z = (m_x += 0x9e3779b97f4a7c15);
42 z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
43 z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
44 return z ^ (z >> 31);
45 }
46
47 private:
48 uint64_t m_x; /* The state can be seeded with any value. */
49};
50
51// -----------------------------------------------------------------------------
52// This also contains public domain code from <http://prng.di.unimi.it/>.
53// From xoshiro512plusplus.c:
54// -----------------------------------------------------------------------------
55// Written in 2019 by David Blackman and Sebastiano Vigna (vigna@acm.org)
56//
57// To the extent possible under law, the author has dedicated all copyright
58// and related and neighboring rights to this software to the public domain
59// worldwide. This software is distributed without any warranty.
60//
61// See <http://creativecommons.org/publicdomain/zero/1.0/>.
62//
63// This is xoshiro512++ 1.0, one of our all-purpose, rock-solid
64// generators. It has excellent (about 1ns) speed, a state (512 bits) that
65// is large enough for any parallel application, and it passes all tests
66// we are aware of.
67//
68// For generating just floating-point numbers, xoshiro512+ is even faster.
69//
70// The state must be seeded so that it is not everywhere zero. If you have
71// a 64-bit seed, we suggest to seed a splitmix64 generator and use its
72// output to fill s.
73// -----------------------------------------------------------------------------
74class xoshiro512pp {
75 public:
76 using result_type = uint64_t;
77
78 explicit xoshiro512pp(const uint64_t seed) {
79 SplitMix64 seed_gen(seed);
80 for (int i = 0; i < 8; ++i) {
81 m_s[i] = seed_gen.next();
82 }
83 }
84
85 bool equal(const xoshiro512pp &other) const {
86 for (int i = 0; i < 8; ++i) {
87 if (m_s[i] != other.m_s[i]) return false;
88 }
89 return true;
90 }
91
92 result_type next() {
93 const uint64_t result = rotl(m_s[0] + m_s[2], 17) + m_s[2];
94
95 const uint64_t t = m_s[1] << 11;
96
97 m_s[2] ^= m_s[0];
98 m_s[5] ^= m_s[1];
99 m_s[1] ^= m_s[2];
100 m_s[7] ^= m_s[3];
101 m_s[3] ^= m_s[4];
102 m_s[4] ^= m_s[5];
103 m_s[0] ^= m_s[6];
104 m_s[6] ^= m_s[7];
105
106 m_s[6] ^= t;
107
108 m_s[7] = rotl(m_s[7], 21);
109
110 return result;
111 }
112
113 // This is the jump function for the generator. It is equivalent
114 // to 2^256 calls to next(); it can be used to generate 2^256
115 // non-overlapping subsequences for parallel computations.
116 void jump() {
117 static const uint64_t JUMP[] = {0x33ed89b6e7a353f9, 0x760083d7955323be,
118 0x2837f2fbb5f22fae, 0x4b8c5674d309511c,
119 0xb11ac47a7ba28c25, 0xf1be7667092bcc1c,
120 0x53851efdb6df0aaf, 0x1ebbc8b23eaf25db};
121
122 uint64_t t[sizeof m_s / sizeof *m_s];
123 memset(t, 0, sizeof t);
124 for (int i = 0; i < (int)(sizeof JUMP / sizeof *JUMP); i++)
125 for (int b = 0; b < 64; b++) {
126 if (JUMP[i] & UINT64_C(1) << b)
127 for (int w = 0; w < (int)(sizeof m_s / sizeof *m_s); w++)
128 t[w] ^= m_s[w];
129 next();
130 }
131
132 memcpy(m_s, t, sizeof m_s);
133 }
134
135 // This is the long-jump function for the generator. It is equivalent to
136 // 2^384 calls to next(); it can be used to generate 2^128 starting points,
137 // from each of which jump() will generate 2^128 non-overlapping
138 // subsequences for parallel distributed computations.
139 void long_jump() {
140 static const uint64_t LONG_JUMP[] = {
141 0x11467fef8f921d28, 0xa2a819f2e79c8ea8, 0xa8299fc284b3959a,
142 0xb4d347340ca63ee1, 0x1cb0940bedbff6ce, 0xd956c5c4fa1f8e17,
143 0x915e38fd4eda93bc, 0x5b3ccdfa5d7daca5};
144
145 uint64_t t[sizeof m_s / sizeof *m_s];
146 memset(t, 0, sizeof t);
147 for (int i = 0; i < (int)(sizeof LONG_JUMP / sizeof *LONG_JUMP); i++)
148 for (int b = 0; b < 64; b++) {
149 if (LONG_JUMP[i] & UINT64_C(1) << b)
150 for (int w = 0; w < (int)(sizeof m_s / sizeof *m_s); w++)
151 t[w] ^= m_s[w];
152 next();
153 }
154
155 memcpy(m_s, t, sizeof m_s);
156 }
157
158 private:
159 static constexpr uint64_t rotl(const uint64_t x, int k) {
160 return (x << k) | (x >> (64 - k));
161 }
162
163 uint64_t m_s[8];
164};
165
166// -----------------------------------------------------------------------------
167// This also contains public domain code from <http://prng.di.unimi.it/>.
168// From xoshiro1024plusplus.c:
169// -----------------------------------------------------------------------------
170// Written in 2019 by David Blackman and Sebastiano Vigna (vigna@acm.org)
171//
172// To the extent possible under law, the author has dedicated all copyright
173// and related and neighboring rights to this software to the public domain
174// worldwide. This software is distributed without any warranty.
175//
176// See <http://creativecommons.org/publicdomain/zero/1.0/>.
177//
178// This is xoroshiro1024++ 1.0, one of our all-purpose, rock-solid,
179// large-state generators. It is extremely fast and it passes all
180// tests we are aware of. Its state however is too large--in general,
181// the xoshiro256 family should be preferred.
182//
183// For generating just floating-point numbers, xoroshiro1024* is even
184// faster (but it has a very mild bias, see notes in the comments).
185//
186// The state must be seeded so that it is not everywhere zero. If you have
187// a 64-bit seed, we suggest to seed a splitmix64 generator and use its
188// output to fill s.
189// -----------------------------------------------------------------------------
190class xoshiro1024pp {
191 public:
192 using result_type = uint64_t;
193
194 explicit xoshiro1024pp(const uint64_t seed) {
195 SplitMix64 seed_gen(seed);
196 for (int i = 0; i < 16; ++i) {
197 m_s[i] = seed_gen.next();
198 }
199 }
200
201 bool equal(const xoshiro1024pp &other) const {
202 for (int i = 0; i < 16; ++i) {
203 if (m_s[i] != other.m_s[i]) return false;
204 }
205 return true;
206 }
207
208 uint64_t next() {
209 const int q = m_p;
210 const uint64_t s0 = m_s[m_p = (m_p + 1) & 15];
211 uint64_t s15 = m_s[q];
212 const uint64_t result = rotl(s0 + s15, 23) + s15;
213
214 s15 ^= s0;
215 m_s[q] = rotl(s0, 25) ^ s15 ^ (s15 << 27);
216 m_s[m_p] = rotl(s15, 36);
217
218 return result;
219 }
220
221 // This is the jump function for the generator. It is equivalent
222 // to 2^512 calls to next(); it can be used to generate 2^512
223 // non-overlapping subsequences for parallel computations.
224 void jump() {
225 static const uint64_t JUMP[] = {
226 0x931197d8e3177f17, 0xb59422e0b9138c5f, 0xf06a6afb49d668bb,
227 0xacb8a6412c8a1401, 0x12304ec85f0b3468, 0xb7dfe7079209891e,
228 0x405b7eec77d9eb14, 0x34ead68280c44e4a, 0xe0e4ba3e0ac9e366,
229 0x8f46eda8348905b7, 0x328bf4dbad90d6ff, 0xc8fd6fb31c9effc3,
230 0xe899d452d4b67652, 0x45f387286ade3205, 0x03864f454a8920bd,
231 0xa68fa28725b1b384};
232
233 uint64_t t[sizeof m_s / sizeof *m_s];
234 memset(t, 0, sizeof t);
235 for (int i = 0; i < (int)(sizeof JUMP / sizeof *JUMP); i++)
236 for (int b = 0; b < 64; b++) {
237 if (JUMP[i] & UINT64_C(1) << b)
238 for (int j = 0; j < int(sizeof m_s / sizeof *m_s); j++)
239 t[j] ^= m_s[(j + m_p) & (sizeof m_s / sizeof *m_s - 1)];
240 next();
241 }
242
243 for (int i = 0; i < (int)(sizeof m_s / sizeof *m_s); i++) {
244 m_s[(i + m_p) & (sizeof m_s / sizeof *m_s - 1)] = t[i];
245 }
246 }
247
248 // This is the long-jump function for the generator. It is equivalent to
249 // 2^768 calls to next(); it can be used to generate 2^256 starting points,
250 // from each of which jump() will generate 2^256 non-overlapping
251 // subsequences for parallel distributed computations.
252 void long_jump() {
253 static const uint64_t LONG_JUMP[] = {
254 0x7374156360bbf00f, 0x4630c2efa3b3c1f6, 0x6654183a892786b1,
255 0x94f7bfcbfb0f1661, 0x27d8243d3d13eb2d, 0x9701730f3dfb300f,
256 0x2f293baae6f604ad, 0xa661831cb60cd8b6, 0x68280c77d9fe008c,
257 0x50554160f5ba9459, 0x2fc20b17ec7b2a9a, 0x49189bbdc8ec9f8f,
258 0x92a65bca41852cc1, 0xf46820dd0509c12a, 0x52b00c35fbf92185,
259 0x1e5b3b7f589e03c1};
260
261 uint64_t t[sizeof m_s / sizeof *m_s];
262 memset(t, 0, sizeof t);
263 for (int i = 0; i < (int)(sizeof LONG_JUMP / sizeof *LONG_JUMP); i++)
264 for (int b = 0; b < 64; b++) {
265 if (LONG_JUMP[i] & UINT64_C(1) << b)
266 for (int j = 0; j < (int)(sizeof m_s / sizeof *m_s); j++)
267 t[j] ^= m_s[(j + m_p) & (sizeof m_s / sizeof *m_s - 1)];
268 next();
269 }
270
271 for (int i = 0; i < (int)(sizeof m_s / sizeof *m_s); i++) {
272 m_s[(i + m_p) & (sizeof m_s / sizeof *m_s - 1)] = t[i];
273 }
274 }
275
276 private:
277 static constexpr uint64_t rotl(const uint64_t x, const int k) {
278 return (x << k) | (x >> (64 - k));
279 }
280
281 int m_p{0};
282 uint64_t m_s[16];
283};
284
285template <typename xoshiro_type>
286class base_rand_xoshiro {
287 public:
288 using result_type = typename xoshiro_type::result_type;
289
290 explicit base_rand_xoshiro(const uint64_t seed = 123)
291 : m_rand_generator(seed) {}
292
295 auto operator()() {
296 const auto n = m_rand_generator.next();
297 assert(min() <= n && n <= max());
298 return n;
299 }
300
304 bool equal(const base_rand_xoshiro &other) {
305 return m_rand_generator.equal(other.m_rand_generator);
306 }
307
310 static constexpr result_type min() {
311 return std::numeric_limits<result_type>::min();
312 }
313
316 static constexpr result_type max() {
317 return std::numeric_limits<result_type>::max();
318 }
319
320 // TODO: implement
321 // operator<<() {}
322 // operator>>() {}
323
324 private:
325 xoshiro_type m_rand_generator;
326};
327
328template <typename xoshiro_type>
329inline bool operator==(const base_rand_xoshiro<xoshiro_type> &lhs,
330 const base_rand_xoshiro<xoshiro_type> &rhs) {
331 return lhs.equal(rhs);
332}
333
334template <typename xoshiro_type>
335inline bool operator!=(const base_rand_xoshiro<xoshiro_type> &lhs,
336 const base_rand_xoshiro<xoshiro_type> &rhs) {
337 return !(lhs == rhs);
338}
339} // namespace detail
340
341#endif // DOXYGEN_SKIP
342
346using rand_512 = detail::base_rand_xoshiro<detail::xoshiro512pp>;
347
351using rand_1024 = detail::base_rand_xoshiro<detail::xoshiro1024pp>;
352} // namespace metall::utility
353
354#endif // METALL_UTILITY_RANDOM_HPP
Namespace for utility items.
detail::base_rand_xoshiro< detail::xoshiro1024pp > rand_1024
pseudo-random number generator that has a similar interface as the ones in STL The actual algorithm i...
Definition random.hpp:351
detail::base_rand_xoshiro< detail::xoshiro512pp > rand_512
pseudo-random number generator that has a similar interface as the ones in STL The actual algorithm i...
Definition random.hpp:346
bool operator!=(const stl_allocator< T, kernel > &rhd, const stl_allocator< T, kernel > &lhd)
Definition stl_allocator.hpp:238
bool operator==(const stl_allocator< T, kernel > &rhd, const stl_allocator< T, kernel > &lhd)
Definition stl_allocator.hpp:230