Metall  v0.28
A persistent memory allocator for data-centric analytics
random.hpp
Go to the documentation of this file.
1 // Copyright 2020 Lawrence Livermore National Security, LLC and other Metall
2 // Project Developers. See the top-level COPYRIGHT file for details.
3 //
4 // SPDX-License-Identifier: (Apache-2.0 OR MIT)
5 
6 #ifndef METALL_UTILITY_RANDOM_HPP
7 #define METALL_UTILITY_RANDOM_HPP
8 
9 #include <cstdint>
10 #include <cstring>
11 #include <cassert>
12 
13 namespace metall::utility {
14 
15 #if !defined(DOXYGEN_SKIP)
16 namespace detail {
17 
18 // -----------------------------------------------------------------------------
19 // This also contains public domain code from <http://prng.di.unimi.it/>.
20 // From splitmix64.c:
21 // -----------------------------------------------------------------------------
22 // Written in 2015 by Sebastiano Vigna (vigna@acm.org)
23 //
24 // To the extent possible under law, the author has dedicated all copyright
25 // and related and neighboring rights to this software to the public domain
26 // worldwide. This software is distributed without any warranty.
27 //
28 // See <http://creativecommons.org/publicdomain/zero/1.0/>.
29 // -----------------------------------------------------------------------------
30 class SplitMix64 {
31  public:
32  explicit SplitMix64(const uint64_t seed) : m_x(seed) {}
33 
34  // This is a fixed-increment version of Java 8's SplittableRandom generator
35  // See http://dx.doi.org/10.1145/2714064.2660195 and
36  // http://docs.oracle.com/javase/8/docs/api/java/util/SplittableRandom.html
37  //
38  // It is a very fast generator passing BigCrush, and it can be useful if
39  // for some reason you absolutely want 64 bits of state.
40  uint64_t next() {
41  uint64_t z = (m_x += 0x9e3779b97f4a7c15);
42  z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
43  z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
44  return z ^ (z >> 31);
45  }
46 
47  private:
48  uint64_t m_x; /* The state can be seeded with any value. */
49 };
50 
51 // -----------------------------------------------------------------------------
52 // This also contains public domain code from <http://prng.di.unimi.it/>.
53 // From xoshiro512plusplus.c:
54 // -----------------------------------------------------------------------------
55 // Written in 2019 by David Blackman and Sebastiano Vigna (vigna@acm.org)
56 //
57 // To the extent possible under law, the author has dedicated all copyright
58 // and related and neighboring rights to this software to the public domain
59 // worldwide. This software is distributed without any warranty.
60 //
61 // See <http://creativecommons.org/publicdomain/zero/1.0/>.
62 //
63 // This is xoshiro512++ 1.0, one of our all-purpose, rock-solid
64 // generators. It has excellent (about 1ns) speed, a state (512 bits) that
65 // is large enough for any parallel application, and it passes all tests
66 // we are aware of.
67 //
68 // For generating just floating-point numbers, xoshiro512+ is even faster.
69 //
70 // The state must be seeded so that it is not everywhere zero. If you have
71 // a 64-bit seed, we suggest to seed a splitmix64 generator and use its
72 // output to fill s.
73 // -----------------------------------------------------------------------------
74 class xoshiro512pp {
75  public:
76  using result_type = uint64_t;
77 
78  explicit xoshiro512pp(const uint64_t seed) {
79  SplitMix64 seed_gen(seed);
80  for (int i = 0; i < 8; ++i) {
81  m_s[i] = seed_gen.next();
82  }
83  }
84 
85  bool equal(const xoshiro512pp &other) const {
86  for (int i = 0; i < 8; ++i) {
87  if (m_s[i] != other.m_s[i]) return false;
88  }
89  return true;
90  }
91 
92  result_type next() {
93  const uint64_t result = rotl(m_s[0] + m_s[2], 17) + m_s[2];
94 
95  const uint64_t t = m_s[1] << 11;
96 
97  m_s[2] ^= m_s[0];
98  m_s[5] ^= m_s[1];
99  m_s[1] ^= m_s[2];
100  m_s[7] ^= m_s[3];
101  m_s[3] ^= m_s[4];
102  m_s[4] ^= m_s[5];
103  m_s[0] ^= m_s[6];
104  m_s[6] ^= m_s[7];
105 
106  m_s[6] ^= t;
107 
108  m_s[7] = rotl(m_s[7], 21);
109 
110  return result;
111  }
112 
113  // This is the jump function for the generator. It is equivalent
114  // to 2^256 calls to next(); it can be used to generate 2^256
115  // non-overlapping subsequences for parallel computations.
116  void jump() {
117  static const uint64_t JUMP[] = {0x33ed89b6e7a353f9, 0x760083d7955323be,
118  0x2837f2fbb5f22fae, 0x4b8c5674d309511c,
119  0xb11ac47a7ba28c25, 0xf1be7667092bcc1c,
120  0x53851efdb6df0aaf, 0x1ebbc8b23eaf25db};
121 
122  uint64_t t[sizeof m_s / sizeof *m_s];
123  memset(t, 0, sizeof t);
124  for (int i = 0; i < (int)(sizeof JUMP / sizeof *JUMP); i++)
125  for (int b = 0; b < 64; b++) {
126  if (JUMP[i] & UINT64_C(1) << b)
127  for (int w = 0; w < (int)(sizeof m_s / sizeof *m_s); w++)
128  t[w] ^= m_s[w];
129  next();
130  }
131 
132  memcpy(m_s, t, sizeof m_s);
133  }
134 
135  // This is the long-jump function for the generator. It is equivalent to
136  // 2^384 calls to next(); it can be used to generate 2^128 starting points,
137  // from each of which jump() will generate 2^128 non-overlapping
138  // subsequences for parallel distributed computations.
139  void long_jump() {
140  static const uint64_t LONG_JUMP[] = {
141  0x11467fef8f921d28, 0xa2a819f2e79c8ea8, 0xa8299fc284b3959a,
142  0xb4d347340ca63ee1, 0x1cb0940bedbff6ce, 0xd956c5c4fa1f8e17,
143  0x915e38fd4eda93bc, 0x5b3ccdfa5d7daca5};
144 
145  uint64_t t[sizeof m_s / sizeof *m_s];
146  memset(t, 0, sizeof t);
147  for (int i = 0; i < (int)(sizeof LONG_JUMP / sizeof *LONG_JUMP); i++)
148  for (int b = 0; b < 64; b++) {
149  if (LONG_JUMP[i] & UINT64_C(1) << b)
150  for (int w = 0; w < (int)(sizeof m_s / sizeof *m_s); w++)
151  t[w] ^= m_s[w];
152  next();
153  }
154 
155  memcpy(m_s, t, sizeof m_s);
156  }
157 
158  private:
159  static constexpr uint64_t rotl(const uint64_t x, int k) {
160  return (x << k) | (x >> (64 - k));
161  }
162 
163  uint64_t m_s[8];
164 };
165 
166 // -----------------------------------------------------------------------------
167 // This also contains public domain code from <http://prng.di.unimi.it/>.
168 // From xoshiro1024plusplus.c:
169 // -----------------------------------------------------------------------------
170 // Written in 2019 by David Blackman and Sebastiano Vigna (vigna@acm.org)
171 //
172 // To the extent possible under law, the author has dedicated all copyright
173 // and related and neighboring rights to this software to the public domain
174 // worldwide. This software is distributed without any warranty.
175 //
176 // See <http://creativecommons.org/publicdomain/zero/1.0/>.
177 //
178 // This is xoroshiro1024++ 1.0, one of our all-purpose, rock-solid,
179 // large-state generators. It is extremely fast and it passes all
180 // tests we are aware of. Its state however is too large--in general,
181 // the xoshiro256 family should be preferred.
182 //
183 // For generating just floating-point numbers, xoroshiro1024* is even
184 // faster (but it has a very mild bias, see notes in the comments).
185 //
186 // The state must be seeded so that it is not everywhere zero. If you have
187 // a 64-bit seed, we suggest to seed a splitmix64 generator and use its
188 // output to fill s.
189 // -----------------------------------------------------------------------------
190 class xoshiro1024pp {
191  public:
192  using result_type = uint64_t;
193 
194  explicit xoshiro1024pp(const uint64_t seed) {
195  SplitMix64 seed_gen(seed);
196  for (int i = 0; i < 16; ++i) {
197  m_s[i] = seed_gen.next();
198  }
199  }
200 
201  bool equal(const xoshiro1024pp &other) const {
202  for (int i = 0; i < 16; ++i) {
203  if (m_s[i] != other.m_s[i]) return false;
204  }
205  return true;
206  }
207 
208  uint64_t next() {
209  const int q = m_p;
210  const uint64_t s0 = m_s[m_p = (m_p + 1) & 15];
211  uint64_t s15 = m_s[q];
212  const uint64_t result = rotl(s0 + s15, 23) + s15;
213 
214  s15 ^= s0;
215  m_s[q] = rotl(s0, 25) ^ s15 ^ (s15 << 27);
216  m_s[m_p] = rotl(s15, 36);
217 
218  return result;
219  }
220 
221  // This is the jump function for the generator. It is equivalent
222  // to 2^512 calls to next(); it can be used to generate 2^512
223  // non-overlapping subsequences for parallel computations.
224  void jump() {
225  static const uint64_t JUMP[] = {
226  0x931197d8e3177f17, 0xb59422e0b9138c5f, 0xf06a6afb49d668bb,
227  0xacb8a6412c8a1401, 0x12304ec85f0b3468, 0xb7dfe7079209891e,
228  0x405b7eec77d9eb14, 0x34ead68280c44e4a, 0xe0e4ba3e0ac9e366,
229  0x8f46eda8348905b7, 0x328bf4dbad90d6ff, 0xc8fd6fb31c9effc3,
230  0xe899d452d4b67652, 0x45f387286ade3205, 0x03864f454a8920bd,
231  0xa68fa28725b1b384};
232 
233  uint64_t t[sizeof m_s / sizeof *m_s];
234  memset(t, 0, sizeof t);
235  for (int i = 0; i < (int)(sizeof JUMP / sizeof *JUMP); i++)
236  for (int b = 0; b < 64; b++) {
237  if (JUMP[i] & UINT64_C(1) << b)
238  for (int j = 0; j < int(sizeof m_s / sizeof *m_s); j++)
239  t[j] ^= m_s[(j + m_p) & (sizeof m_s / sizeof *m_s - 1)];
240  next();
241  }
242 
243  for (int i = 0; i < (int)(sizeof m_s / sizeof *m_s); i++) {
244  m_s[(i + m_p) & (sizeof m_s / sizeof *m_s - 1)] = t[i];
245  }
246  }
247 
248  // This is the long-jump function for the generator. It is equivalent to
249  // 2^768 calls to next(); it can be used to generate 2^256 starting points,
250  // from each of which jump() will generate 2^256 non-overlapping
251  // subsequences for parallel distributed computations.
252  void long_jump() {
253  static const uint64_t LONG_JUMP[] = {
254  0x7374156360bbf00f, 0x4630c2efa3b3c1f6, 0x6654183a892786b1,
255  0x94f7bfcbfb0f1661, 0x27d8243d3d13eb2d, 0x9701730f3dfb300f,
256  0x2f293baae6f604ad, 0xa661831cb60cd8b6, 0x68280c77d9fe008c,
257  0x50554160f5ba9459, 0x2fc20b17ec7b2a9a, 0x49189bbdc8ec9f8f,
258  0x92a65bca41852cc1, 0xf46820dd0509c12a, 0x52b00c35fbf92185,
259  0x1e5b3b7f589e03c1};
260 
261  uint64_t t[sizeof m_s / sizeof *m_s];
262  memset(t, 0, sizeof t);
263  for (int i = 0; i < (int)(sizeof LONG_JUMP / sizeof *LONG_JUMP); i++)
264  for (int b = 0; b < 64; b++) {
265  if (LONG_JUMP[i] & UINT64_C(1) << b)
266  for (int j = 0; j < (int)(sizeof m_s / sizeof *m_s); j++)
267  t[j] ^= m_s[(j + m_p) & (sizeof m_s / sizeof *m_s - 1)];
268  next();
269  }
270 
271  for (int i = 0; i < (int)(sizeof m_s / sizeof *m_s); i++) {
272  m_s[(i + m_p) & (sizeof m_s / sizeof *m_s - 1)] = t[i];
273  }
274  }
275 
276  private:
277  static constexpr uint64_t rotl(const uint64_t x, const int k) {
278  return (x << k) | (x >> (64 - k));
279  }
280 
281  int m_p{0};
282  uint64_t m_s[16];
283 };
284 
285 template <typename xoshiro_type>
286 class base_rand_xoshiro {
287  public:
288  using result_type = typename xoshiro_type::result_type;
289 
290  explicit base_rand_xoshiro(const uint64_t seed = 123)
291  : m_rand_generator(seed) {}
292 
295  auto operator()() {
296  const auto n = m_rand_generator.next();
297  assert(min() <= n && n <= max());
298  return n;
299  }
300 
304  bool equal(const base_rand_xoshiro &other) {
305  return m_rand_generator.equal(other.m_rand_generator);
306  }
307 
310  static constexpr result_type min() {
311  return std::numeric_limits<result_type>::min();
312  }
313 
316  static constexpr result_type max() {
317  return std::numeric_limits<result_type>::max();
318  }
319 
320  // TODO: implement
321  // operator<<() {}
322  // operator>>() {}
323 
324  private:
325  xoshiro_type m_rand_generator;
326 };
327 
328 template <typename xoshiro_type>
329 inline bool operator==(const base_rand_xoshiro<xoshiro_type> &lhs,
330  const base_rand_xoshiro<xoshiro_type> &rhs) {
331  return lhs.equal(rhs);
332 }
333 
334 template <typename xoshiro_type>
335 inline bool operator!=(const base_rand_xoshiro<xoshiro_type> &lhs,
336  const base_rand_xoshiro<xoshiro_type> &rhs) {
337  return !(lhs == rhs);
338 }
339 } // namespace detail
340 
341 #endif // DOXYGEN_SKIP
342 
346 using rand_512 = detail::base_rand_xoshiro<detail::xoshiro512pp>;
347 
351 using rand_1024 = detail::base_rand_xoshiro<detail::xoshiro1024pp>;
352 } // namespace metall::utility
353 
354 #endif // METALL_UTILITY_RANDOM_HPP
Namespace for utility items.
bool operator==(const container_of_containers_iterator_adaptor< outer_iterator_type, inner_iterator_type > &lhs, const container_of_containers_iterator_adaptor< outer_iterator_type, inner_iterator_type > &rhs)
Definition: container_of_containers_iterator_adaptor.hpp:121
bool operator!=(const container_of_containers_iterator_adaptor< outer_iterator_type, inner_iterator_type > &lhs, const container_of_containers_iterator_adaptor< outer_iterator_type, inner_iterator_type > &rhs)
Definition: container_of_containers_iterator_adaptor.hpp:130
detail::base_rand_xoshiro< detail::xoshiro1024pp > rand_1024
pseudo-random number generator that has a similar interface as the ones in STL The actual algorithm i...
Definition: random.hpp:351
detail::base_rand_xoshiro< detail::xoshiro512pp > rand_512
pseudo-random number generator that has a similar interface as the ones in STL The actual algorithm i...
Definition: random.hpp:346