Metall  v0.28
A persistent memory allocator for data-centric analytics
string_key_store.hpp
Go to the documentation of this file.
1 // Copyright 2022 Lawrence Livermore National Security, LLC and other Metall
2 // Project Developers. See the top-level COPYRIGHT file for details.
3 //
4 // SPDX-License-Identifier: (Apache-2.0 OR MIT)
5 
6 #ifndef METALL_CONTAINER_CONCURRENT_STRING_KEY_STORE_HPP
7 #define METALL_CONTAINER_CONCURRENT_STRING_KEY_STORE_HPP
8 
9 #include <memory>
10 #include <string_view>
11 #include <utility>
12 #include <tuple>
17 #include <metall/metall.hpp>
18 
19 namespace metall::container {
20 
21 namespace {
22 namespace mc = metall::container;
23 }
24 
30 template <typename _value_type,
31  typename allocator_type = metall::manager::allocator_type<std::byte>>
33  private:
34  template <typename T>
35  using other_allocator =
36  typename std::allocator_traits<allocator_type>::template rebind_alloc<T>;
37 
38  template <typename T>
39  using other_scoped_allocator =
41 
42  using internal_id_type = uint64_t;
43  using internal_string_type =
44  mc::basic_string<char, std::char_traits<char>, other_allocator<char>>;
45  using internal_value_type = std::tuple<internal_string_type, _value_type>;
46 
47  using map_type = mc::unordered_multimap<
48  internal_id_type, internal_value_type, std::hash<internal_id_type>,
49  std::equal_to<internal_id_type>,
50  other_scoped_allocator<
51  std::pair<const internal_id_type, internal_value_type>>>;
52 
54  static constexpr internal_id_type k_max_internal_id =
55  std::numeric_limits<internal_id_type>::max();
56 
57  public:
58  using key_type = std::string_view;
59  using value_type = _value_type;
60  using locator_type =
62 
65  explicit string_key_store(const allocator_type &allocator = allocator_type())
66  : m_map(allocator) {}
67 
72  string_key_store(const bool unique, const uint64_t hash_seed,
73  const allocator_type &allocator = allocator_type())
74  : m_unique(unique), m_hash_seed(hash_seed), m_map(allocator) {}
75 
77  string_key_store(const string_key_store &) = default;
78 
80  string_key_store(const string_key_store &other, const allocator_type &alloc)
81  : m_unique(other.m_unique),
82  m_hash_seed(other.m_hash_seed),
83  m_map(other.m_map, alloc) {}
84 
86  string_key_store(string_key_store &&) noexcept = default;
87 
90  const allocator_type &alloc) noexcept
91  : m_unique(other.m_unique),
92  m_hash_seed(other.m_hash_seed),
93  m_map(std::move(other.m_map), alloc) {}
94 
97 
99  string_key_store &operator=(string_key_store &&) noexcept = default;
100 
106  bool insert(const key_type &key) {
107  const auto internal_id = priv_find_or_generate_internal_id(key);
108  if (m_unique && m_map.count(internal_id) == 1) {
109  return false;
110  }
111 
112  // We delegate tuple to decide if value_type's constructor needs an
113  // allocator object. By taking this approach, we can avoid the complex logic
114  // of uses-allocator construction. This is why we use tuple for
115  // internal_value_type.
116  auto internal_value =
117  internal_value_type{std::allocator_arg, m_map.get_allocator()};
118  std::get<0>(internal_value) = key;
119 
120  m_map.emplace(internal_id, std::move(internal_value));
121 
122  return true;
123  }
124 
130  bool insert(const key_type &key, const value_type &value) {
131  const auto internal_id = priv_find_or_generate_internal_id(key);
132 
133  assert(!m_unique || m_map.count(internal_id) <= 1);
134  if (m_unique && m_map.count(internal_id) == 1) {
135  assert(m_map.count(internal_id) == 1);
136  std::get<1>(m_map.find(internal_id)->second) = value;
137  } else {
138  m_map.emplace(internal_id, internal_value_type{std::allocator_arg,
139  m_map.get_allocator(),
140  key.data(), value});
141  }
142  return true;
143  }
144 
149  bool insert(const key_type &key, value_type &&value) {
150  const auto internal_id = priv_find_or_generate_internal_id(key);
151 
152  assert(!m_unique || m_map.count(internal_id) <= 1);
153  if (m_unique && m_map.count(internal_id) == 1) {
154  std::get<1>(m_map.find(internal_id)->second) = std::move(value);
155  } else {
156  m_map.emplace(internal_id, internal_value_type{
157  std::allocator_arg, m_map.get_allocator(),
158  key.data(), std::move(value)});
159  }
160  return true;
161  }
162 
164  void clear() {
165  m_map.clear();
166  m_max_id_probe_distance = 0;
167  }
168 
172  std::size_t count(const key_type &key) const {
173  const auto internal_id = priv_find_internal_id(key);
174  return m_map.count(internal_id);
175  }
176 
179  std::size_t size() const { return m_map.size(); }
180 
184  const key_type key(const locator_type &position) const {
185  const auto &key = std::get<0>(position.m_iterator->second);
186  return key_type(key.c_str());
187  }
188 
192  value_type &value(const locator_type &position) {
193  // Convert to a non-const iterator
194  auto non_const_iterator =
195  m_map.erase(position.m_iterator, position.m_iterator);
196  return std::get<1>(non_const_iterator->second);
197  }
198 
202  const value_type &value(const locator_type &position) const {
203  return std::get<1>(position.m_iterator->second);
204  }
205 
209  locator_type find(const key_type &key) const {
210  const auto internal_id = priv_find_internal_id(key);
211  return locator_type(m_map.find(internal_id));
212  }
213 
219  std::pair<locator_type, locator_type> equal_range(const key_type &key) const {
220  const auto internal_id = priv_find_internal_id(key);
221  const auto range = m_map.equal_range(internal_id);
222  return std::make_pair(locator_type(range.first),
223  locator_type(range.second));
224  }
225 
228  locator_type begin() const { return locator_type(m_map.begin()); }
229 
232  locator_type end() const { return locator_type(m_map.end()); }
233 
237  std::size_t erase(const key_type &key) {
238  const auto internal_id = priv_find_internal_id(key);
239  return m_map.erase(internal_id);
240  }
241 
245  locator_type erase(const locator_type &position) {
246  if (position == end()) return end();
247  return locator_type(m_map.erase(position.m_iterator));
248  }
249 
253  std::size_t max_id_probe_distance() const { return m_max_id_probe_distance; }
254 
256  void rehash() {
257  auto old_map = map_type(get_allocator());
258  std::swap(old_map, m_map);
259  m_map.clear();
260  assert(m_map.empty());
261  for (auto &elem : old_map) {
262  insert(std::get<0>(elem.second), std::move(std::get<1>(elem.second)));
263  }
264  }
265 
268  allocator_type get_allocator() { return m_map.get_allocator(); }
269 
273  bool unique() const { return m_unique; }
274 
277  bool hash_seed() const { return m_hash_seed; }
278 
279  private:
281  internal_id_type priv_generate_internal_id(const key_type &key) {
282  auto internal_id = priv_hash_key(key, m_hash_seed);
283 
284  std::size_t distance = 0;
285  while (m_map.count(internal_id) > 0) {
286  internal_id = priv_increment_internal_id(internal_id);
287  ++distance;
288  }
289  m_max_id_probe_distance = std::max(distance, m_max_id_probe_distance);
290 
291  return internal_id;
292  }
293 
297  internal_id_type priv_find_internal_id(const key_type &key) const {
298  auto internal_id = priv_hash_key(key, m_hash_seed);
299 
300  for (std::size_t d = 0; d <= m_max_id_probe_distance; ++d) {
301  const auto itr = m_map.find(internal_id);
302  if (itr == m_map.end()) {
303  break;
304  }
305 
306  if (std::get<0>(itr->second) == key) {
307  return internal_id;
308  }
309  internal_id = priv_increment_internal_id(internal_id);
310  }
311 
312  return k_max_internal_id; // Couldn't find
313  }
314 
318  internal_id_type priv_find_or_generate_internal_id(const key_type &key) {
319  auto internal_id = priv_find_internal_id(key);
320  if (internal_id == k_max_internal_id) { // Couldn't find
321  // Generate a new one.
322  internal_id = priv_generate_internal_id(key);
323  }
324  return internal_id;
325  }
326 
327  static internal_id_type priv_hash_key(const key_type &key,
328  [[maybe_unused]] const uint64_t seed) {
329 #ifdef METALL_CONTAINER_STRING_KEY_STORE_USE_SIMPLE_HASH
330  internal_id_type hash = key.empty() ? 0 : (uint8_t)key[0] % 2;
331 #else
332  auto hash = (internal_id_type)metall::mtlldetail::murmur_hash_64a(
333  key.data(), (int)key.length(), seed);
334 #endif
335  if (hash == k_max_internal_id) {
336  hash = priv_increment_internal_id(hash);
337  }
338  assert(hash != k_max_internal_id);
339  return hash;
340  }
341 
342  static internal_id_type priv_increment_internal_id(
343  const internal_id_type id) {
344  const auto new_id = (id + 1) % k_max_internal_id;
345  assert(new_id != k_max_internal_id);
346  return new_id;
347  }
348 
349  bool m_unique{false};
350  uint64_t m_hash_seed{123};
351  map_type m_map{allocator_type{}};
352  std::size_t m_max_id_probe_distance{0};
353 };
354 
355 } // namespace metall::container
356 
357 #endif // METALL_CONTAINER_CONCURRENT_STRING_KEY_STORE_HPP
Definition: string_key_store_locator.hpp:12
A ke-value store that uses string for its key.
Definition: string_key_store.hpp:32
const value_type & value(const locator_type &position) const
Returns the value of the element at 'position'.
Definition: string_key_store.hpp:202
void rehash()
Rehash elements.
Definition: string_key_store.hpp:256
std::size_t erase(const key_type &key)
Removes all elements with the key equivalent to key.
Definition: string_key_store.hpp:237
string_key_store(const string_key_store &other, const allocator_type &alloc)
Allocator-extended copy constructor.
Definition: string_key_store.hpp:80
locator_type find(const key_type &key) const
Finds an element with key equivalent to 'key'.
Definition: string_key_store.hpp:209
locator_type erase(const locator_type &position)
Removes the element at 'position'.
Definition: string_key_store.hpp:245
locator_type end() const
Returns an iterator to the element following the last element.
Definition: string_key_store.hpp:232
bool hash_seed() const
Returns the hash seed.
Definition: string_key_store.hpp:277
void clear()
Clear all contents. This call does not reduce the memory usage.
Definition: string_key_store.hpp:164
string_key_store(const allocator_type &allocator=allocator_type())
Constructor.
Definition: string_key_store.hpp:65
std::size_t max_id_probe_distance() const
Returns the maximum ID probe distance. In other words, the maximum number of key pairs that have the ...
Definition: string_key_store.hpp:253
std::pair< locator_type, locator_type > equal_range(const key_type &key) const
Returns a range containing all elements with key key in the container.
Definition: string_key_store.hpp:219
string_key_store & operator=(string_key_store &&) noexcept=default
Move assignment operator.
string_key_store & operator=(const string_key_store &)=default
Copy assignment operator.
const key_type key(const locator_type &position) const
Returns the key of the element at 'position'.
Definition: string_key_store.hpp:184
_value_type value_type
Definition: string_key_store.hpp:59
bool insert(const key_type &key)
Inserts a key with the default value. If the unique parameter in the constructor was set to true and ...
Definition: string_key_store.hpp:106
std::size_t size() const
Returns the number of elements in this container.
Definition: string_key_store.hpp:179
bool insert(const key_type &key, value_type &&value)
Insert() with move operator version.
Definition: string_key_store.hpp:149
bool unique() const
Returns if this container inserts keys uniquely.
Definition: string_key_store.hpp:273
string_key_store(string_key_store &&) noexcept=default
Move constructor.
string_key_store(const string_key_store &)=default
Copy constructor.
locator_type begin() const
Return an iterator that points the first element in the container.
Definition: string_key_store.hpp:228
value_type & value(const locator_type &position)
Returns the value of the element at 'position'.
Definition: string_key_store.hpp:192
allocator_type get_allocator()
Returns an instance of the internal allocator.
Definition: string_key_store.hpp:268
std::size_t count(const key_type &key) const
Counts the number of items associated with the key.
Definition: string_key_store.hpp:172
string_key_store_locator< typename map_type::const_iterator > locator_type
Definition: string_key_store.hpp:61
bool insert(const key_type &key, const value_type &value)
Inserts an item. Suppose the unique parameter was set to true in the constructor and a duplicate key ...
Definition: string_key_store.hpp:130
std::string_view key_type
Definition: string_key_store.hpp:58
string_key_store(const bool unique, const uint64_t hash_seed, const allocator_type &allocator=allocator_type())
Constructor.
Definition: string_key_store.hpp:72
A STL compatible allocator.
Definition: stl_allocator.hpp:34
Namespace for Metall container.
boost::container::basic_string< CharT, Traits, Allocator > basic_string
A string container that uses Metall as its default allocator.
Definition: string.hpp:19
boost::unordered_multimap< Key, T, Hash, KeyEqual, Allocator > unordered_multimap
An unordered_multimap container that uses Metall as its default allocator.
Definition: unordered_map.hpp:29
boost::container::scoped_allocator_adaptor< OuterAlloc, InnerAlloc... > scoped_allocator_adaptor
An allocator which can be used with multilevel containers.
Definition: scoped_allocator.hpp:16
void swap(array< allocator_type > &lhd, array< allocator_type > &rhd) noexcept
Swap value instances.
Definition: array.hpp:193
metall::mtlldetail::hash< seed > hash
Hash a value of type T.
Definition: hash.hpp:18