Metall v0.30
A persistent memory allocator for data-centric analytics
 
Loading...
Searching...
No Matches
string_key_store.hpp
Go to the documentation of this file.
1// Copyright 2022 Lawrence Livermore National Security, LLC and other Metall
2// Project Developers. See the top-level COPYRIGHT file for details.
3//
4// SPDX-License-Identifier: (Apache-2.0 OR MIT)
5
6#ifndef METALL_CONTAINER_CONCURRENT_STRING_KEY_STORE_HPP
7#define METALL_CONTAINER_CONCURRENT_STRING_KEY_STORE_HPP
8
9#include <memory>
10#include <string_view>
11#include <utility>
12#include <tuple>
17#include <metall/metall.hpp>
18
19namespace metall::container {
20
21namespace {
22namespace mc = metall::container;
23}
24
30template <typename _value_type,
31 typename allocator_type = metall::manager::allocator_type<std::byte>>
33 private:
34 template <typename T>
35 using other_allocator =
36 typename std::allocator_traits<allocator_type>::template rebind_alloc<T>;
37
38 template <typename T>
39 using other_scoped_allocator =
41
42 using internal_id_type = uint64_t;
43 using internal_string_type =
45 using internal_value_type = std::tuple<internal_string_type, _value_type>;
46
47 using map_type = mc::unordered_multimap<
48 internal_id_type, internal_value_type, std::hash<internal_id_type>,
49 std::equal_to<internal_id_type>,
50 other_scoped_allocator<
51 std::pair<const internal_id_type, internal_value_type>>>;
52
54 static constexpr internal_id_type k_max_internal_id =
55 std::numeric_limits<internal_id_type>::max();
56
57 public:
58 using key_type = std::string_view;
62
65 explicit string_key_store(const allocator_type &allocator = allocator_type())
66 : m_map(allocator) {}
67
73 const allocator_type &allocator = allocator_type())
74 : m_unique(unique), m_hash_seed(hash_seed), m_map(allocator) {}
75
78
80 string_key_store(const string_key_store &other, const allocator_type &alloc)
81 : m_unique(other.m_unique),
82 m_hash_seed(other.m_hash_seed),
83 m_map(other.m_map, alloc) {}
84
87
90 const allocator_type &alloc) noexcept
91 : m_unique(other.m_unique),
92 m_hash_seed(other.m_hash_seed),
93 m_map(std::move(other.m_map), alloc) {}
94
97
100
107 const auto internal_id = priv_find_or_generate_internal_id(key);
108 if (m_unique && m_map.count(internal_id) == 1) {
109 return false;
110 }
111
112 // We delegate tuple to decide if value_type's constructor needs an
113 // allocator object. By taking this approach, we can avoid the complex logic
114 // of uses-allocator construction. This is why we use tuple for
115 // internal_value_type.
116 auto internal_value =
117 internal_value_type{std::allocator_arg, m_map.get_allocator()};
118 std::get<0>(internal_value) = key;
119
120 m_map.emplace(internal_id, std::move(internal_value));
121
122 return true;
123 }
124
130 bool insert(const key_type &key, const value_type &value) {
131 const auto internal_id = priv_find_or_generate_internal_id(key);
132
133 assert(!m_unique || m_map.count(internal_id) <= 1);
134 if (m_unique && m_map.count(internal_id) == 1) {
135 assert(m_map.count(internal_id) == 1);
136 std::get<1>(m_map.find(internal_id)->second) = value;
137 } else {
138 m_map.emplace(internal_id, internal_value_type{std::allocator_arg,
139 m_map.get_allocator(),
140 key.data(), value});
141 }
142 return true;
143 }
144
150 const auto internal_id = priv_find_or_generate_internal_id(key);
151
152 assert(!m_unique || m_map.count(internal_id) <= 1);
153 if (m_unique && m_map.count(internal_id) == 1) {
154 std::get<1>(m_map.find(internal_id)->second) = std::move(value);
155 } else {
156 m_map.emplace(internal_id, internal_value_type{
157 std::allocator_arg, m_map.get_allocator(),
158 key.data(), std::move(value)});
159 }
160 return true;
161 }
162
164 void clear() {
165 m_map.clear();
166 m_max_id_probe_distance = 0;
167 }
168
172 std::size_t count(const key_type &key) const {
173 const auto internal_id = priv_find_internal_id(key);
174 return m_map.count(internal_id);
175 }
176
179 std::size_t size() const { return m_map.size(); }
180
184 const key_type key(const locator_type &position) const {
185 const auto &key = std::get<0>(position.m_iterator->second);
186 return key_type(key.c_str());
187 }
188
193 // Convert to a non-const iterator
194 auto non_const_iterator =
195 m_map.erase(position.m_iterator, position.m_iterator);
196 return std::get<1>(non_const_iterator->second);
197 }
198
202 const value_type &value(const locator_type &position) const {
203 return std::get<1>(position.m_iterator->second);
204 }
205
210 const auto internal_id = priv_find_internal_id(key);
211 return locator_type(m_map.find(internal_id));
212 }
213
219 std::pair<locator_type, locator_type> equal_range(const key_type &key) const {
220 const auto internal_id = priv_find_internal_id(key);
221 const auto range = m_map.equal_range(internal_id);
222 return std::make_pair(locator_type(range.first),
223 locator_type(range.second));
224 }
225
228 locator_type begin() const { return locator_type(m_map.begin()); }
229
232 locator_type end() const { return locator_type(m_map.end()); }
233
237 std::size_t erase(const key_type &key) {
238 const auto internal_id = priv_find_internal_id(key);
239 return m_map.erase(internal_id);
240 }
241
246 if (position == end()) return end();
247 return locator_type(m_map.erase(position.m_iterator));
248 }
249
253 std::size_t max_id_probe_distance() const { return m_max_id_probe_distance; }
254
256 void rehash() {
257 auto old_map = map_type(get_allocator());
258 std::swap(old_map, m_map);
259 m_map.clear();
260 assert(m_map.empty());
261 for (auto &elem : old_map) {
262 insert(std::get<0>(elem.second), std::move(std::get<1>(elem.second)));
263 }
264 }
265
268 allocator_type get_allocator() { return m_map.get_allocator(); }
269
273 bool unique() const { return m_unique; }
274
277 bool hash_seed() const { return m_hash_seed; }
278
279 private:
281 internal_id_type priv_generate_internal_id(const key_type &key) {
282 auto internal_id = priv_hash_key(key, m_hash_seed);
283
284 std::size_t distance = 0;
285 while (m_map.count(internal_id) > 0) {
286 internal_id = priv_increment_internal_id(internal_id);
287 ++distance;
288 }
289 m_max_id_probe_distance = std::max(distance, m_max_id_probe_distance);
290
291 return internal_id;
292 }
293
297 internal_id_type priv_find_internal_id(const key_type &key) const {
298 auto internal_id = priv_hash_key(key, m_hash_seed);
299
300 for (std::size_t d = 0; d <= m_max_id_probe_distance; ++d) {
301 const auto itr = m_map.find(internal_id);
302 if (itr == m_map.end()) {
303 break;
304 }
305
306 if (std::get<0>(itr->second) == key) {
307 return internal_id;
308 }
309 internal_id = priv_increment_internal_id(internal_id);
310 }
311
312 return k_max_internal_id; // Couldn't find
313 }
314
318 internal_id_type priv_find_or_generate_internal_id(const key_type &key) {
319 auto internal_id = priv_find_internal_id(key);
320 if (internal_id == k_max_internal_id) { // Couldn't find
321 // Generate a new one.
322 internal_id = priv_generate_internal_id(key);
323 }
324 return internal_id;
325 }
326
327 static internal_id_type priv_hash_key(const key_type &key,
328 [[maybe_unused]] const uint64_t seed) {
329#ifdef METALL_CONTAINER_STRING_KEY_STORE_USE_SIMPLE_HASH
330 internal_id_type hash = key.empty() ? 0 : (uint8_t)key[0] % 2;
331#else
332 auto hash = (internal_id_type)metall::mtlldetail::murmur_hash_64a(
333 key.data(), (int)key.length(), seed);
334#endif
335 if (hash == k_max_internal_id) {
336 hash = priv_increment_internal_id(hash);
337 }
338 assert(hash != k_max_internal_id);
339 return hash;
340 }
341
342 static internal_id_type priv_increment_internal_id(
343 const internal_id_type id) {
344 const auto new_id = (id + 1) % k_max_internal_id;
345 assert(new_id != k_max_internal_id);
346 return new_id;
347 }
348
349 bool m_unique{false};
350 uint64_t m_hash_seed{123};
351 map_type m_map{allocator_type{}};
352 std::size_t m_max_id_probe_distance{0};
353};
354
355} // namespace metall::container
356
357#endif // METALL_CONTAINER_CONCURRENT_STRING_KEY_STORE_HPP
A ke-value store that uses string for its key.
Definition string_key_store.hpp:32
std::pair< locator_type, locator_type > equal_range(const key_type &key) const
Returns a range containing all elements with key key in the container.
Definition string_key_store.hpp:219
void rehash()
Rehash elements.
Definition string_key_store.hpp:256
std::size_t erase(const key_type &key)
Removes all elements with the key equivalent to key.
Definition string_key_store.hpp:237
string_key_store(const string_key_store &other, const allocator_type &alloc)
Allocator-extended copy constructor.
Definition string_key_store.hpp:80
locator_type find(const key_type &key) const
Finds an element with key equivalent to 'key'.
Definition string_key_store.hpp:209
locator_type erase(const locator_type &position)
Removes the element at 'position'.
Definition string_key_store.hpp:245
locator_type end() const
Returns an iterator to the element following the last element.
Definition string_key_store.hpp:232
const value_type & value(const locator_type &position) const
Returns the value of the element at 'position'.
Definition string_key_store.hpp:202
bool hash_seed() const
Returns the hash seed.
Definition string_key_store.hpp:277
void clear()
Clear all contents. This call does not reduce the memory usage.
Definition string_key_store.hpp:164
string_key_store & operator=(const string_key_store &)=default
Copy assignment operator.
string_key_store(const allocator_type &allocator=allocator_type())
Constructor.
Definition string_key_store.hpp:65
std::size_t max_id_probe_distance() const
Returns the maximum ID probe distance. In other words, the maximum number of key pairs that have the ...
Definition string_key_store.hpp:253
value_type & value(const locator_type &position)
Returns the value of the element at 'position'.
Definition string_key_store.hpp:192
const key_type key(const locator_type &position) const
Returns the key of the element at 'position'.
Definition string_key_store.hpp:184
_value_type value_type
Definition string_key_store.hpp:59
bool insert(const key_type &key)
Inserts a key with the default value. If the unique parameter in the constructor was set to true and ...
Definition string_key_store.hpp:106
std::size_t size() const
Returns the number of elements in this container.
Definition string_key_store.hpp:179
bool insert(const key_type &key, value_type &&value)
Insert() with move operator version.
Definition string_key_store.hpp:149
bool unique() const
Returns if this container inserts keys uniquely.
Definition string_key_store.hpp:273
string_key_store(string_key_store &&) noexcept=default
Move constructor.
string_key_store(const string_key_store &)=default
Copy constructor.
string_key_store & operator=(string_key_store &&) noexcept=default
Move assignment operator.
locator_type begin() const
Return an iterator that points the first element in the container.
Definition string_key_store.hpp:228
allocator_type get_allocator()
Returns an instance of the internal allocator.
Definition string_key_store.hpp:268
std::size_t count(const key_type &key) const
Counts the number of items associated with the key.
Definition string_key_store.hpp:172
string_key_store_locator< typename map_type::const_iterator > locator_type
Definition string_key_store.hpp:61
bool insert(const key_type &key, const value_type &value)
Inserts an item. Suppose the unique parameter was set to true in the constructor and a duplicate key ...
Definition string_key_store.hpp:130
std::string_view key_type
Definition string_key_store.hpp:58
string_key_store(const bool unique, const uint64_t hash_seed, const allocator_type &allocator=allocator_type())
Constructor.
Definition string_key_store.hpp:72
Namespace for Metall container.
boost::container::vector< T, Allocator > vector
A vector container that uses Metall as its default allocator.
Definition vector.hpp:17
boost::unordered_multimap< Key, T, Hash, KeyEqual, Allocator > unordered_multimap
An unordered_multimap container that uses Metall as its default allocator.
Definition unordered_map.hpp:29
metall::mtlldetail::hash< seed > hash
Hash a value of type T.
Definition hash.hpp:18