Metall v0.30
A persistent memory allocator for data-centric analytics
 
Loading...
Searching...
No Matches
mpi.hpp
Go to the documentation of this file.
1// Copyright 2020 Lawrence Livermore National Security, LLC and other Metall
2// Project Developers. See the top-level COPYRIGHT file for details.
3//
4// SPDX-License-Identifier: (Apache-2.0 OR MIT)
5
6#ifndef METALL_METALL_UTILITY_MPI_HPP
7#define METALL_METALL_UTILITY_MPI_HPP
8
9#include <sys/mman.h>
10#include <sys/stat.h>
11#include <fcntl.h>
12
13#include <mpi.h>
14
15#include <metall/logger.hpp>
16#include <metall/detail/file.hpp>
17#include <metall/detail/mmap.hpp>
18
21namespace metall::utility::mpi {
22
23inline int comm_rank(const MPI_Comm &comm) {
24 int rank;
25 if (::MPI_Comm_rank(comm, &rank) != MPI_SUCCESS) {
26 logger::out(logger::level::error, __FILE__, __LINE__,
27 "Failed MPI_Comm_rank");
28 return -1;
29 }
30 return rank;
31}
32
33inline int comm_size(const MPI_Comm &comm) {
34 int size;
35 if (::MPI_Comm_size(comm, &size) != MPI_SUCCESS) {
36 logger::out(logger::level::error, __FILE__, __LINE__,
37 "Failed MPI_Comm_size");
38 return -1;
39 }
40 return size;
41}
42
43inline bool barrier(const MPI_Comm &comm) {
44 if (::MPI_Barrier(comm) != MPI_SUCCESS) {
45 logger::out(logger::level::error, __FILE__, __LINE__, "Failed MPI_Barrier");
46 return false;
47 }
48 return true;
49}
50
56inline std::pair<bool, bool> global_logical_and(const bool local_value,
57 const MPI_Comm &comm) {
58 char global_value = 0;
59 if (::MPI_Allreduce(&local_value, &global_value, 1, MPI_CHAR, MPI_LAND,
60 comm) != MPI_SUCCESS) {
61 return std::make_pair(false, global_value);
62 }
63 return std::make_pair(true, global_value);
64}
65
71inline std::pair<bool, bool> global_logical_or(const bool local_value,
72 const MPI_Comm &comm) {
73 char global_value = 0;
74 if (::MPI_Allreduce(&local_value, &global_value, 1, MPI_CHAR, MPI_LOR,
75 comm) != MPI_SUCCESS) {
76 return std::make_pair(false, false);
77 }
78 return std::make_pair(true, global_value);
79}
80
84inline int determine_local_root(const MPI_Comm &comm) {
85 const char *shm_name = "metall_local_root";
86 ::shm_unlink(shm_name);
87 barrier(comm);
88
89 const int world_rank = comm_rank(comm);
90 const int world_size = comm_size(comm);
91
92 // Blocks except for rank 0
93 if (world_rank > 0) {
94 if (::MPI_Recv(nullptr, 0, MPI_BYTE, world_rank - 1, 1, MPI_COMM_WORLD,
95 MPI_STATUS_IGNORE) != MPI_SUCCESS) {
96 logger::out(logger::level::error, __FILE__, __LINE__, "Failed MPI_Recv");
97 return -1;
98 }
99 }
100
101 const int shm_size = 4096;
102 bool this_rank_created = false;
103
104 int shm_fd = ::shm_open(shm_name, O_RDWR, 0666);
105 if (shm_fd == -1) {
106 shm_fd = ::shm_open(shm_name, O_CREAT | O_RDWR, 0666);
107 if (shm_fd == -1) {
108 logger::out(logger::level::error, __FILE__, __LINE__,
109 "Failed to open & create a shm file");
110 return -1;
111 }
112 this_rank_created = true;
113
114 if (!metall::mtlldetail::extend_file_size(shm_fd, shm_size, false)) {
115 logger::out(logger::level::warning, __FILE__, __LINE__,
116 "Failed to extend a shm file; however, continue work");
117 }
118 }
119
120 // By now, should be ready & correct size
121 void *const ptr =
122 metall::mtlldetail::map_file_write_mode(shm_fd, nullptr, shm_size, 0, 0);
123 if (!ptr) {
124 logger::out(logger::level::error, __FILE__, __LINE__,
125 "Failed to map a shm file");
126 return -1;
127 }
128
129 auto *p_min_rank_size = static_cast<std::pair<int, int> *>(ptr);
130 if (this_rank_created) {
131 p_min_rank_size->first = world_rank;
132 p_min_rank_size->second = 1;
133 } else {
134 p_min_rank_size->first = std::min(p_min_rank_size->first, world_rank);
135 p_min_rank_size->second++;
136 }
137
138 // Notifies rank+1 of completion
139 if (world_rank < world_size - 1) {
140 if (MPI_Send(nullptr, 0, MPI_BYTE, world_rank + 1, 1, MPI_COMM_WORLD) !=
141 MPI_SUCCESS) {
142 logger::out(logger::level::error, __FILE__, __LINE__, "Failed MPI_Send");
143 return -1;
144 }
145 }
146
147 // All ranks have completed. Each pulls their node loacal's data
148 barrier(comm);
149 const int local_root_rank = p_min_rank_size->first;
150
151 // Close shared segment
152 if (!metall::mtlldetail::munmap(shm_fd, ptr, shm_size, false)) {
153 logger::out(logger::level::warning, __FILE__, __LINE__,
154 "Failed to unmap the shm file; however, continue work.");
155 }
156 barrier(comm);
157 if (this_rank_created && ::shm_unlink(shm_name) != 0) {
158 logger::perror(logger::level::warning, __FILE__, __LINE__,
159 "Failed to remove the shm file; however, continue work.");
160 }
161
162 return local_root_rank;
163}
164} // namespace metall::utility::mpi
165#endif // METALL_METALL_UTILITY_MPI_HPP
static void perror(const level lvl, const char *const file_name, const int line_no, const char *const message) noexcept
Log a message about errno.
Definition logger.hpp:41
static void out(const level lvl, const char *const file_name, const int line_no, const char *const message) noexcept
Log a message.
Definition logger.hpp:35
@ warning
Warning logger message.
@ error
Error logger message.
Namespace for MPI utilities.
bool barrier(const MPI_Comm &comm)
Definition mpi.hpp:43
std::pair< bool, bool > global_logical_and(const bool local_value, const MPI_Comm &comm)
Performs the logical 'and' operation.
Definition mpi.hpp:56
int comm_rank(const MPI_Comm &comm)
Definition mpi.hpp:23
std::pair< bool, bool > global_logical_or(const bool local_value, const MPI_Comm &comm)
Performs the logical 'or' operation.
Definition mpi.hpp:71
int comm_size(const MPI_Comm &comm)
Definition mpi.hpp:33
int determine_local_root(const MPI_Comm &comm)
Determines a local root rank.
Definition mpi.hpp:84