52 #ifndef _HAVOQGT_MPI_HPP_
53 #define _HAVOQGT_MPI_HPP_
68 #define CHK_MPI(a) { if (a != MPI_SUCCESS) {\
69 char* error_string = NULL; \
71 MPI_Error_string(a, error_string, &len); \
72 std::cerr << __FILE__ << ", line " << __LINE__ \
73 <<" MPI ERROR = " << error_string << std::endl; \
78 namespace havoqgt {
namespace mpi {
81 inline MPI_Datatype
mpi_typeof(
signed short) {
return MPI_SHORT;}
82 inline MPI_Datatype
mpi_typeof(
signed int) {
return MPI_INT;}
83 inline MPI_Datatype
mpi_typeof(
signed long) {
return MPI_LONG;}
84 inline MPI_Datatype
mpi_typeof(
unsigned char) {
return MPI_UNSIGNED_CHAR;}
85 inline MPI_Datatype
mpi_typeof(
unsigned short) {
return MPI_UNSIGNED_SHORT;}
86 inline MPI_Datatype
mpi_typeof(
unsigned) {
return MPI_UNSIGNED;}
87 inline MPI_Datatype
mpi_typeof(
unsigned long) {
return MPI_UNSIGNED_LONG;}
88 inline MPI_Datatype
mpi_typeof(
unsigned long long) {
return MPI_UNSIGNED_LONG_LONG; }
89 inline MPI_Datatype
mpi_typeof(
signed long long) {
return MPI_LONG_LONG_INT;}
90 inline MPI_Datatype
mpi_typeof(
double) {
return MPI_DOUBLE;}
91 inline MPI_Datatype
mpi_typeof(
long double) {
return MPI_LONG_DOUBLE;}
92 inline MPI_Datatype
mpi_typeof(std::pair<int,int>) {
return MPI_2INT;}
93 inline MPI_Datatype
mpi_typeof(std::pair<float,int>) {
return MPI_FLOAT_INT;}
94 inline MPI_Datatype
mpi_typeof(std::pair<double,int>) {
return MPI_DOUBLE_INT;}
95 inline MPI_Datatype
mpi_typeof(std::pair<long double,int>) {
return MPI_LONG_DOUBLE_INT;}
96 inline MPI_Datatype
mpi_typeof(std::pair<short,int>) {
return MPI_SHORT_INT;}
101 template <
typename T>
104 template <
typename T>
107 template <
typename T>
110 template <
typename T>
113 template <
typename T>
139 if(ret != MPI_SUCCESS) {
140 char estring[MPI_MAX_ERROR_STRING];
142 MPI_Error_string(ret, estring, &len);
144 std::cerr <<
"MPI ERROR = " << estring << std::endl;
158 CHK_MPI(MPI_Ibarrier(mpi_comm, &request));
163 MPI_Test(&request, &is_done, &status);
175 template <
typename T,
typename Op>
185 template <
typename Vec,
typename Op>
188 MPI_Allreduce(MPI_IN_PLACE, &(vec[0]), vec.size(),
193 template <
typename T,
typename Op>
196 out_vec.resize(in_vec.size());
198 MPI_Allreduce( &(in_vec[0]), &(out_vec[0]), in_vec.size(),
204 template<
typename T,
typename Partitioner>
217 template <
typename T>
219 std::vector<T>& out_vec, std::vector<int>& out_recvcnts,
221 int mpi_size(0), mpi_rank(0);
222 CHK_MPI( MPI_Comm_size( mpi_comm, &mpi_size) );
223 CHK_MPI( MPI_Comm_rank( mpi_comm, &mpi_rank) );
225 void* send_vec = in_vec.size() > 0 ? (
void*) &in_vec[0] : NULL;
226 int* send_cnts = in_sendcnts.size() > 0 ? &in_sendcnts[0] : NULL;
230 out_recvcnts.resize(mpi_size);
231 CHK_MPI( MPI_Alltoall( (
void*) send_cnts,
sizeof(
int), MPI_BYTE,
232 (
void*) &(out_recvcnts[0]),
sizeof(
int), MPI_BYTE,
237 std::vector<int> sdispls(mpi_size,0), rdispls(mpi_size,0);
238 std::partial_sum(in_sendcnts.begin(), in_sendcnts.end(), sdispls.begin());
239 for(
size_t i=0; i<sdispls.size(); ++i) {
240 sdispls[i] -= in_sendcnts[i];
242 std::partial_sum(out_recvcnts.begin(), out_recvcnts.end(), rdispls.begin());
243 for(
size_t i=0; i<rdispls.size(); ++i) {
244 rdispls[i] -= out_recvcnts[i];
247 out_vec.resize(std::accumulate(out_recvcnts.begin(), out_recvcnts.end(), 0));
249 int* send_displs = sdispls.size() > 0 ? &sdispls[0] : NULL;
250 CHK_MPI( MPI_Alltoallv(send_vec, send_cnts, send_displs,
251 mpi_typeof(T()), &(out_vec[0]), &(out_recvcnts[0]),
257 template <
typename T,
typename Partitioner>
259 Partitioner &owner, MPI_Comm mpi_comm) {
260 int mpi_size(0), mpi_rank(0);
261 CHK_MPI( MPI_Comm_size( mpi_comm, &mpi_size) );
262 CHK_MPI( MPI_Comm_rank( mpi_comm, &mpi_rank) );
264 std::vector<int> send_counts(mpi_size,0);
265 std::vector<int> send_disps(mpi_size,0);
267 std::vector<int> recv_counts(mpi_size,0);
268 std::vector<int> recv_disps(mpi_size,0);
274 for(
size_t i=0; i<inout_vec.size(); ++i) {
275 send_counts[owner(inout_vec[i])] +=
sizeof(T);
282 std::partial_sum(send_counts.begin(), send_counts.end(), send_disps.begin());
283 for(
size_t i=0; i<send_disps.size(); ++i) {
284 send_disps[i] -= send_counts[i];
287 temp_vec.resize(inout_vec.size());
288 for(
size_t i=0; i<inout_vec.size(); ++i) {
289 std::vector<int> temp_arrange(mpi_size,0);
290 int dest_rank = owner(inout_vec[i]);
291 size_t dest_offset = send_disps[dest_rank] + temp_arrange[dest_rank];
292 temp_arrange[dest_rank] +=
sizeof(T);
293 dest_offset /=
sizeof(T);
294 temp_vec[dest_offset] = inout_vec[i];
299 CHK_MPI( MPI_Alltoall( (
void*) &(send_counts[0]),
sizeof(
int), MPI_BYTE,
300 (
void*) &(recv_counts[0]),
sizeof(
int), MPI_BYTE,
304 int total_recv = std::accumulate(recv_counts.begin(), recv_counts.end(), 0);
305 inout_vec.resize(total_recv/
sizeof(T));
308 std::partial_sum(recv_counts.begin(), recv_counts.end(), recv_disps.begin());
309 for(
size_t i=0; i<recv_disps.size(); ++i) {
310 recv_disps[i] -= recv_counts[i];
314 CHK_MPI( MPI_Alltoallv( (
void*) &(temp_vec[0]), &(send_counts[0]), &(send_disps[0]), MPI_BYTE,
315 (
void*) &(inout_vec[0]),&(recv_counts[0]), &(recv_disps[0]), MPI_BYTE,
319 template <
typename T,
typename Partitioner>
321 Partitioner &owner, MPI_Comm mpi_comm) {
322 int mpi_size(0), mpi_rank(0);
323 CHK_MPI( MPI_Comm_size( mpi_comm, &mpi_size) );
324 CHK_MPI( MPI_Comm_rank( mpi_comm, &mpi_rank) );
326 std::vector<int> send_counts(mpi_size,0);
327 std::vector<int> send_disps(mpi_size,0);
328 std::vector<int> recv_counts(mpi_size,0);
329 std::vector<int> recv_disps(mpi_size,0);
335 for(
size_t i=0; i<in_vec.size(); ++i) {
336 send_counts[owner(in_vec[i],
true)] +=
sizeof(T);
344 std::partial_sum(send_counts.begin(), send_counts.end(), send_disps.begin());
345 for(
size_t i=0; i<send_disps.size(); ++i) {
346 send_disps[i] -= send_counts[i];
351 std::vector<T> order_vec(in_vec.size());
352 std::vector<int> temp_arrange(mpi_size,0);
353 for(
size_t i=0; i<in_vec.size(); ++i) {
354 int dest_rank = owner(in_vec[i],
false);
355 assert (dest_rank >=0 && dest_rank < mpi_size);
357 size_t dest_offset = send_disps[dest_rank] + temp_arrange[dest_rank];
358 temp_arrange[dest_rank] +=
sizeof(T);
359 dest_offset /=
sizeof(T);
360 order_vec[dest_offset] = in_vec[i];
362 in_vec.swap(order_vec);
367 CHK_MPI( MPI_Alltoall( (
void*) &(send_counts[0]),
sizeof(
int), MPI_BYTE,
368 (
void*) &(recv_counts[0]),
sizeof(
int), MPI_BYTE,
373 int total_recv = std::accumulate(recv_counts.begin(), recv_counts.end(), 0);
374 out_vec.resize(total_recv/
sizeof(T));
377 std::partial_sum(recv_counts.begin(), recv_counts.end(), recv_disps.begin());
378 for(
size_t i=0; i<recv_disps.size(); ++i) {
379 recv_disps[i] -= recv_counts[i];
383 void* send_ptr = in_vec.empty() ? NULL :(
void*) &(in_vec[0]);
384 void* recv_ptr = out_vec.empty() ? NULL :(
void*) &(out_vec[0]);
385 CHK_MPI( MPI_Alltoallv( send_ptr, &(send_counts[0]), &(send_disps[0]), MPI_BYTE,
386 recv_ptr, &(recv_counts[0]), &(recv_disps[0]), MPI_BYTE,
396 template <
typename T>
399 const int HAVOQGT_TAG=42;
400 int mpi_size(0), mpi_rank(0);
401 CHK_MPI( MPI_Comm_size( mpi_comm, &mpi_size) );
402 CHK_MPI( MPI_Comm_rank( mpi_comm, &mpi_rank) );
403 assert( in_out_vec.size() == count * mpi_size );
405 for(
int i=0; i<mpi_size; ++i) {
407 for(
int j=i; j<mpi_size; ++j) {
410 CHK_MPI( MPI_Sendrecv_replace(&(in_out_vec[j*count]), count,
414 mpi_comm, &status) );
416 }
else if(mpi_rank == j) {
418 CHK_MPI( MPI_Sendrecv_replace(&(in_out_vec[i*count]), count,
422 mpi_comm, &status) );
431 template <
typename T>
434 CHK_MPI( MPI_Comm_size( mpi_comm, &mpi_size) );
436 out_p_vec.resize(mpi_size);
437 CHK_MPI( MPI_Allgather(&_t,
sizeof(_t), MPI_BYTE,
438 &(out_p_vec[0]),
sizeof(_t), MPI_BYTE, mpi_comm) );
442 template <
typename T>
444 std::vector<T>& out_recv_gather, MPI_Comm mpi_comm) {
446 int mpi_size(0), mpi_rank(0);
447 CHK_MPI( MPI_Comm_size( mpi_comm, &mpi_size) );
448 CHK_MPI( MPI_Comm_rank( mpi_comm, &mpi_rank) );
450 int my_size = in_send.size();
451 std::vector<int> recv_counts(mpi_size,0);
452 std::vector<int> recv_disps(mpi_size,0);
458 int total_recv = std::accumulate(recv_counts.begin(), recv_counts.end(), 0);
460 out_recv_gather.resize(total_recv);
463 std::partial_sum(recv_counts.begin(), recv_counts.end(), recv_disps.begin());
464 for(
size_t i=0; i<recv_disps.size(); ++i) {
465 recv_disps[i] -= recv_counts[i];
468 void* send_buff = in_send.size() == 0 ? NULL : &(in_send[0]);
470 &(out_recv_gather[0]), &(recv_counts[0]), &(recv_disps[0]),
486 template <
typename T>
488 std::vector< std::vector<T> >& out_p_vec,
490 int mpi_size(0), mpi_rank(0);
491 CHK_MPI( MPI_Comm_size( mpi_comm, &mpi_size) );
492 CHK_MPI( MPI_Comm_rank( mpi_comm, &mpi_rank) );
493 assert( mpi_size == (
int) in_p_vec.size() );
495 std::vector<size_t> per_rank_send_counts(mpi_size);
496 std::vector<size_t> per_rank_recv_counts(mpi_size);
498 for(
int i=0; i<mpi_size; ++i) {
499 per_rank_send_counts[i] = in_p_vec[i].size();
503 &(per_rank_recv_counts[0]), 1,
mpi_typeof(
size_t()),
506 out_p_vec.resize(mpi_size);
507 for(
int i=0; i<mpi_size; ++i) {
508 out_p_vec[i].resize(per_rank_recv_counts[i]);
542 for(
int i=0; i<mpi_size; ++i) {
543 MPI_Request request[2];
544 int send_to_rank = (mpi_rank + i) % mpi_size;
545 int recv_from_rank = (mpi_rank - i + mpi_size) % mpi_size;
546 CHK_MPI( MPI_Isend( &(in_p_vec[send_to_rank][0]),
547 (in_p_vec[send_to_rank].size() *
sizeof(T)),
549 send_to_rank, 0, mpi_comm, &(request[0]) ) );
550 CHK_MPI( MPI_Irecv( &(out_p_vec[recv_from_rank][0]),
551 (out_p_vec[recv_from_rank].size() *
sizeof(T)),
553 recv_from_rank, 0, mpi_comm, &(request[1]) ) );
554 CHK_MPI( MPI_Waitall(2, request, MPI_STATUSES_IGNORE) );
558 template <
typename T>
561 CHK_MPI( MPI_Bcast(&data,
sizeof(data), MPI_BYTE, root, comm));
567 CHK_MPI( MPI_Comm_rank( MPI_COMM_WORLD, &mpi_rank) );
576 CHK_MPI( MPI_Barrier( MPI_COMM_WORLD ));
578 CHK_MPI( MPI_Comm_rank( MPI_COMM_WORLD, &mpi_rank) );
589 CHK_MPI( MPI_Comm_rank(MPI_COMM_WORLD, &rank) );
595 CHK_MPI( MPI_Comm_size(MPI_COMM_WORLD, &size) );
634 #endif //_HAVOQGT_MPI_HPP_
communicator(MPI_Comm in_comm)
std::ostream & cout_rank0()
mpi_communicator(int argc, char **argv)
owner_sort(Partitioner _owner)
void mpi_all_to_all_better(std::vector< T > &in_vec, std::vector< T > &out_vec, Partitioner &owner, MPI_Comm mpi_comm)
void mpi_all_reduce_inplace(Vec &vec, Op in_op, MPI_Comm mpi_comm)
T mpi_all_reduce(T in_d, Op in_op, MPI_Comm mpi_comm)
std::ostream & get_null_ostream()
void mpi_all_gather(T _t, std::vector< T > &out_p_vec, MPI_Comm mpi_comm)
TODO: Add tests.
std::ostream & cout_rank0_barrier()
MPI_Datatype mpi_typeof(char)
void mpi_yield_barrier(MPI_Comm mpi_comm)
void mpi_all_to_all_in_place(std::vector< T > &in_out_vec, size_t count, MPI_Comm mpi_comm)
void check_mpi(int ret)
Checks MPI return codes.
void mpi_bcast(T &data, int root, MPI_Comm comm)
bool operator()(const T &a, const T &b) const
void mpi_all_to_all(std::vector< T > &in_vec, std::vector< int > &in_sendcnts, std::vector< T > &out_vec, std::vector< int > &out_recvcnts, MPI_Comm mpi_comm)
mpi_communicator(MPI_Comm comm)