HavoqGT
termination_detection.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013, Lawrence Livermore National Security, LLC.
3  * Produced at the Lawrence Livermore National Laboratory.
4  * Written by Roger Pearce <rpearce@llnl.gov>.
5  * LLNL-CODE-644630.
6  * All rights reserved.
7  *
8  * This file is part of HavoqGT, Version 0.1.
9  * For details, see https://computation.llnl.gov/casc/dcca-pub/dcca/Downloads.html
10  *
11  * Please also read this link – Our Notice and GNU Lesser General Public License.
12  * http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html
13  *
14  * This program is free software; you can redistribute it and/or modify it under
15  * the terms of the GNU Lesser General Public License (as published by the Free
16  * Software Foundation) version 2.1 dated February 1999.
17  *
18  * This program is distributed in the hope that it will be useful, but WITHOUT ANY
19  * WARRANTY; without even the IMPLIED WARRANTY OF MERCHANTABILITY or FITNESS FOR A
20  * PARTICULAR PURPOSE. See the terms and conditions of the GNU General Public
21  * License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public License along
24  * with this program; if not, write to the Free Software Foundation, Inc.,
25  * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26  *
27  * OUR NOTICE AND TERMS AND CONDITIONS OF THE GNU GENERAL PUBLIC LICENSE
28  *
29  * Our Preamble Notice
30  *
31  * A. This notice is required to be provided under our contract with the
32  * U.S. Department of Energy (DOE). This work was produced at the Lawrence
33  * Livermore National Laboratory under Contract No. DE-AC52-07NA27344 with the DOE.
34  *
35  * B. Neither the United States Government nor Lawrence Livermore National
36  * Security, LLC nor any of their employees, makes any warranty, express or
37  * implied, or assumes any liability or responsibility for the accuracy,
38  * completeness, or usefulness of any information, apparatus, product, or process
39  * disclosed, or represents that its use would not infringe privately-owned rights.
40  *
41  * C. Also, reference herein to any specific commercial products, process, or
42  * services by trade name, trademark, manufacturer or otherwise does not
43  * necessarily constitute or imply its endorsement, recommendation, or favoring by
44  * the United States Government or Lawrence Livermore National Security, LLC. The
45  * views and opinions of authors expressed herein do not necessarily state or
46  * reflect those of the United States Government or Lawrence Livermore National
47  * Security, LLC, and shall not be used for advertising or product endorsement
48  * purposes.
49  *
50  */
51 
52 #ifndef HAVOQGT_MPI_TERMINATION_DETECTION_HPP_INCLUDED
53 #define HAVOQGT_MPI_TERMINATION_DETECTION_HPP_INCLUDED
54 
55 #include <havoqgt/mpi.hpp>
56 #include <utility>
57 #include <limits>
58 
59 namespace havoqgt { namespace mpi {
60 
61 /*
62 Info about the states of termination detection -- this
63 is almost unreadable!
64 
65 WAITING_FOR_PARENT_IRECV { query_request & terminate_signal
66 if( local_received == local_retired) {
67  if root, send msg to children
68  else, MPI_Testany (query_request, terminate_signal)
69 }
70 -- locally al msgs have to be recived and sent
71 -- if root, ready
72 -- else, wait for parent signal
73 }
74 
75 sent msg to children
76 WAITING_FOR_CHILDREN_ISEND -- query_request & terminate_signal
77 
78 WAITING_FOR_CHILDREN_IRECV{ -- query_response
79  sum, (with local) and send to parent
80  if root, eval and compare w/ 'previous' global reduce
81 }
82 
83 WAIT_FOR_TERMINATE_CHILDREN_ISEND -- terminate_singal
84 
85 
86 irecv for parent (quiery and terminate)
87 irecv for childen (query response)
88 isend for children (query and terminate)
89 isend for parent (query response)
90 
91 WAITING_FOR_INIT, WAITING_FOR_ISEND_CHILDREN, WAITING_FOR_RECV_CHILDREN,
92 WAITING_FOR_ISEND_PARENT,
93 
94 */
95 
96 template<typename SizeType>
98  private:
101  public:
102  typedef SizeType size_type;
103  typedef std::pair<size_type, size_type> status_response_type;
104 
105  termination_detection(MPI_Comm in_mpi_comm,
106  int in_num_tree_children = 2,
107  int in_query_status_tag = 2,
108  int in_query_response_tag = 3,
109  int in_terminate_tag = 4) {
110  m_mpi_comm = in_mpi_comm;
111  m_num_tree_children = in_num_tree_children;
112  m_query_status_tag = in_query_status_tag;
113  m_query_response_tag = in_query_response_tag;
114  m_terminate_tag = in_terminate_tag;
115  CHK_MPI( MPI_Comm_rank( m_mpi_comm, &m_mpi_rank) );
116  CHK_MPI( MPI_Comm_size( m_mpi_comm, &m_mpi_size) );
121  status_response_type(std::numeric_limits<size_type>::max(),
122  std::numeric_limits<size_type>::max());
123 
124  m_count_queued = 0;
125  m_count_completed = 0;
126  /*for(int i=0; i<m_mpi_size; ++i) {
127  if(m_mpi_rank == i) {
128  std::cout << m_mpi_rank << " parent_rank() = " << parent_rank() << std::endl;
129  std::cout << m_mpi_rank << " begin_child_rank() = " << begin_child_rank() << std::endl;
130  std::cout << m_mpi_rank << " is_leaf_rank() = " << is_leaf_rank() << std::endl;
131  std::cout << m_mpi_rank << " num_children() = " << num_children() << std::endl;
132  std::cout << std::endl;
133  }
134  MPI_Barrier(in_mpi_comm);
135 
136  }
137  exit(-1);*/
138  }
139 
140  void inc_queued(size_t _i=1) { m_count_queued += _i; }
141  void inc_completed(size_t _i=1) { m_count_completed += _i; }
142 
145  }
146 
147  private:
148  bool test_for_termination_internal(const size_type& in_queued,
149  const size_type& in_completed) {
150  switch(m_current_state) {
151  case WAITING_INIT:
152  return handle_waiting_init(in_queued, in_completed);
154  return handle_waiting_isend_children(in_queued, in_completed);
156  return handle_waiting_recv_children(in_queued, in_completed);
158  return handle_waiting_isend_parent(in_queued, in_completed);
159  };
160  return false;
161  }
162 
163  bool handle_waiting_init(const size_type& in_queued,
164  const size_type& in_completed) {
165  //std::cout << m_mpi_rank << " " << __FUNCTION__ << std::endl;
166  m_subtree_status_response.first = 0;
167  m_subtree_status_response.second = 0;
168  if(m_mpi_rank == 0) {
171  } else {
173  CHK_MPI( MPI_Recv(NULL, 0, MPI_BYTE, parent_rank(), m_terminate_tag, m_mpi_comm, MPI_STATUS_IGNORE) );
176  return true;
177  }
178  else if(mpi_iprobe(/*parent_rank()*/MPI_ANY_SOURCE, m_query_status_tag)) {
179  //std::cout << m_mpi_rank << "ReceivedInit" << std::endl;
180  CHK_MPI( MPI_Recv(NULL, 0, MPI_BYTE, parent_rank(), m_query_status_tag,
181  m_mpi_comm, MPI_STATUS_IGNORE) );
184  }
185  }
186  return false;
187  }
188 
189  bool handle_waiting_isend_children (const size_type& in_queued,
190  const size_type& in_completed) {
191  //std::cout << m_mpi_rank << " " << __FUNCTION__ << std::endl;
192  while(!m_vec_req_isend_children.empty()) {
194  m_vec_req_isend_children.pop_back();
195  else
196  break;
197  }
198  if(m_vec_req_isend_children.empty())
200  return false;
201  }
202 
203  bool handle_waiting_recv_children (const size_type& in_queued,
204  const size_type& in_completed) {
205  //std::cout << m_mpi_rank << " " << __FUNCTION__ << std::endl;
206  if(is_leaf_rank()) {
207  m_subtree_status_response.first += in_queued;
208  m_subtree_status_response.second += in_completed;
211  } else {
212  status_response_type recv_buf;
213  while(mpi_iprobe(MPI_ANY_SOURCE, m_query_response_tag)) {
215  CHK_MPI( MPI_Recv( (void*) &recv_buf, sizeof(status_response_type),
216  MPI_BYTE, MPI_ANY_SOURCE, m_query_response_tag,
217  m_mpi_comm, MPI_STATUS_IGNORE) );
218  m_subtree_status_response.first += recv_buf.first;
219  m_subtree_status_response.second += recv_buf.second;
220  }
221  if(m_num_waiting_recv_children == 0) {
223  m_subtree_status_response.first += in_queued;
224  m_subtree_status_response.second += in_completed;
225  if(m_mpi_rank == 0) {
230  return true;
231  } else {
233  }
234  } else {
235  // This is a place we can debug termination detection
236  //std::cout << "m_subtree_status_response = " << m_subtree_status_response.first
237  // << ", " << m_subtree_status_response.second << std::endl;
238  }
240  return false;
241  } else {
244  }
245  }
246  }
247  return false;
248  }
249 
250  bool handle_waiting_isend_parent (const size_type& in_queued,
251  const size_type& in_completed) {
252  //std::cout << m_mpi_rank << " " << __FUNCTION__ << std::endl;
255  }
256  return false;
257  }
258 
260  //std::cout << m_mpi_rank << " " << __FUNCTION__ << std::endl;
261  for(int i=0; i<num_children(); ++i) {
262  int child_rank = i + begin_child_rank();
263  CHK_MPI( MPI_Send(NULL, 0, MPI_BYTE, child_rank, m_terminate_tag,
264  m_mpi_comm) );
265  }
266  }
267 
268  bool mpi_test(MPI_Request& in_req) {
269  //std::cout << m_mpi_rank << " " << __FUNCTION__ << std::endl;
270  int flag(0);
271  CHK_MPI( MPI_Test( &(in_req), &flag, MPI_STATUS_IGNORE) );
272  return flag == 1;
273  }
274 
276  //std::cout << m_mpi_rank << " " << __FUNCTION__ << std::endl;
277  CHK_MPI( MPI_Isend( (void*) &m_subtree_status_response, sizeof(status_response_type),
278  MPI_BYTE, parent_rank(), m_query_response_tag,
280  }
281 
282  bool mpi_iprobe(int in_source, int in_tag) {
283  //std::cout << m_mpi_rank << " " << __FUNCTION__ << " source = " << in_source << " tag = " << in_tag << std::endl;
284  MPI_Status status;
285  int flag(0);
286  CHK_MPI( MPI_Iprobe(in_source, in_tag, m_mpi_comm, &flag, &status) );
287  return flag == 1;
288  }
289 
291  //std::cout << m_mpi_rank << " " << __FUNCTION__ << std::endl;
292  for(int i=0; i<num_children(); ++i) {
293  int child_rank = i + begin_child_rank();
294  MPI_Request isend_request;
295  //std::cout << m_mpi_rank << "MPI_Isend -- " << child_rank << " " << m_query_status_tag << std::endl;
296  CHK_MPI( MPI_Isend(NULL, 0, MPI_BYTE, child_rank, m_query_status_tag,
297  m_mpi_comm, &isend_request) );
298  m_vec_req_isend_children.push_back(isend_request);
299  }
300  }
301 
304  bool is_leaf_rank() { return begin_child_rank() >= m_mpi_size; }
305  int num_children() {
306  int to_return = std::min(m_num_tree_children, m_mpi_size - begin_child_rank());
307  if (to_return < 0) to_return = 0;
308  return to_return;
309  }
310 
311 
313  MPI_Comm m_mpi_comm;
318 
321 
323 
324  std::vector<MPI_Request> m_vec_req_isend_children;
325 
326  MPI_Request m_req_isend_parent;
327  status_response_type* m_ptr_buf_isend_parent;
329  status_response_type m_subtree_status_response;
331 
332  size_type m_count_queued;
333  size_type m_count_completed;
334 
335 
336  //MPI_Request m_
337  //std::vector<MPI_Request>
338 
339 };
340 
341 }} //namespace havoqgt { namespace mpi {
342 
343 #endif //HAVOQGT_MPI_TERMINATION_DETECTION_HPP_INCLUDED
std::pair< size_type, size_type > status_response_type
bool handle_waiting_recv_children(const size_type &in_queued, const size_type &in_completed)
status_response_type m_previous_subtree_status_response
std::vector< MPI_Request > m_vec_req_isend_children
MPI_Comm m_mpi_comm
Configuration parameters.
bool handle_waiting_isend_children(const size_type &in_queued, const size_type &in_completed)
bool test_for_termination_internal(const size_type &in_queued, const size_type &in_completed)
termination_detection_state_type m_current_state
bool handle_waiting_init(const size_type &in_queued, const size_type &in_completed)
bool handle_waiting_isend_parent(const size_type &in_queued, const size_type &in_completed)
bool mpi_iprobe(int in_source, int in_tag)
#define CHK_MPI(a)
Definition: mpi.hpp:68
termination_detection(MPI_Comm in_mpi_comm, int in_num_tree_children=2, int in_query_status_tag=2, int in_query_response_tag=3, int in_terminate_tag=4)