00001 ////////////////////////////////////////////////////////////////////////////////////////////////// 00002 // Copyright (c) 2010, Lawrence Livermore National Security, LLC. 00003 // Produced at the Lawrence Livermore National Laboratory 00004 // LLNL-CODE-433662 00005 // All rights reserved. 00006 // 00007 // This file is part of Muster. For details, see http://github.com/tgamblin/muster. 00008 // Please also read the LICENSE file for further information. 00009 // 00010 // Redistribution and use in source and binary forms, with or without modification, are 00011 // permitted provided that the following conditions are met: 00012 // 00013 // * Redistributions of source code must retain the above copyright notice, this list of 00014 // conditions and the disclaimer below. 00015 // * Redistributions in binary form must reproduce the above copyright notice, this list of 00016 // conditions and the disclaimer (as noted below) in the documentation and/or other materials 00017 // provided with the distribution. 00018 // * Neither the name of the LLNS/LLNL nor the names of its contributors may be used to endorse 00019 // or promote products derived from this software without specific prior written permission. 00020 // 00021 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS 00022 // OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 00023 // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 00024 // LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE 00025 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 00026 // (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00027 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 00028 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00029 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00030 ////////////////////////////////////////////////////////////////////////////////////////////////// 00031 00032 /// 00033 /// @file par_partition.h 00034 /// @author Todd Gamblin tgamblin@llnl.gov 00035 /// @brief Distributed representation of a partitioning of a data set. 00036 /// 00037 #ifndef PAR_PARTITION_H 00038 #define PAR_PARTITION_H 00039 00040 #include <mpi.h> 00041 #include <vector> 00042 #include <ostream> 00043 00044 #include "partition.h" 00045 00046 00047 namespace cluster { 00048 00049 /// 00050 /// par_partition represents a partitioning of a distributed data set. 00051 /// It is analogous to partition, but its object_ids are distributed across 00052 /// the ranks of the communicator it is instantiated with. Each process is assumed 00053 /// to "own" some set of objects in the data set this describes, and each process's 00054 /// par_partition object contains medoid_ids only for its own objects. Thus, the cluster_ids 00055 /// array will contain different object_ids on different processes within the "same" 00056 /// par_partition object. 00057 /// 00058 /// While cluster_ids will vary, the "same" par_partition object on the same 00059 /// communicator will have the same medoid_ids. If this is not true, then the medoid_ids 00060 /// won't make any sense between processes. Partitioning algorithms that use a 00061 /// par_partition for output should preserve this property. 00062 /// 00063 /// You can convert a par_partition to a partition on a single process using 00064 /// the gather() method. This is a collective operation. It is not scalable, in that 00065 /// it will aggregate ids from <i>every</i> process in the communicator to <i>one</i> process. 00066 /// However, it's useful for small systems and debugging. 00067 /// 00068 /// @see partition, the non-distributed equivalent of this class. 00069 /// 00070 struct par_partition { 00071 /// Gives the object id for the ith medoid. This object may not be local. 00072 std::vector<object_id> medoid_ids; 00073 00074 /// Global cluster ids for local objects. These are indices into medoid_ids. 00075 /// The object id of the medoid of the ith local object is medoid_ids[cluster_ids[i]]. 00076 std::vector<object_id> cluster_ids; 00077 00078 /// Communicator, the processes of which this partition divides 00079 MPI_Comm comm; 00080 00081 /// Construct a parallel partition for the communicator supplied 00082 /// Partition starts off with everyone in one cluster with medoid 0. 00083 par_partition(MPI_Comm comm = MPI_COMM_WORLD); 00084 00085 /// Virtual destructor for inheritance. 00086 virtual ~par_partition(); 00087 00088 /// Scalably get the sizes of all the clusters in this partition. 00089 /// POST: sizes is valid on all processes 00090 void get_sizes(std::vector<size_t>& sizes); 00091 00092 /// Collective operation. Gathers my_id from all processes into a 00093 /// local partition object. If size of system is large, then this method 00094 /// will not scale. 00095 void gather(partition& local, int root=0); 00096 }; 00097 00098 /// 00099 /// Right now this just uses parition::operator<<() by making a 00100 /// partition with this par_partition's cluster_ids and medoid_ids vectors 00101 /// and outputting it. 00102 /// 00103 /// @todo fix the implementation; it's a little hacky. 00104 /// 00105 std::ostream& operator<<(std::ostream& out, const par_partition& par); 00106 00107 } // namespace cluster 00108 00109 #endif // PAR_PARTITION_H 00110 00111