00001 ////////////////////////////////////////////////////////////////////////////////////////////////// 00002 // Copyright (c) 2010, Lawrence Livermore National Security, LLC. 00003 // Produced at the Lawrence Livermore National Laboratory 00004 // LLNL-CODE-433662 00005 // All rights reserved. 00006 // 00007 // This file is part of Muster. For details, see http://github.com/tgamblin/muster. 00008 // Please also read the LICENSE file for further information. 00009 // 00010 // Redistribution and use in source and binary forms, with or without modification, are 00011 // permitted provided that the following conditions are met: 00012 // 00013 // * Redistributions of source code must retain the above copyright notice, this list of 00014 // conditions and the disclaimer below. 00015 // * Redistributions in binary form must reproduce the above copyright notice, this list of 00016 // conditions and the disclaimer (as noted below) in the documentation and/or other materials 00017 // provided with the distribution. 00018 // * Neither the name of the LLNS/LLNL nor the names of its contributors may be used to endorse 00019 // or promote products derived from this software without specific prior written permission. 00020 // 00021 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS 00022 // OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 00023 // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 00024 // LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE 00025 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 00026 // (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00027 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 00028 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00029 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00030 ////////////////////////////////////////////////////////////////////////////////////////////////// 00031 00032 /// 00033 /// @file trial.h 00034 /// @author Todd Gamblin tgamblin@llnl.gov 00035 /// @brief Data structure representing a trial run of a partitioned clustering algorithm. 00036 /// 00037 #ifndef TRIAL_H 00038 #define TRIAL_H 00039 00040 #include <cstdlib> 00041 00042 namespace cluster { 00043 00044 /// 00045 /// This struct represents parameters for a single trial run of kmedoids. 00046 /// We generate a bunch of these to farm all the trials out to processes when doing 00047 /// parallel clustering. 00048 /// 00049 /// Each trial has a particular <i>k</i> (number of clusters), sample size, and 00050 /// repetition number (for successive samples with the same k). 00051 /// 00052 struct trial { 00053 size_t k; 00054 size_t rep; 00055 size_t sample_size; 00056 00057 trial() { } 00058 00059 trial(size_t _k, size_t _rep, size_t _sample_size) 00060 : k(_k), rep(_rep), sample_size(_sample_size) { } 00061 00062 trial(const trial& other) 00063 : k(other.k), rep(other.rep), sample_size(other.sample_size) { } 00064 }; 00065 00066 /// 00067 /// Class to generate a set of trials for clustering. This packages up state for the main 00068 /// trial loop and allows work to be farmed out to worker processes. 00069 /// 00070 class trial_generator { 00071 public: 00072 /// 00073 /// Constructor to generate trials from 1 to max_k. 00074 /// 00075 trial_generator(size_t _max_k, size_t _max_reps, size_t _init_size, size_t _num_objects); 00076 00077 /// 00078 /// Constructor to generate trials from min_k to max_k. 00079 /// 00080 trial_generator(size_t min_k, size_t _max_k, 00081 size_t _max_reps, size_t _init_size, size_t _num_objects); 00082 00083 size_t count() const; ///< return iterations so far. 00084 bool has_next() const; ///< whether there are trials remaining. 00085 trial next(); ///< return parameters for next trial 00086 void reset(); ///< return to initial state 00087 size_t num_trials(); ///< Return total number of trials this will generate. 00088 00089 const size_t max_k; ///< maximum k to try 00090 const size_t max_reps; ///< max number of repetitions per k 00091 const size_t init_size; ///< initial size for samples before factoring in k, as per CLARA paper. 00092 const size_t num_objects; ///< number of elements in the data set; determines maximum sample size. 00093 00094 private: 00095 trial cur_trial; ///< current state of the iterator. 00096 size_t number_of_trials; ///< memoized total number of trials in this generator 00097 size_t iterations; ///< number of iterations so far 00098 00099 /// size of the sample to cluster for particular k 00100 size_t get_sample_size(size_t k); 00101 }; 00102 00103 } // namespace cluster 00104 00105 #endif // TRIAL_H