00001 ////////////////////////////////////////////////////////////////////////////////////////////////// 00002 // Copyright (c) 2010, Lawrence Livermore National Security, LLC. 00003 // Produced at the Lawrence Livermore National Laboratory 00004 // LLNL-CODE-433662 00005 // All rights reserved. 00006 // 00007 // This file is part of Muster. For details, see http://github.com/tgamblin/muster. 00008 // Please also read the LICENSE file for further information. 00009 // 00010 // Redistribution and use in source and binary forms, with or without modification, are 00011 // permitted provided that the following conditions are met: 00012 // 00013 // * Redistributions of source code must retain the above copyright notice, this list of 00014 // conditions and the disclaimer below. 00015 // * Redistributions in binary form must reproduce the above copyright notice, this list of 00016 // conditions and the disclaimer (as noted below) in the documentation and/or other materials 00017 // provided with the distribution. 00018 // * Neither the name of the LLNS/LLNL nor the names of its contributors may be used to endorse 00019 // or promote products derived from this software without specific prior written permission. 00020 // 00021 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS 00022 // OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 00023 // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 00024 // LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE 00025 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 00026 // (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00027 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 00028 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00029 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00030 ////////////////////////////////////////////////////////////////////////////////////////////////// 00031 00032 /// 00033 /// @file dissimilarity.h 00034 /// @author Todd Gamblin tgamblin@llnl.gov 00035 /// @brief Data types and functions for dealing with dissimilarity matrices. 00036 /// 00037 #ifndef DISSIMILARITY_MATRIX_H 00038 #define DISSIMILARITY_MATRIX_H 00039 00040 #include <vector> 00041 #include <boost/numeric/ublas/symmetric.hpp> 00042 #include <iostream> 00043 00044 namespace cluster { 00045 /// 00046 /// Packed repersentation of symmetric dissimilarity matrix. 00047 /// 00048 typedef boost::numeric::ublas::symmetric_matrix<double> dissimilarity_matrix; 00049 00050 /// 00051 /// Computes a dissimilarity matrix from a vector of objects. 00052 /// 00053 /// @param[in] objects Vector of any type T. 00054 /// @param[in] dissimilarity A dissimilarity measure that gives the distance between two T's. 00055 /// Needs to be callable on (T, T). 00056 /// @param[out] mat Output parameter. Dissimiliarity matrix is stored here. 00057 /// 00058 template <class T, class D> 00059 void build_dissimilarity_matrix(const std::vector<T>& objects, D dissimilarity, 00060 dissimilarity_matrix& mat) { 00061 if (mat.size1() != objects.size() || mat.size2() != objects.size()) { 00062 mat.resize(objects.size(), objects.size()); 00063 } 00064 00065 for (size_t i=0; i < objects.size(); i++) { 00066 for (size_t j=0; j <= i; j++) { 00067 mat(i,j) = dissimilarity(objects[i], objects[j]); 00068 } 00069 } 00070 } 00071 00072 00073 /// 00074 /// Computes a dissimilarity matrix from a subset of a vector of objects. 00075 /// 00076 /// @param objects Vector of any type T. 00077 /// @param subset Indirection vector. Contains indices into objects for 00078 /// elements to be compared. 00079 /// @param dissimilarity A dissimilarity measure that gives the distance between two T's. 00080 /// Needs to be callable(T, T). 00081 /// @param mat Output parameter. Dissimiliarity matrix is stored here. 00082 template <class T, class D> 00083 void build_dissimilarity_matrix(const std::vector<T>& objects, const std::vector<size_t>& subset, 00084 D dissimilarity, dissimilarity_matrix& mat) { 00085 if (mat.size1() != subset.size() || mat.size2() != subset.size()) { 00086 mat.resize(subset.size(), subset.size()); 00087 } 00088 00089 for (size_t i=0; i < subset.size(); i++) { 00090 for (size_t j=0; j <= i; j++) { 00091 mat(i,j) = dissimilarity(objects[subset[i]], objects[subset[j]]); 00092 } 00093 } 00094 } 00095 00096 00097 /// Adaptor for passing a matrix by reference to template functions that take 00098 /// a callable distance function. Avoids copying distance matrix. 00099 struct matrix_distance { 00100 const dissimilarity_matrix& mat; 00101 matrix_distance(const dissimilarity_matrix& m) : mat(m) { } 00102 double operator()(size_t i, size_t j) { return mat(i,j); } 00103 }; 00104 00105 00106 /// Functor for computing distance lazily from an object array and 00107 /// a distance metric. Use this for CLARA, where we don't want to 00108 /// precompute the entire distance matrix. 00109 template <class T, class D> 00110 struct lazy_distance_functor { 00111 const std::vector<T>& objects; 00112 D dissimilarity; 00113 00114 lazy_distance_functor(const std::vector<T>& objs, D d) 00115 : objects(objs), dissimilarity(d) { } 00116 00117 double operator()(size_t i, size_t j) { 00118 return dissimilarity(objects[i], objects[j]); 00119 } 00120 }; 00121 00122 /// Type-inferred syntactic sugar for constructing lazy_distance_functor. 00123 template <class T, class D> 00124 lazy_distance_functor<T,D> lazy_distance(const std::vector<T>& objs, D dist) { 00125 return lazy_distance_functor<T,D>(objs, dist); 00126 } 00127 00128 }; // namespace cluster 00129 00130 #endif // DISSIMILARITY_MATRIX_H