Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036 #include "partition.h"
00037
00038 #include <iostream>
00039 #include <iterator>
00040 #include <map>
00041 #include <cassert>
00042 #include <algorithm>
00043
00044 #include "counter.h"
00045 #include "stl_utils.h"
00046
00047 using namespace std;
00048
00049 namespace cluster {
00050
00051 partition::partition(size_t num_objects) : cluster_ids(num_objects, 0) {
00052 if (num_objects) medoid_ids.resize(1);
00053 }
00054
00055
00056 partition::~partition() { }
00057
00058
00059 void partition::to_cluster_list(cluster_list& clusters) const {
00060 clusters.clear();
00061 clusters.resize(medoid_ids.size());
00062 for (size_t object=0; object < cluster_ids.size(); object++) {
00063 clusters[cluster_ids[object]].insert(object);
00064 }
00065 }
00066
00067
00068 void partition::swap(partition& other) {
00069 medoid_ids.swap(other.medoid_ids);
00070 cluster_ids.swap(other.cluster_ids);
00071 }
00072
00073
00074 void partition::sort() {
00075
00076 vector<size_t> mapping(medoid_ids.size());
00077 generate(mapping.begin(), mapping.end(), sequence());
00078
00079 std::sort(mapping.begin(), mapping.end(), indexed_lt(medoid_ids));
00080 invert(mapping);
00081 std::sort(medoid_ids.begin(), medoid_ids.end());
00082
00083
00084 for (size_t i=0; i < cluster_ids.size(); i++) {
00085 cluster_ids[i] = mapping[cluster_ids[i]];
00086 }
00087 }
00088
00089
00090 static void write(ostream& out, const cluster_list& clusters, const vector<object_id> *medoid_ids = NULL) {
00091 if (medoid_ids) {
00092 out << "Medoids: ";
00093 copy(medoid_ids->begin(), medoid_ids->end(), ostream_iterator<object_id>(out, " "));
00094 out << endl;
00095 }
00096
00097 for (unsigned i=0; i < clusters.size(); i++) {
00098 out << i << "\t";
00099 const set<object_id>& c = clusters[i];
00100
00101 for (set<object_id>::const_iterator obj=c.begin(); obj != c.end(); obj++) {
00102 if (medoid_ids && (*medoid_ids)[i] == *obj) {
00103 out << "[" << *obj << "] ";
00104 } else {
00105 out << *obj << " ";
00106 }
00107 }
00108 out << endl;
00109 }
00110 }
00111
00112
00113 ostream& operator<<(ostream& out, const cluster_list& clusters) {
00114 write(out, clusters);
00115 return out;
00116 }
00117
00118
00119 size_t partition::size(size_t i) const {
00120 return count(cluster_ids.begin(), cluster_ids.end(), i);
00121 }
00122
00123
00124
00125 double mirkin_distance(const cluster_list& c1, const cluster_list& c2) {
00126 size_t c1_sum2 = 0;
00127 size_t n = 0;
00128 for (size_t i=0; i < c1.size(); i++) {
00129 c1_sum2 += c1[i].size() * c1[i].size();
00130 n += c1[i].size();
00131 }
00132
00133 size_t c2_sum2 = 0;
00134 for (size_t i=0; i < c2.size(); i++) {
00135 c2_sum2 += c2[i].size() * c2[i].size();
00136 }
00137
00138
00139 size_t c1c2_sum2 = 0;
00140 for (size_t i=0; i < c1.size(); i++) {
00141 for (size_t j=0; j < c2.size(); j++) {
00142 size_t size;
00143 set_intersection(c1[i].begin(), c1[i].end(),
00144 c2[j].begin(), c2[j].end(),
00145 counter(size));
00146 c1c2_sum2 += size * size;
00147 }
00148 }
00149
00150 return (c1_sum2 + c2_sum2 - (2 * c1c2_sum2)) / (double)(n*n);
00151 }
00152
00153
00154 std::ostream& operator<<(std::ostream& out, const partition& p) {
00155 cluster_list list;
00156 p.to_cluster_list(list);
00157 write(out, list, &p.medoid_ids);
00158 return out;
00159 }
00160
00161
00162 double mirkin_distance(const partition& c1, const partition& c2) {
00163 cluster_list l1, l2;
00164 c1.to_cluster_list(l1);
00165 c2.to_cluster_list(l2);
00166 return mirkin_distance(l1, l2);
00167 }
00168
00169
00170 void expand(cluster_list& list, size_t level) {
00171 if (!level) return;
00172
00173 cluster_list expanded(list.size());
00174 for (size_t i=0; i < list.size(); i++) {
00175 for (set<object_id>::iterator o=list[i].begin(); o != list[i].end(); o++) {
00176 size_t start = (1 << level) * (*o);
00177 size_t end = (1 << level) * (*o + 1);
00178 for (size_t ex=start; ex < end; ex++) {
00179 expanded[i].insert(ex);
00180 }
00181 }
00182 }
00183 expanded.swap(list);
00184 }
00185
00186
00187 void partition::write_members_with_runs(medoid_id m, ostream& out) {
00188 object_id o=0;
00189 bool first = true;
00190 while (o < cluster_ids.size()) {
00191 if (cluster_ids[o] == m) {
00192 object_id start = o++;
00193 while (o < cluster_ids.size() && cluster_ids[o] == m) o++;
00194 if (!first) out << " ";
00195 if (o == start+1) {
00196 out << start;
00197 } else {
00198 out << start << "-" << (o-1);
00199 }
00200 first = false;
00201 }
00202 o++;
00203 }
00204 }
00205
00206 }