Hierarchical.cpp
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "clustering/Hierarchical.h"
00012 #include "distance/Distance.h"
00013 #include "features/Labels.h"
00014 #include "features/Features.h"
00015 #include "lib/Mathematics.h"
00016 #include "base/Parallel.h"
00017
00018 #ifndef WIN32
00019 #include <pthread.h>
00020 #endif
00021
00022 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00023 struct pair
00024 {
00026 int32_t idx1;
00028 int32_t idx2;
00029 };
00030 #endif // DOXYGEN_SHOULD_SKIP_THIS
00031
00032 CHierarchical::CHierarchical()
00033 : CDistanceMachine(), merges(3), dimensions(0), assignment(NULL),
00034 table_size(0), pairs(NULL), merge_distance(NULL)
00035 {
00036 }
00037
00038 CHierarchical::CHierarchical(int32_t merges_, CDistance* d)
00039 : CDistanceMachine(), merges(merges_), dimensions(0), assignment(NULL),
00040 table_size(0), pairs(NULL), merge_distance(NULL)
00041 {
00042 set_distance(d);
00043 }
00044
00045 CHierarchical::~CHierarchical()
00046 {
00047 delete[] merge_distance;
00048 delete[] assignment;
00049 delete[] pairs;
00050 }
00051
00052 bool CHierarchical::train()
00053 {
00054 ASSERT(distance);
00055 CFeatures* lhs=distance->get_lhs();
00056 ASSERT(lhs);
00057
00058 int32_t num=lhs->get_num_vectors();
00059 ASSERT(num>0);
00060
00061 const int32_t num_pairs=num*(num-1)/2;
00062
00063 delete[] merge_distance;
00064 merge_distance=new float64_t[num];
00065 CMath::fill_vector(merge_distance, num, -1.0);
00066
00067 delete[] assignment;
00068 assignment=new int32_t[num];
00069 CMath::range_fill_vector(assignment, num);
00070
00071 delete[] pairs;
00072 pairs=new int32_t[2*num];
00073 CMath::fill_vector(pairs, 2*num, -1);
00074
00075 pair* index=new pair[num_pairs];
00076 float64_t* distances=new float64_t[num_pairs];
00077
00078 int32_t offs=0;
00079 for (int32_t i=0; i<num; i++)
00080 {
00081 for (int32_t j=i+1; j<num; j++)
00082 {
00083 distances[offs]=distance->distance(i,j);
00084 index[offs].idx1=i;
00085 index[offs].idx2=j;
00086 offs++;
00087 }
00088 SG_PROGRESS(i, 0, num-1);
00089 }
00090
00091 CMath::qsort_index<float64_t,pair>(distances, index, (num-1)*num/2);
00092
00093
00094 int32_t k=-1;
00095 int32_t l=0;
00096 for (; l<num && (num-l)>=merges && k<num_pairs-1; l++)
00097 {
00098 while (k<num_pairs-1)
00099 {
00100 k++;
00101
00102 int32_t i=index[k].idx1;
00103 int32_t j=index[k].idx2;
00104 int32_t c1=assignment[i];
00105 int32_t c2=assignment[j];
00106
00107 if (c1==c2)
00108 continue;
00109
00110 SG_PROGRESS(k, 0, num_pairs-1);
00111
00112 if (c1<c2)
00113 {
00114 pairs[2*l]=c1;
00115 pairs[2*l+1]=c2;
00116 }
00117 else
00118 {
00119 pairs[2*l]=c2;
00120 pairs[2*l+1]=c1;
00121 }
00122 merge_distance[l]=distances[k];
00123
00124 int32_t c=num+l;
00125 for (int32_t m=0; m<num; m++)
00126 {
00127 if (assignment[m] == c1 || assignment[m] == c2)
00128 assignment[m] = c;
00129 }
00130 #ifdef DEBUG_HIERARCHICAL
00131 SG_PRINT("l=%04i i=%04i j=%04i c1=%+04d c2=%+04d c=%+04d dist=%6.6f\n", l,i,j, c1,c2,c, merge_distance[l]);
00132 #endif
00133 break;
00134 }
00135 }
00136
00137 assignment_size=num;
00138 table_size=l-1;
00139 ASSERT(table_size>0);
00140 delete[] distances;
00141 delete[] index;
00142 SG_UNREF(lhs)
00143
00144 return true;
00145 }
00146
00147 bool CHierarchical::load(FILE* srcfile)
00148 {
00149 return false;
00150 }
00151
00152 bool CHierarchical::save(FILE* dstfile)
00153 {
00154 return false;
00155 }