Hierarchical.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2007-2009 Soeren Sonnenburg
00008  * Copyright (C) 2007-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include "clustering/Hierarchical.h"
00012 #include "distance/Distance.h"
00013 #include "features/Labels.h"
00014 #include "features/Features.h"
00015 #include "lib/Mathematics.h"
00016 #include "base/Parallel.h"
00017 
00018 #ifndef WIN32
00019 #include <pthread.h>
00020 #endif
00021 
00022 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00023 struct pair
00024 {
00026     int32_t idx1;
00028     int32_t idx2;
00029 };
00030 #endif // DOXYGEN_SHOULD_SKIP_THIS
00031 
00032 CHierarchical::CHierarchical()
00033 : CDistanceMachine(), merges(3), dimensions(0), assignment(NULL),
00034     table_size(0), pairs(NULL), merge_distance(NULL)
00035 {
00036 }
00037 
00038 CHierarchical::CHierarchical(int32_t merges_, CDistance* d)
00039 : CDistanceMachine(), merges(merges_), dimensions(0), assignment(NULL),
00040     table_size(0), pairs(NULL), merge_distance(NULL)
00041 {
00042     set_distance(d);
00043 }
00044 
00045 CHierarchical::~CHierarchical()
00046 {
00047     delete[] merge_distance;
00048     delete[] assignment;
00049     delete[] pairs;
00050 }
00051 
00052 bool CHierarchical::train()
00053 {
00054     ASSERT(distance);
00055     CFeatures* lhs=distance->get_lhs();
00056     ASSERT(lhs);
00057 
00058     int32_t num=lhs->get_num_vectors();
00059     ASSERT(num>0);
00060 
00061     const int32_t num_pairs=num*(num-1)/2;
00062 
00063     delete[] merge_distance;
00064     merge_distance=new float64_t[num];
00065     CMath::fill_vector(merge_distance, num, -1.0);
00066 
00067     delete[] assignment;
00068     assignment=new int32_t[num];
00069     CMath::range_fill_vector(assignment, num);
00070 
00071     delete[] pairs;
00072     pairs=new int32_t[2*num];
00073     CMath::fill_vector(pairs, 2*num, -1);
00074 
00075     pair* index=new pair[num_pairs];
00076     float64_t* distances=new float64_t[num_pairs];
00077 
00078     int32_t offs=0;
00079     for (int32_t i=0; i<num; i++)
00080     {
00081         for (int32_t j=i+1; j<num; j++)
00082         {
00083             distances[offs]=distance->distance(i,j);
00084             index[offs].idx1=i;
00085             index[offs].idx2=j;
00086             offs++;                 //offs=i*(i+1)/2+j
00087         }
00088         SG_PROGRESS(i, 0, num-1);
00089     }
00090 
00091     CMath::qsort_index<float64_t,pair>(distances, index, (num-1)*num/2);
00092     //CMath::display_vector(distances, (num-1)*num/2, "dists");
00093 
00094     int32_t k=-1;
00095     int32_t l=0;
00096     for (; l<num && (num-l)>=merges && k<num_pairs-1; l++)
00097     {
00098         while (k<num_pairs-1)
00099         {
00100             k++;
00101 
00102             int32_t i=index[k].idx1;
00103             int32_t j=index[k].idx2;
00104             int32_t c1=assignment[i];
00105             int32_t c2=assignment[j];
00106 
00107             if (c1==c2)
00108                 continue;
00109             
00110             SG_PROGRESS(k, 0, num_pairs-1);
00111 
00112             if (c1<c2)
00113             {
00114                 pairs[2*l]=c1;
00115                 pairs[2*l+1]=c2;
00116             }
00117             else
00118             {
00119                 pairs[2*l]=c2;
00120                 pairs[2*l+1]=c1;
00121             }
00122             merge_distance[l]=distances[k];
00123 
00124             int32_t c=num+l;
00125             for (int32_t m=0; m<num; m++)
00126             {
00127                 if (assignment[m] == c1 || assignment[m] == c2)
00128                     assignment[m] = c;
00129             }
00130 #ifdef DEBUG_HIERARCHICAL
00131             SG_PRINT("l=%04i i=%04i j=%04i c1=%+04d c2=%+04d c=%+04d dist=%6.6f\n", l,i,j, c1,c2,c, merge_distance[l]);
00132 #endif
00133             break;
00134         }
00135     }
00136 
00137     assignment_size=num;
00138     table_size=l-1;
00139     ASSERT(table_size>0);
00140     delete[] distances;
00141     delete[] index;
00142     SG_UNREF(lhs)
00143 
00144     return true;
00145 }
00146 
00147 bool CHierarchical::load(FILE* srcfile)
00148 {
00149     return false;
00150 }
00151 
00152 bool CHierarchical::save(FILE* dstfile)
00153 {
00154     return false;
00155 }

SHOGUN Machine Learning Toolbox - Documentation