Distance.cpp

Go to the documentation of this file.
00001 /*
00002  * this program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2006-2009 Christian Gehl
00008  * Written (W) 2006-2009 Soeren Sonnenburg
00009  * Copyright (C) 2006-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "lib/config.h"
00013 
00014 #include "lib/common.h"
00015 #include "lib/io.h"
00016 #include "lib/File.h"
00017 #include "lib/Time.h"
00018 #include "base/Parallel.h"
00019 
00020 #include "distance/Distance.h"
00021 #include "features/Features.h"
00022 
00023 #include <string.h>
00024 #include <unistd.h>
00025 
00026 #ifndef WIN32
00027 #include <pthread.h>
00028 #endif
00029 
00030 CDistance::CDistance()
00031 : CSGObject(), precomputed_matrix(NULL), precompute_matrix(false),
00032     lhs(NULL), rhs(NULL)
00033 {
00034 }
00035 
00036         
00037 CDistance::CDistance(CFeatures* p_lhs, CFeatures* p_rhs)
00038 : CSGObject(), precomputed_matrix(NULL), precompute_matrix(false),
00039     lhs(NULL), rhs(NULL)
00040 {
00041     init(p_lhs, p_rhs);
00042 }
00043 
00044 CDistance::~CDistance()
00045 {
00046     delete[] precomputed_matrix;
00047     precomputed_matrix=NULL;
00048 
00049     remove_lhs_and_rhs();
00050 }
00051 
00052 bool CDistance::init(CFeatures* l, CFeatures* r)
00053 {
00054     //make sure features were indeed supplied
00055     ASSERT(l);
00056     ASSERT(r);
00057 
00058     //make sure features are compatible
00059     ASSERT(l->get_feature_class()==r->get_feature_class());
00060     ASSERT(l->get_feature_type()==r->get_feature_type());
00061 
00062     //remove references to previous features
00063     remove_lhs_and_rhs();
00064 
00065     //increase reference counts
00066     SG_REF(l);
00067     if (l!=r)
00068         SG_REF(r);
00069 
00070     lhs=l;
00071     rhs=r;
00072 
00073     delete[] precomputed_matrix ;
00074     precomputed_matrix=NULL ;
00075 
00076     return true;
00077 }
00078 
00079 bool CDistance::load(char* fname)
00080 {
00081     return false;
00082 }
00083 
00084 bool CDistance::save(char* fname)
00085 {
00086     int32_t i=0;
00087     int32_t num_left=lhs->get_num_vectors();
00088     int32_t num_right=rhs->get_num_vectors();
00089     int64_t num_total=num_left*num_right;
00090 
00091     CFile f(fname, 'w', F_DREAL);
00092 
00093     for (int32_t l=0; l< (int32_t) num_left && f.is_ok(); l++)
00094     {
00095         for (int32_t r=0; r< (int32_t) num_right && f.is_ok(); r++)
00096         {
00097             if (!(i % (num_total/10+1)))
00098                 SG_PRINT( "%02d%%.", (int) (100.0*i/num_total));
00099             else if (!(i % (num_total/200+1)))
00100                 SG_PRINT( ".");
00101 
00102             float64_t k=distance(l,r);
00103             f.save_real_data(&k, 1);
00104 
00105             i++;
00106         }
00107     }
00108 
00109     if (f.is_ok())
00110         SG_INFO( "distance matrix of size %ld x %ld written \n", num_left, num_right);
00111 
00112     return (f.is_ok());
00113 }
00114 
00115 void CDistance::remove_lhs_and_rhs()
00116 {
00117     if (rhs!=lhs)
00118         SG_UNREF(rhs);
00119     rhs = NULL;
00120 
00121     SG_UNREF(lhs);
00122     lhs = NULL;
00123 }
00124 
00125 void CDistance::remove_lhs()
00126 { 
00127     SG_UNREF(lhs);
00128     lhs = NULL;
00129 }
00130 
00132 void CDistance::remove_rhs()
00133 {
00134     SG_UNREF(rhs);
00135     rhs = NULL;
00136 }
00137 
00138 
00139 void CDistance::do_precompute_matrix()
00140 {
00141     int32_t num_left=lhs->get_num_vectors();
00142     int32_t num_right=rhs->get_num_vectors();
00143     SG_INFO( "precomputing distance matrix (%ix%i)\n", num_left, num_right) ;
00144 
00145     ASSERT(num_left==num_right);
00146     ASSERT(lhs==rhs);
00147     int32_t num=num_left;
00148     
00149     delete[] precomputed_matrix;
00150     precomputed_matrix=new float32_t[num*(num+1)/2];
00151 
00152     for (int32_t i=0; i<num; i++)
00153     {
00154         SG_PROGRESS(i*i,0,num*num);
00155         for (int32_t j=0; j<=i; j++)
00156             precomputed_matrix[i*(i+1)/2+j] = compute(i,j) ;
00157     }
00158 
00159     SG_PROGRESS(num*num,0,num*num);
00160     SG_DONE();
00161 }
00162 
00163 void CDistance::get_distance_matrix(float64_t** dst, int32_t* m, int32_t* n)
00164 {
00165     ASSERT(dst && m && n);
00166 
00167     float64_t* result = NULL;
00168     CFeatures* f1 = lhs;
00169     CFeatures* f2 = rhs;
00170 
00171     if (f1 && f2)
00172     {
00173         int32_t num_vec1=f1->get_num_vectors();
00174         int32_t num_vec2=f2->get_num_vectors();
00175         *m=num_vec1;
00176         *n=num_vec2;
00177 
00178         int64_t total_num=num_vec1*num_vec2;
00179         int32_t num_done=0;
00180         SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00181 
00182         result=(float64_t*) malloc(total_num*sizeof(float64_t));
00183         ASSERT(result);
00184 
00185         if ( (f1 == f2) && (num_vec1 == num_vec2) )
00186         {
00187             for (int32_t i=0; i<num_vec1; i++)
00188             {
00189                 for (int32_t j=i; j<num_vec1; j++)
00190                 {
00191                     float64_t v=distance(i,j);
00192 
00193                     result[i+j*num_vec1]=v;
00194                     result[j+i*num_vec1]=v;
00195 
00196                     if (num_done%100000)
00197                         SG_PROGRESS(num_done, 0, total_num-1);
00198 
00199                     if (i!=j)
00200                         num_done+=2;
00201                     else
00202                         num_done+=1;
00203                 }
00204             }
00205         }
00206         else
00207         {
00208             for (int32_t i=0; i<num_vec1; i++)
00209             {
00210                 for (int32_t j=0; j<num_vec2; j++)
00211                 {
00212                     result[i+j*num_vec1]=distance(i,j) ;
00213 
00214                     if (num_done%100000)
00215                         SG_PROGRESS(num_done, 0, total_num-1);
00216 
00217                     num_done++;
00218                 }
00219             }
00220         }
00221 
00222         SG_DONE();
00223     }
00224     else
00225       SG_ERROR( "no features assigned to distance\n");
00226 
00227     *dst=result;
00228 }
00229 
00230 float32_t* CDistance::get_distance_matrix_shortreal(
00231     int32_t &num_vec1, int32_t &num_vec2, float32_t* target)
00232 {
00233     float32_t* result = NULL;
00234     CFeatures* f1 = lhs;
00235     CFeatures* f2 = rhs;
00236 
00237     if (f1 && f2)
00238     {
00239         if (target && (num_vec1!=f1->get_num_vectors() || num_vec2!=f2->get_num_vectors()))
00240             SG_ERROR("distance matrix does not fit into target\n");
00241 
00242         num_vec1=f1->get_num_vectors();
00243         num_vec2=f2->get_num_vectors();
00244         int64_t total_num=num_vec1*num_vec2;
00245         int32_t num_done=0;
00246 
00247         SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00248 
00249         if (target)
00250             result=target;
00251         else
00252             result=new float32_t[total_num];
00253 
00254         if ( (f1 == f2) && (num_vec1 == num_vec2) )
00255         {
00256             for (int32_t i=0; i<num_vec1; i++)
00257             {
00258                 for (int32_t j=i; j<num_vec1; j++)
00259                 {
00260                     float64_t v=distance(i,j);
00261 
00262                     result[i+j*num_vec1]=v;
00263                     result[j+i*num_vec1]=v;
00264 
00265                     if (num_done%100000)
00266                         SG_PROGRESS(num_done, 0, total_num-1);
00267 
00268                     if (i!=j)
00269                         num_done+=2;
00270                     else
00271                         num_done+=1;
00272                 }
00273             }
00274         }
00275         else
00276         {
00277             for (int32_t i=0; i<num_vec1; i++)
00278             {
00279                 for (int32_t j=0; j<num_vec2; j++)
00280                 {
00281                     result[i+j*num_vec1]=distance(i,j) ;
00282 
00283                     if (num_done%100000)
00284                         SG_PROGRESS(num_done, 0, total_num-1);
00285 
00286                     num_done++;
00287                 }
00288             }
00289         }
00290 
00291         SG_DONE();
00292     }
00293     else
00294       SG_ERROR( "no features assigned to distance\n");
00295 
00296     return result;
00297 }
00298 
00299 float64_t* CDistance::get_distance_matrix_real(
00300     int32_t &num_vec1, int32_t &num_vec2, float64_t* target)
00301 {
00302     float64_t* result = NULL;
00303     CFeatures* f1 = lhs;
00304     CFeatures* f2 = rhs;
00305 
00306     if (f1 && f2)
00307     {
00308         if (target && (num_vec1!=f1->get_num_vectors() || num_vec2!=f2->get_num_vectors()))
00309             SG_ERROR("distance matrix does not fit into target\n");
00310 
00311         num_vec1=f1->get_num_vectors();
00312         num_vec2=f2->get_num_vectors();
00313         int64_t total_num=num_vec1*num_vec2;
00314         int32_t num_done=0;
00315 
00316         SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00317 
00318         if (target)
00319             result=target;
00320         else
00321             result=new float64_t[total_num];
00322 
00323         if ( (f1 == f2) && (num_vec1 == num_vec2) )
00324         {
00325             for (int32_t i=0; i<num_vec1; i++)
00326             {
00327                 for (int32_t j=i; j<num_vec1; j++)
00328                 {
00329                     float64_t v=distance(i,j);
00330 
00331                     result[i+j*num_vec1]=v;
00332                     result[j+i*num_vec1]=v;
00333 
00334                     if (num_done%100000)
00335                         SG_PROGRESS(num_done, 0, total_num-1);
00336 
00337                     if (i!=j)
00338                         num_done+=2;
00339                     else
00340                         num_done+=1;
00341                 }
00342             }
00343         }
00344         else
00345         {
00346             for (int32_t i=0; i<num_vec1; i++)
00347             {
00348                 for (int32_t j=0; j<num_vec2; j++)
00349                 {
00350                     result[i+j*num_vec1]=distance(i,j) ;
00351 
00352                     if (num_done%100000)
00353                         SG_PROGRESS(num_done, 0, total_num-1);
00354 
00355                     num_done++;
00356                 }
00357             }
00358         }
00359 
00360         SG_DONE();
00361     }
00362     else
00363       SG_ERROR( "no features assigned to distance\n");
00364 
00365     return result;
00366 }

SHOGUN Machine Learning Toolbox - Documentation