Distance.cpp

Go to the documentation of this file.
00001 /*
00002  * this program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2006-2009 Christian Gehl
00008  * Written (W) 2006-2009 Soeren Sonnenburg
00009  * Copyright (C) 2006-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "lib/config.h"
00013 
00014 #include "lib/common.h"
00015 #include "lib/io.h"
00016 #include "lib/File.h"
00017 #include "lib/Time.h"
00018 #include "base/Parallel.h"
00019 
00020 #include "distance/Distance.h"
00021 #include "features/Features.h"
00022 
00023 #include <string.h>
00024 #include <unistd.h>
00025 
00026 #ifndef WIN32
00027 #include <pthread.h>
00028 #endif
00029 
00030 using namespace shogun;
00031 
00032 CDistance::CDistance()
00033 : CSGObject(), precomputed_matrix(NULL), precompute_matrix(false),
00034     lhs(NULL), rhs(NULL)
00035 {
00036 }
00037 
00038         
00039 CDistance::CDistance(CFeatures* p_lhs, CFeatures* p_rhs)
00040 : CSGObject(), precomputed_matrix(NULL), precompute_matrix(false),
00041     lhs(NULL), rhs(NULL)
00042 {
00043     init(p_lhs, p_rhs);
00044 }
00045 
00046 CDistance::~CDistance()
00047 {
00048     delete[] precomputed_matrix;
00049     precomputed_matrix=NULL;
00050 
00051     remove_lhs_and_rhs();
00052 }
00053 
00054 bool CDistance::init(CFeatures* l, CFeatures* r)
00055 {
00056     //make sure features were indeed supplied
00057     ASSERT(l);
00058     ASSERT(r);
00059 
00060     //make sure features are compatible
00061     ASSERT(l->get_feature_class()==r->get_feature_class());
00062     ASSERT(l->get_feature_type()==r->get_feature_type());
00063 
00064     //remove references to previous features
00065     remove_lhs_and_rhs();
00066 
00067     //increase reference counts
00068     SG_REF(l);
00069     if (l!=r)
00070         SG_REF(r);
00071 
00072     lhs=l;
00073     rhs=r;
00074 
00075     delete[] precomputed_matrix ;
00076     precomputed_matrix=NULL ;
00077 
00078     return true;
00079 }
00080 
00081 void CDistance::load(CFile* loader)
00082 {
00083 }
00084 
00085 void CDistance::save(CFile* writer)
00086 {
00087 }
00088 
00089 void CDistance::remove_lhs_and_rhs()
00090 {
00091     if (rhs!=lhs)
00092         SG_UNREF(rhs);
00093     rhs = NULL;
00094 
00095     SG_UNREF(lhs);
00096     lhs = NULL;
00097 }
00098 
00099 void CDistance::remove_lhs()
00100 { 
00101     SG_UNREF(lhs);
00102     lhs = NULL;
00103 }
00104 
00106 void CDistance::remove_rhs()
00107 {
00108     SG_UNREF(rhs);
00109     rhs = NULL;
00110 }
00111 
00112 CFeatures* CDistance::replace_rhs(CFeatures* r)
00113 {
00114      //make sure features were indeed supplied
00115      ASSERT(r);
00116 
00117      //make sure features are compatible
00118      ASSERT(lhs->get_feature_class()==r->get_feature_class());
00119      ASSERT(lhs->get_feature_type()==r->get_feature_type());
00120 
00121      //remove references to previous rhs features
00122      CFeatures* tmp=rhs;
00123      
00124      rhs=r;
00125 
00126      delete[] precomputed_matrix ;
00127      precomputed_matrix=NULL ;
00128 
00129      return tmp;
00130 }
00131 
00132 void CDistance::do_precompute_matrix()
00133 {
00134     int32_t num_left=lhs->get_num_vectors();
00135     int32_t num_right=rhs->get_num_vectors();
00136     SG_INFO( "precomputing distance matrix (%ix%i)\n", num_left, num_right) ;
00137 
00138     ASSERT(num_left==num_right);
00139     ASSERT(lhs==rhs);
00140     int32_t num=num_left;
00141     
00142     delete[] precomputed_matrix;
00143     precomputed_matrix=new float32_t[num*(num+1)/2];
00144 
00145     for (int32_t i=0; i<num; i++)
00146     {
00147         SG_PROGRESS(i*i,0,num*num);
00148         for (int32_t j=0; j<=i; j++)
00149             precomputed_matrix[i*(i+1)/2+j] = compute(i,j) ;
00150     }
00151 
00152     SG_PROGRESS(num*num,0,num*num);
00153     SG_DONE();
00154 }
00155 
00156 void CDistance::get_distance_matrix(float64_t** dst, int32_t* m, int32_t* n)
00157 {
00158     ASSERT(dst && m && n);
00159 
00160     float64_t* result = NULL;
00161     CFeatures* f1 = lhs;
00162     CFeatures* f2 = rhs;
00163 
00164     if (f1 && f2)
00165     {
00166         int32_t num_vec1=f1->get_num_vectors();
00167         int32_t num_vec2=f2->get_num_vectors();
00168         *m=num_vec1;
00169         *n=num_vec2;
00170 
00171         int64_t total_num=num_vec1*num_vec2;
00172         int32_t num_done=0;
00173         SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00174 
00175         result=(float64_t*) malloc(total_num*sizeof(float64_t));
00176         ASSERT(result);
00177 
00178         if ( (f1 == f2) && (num_vec1 == num_vec2) )
00179         {
00180             for (int32_t i=0; i<num_vec1; i++)
00181             {
00182                 for (int32_t j=i; j<num_vec1; j++)
00183                 {
00184                     float64_t v=distance(i,j);
00185 
00186                     result[i+j*num_vec1]=v;
00187                     result[j+i*num_vec1]=v;
00188 
00189                     if (num_done%100000)
00190                         SG_PROGRESS(num_done, 0, total_num-1);
00191 
00192                     if (i!=j)
00193                         num_done+=2;
00194                     else
00195                         num_done+=1;
00196                 }
00197             }
00198         }
00199         else
00200         {
00201             for (int32_t i=0; i<num_vec1; i++)
00202             {
00203                 for (int32_t j=0; j<num_vec2; j++)
00204                 {
00205                     result[i+j*num_vec1]=distance(i,j) ;
00206 
00207                     if (num_done%100000)
00208                         SG_PROGRESS(num_done, 0, total_num-1);
00209 
00210                     num_done++;
00211                 }
00212             }
00213         }
00214 
00215         SG_DONE();
00216     }
00217     else
00218       SG_ERROR( "no features assigned to distance\n");
00219 
00220     *dst=result;
00221 }
00222 
00223 float32_t* CDistance::get_distance_matrix_shortreal(
00224     int32_t &num_vec1, int32_t &num_vec2, float32_t* target)
00225 {
00226     float32_t* result = NULL;
00227     CFeatures* f1 = lhs;
00228     CFeatures* f2 = rhs;
00229 
00230     if (f1 && f2)
00231     {
00232         if (target && (num_vec1!=f1->get_num_vectors() || num_vec2!=f2->get_num_vectors()))
00233             SG_ERROR("distance matrix does not fit into target\n");
00234 
00235         num_vec1=f1->get_num_vectors();
00236         num_vec2=f2->get_num_vectors();
00237         int64_t total_num=num_vec1*num_vec2;
00238         int32_t num_done=0;
00239 
00240         SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00241 
00242         if (target)
00243             result=target;
00244         else
00245             result=new float32_t[total_num];
00246 
00247         if ( (f1 == f2) && (num_vec1 == num_vec2) )
00248         {
00249             for (int32_t i=0; i<num_vec1; i++)
00250             {
00251                 for (int32_t j=i; j<num_vec1; j++)
00252                 {
00253                     float64_t v=distance(i,j);
00254 
00255                     result[i+j*num_vec1]=v;
00256                     result[j+i*num_vec1]=v;
00257 
00258                     if (num_done%100000)
00259                         SG_PROGRESS(num_done, 0, total_num-1);
00260 
00261                     if (i!=j)
00262                         num_done+=2;
00263                     else
00264                         num_done+=1;
00265                 }
00266             }
00267         }
00268         else
00269         {
00270             for (int32_t i=0; i<num_vec1; i++)
00271             {
00272                 for (int32_t j=0; j<num_vec2; j++)
00273                 {
00274                     result[i+j*num_vec1]=distance(i,j) ;
00275 
00276                     if (num_done%100000)
00277                         SG_PROGRESS(num_done, 0, total_num-1);
00278 
00279                     num_done++;
00280                 }
00281             }
00282         }
00283 
00284         SG_DONE();
00285     }
00286     else
00287       SG_ERROR( "no features assigned to distance\n");
00288 
00289     return result;
00290 }
00291 
00292 float64_t* CDistance::get_distance_matrix_real(
00293     int32_t &num_vec1, int32_t &num_vec2, float64_t* target)
00294 {
00295     float64_t* result = NULL;
00296     CFeatures* f1 = lhs;
00297     CFeatures* f2 = rhs;
00298 
00299     if (f1 && f2)
00300     {
00301         if (target && (num_vec1!=f1->get_num_vectors() || num_vec2!=f2->get_num_vectors()))
00302             SG_ERROR("distance matrix does not fit into target\n");
00303 
00304         num_vec1=f1->get_num_vectors();
00305         num_vec2=f2->get_num_vectors();
00306         int64_t total_num=num_vec1*num_vec2;
00307         int32_t num_done=0;
00308 
00309         SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00310 
00311         if (target)
00312             result=target;
00313         else
00314             result=new float64_t[total_num];
00315 
00316         if ( (f1 == f2) && (num_vec1 == num_vec2) )
00317         {
00318             for (int32_t i=0; i<num_vec1; i++)
00319             {
00320                 for (int32_t j=i; j<num_vec1; j++)
00321                 {
00322                     float64_t v=distance(i,j);
00323 
00324                     result[i+j*num_vec1]=v;
00325                     result[j+i*num_vec1]=v;
00326 
00327                     if (num_done%100000)
00328                         SG_PROGRESS(num_done, 0, total_num-1);
00329 
00330                     if (i!=j)
00331                         num_done+=2;
00332                     else
00333                         num_done+=1;
00334                 }
00335             }
00336         }
00337         else
00338         {
00339             for (int32_t i=0; i<num_vec1; i++)
00340             {
00341                 for (int32_t j=0; j<num_vec2; j++)
00342                 {
00343                     result[i+j*num_vec1]=distance(i,j) ;
00344 
00345                     if (num_done%100000)
00346                         SG_PROGRESS(num_done, 0, total_num-1);
00347 
00348                     num_done++;
00349                 }
00350             }
00351         }
00352 
00353         SG_DONE();
00354     }
00355     else
00356       SG_ERROR( "no features assigned to distance\n");
00357 
00358     return result;
00359 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation