00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "lib/config.h"
00013
00014 #include "lib/common.h"
00015 #include "lib/io.h"
00016 #include "lib/File.h"
00017 #include "lib/Time.h"
00018 #include "base/Parallel.h"
00019
00020 #include "distance/Distance.h"
00021 #include "features/Features.h"
00022
00023 #include <string.h>
00024 #include <unistd.h>
00025
00026 #ifndef WIN32
00027 #include <pthread.h>
00028 #endif
00029
00030 using namespace shogun;
00031
00032 CDistance::CDistance()
00033 : CSGObject(), precomputed_matrix(NULL), precompute_matrix(false),
00034 lhs(NULL), rhs(NULL)
00035 {
00036 }
00037
00038
00039 CDistance::CDistance(CFeatures* p_lhs, CFeatures* p_rhs)
00040 : CSGObject(), precomputed_matrix(NULL), precompute_matrix(false),
00041 lhs(NULL), rhs(NULL)
00042 {
00043 init(p_lhs, p_rhs);
00044 }
00045
00046 CDistance::~CDistance()
00047 {
00048 delete[] precomputed_matrix;
00049 precomputed_matrix=NULL;
00050
00051 remove_lhs_and_rhs();
00052 }
00053
00054 bool CDistance::init(CFeatures* l, CFeatures* r)
00055 {
00056
00057 ASSERT(l);
00058 ASSERT(r);
00059
00060
00061 ASSERT(l->get_feature_class()==r->get_feature_class());
00062 ASSERT(l->get_feature_type()==r->get_feature_type());
00063
00064
00065 remove_lhs_and_rhs();
00066
00067
00068 SG_REF(l);
00069 if (l!=r)
00070 SG_REF(r);
00071
00072 lhs=l;
00073 rhs=r;
00074
00075 delete[] precomputed_matrix ;
00076 precomputed_matrix=NULL ;
00077
00078 return true;
00079 }
00080
00081 void CDistance::load(CFile* loader)
00082 {
00083 }
00084
00085 void CDistance::save(CFile* writer)
00086 {
00087 }
00088
00089 void CDistance::remove_lhs_and_rhs()
00090 {
00091 if (rhs!=lhs)
00092 SG_UNREF(rhs);
00093 rhs = NULL;
00094
00095 SG_UNREF(lhs);
00096 lhs = NULL;
00097 }
00098
00099 void CDistance::remove_lhs()
00100 {
00101 SG_UNREF(lhs);
00102 lhs = NULL;
00103 }
00104
00106 void CDistance::remove_rhs()
00107 {
00108 SG_UNREF(rhs);
00109 rhs = NULL;
00110 }
00111
00112 CFeatures* CDistance::replace_rhs(CFeatures* r)
00113 {
00114
00115 ASSERT(r);
00116
00117
00118 ASSERT(lhs->get_feature_class()==r->get_feature_class());
00119 ASSERT(lhs->get_feature_type()==r->get_feature_type());
00120
00121
00122 CFeatures* tmp=rhs;
00123
00124 rhs=r;
00125
00126 delete[] precomputed_matrix ;
00127 precomputed_matrix=NULL ;
00128
00129 return tmp;
00130 }
00131
00132 void CDistance::do_precompute_matrix()
00133 {
00134 int32_t num_left=lhs->get_num_vectors();
00135 int32_t num_right=rhs->get_num_vectors();
00136 SG_INFO( "precomputing distance matrix (%ix%i)\n", num_left, num_right) ;
00137
00138 ASSERT(num_left==num_right);
00139 ASSERT(lhs==rhs);
00140 int32_t num=num_left;
00141
00142 delete[] precomputed_matrix;
00143 precomputed_matrix=new float32_t[num*(num+1)/2];
00144
00145 for (int32_t i=0; i<num; i++)
00146 {
00147 SG_PROGRESS(i*i,0,num*num);
00148 for (int32_t j=0; j<=i; j++)
00149 precomputed_matrix[i*(i+1)/2+j] = compute(i,j) ;
00150 }
00151
00152 SG_PROGRESS(num*num,0,num*num);
00153 SG_DONE();
00154 }
00155
00156 void CDistance::get_distance_matrix(float64_t** dst, int32_t* m, int32_t* n)
00157 {
00158 ASSERT(dst && m && n);
00159
00160 float64_t* result = NULL;
00161 CFeatures* f1 = lhs;
00162 CFeatures* f2 = rhs;
00163
00164 if (f1 && f2)
00165 {
00166 int32_t num_vec1=f1->get_num_vectors();
00167 int32_t num_vec2=f2->get_num_vectors();
00168 *m=num_vec1;
00169 *n=num_vec2;
00170
00171 int64_t total_num=num_vec1*num_vec2;
00172 int32_t num_done=0;
00173 SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00174
00175 result=(float64_t*) malloc(total_num*sizeof(float64_t));
00176 ASSERT(result);
00177
00178 if ( (f1 == f2) && (num_vec1 == num_vec2) )
00179 {
00180 for (int32_t i=0; i<num_vec1; i++)
00181 {
00182 for (int32_t j=i; j<num_vec1; j++)
00183 {
00184 float64_t v=distance(i,j);
00185
00186 result[i+j*num_vec1]=v;
00187 result[j+i*num_vec1]=v;
00188
00189 if (num_done%100000)
00190 SG_PROGRESS(num_done, 0, total_num-1);
00191
00192 if (i!=j)
00193 num_done+=2;
00194 else
00195 num_done+=1;
00196 }
00197 }
00198 }
00199 else
00200 {
00201 for (int32_t i=0; i<num_vec1; i++)
00202 {
00203 for (int32_t j=0; j<num_vec2; j++)
00204 {
00205 result[i+j*num_vec1]=distance(i,j) ;
00206
00207 if (num_done%100000)
00208 SG_PROGRESS(num_done, 0, total_num-1);
00209
00210 num_done++;
00211 }
00212 }
00213 }
00214
00215 SG_DONE();
00216 }
00217 else
00218 SG_ERROR( "no features assigned to distance\n");
00219
00220 *dst=result;
00221 }
00222
00223 float32_t* CDistance::get_distance_matrix_shortreal(
00224 int32_t &num_vec1, int32_t &num_vec2, float32_t* target)
00225 {
00226 float32_t* result = NULL;
00227 CFeatures* f1 = lhs;
00228 CFeatures* f2 = rhs;
00229
00230 if (f1 && f2)
00231 {
00232 if (target && (num_vec1!=f1->get_num_vectors() || num_vec2!=f2->get_num_vectors()))
00233 SG_ERROR("distance matrix does not fit into target\n");
00234
00235 num_vec1=f1->get_num_vectors();
00236 num_vec2=f2->get_num_vectors();
00237 int64_t total_num=num_vec1*num_vec2;
00238 int32_t num_done=0;
00239
00240 SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00241
00242 if (target)
00243 result=target;
00244 else
00245 result=new float32_t[total_num];
00246
00247 if ( (f1 == f2) && (num_vec1 == num_vec2) )
00248 {
00249 for (int32_t i=0; i<num_vec1; i++)
00250 {
00251 for (int32_t j=i; j<num_vec1; j++)
00252 {
00253 float64_t v=distance(i,j);
00254
00255 result[i+j*num_vec1]=v;
00256 result[j+i*num_vec1]=v;
00257
00258 if (num_done%100000)
00259 SG_PROGRESS(num_done, 0, total_num-1);
00260
00261 if (i!=j)
00262 num_done+=2;
00263 else
00264 num_done+=1;
00265 }
00266 }
00267 }
00268 else
00269 {
00270 for (int32_t i=0; i<num_vec1; i++)
00271 {
00272 for (int32_t j=0; j<num_vec2; j++)
00273 {
00274 result[i+j*num_vec1]=distance(i,j) ;
00275
00276 if (num_done%100000)
00277 SG_PROGRESS(num_done, 0, total_num-1);
00278
00279 num_done++;
00280 }
00281 }
00282 }
00283
00284 SG_DONE();
00285 }
00286 else
00287 SG_ERROR( "no features assigned to distance\n");
00288
00289 return result;
00290 }
00291
00292 float64_t* CDistance::get_distance_matrix_real(
00293 int32_t &num_vec1, int32_t &num_vec2, float64_t* target)
00294 {
00295 float64_t* result = NULL;
00296 CFeatures* f1 = lhs;
00297 CFeatures* f2 = rhs;
00298
00299 if (f1 && f2)
00300 {
00301 if (target && (num_vec1!=f1->get_num_vectors() || num_vec2!=f2->get_num_vectors()))
00302 SG_ERROR("distance matrix does not fit into target\n");
00303
00304 num_vec1=f1->get_num_vectors();
00305 num_vec2=f2->get_num_vectors();
00306 int64_t total_num=num_vec1*num_vec2;
00307 int32_t num_done=0;
00308
00309 SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00310
00311 if (target)
00312 result=target;
00313 else
00314 result=new float64_t[total_num];
00315
00316 if ( (f1 == f2) && (num_vec1 == num_vec2) )
00317 {
00318 for (int32_t i=0; i<num_vec1; i++)
00319 {
00320 for (int32_t j=i; j<num_vec1; j++)
00321 {
00322 float64_t v=distance(i,j);
00323
00324 result[i+j*num_vec1]=v;
00325 result[j+i*num_vec1]=v;
00326
00327 if (num_done%100000)
00328 SG_PROGRESS(num_done, 0, total_num-1);
00329
00330 if (i!=j)
00331 num_done+=2;
00332 else
00333 num_done+=1;
00334 }
00335 }
00336 }
00337 else
00338 {
00339 for (int32_t i=0; i<num_vec1; i++)
00340 {
00341 for (int32_t j=0; j<num_vec2; j++)
00342 {
00343 result[i+j*num_vec1]=distance(i,j) ;
00344
00345 if (num_done%100000)
00346 SG_PROGRESS(num_done, 0, total_num-1);
00347
00348 num_done++;
00349 }
00350 }
00351 }
00352
00353 SG_DONE();
00354 }
00355 else
00356 SG_ERROR( "no features assigned to distance\n");
00357
00358 return result;
00359 }