00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "lib/config.h"
00013
00014 #include "lib/common.h"
00015 #include "lib/io.h"
00016 #include "lib/File.h"
00017 #include "lib/Time.h"
00018 #include "base/Parallel.h"
00019
00020 #include "distance/Distance.h"
00021 #include "features/Features.h"
00022
00023 #include <string.h>
00024 #include <unistd.h>
00025
00026 #ifndef WIN32
00027 #include <pthread.h>
00028 #endif
00029
00030 CDistance::CDistance()
00031 : CSGObject(), precomputed_matrix(NULL), precompute_matrix(false),
00032 lhs(NULL), rhs(NULL)
00033 {
00034 }
00035
00036
00037 CDistance::CDistance(CFeatures* p_lhs, CFeatures* p_rhs)
00038 : CSGObject(), precomputed_matrix(NULL), precompute_matrix(false),
00039 lhs(NULL), rhs(NULL)
00040 {
00041 init(p_lhs, p_rhs);
00042 }
00043
00044 CDistance::~CDistance()
00045 {
00046 delete[] precomputed_matrix;
00047 precomputed_matrix=NULL;
00048
00049 remove_lhs_and_rhs();
00050 }
00051
00052 bool CDistance::init(CFeatures* l, CFeatures* r)
00053 {
00054
00055 ASSERT(l);
00056 ASSERT(r);
00057
00058
00059 ASSERT(l->get_feature_class()==r->get_feature_class());
00060 ASSERT(l->get_feature_type()==r->get_feature_type());
00061
00062
00063 remove_lhs_and_rhs();
00064
00065
00066 SG_REF(l);
00067 if (l!=r)
00068 SG_REF(r);
00069
00070 lhs=l;
00071 rhs=r;
00072
00073 delete[] precomputed_matrix ;
00074 precomputed_matrix=NULL ;
00075
00076 return true;
00077 }
00078
00079 bool CDistance::load(char* fname)
00080 {
00081 return false;
00082 }
00083
00084 bool CDistance::save(char* fname)
00085 {
00086 int32_t i=0;
00087 int32_t num_left=lhs->get_num_vectors();
00088 int32_t num_right=rhs->get_num_vectors();
00089 int64_t num_total=num_left*num_right;
00090
00091 CFile f(fname, 'w', F_DREAL);
00092
00093 for (int32_t l=0; l< (int32_t) num_left && f.is_ok(); l++)
00094 {
00095 for (int32_t r=0; r< (int32_t) num_right && f.is_ok(); r++)
00096 {
00097 if (!(i % (num_total/10+1)))
00098 SG_PRINT( "%02d%%.", (int) (100.0*i/num_total));
00099 else if (!(i % (num_total/200+1)))
00100 SG_PRINT( ".");
00101
00102 float64_t k=distance(l,r);
00103 f.save_real_data(&k, 1);
00104
00105 i++;
00106 }
00107 }
00108
00109 if (f.is_ok())
00110 SG_INFO( "distance matrix of size %ld x %ld written \n", num_left, num_right);
00111
00112 return (f.is_ok());
00113 }
00114
00115 void CDistance::remove_lhs_and_rhs()
00116 {
00117 if (rhs!=lhs)
00118 SG_UNREF(rhs);
00119 rhs = NULL;
00120
00121 SG_UNREF(lhs);
00122 lhs = NULL;
00123 }
00124
00125 void CDistance::remove_lhs()
00126 {
00127 SG_UNREF(lhs);
00128 lhs = NULL;
00129 }
00130
00132 void CDistance::remove_rhs()
00133 {
00134 SG_UNREF(rhs);
00135 rhs = NULL;
00136 }
00137
00138
00139 void CDistance::do_precompute_matrix()
00140 {
00141 int32_t num_left=lhs->get_num_vectors();
00142 int32_t num_right=rhs->get_num_vectors();
00143 SG_INFO( "precomputing distance matrix (%ix%i)\n", num_left, num_right) ;
00144
00145 ASSERT(num_left==num_right);
00146 ASSERT(lhs==rhs);
00147 int32_t num=num_left;
00148
00149 delete[] precomputed_matrix;
00150 precomputed_matrix=new float32_t[num*(num+1)/2];
00151
00152 for (int32_t i=0; i<num; i++)
00153 {
00154 SG_PROGRESS(i*i,0,num*num);
00155 for (int32_t j=0; j<=i; j++)
00156 precomputed_matrix[i*(i+1)/2+j] = compute(i,j) ;
00157 }
00158
00159 SG_PROGRESS(num*num,0,num*num);
00160 SG_DONE();
00161 }
00162
00163 void CDistance::get_distance_matrix(float64_t** dst, int32_t* m, int32_t* n)
00164 {
00165 ASSERT(dst && m && n);
00166
00167 float64_t* result = NULL;
00168 CFeatures* f1 = lhs;
00169 CFeatures* f2 = rhs;
00170
00171 if (f1 && f2)
00172 {
00173 int32_t num_vec1=f1->get_num_vectors();
00174 int32_t num_vec2=f2->get_num_vectors();
00175 *m=num_vec1;
00176 *n=num_vec2;
00177
00178 int64_t total_num=num_vec1*num_vec2;
00179 int32_t num_done=0;
00180 SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00181
00182 result=(float64_t*) malloc(total_num*sizeof(float64_t));
00183 ASSERT(result);
00184
00185 if ( (f1 == f2) && (num_vec1 == num_vec2) )
00186 {
00187 for (int32_t i=0; i<num_vec1; i++)
00188 {
00189 for (int32_t j=i; j<num_vec1; j++)
00190 {
00191 float64_t v=distance(i,j);
00192
00193 result[i+j*num_vec1]=v;
00194 result[j+i*num_vec1]=v;
00195
00196 if (num_done%100000)
00197 SG_PROGRESS(num_done, 0, total_num-1);
00198
00199 if (i!=j)
00200 num_done+=2;
00201 else
00202 num_done+=1;
00203 }
00204 }
00205 }
00206 else
00207 {
00208 for (int32_t i=0; i<num_vec1; i++)
00209 {
00210 for (int32_t j=0; j<num_vec2; j++)
00211 {
00212 result[i+j*num_vec1]=distance(i,j) ;
00213
00214 if (num_done%100000)
00215 SG_PROGRESS(num_done, 0, total_num-1);
00216
00217 num_done++;
00218 }
00219 }
00220 }
00221
00222 SG_DONE();
00223 }
00224 else
00225 SG_ERROR( "no features assigned to distance\n");
00226
00227 *dst=result;
00228 }
00229
00230 float32_t* CDistance::get_distance_matrix_shortreal(
00231 int32_t &num_vec1, int32_t &num_vec2, float32_t* target)
00232 {
00233 float32_t* result = NULL;
00234 CFeatures* f1 = lhs;
00235 CFeatures* f2 = rhs;
00236
00237 if (f1 && f2)
00238 {
00239 if (target && (num_vec1!=f1->get_num_vectors() || num_vec2!=f2->get_num_vectors()))
00240 SG_ERROR("distance matrix does not fit into target\n");
00241
00242 num_vec1=f1->get_num_vectors();
00243 num_vec2=f2->get_num_vectors();
00244 int64_t total_num=num_vec1*num_vec2;
00245 int32_t num_done=0;
00246
00247 SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00248
00249 if (target)
00250 result=target;
00251 else
00252 result=new float32_t[total_num];
00253
00254 if ( (f1 == f2) && (num_vec1 == num_vec2) )
00255 {
00256 for (int32_t i=0; i<num_vec1; i++)
00257 {
00258 for (int32_t j=i; j<num_vec1; j++)
00259 {
00260 float64_t v=distance(i,j);
00261
00262 result[i+j*num_vec1]=v;
00263 result[j+i*num_vec1]=v;
00264
00265 if (num_done%100000)
00266 SG_PROGRESS(num_done, 0, total_num-1);
00267
00268 if (i!=j)
00269 num_done+=2;
00270 else
00271 num_done+=1;
00272 }
00273 }
00274 }
00275 else
00276 {
00277 for (int32_t i=0; i<num_vec1; i++)
00278 {
00279 for (int32_t j=0; j<num_vec2; j++)
00280 {
00281 result[i+j*num_vec1]=distance(i,j) ;
00282
00283 if (num_done%100000)
00284 SG_PROGRESS(num_done, 0, total_num-1);
00285
00286 num_done++;
00287 }
00288 }
00289 }
00290
00291 SG_DONE();
00292 }
00293 else
00294 SG_ERROR( "no features assigned to distance\n");
00295
00296 return result;
00297 }
00298
00299 float64_t* CDistance::get_distance_matrix_real(
00300 int32_t &num_vec1, int32_t &num_vec2, float64_t* target)
00301 {
00302 float64_t* result = NULL;
00303 CFeatures* f1 = lhs;
00304 CFeatures* f2 = rhs;
00305
00306 if (f1 && f2)
00307 {
00308 if (target && (num_vec1!=f1->get_num_vectors() || num_vec2!=f2->get_num_vectors()))
00309 SG_ERROR("distance matrix does not fit into target\n");
00310
00311 num_vec1=f1->get_num_vectors();
00312 num_vec2=f2->get_num_vectors();
00313 int64_t total_num=num_vec1*num_vec2;
00314 int32_t num_done=0;
00315
00316 SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00317
00318 if (target)
00319 result=target;
00320 else
00321 result=new float64_t[total_num];
00322
00323 if ( (f1 == f2) && (num_vec1 == num_vec2) )
00324 {
00325 for (int32_t i=0; i<num_vec1; i++)
00326 {
00327 for (int32_t j=i; j<num_vec1; j++)
00328 {
00329 float64_t v=distance(i,j);
00330
00331 result[i+j*num_vec1]=v;
00332 result[j+i*num_vec1]=v;
00333
00334 if (num_done%100000)
00335 SG_PROGRESS(num_done, 0, total_num-1);
00336
00337 if (i!=j)
00338 num_done+=2;
00339 else
00340 num_done+=1;
00341 }
00342 }
00343 }
00344 else
00345 {
00346 for (int32_t i=0; i<num_vec1; i++)
00347 {
00348 for (int32_t j=0; j<num_vec2; j++)
00349 {
00350 result[i+j*num_vec1]=distance(i,j) ;
00351
00352 if (num_done%100000)
00353 SG_PROGRESS(num_done, 0, total_num-1);
00354
00355 num_done++;
00356 }
00357 }
00358 }
00359
00360 SG_DONE();
00361 }
00362 else
00363 SG_ERROR( "no features assigned to distance\n");
00364
00365 return result;
00366 }