DotFeatures.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2009 Soeren Sonnenburg
00008  * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include "features/DotFeatures.h"
00012 #include "lib/io.h"
00013 #include "lib/Signal.h"
00014 #include "base/Parallel.h"
00015 
00016 #ifndef WIN32
00017 #include <pthread.h>
00018 #endif
00019 
00020 using namespace shogun;
00021 
00022 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00023 struct DF_THREAD_PARAM
00024 {
00025     CDotFeatures* df;
00026     int32_t* sub_index;
00027     float64_t* output;
00028     int32_t start;
00029     int32_t stop;
00030     float64_t* alphas;
00031     float64_t* vec;
00032     int32_t dim;
00033     float64_t bias;
00034     bool progress;
00035 };
00036 #endif // DOXYGEN_SHOULD_SKIP_THIS
00037 
00038 void CDotFeatures::dense_dot_range(float64_t* output, int32_t start, int32_t stop, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b)
00039 {
00040     ASSERT(output);
00041     // write access is internally between output[start..stop] so the following
00042     // line is necessary to write to output[0...(stop-start-1)]
00043     output-=start; 
00044     ASSERT(start>=0);
00045     ASSERT(start<stop);
00046     ASSERT(stop<=get_num_vectors());
00047 
00048     int32_t num_vectors=stop-start;
00049     ASSERT(num_vectors>0);
00050 
00051     int32_t num_threads=parallel->get_num_threads();
00052     ASSERT(num_threads>0);
00053 
00054     CSignal::clear_cancel();
00055 
00056 #ifndef WIN32
00057     if (num_threads < 2)
00058     {
00059 #endif
00060         DF_THREAD_PARAM params;
00061         params.df=this;
00062         params.sub_index=NULL;
00063         params.output=output;
00064         params.start=start;
00065         params.stop=stop;
00066         params.alphas=alphas;
00067         params.vec=vec;
00068         params.dim=dim;
00069         params.bias=b;
00070         params.progress=false; //true;
00071         dense_dot_range_helper((void*) &params);
00072 #ifndef WIN32
00073     }
00074     else
00075     {
00076         pthread_t* threads = new pthread_t[num_threads-1];
00077         DF_THREAD_PARAM* params = new DF_THREAD_PARAM[num_threads];
00078         int32_t step= num_vectors/num_threads;
00079 
00080         int32_t t;
00081 
00082         for (t=0; t<num_threads-1; t++)
00083         {
00084             params[t].df = this;
00085             params[t].sub_index=NULL;
00086             params[t].output = output;
00087             params[t].start = start+t*step;
00088             params[t].stop = start+(t+1)*step;
00089             params[t].alphas=alphas;
00090             params[t].vec=vec;
00091             params[t].dim=dim;
00092             params[t].bias=b;
00093             params[t].progress = false;
00094             pthread_create(&threads[t], NULL,
00095                     CDotFeatures::dense_dot_range_helper, (void*)&params[t]);
00096         }
00097 
00098         params[t].df = this;
00099         params[t].output = output;
00100         params[t].sub_index=NULL;
00101         params[t].start = start+t*step;
00102         params[t].stop = stop;
00103         params[t].alphas=alphas;
00104         params[t].vec=vec;
00105         params[t].dim=dim;
00106         params[t].bias=b;
00107         params[t].progress = false; //true;
00108         dense_dot_range_helper((void*) &params[t]);
00109 
00110         for (t=0; t<num_threads-1; t++)
00111             pthread_join(threads[t], NULL);
00112 
00113         delete[] params;
00114         delete[] threads;
00115     }
00116 #endif
00117 
00118 #ifndef WIN32
00119         if ( CSignal::cancel_computations() )
00120             SG_INFO( "prematurely stopped.           \n");
00121 #endif
00122 }
00123 
00124 void CDotFeatures::dense_dot_range_subset(int32_t* sub_index, int32_t num, float64_t* output, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b)
00125 {
00126     ASSERT(sub_index);
00127     ASSERT(output);
00128 
00129     int32_t num_threads=parallel->get_num_threads();
00130     ASSERT(num_threads>0);
00131 
00132     CSignal::clear_cancel();
00133 
00134 #ifndef WIN32
00135     if (num_threads < 2)
00136     {
00137 #endif
00138         DF_THREAD_PARAM params;
00139         params.df=this;
00140         params.sub_index=sub_index;
00141         params.output=output;
00142         params.start=0;
00143         params.stop=num;
00144         params.alphas=alphas;
00145         params.vec=vec;
00146         params.dim=dim;
00147         params.bias=b;
00148         params.progress=false; //true;
00149         dense_dot_range_helper((void*) &params);
00150 #ifndef WIN32
00151     }
00152     else
00153     {
00154         pthread_t* threads = new pthread_t[num_threads-1];
00155         DF_THREAD_PARAM* params = new DF_THREAD_PARAM[num_threads];
00156         int32_t step= num/num_threads;
00157 
00158         int32_t t;
00159 
00160         for (t=0; t<num_threads-1; t++)
00161         {
00162             params[t].df = this;
00163             params[t].sub_index=sub_index;
00164             params[t].output = output;
00165             params[t].start = t*step;
00166             params[t].stop = (t+1)*step;
00167             params[t].alphas=alphas;
00168             params[t].vec=vec;
00169             params[t].dim=dim;
00170             params[t].bias=b;
00171             params[t].progress = false;
00172             pthread_create(&threads[t], NULL,
00173                     CDotFeatures::dense_dot_range_helper, (void*)&params[t]);
00174         }
00175 
00176         params[t].df = this;
00177         params[t].sub_index=sub_index;
00178         params[t].output = output;
00179         params[t].start = t*step;
00180         params[t].stop = num;
00181         params[t].alphas=alphas;
00182         params[t].vec=vec;
00183         params[t].dim=dim;
00184         params[t].bias=b;
00185         params[t].progress = false; //true;
00186         dense_dot_range_helper((void*) &params[t]);
00187 
00188         for (t=0; t<num_threads-1; t++)
00189             pthread_join(threads[t], NULL);
00190 
00191         delete[] params;
00192         delete[] threads;
00193     }
00194 #endif
00195 
00196 #ifndef WIN32
00197         if ( CSignal::cancel_computations() )
00198             SG_INFO( "prematurely stopped.           \n");
00199 #endif
00200 }
00201 
00202 void* CDotFeatures::dense_dot_range_helper(void* p)
00203 {
00204     DF_THREAD_PARAM* par=(DF_THREAD_PARAM*) p;
00205     CDotFeatures* df=par->df;
00206     int32_t* sub_index=par->sub_index;
00207     float64_t* output=par->output;
00208     int32_t start=par->start;
00209     int32_t stop=par->stop;
00210     float64_t* alphas=par->alphas;
00211     float64_t* vec=par->vec;
00212     int32_t dim=par->dim;
00213     float64_t bias=par->bias;
00214     bool progress=par->progress;
00215 
00216     if (sub_index)
00217     {
00218 #ifdef WIN32
00219         for (int32_t i=start; i<stop i++)
00220 #else
00221         for (int32_t i=start; i<stop &&
00222                 !CSignal::cancel_computations(); i++)
00223 #endif
00224         {
00225             if (alphas)
00226                 output[i]=alphas[sub_index[i]]*df->dense_dot(sub_index[i], vec, dim)+bias;
00227             else
00228                 output[i]=df->dense_dot(sub_index[i], vec, dim)+bias;
00229             if (progress)
00230                 df->display_progress(start, stop, i);
00231         }
00232 
00233     }
00234     else
00235     {
00236 #ifdef WIN32
00237         for (int32_t i=start; i<stop i++)
00238 #else
00239         for (int32_t i=start; i<stop &&
00240                 !CSignal::cancel_computations(); i++)
00241 #endif
00242         {
00243             if (alphas)
00244                 output[i]=alphas[i]*df->dense_dot(i, vec, dim)+bias;
00245             else
00246                 output[i]=df->dense_dot(i, vec, dim)+bias;
00247             if (progress)
00248                 df->display_progress(start, stop, i);
00249         }
00250     }
00251 
00252     return NULL;
00253 }
00254 
00255 void CDotFeatures::get_feature_matrix(float64_t** dst, int32_t* num_feat, int32_t* num_vec)
00256 {
00257     int64_t offs=0;
00258     int32_t num=get_num_vectors();
00259     int32_t dim=get_dim_feature_space();
00260     ASSERT(num>0);
00261     ASSERT(dim>0);
00262 
00263     int64_t sz=((uint64_t) num)* dim;
00264 
00265     *num_feat=dim;
00266     *num_vec=num;
00267     *dst=new float64_t[sz];
00268     memset(*dst, 0, sz*sizeof(float64_t));
00269 
00270     for (int32_t i=0; i<num; i++)
00271     {
00272         add_to_dense_vec(1.0, i, &((*dst)[offs]), dim);
00273         offs+=dim;
00274     }
00275 }
00276 
00277 void CDotFeatures::get_feature_vector(float64_t** dst, int32_t* len, int32_t num)
00278 {
00279     int32_t dim=get_dim_feature_space();
00280     ASSERT(num>=0 && num<=num);
00281     ASSERT(dim>0);
00282 
00283     *len=dim;
00284     *dst=new float64_t[dim];
00285     memset(*dst, 0, dim*sizeof(float64_t));
00286 
00287     add_to_dense_vec(1.0, num, *dst, dim);
00288 }
00289 
00290 void CDotFeatures::benchmark_add_to_dense_vector(int32_t repeats)
00291 {
00292     int32_t num=get_num_vectors();
00293     int32_t d=get_dim_feature_space();
00294     float64_t* w= new float64_t[d];
00295     CMath::fill_vector(w, d, 0.0);
00296 
00297     CTime t;
00298     float64_t start_cpu=t.get_runtime();
00299     float64_t start_wall=t.get_curtime();
00300     for (int32_t r=0; r<repeats; r++)
00301     {
00302         for (int32_t i=0; i<num; i++)
00303             add_to_dense_vec(1.172343*(r+1), i, w, d);
00304     }
00305 
00306     SG_PRINT("Time to process %d x num=%d add_to_dense_vector ops: cputime %fs walltime %fs\n",
00307             repeats, num, (t.get_runtime()-start_cpu)/repeats,
00308             (t.get_curtime()-start_wall)/repeats);
00309 
00310     delete[] w;
00311 }
00312 
00313 void CDotFeatures::benchmark_dense_dot_range(int32_t repeats)
00314 {
00315     int32_t num=get_num_vectors();
00316     int32_t d=get_dim_feature_space();
00317     float64_t* w= new float64_t[d];
00318     float64_t* out= new float64_t[num];
00319     float64_t* alphas= new float64_t[num];
00320     CMath::range_fill_vector(w, d, 17.0);
00321     CMath::range_fill_vector(alphas, num, 1.2345);
00322     //CMath::fill_vector(w, d, 17.0);
00323     //CMath::fill_vector(alphas, num, 1.2345);
00324 
00325     CTime t;
00326     float64_t start_cpu=t.get_runtime();
00327     float64_t start_wall=t.get_curtime();
00328 
00329     for (int32_t r=0; r<repeats; r++)
00330             dense_dot_range(out, 0, num, alphas, w, d, 23);
00331 
00332 #ifdef DEBUG_DOTFEATURES
00333     CMath::display_vector(out, 40, "dense_dot_range");
00334     float64_t* out2= new float64_t[num];
00335 
00336     for (int32_t r=0; r<repeats; r++)
00337     {
00338         CMath::fill_vector(out2, num, 0.0);
00339         for (int32_t i=0; i<num; i++)
00340             out2[i]+=dense_dot(i, w, d)*alphas[i]+23;
00341     }
00342     CMath::display_vector(out2, 40, "dense_dot");
00343     for (int32_t i=0; i<num; i++)
00344         out2[i]-=out[i];
00345     CMath::display_vector(out2, 40, "diff");
00346 #endif
00347     SG_PRINT("Time to process %d x num=%d dense_dot_range ops: cputime %fs walltime %fs\n",
00348             repeats, num, (t.get_runtime()-start_cpu)/repeats,
00349             (t.get_curtime()-start_wall)/repeats);
00350 
00351     delete[] alphas;
00352     delete[] out;
00353     delete[] w;
00354 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation