SVM.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include "lib/common.h"
00012 #include "lib/io.h"
00013 #include "lib/Signal.h"
00014 #include "base/Parallel.h"
00015 
00016 #include "classifier/svm/SVM.h"
00017 
00018 #include <string.h>
00019 
00020 #ifndef WIN32
00021 #include <pthread.h>
00022 #endif
00023 
00024 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00025 struct S_THREAD_PARAM
00026 {
00027     CSVM* svm;
00028     CLabels* result;
00029     int32_t start;
00030     int32_t end;
00031     bool verbose;
00032 };
00033 #endif // DOXYGEN_SHOULD_SKIP_THIS
00034 
00035 CSVM::CSVM(int32_t num_sv)
00036 : CKernelMachine()
00037 {
00038     set_defaults(num_sv);
00039 }
00040 
00041 CSVM::CSVM(float64_t C, CKernel* k, CLabels* lab)
00042 : CKernelMachine()
00043 {
00044     set_defaults();
00045     set_C(C,C);
00046     set_labels(lab);
00047     set_kernel(k);
00048 }
00049 
00050 CSVM::~CSVM()
00051 {
00052     delete[] svm_model.alpha;
00053     delete[] svm_model.svs;
00054 
00055     SG_DEBUG("SVM object destroyed\n");
00056 }
00057 
00058 void CSVM::set_defaults(int32_t num_sv)
00059 {
00060     svm_model.b=0.0;
00061     svm_model.alpha=NULL;
00062     svm_model.svs=NULL;
00063     svm_model.num_svs=0;
00064     svm_loaded=false;
00065 
00066     weight_epsilon=1e-5;
00067     epsilon=1e-5;
00068     tube_epsilon=1e-2;
00069 
00070     nu=0.5;
00071     C1=1;
00072     C2=1;
00073     C_mkl=0;
00074     mkl_norm=1;
00075 
00076     objective=0;
00077 
00078     qpsize=41;
00079     use_bias=true;
00080     use_shrinking=true;
00081     use_mkl=false;
00082     use_batch_computation=true;
00083     use_linadd=true;
00084 
00085     if (num_sv>0)
00086         create_new_model(num_sv);
00087 }
00088 
00089 bool CSVM::load(FILE* modelfl)
00090 {
00091     bool result=true;
00092     char char_buffer[1024];
00093     int32_t int_buffer;
00094     float64_t double_buffer;
00095     int32_t line_number=1;
00096 
00097     if (fscanf(modelfl,"%4s\n", char_buffer)==EOF)
00098     {
00099         result=false;
00100         SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00101     }
00102     else
00103     {
00104         char_buffer[4]='\0';
00105         if (strcmp("%SVM", char_buffer)!=0)
00106         {
00107             result=false;
00108             SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00109         }
00110         line_number++;
00111     }
00112 
00113     int_buffer=0;
00114     if (fscanf(modelfl," numsv=%d; \n", &int_buffer) != 1)
00115     {
00116         result=false;
00117         SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00118     }
00119 
00120     if (!feof(modelfl))
00121         line_number++;
00122 
00123     SG_INFO( "loading %ld support vectors\n",int_buffer);
00124     create_new_model(int_buffer);
00125 
00126     if (fscanf(modelfl," kernel='%s'; \n", char_buffer) != 1)
00127     {
00128         result=false;
00129         SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00130     }
00131 
00132     if (!feof(modelfl))
00133         line_number++;
00134 
00135     double_buffer=0;
00136     
00137     if (fscanf(modelfl," b=%lf; \n", &double_buffer) != 1)
00138     {
00139         result=false;
00140         SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00141     }
00142     
00143     if (!feof(modelfl))
00144         line_number++;
00145 
00146     set_bias(double_buffer);
00147 
00148     if (fscanf(modelfl,"%8s\n", char_buffer) == EOF)
00149     {
00150         result=false;
00151         SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00152     }
00153     else
00154     {
00155         char_buffer[9]='\0';
00156         if (strcmp("alphas=[", char_buffer)!=0)
00157         {
00158             result=false;
00159             SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00160         }
00161         line_number++;
00162     }
00163 
00164     for (int32_t i=0; i<get_num_support_vectors(); i++)
00165     {
00166         double_buffer=0;
00167         int_buffer=0;
00168 
00169         if (fscanf(modelfl," \[%lf,%d]; \n", &double_buffer, &int_buffer) != 2)
00170         {
00171             result=false;
00172             SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00173         }
00174 
00175         if (!feof(modelfl))
00176             line_number++;
00177 
00178         set_support_vector(i, int_buffer);
00179         set_alpha(i, double_buffer);
00180     }
00181 
00182     if (fscanf(modelfl,"%2s", char_buffer) == EOF)
00183     {
00184         result=false;
00185         SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00186     }
00187     else
00188     {
00189         char_buffer[3]='\0';
00190         if (strcmp("];", char_buffer)!=0)
00191         {
00192             result=false;
00193             SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00194         }
00195         line_number++;
00196     }
00197 
00198     svm_loaded=result;
00199     return result;
00200 }
00201 
00202 bool CSVM::save(FILE* modelfl)
00203 {
00204     if (!kernel)
00205         SG_ERROR("Kernel not defined!\n");
00206 
00207     SG_INFO( "Writing model file...");
00208     fprintf(modelfl,"%%SVM\n");
00209     fprintf(modelfl,"numsv=%d;\n", get_num_support_vectors());
00210     fprintf(modelfl,"kernel='%s';\n", kernel->get_name());
00211     fprintf(modelfl,"b=%+10.16e;\n",get_bias());
00212 
00213     fprintf(modelfl, "alphas=\[\n");
00214 
00215     for(int32_t i=0; i<get_num_support_vectors(); i++)
00216         fprintf(modelfl,"\t[%+10.16e,%d];\n",
00217                 CSVM::get_alpha(i), get_support_vector(i));
00218 
00219     fprintf(modelfl, "];\n");
00220 
00221     SG_DONE();
00222     return true ;
00223 } 
00224 
00225 bool CSVM::init_kernel_optimization()
00226 {
00227     int32_t num_sv=get_num_support_vectors();
00228 
00229     if (kernel && kernel->has_property(KP_LINADD) && num_sv>0)
00230     {
00231         int32_t * sv_idx    = new int32_t[num_sv] ;
00232         float64_t* sv_weight = new float64_t[num_sv] ;
00233 
00234         for(int32_t i=0; i<num_sv; i++)
00235         {
00236             sv_idx[i]    = get_support_vector(i) ;
00237             sv_weight[i] = get_alpha(i) ;
00238         }
00239 
00240         bool ret = kernel->init_optimization(num_sv, sv_idx, sv_weight) ;
00241 
00242         delete[] sv_idx ;
00243         delete[] sv_weight ;
00244 
00245         if (!ret)
00246             SG_ERROR( "initialization of kernel optimization failed\n");
00247 
00248         return ret;
00249     }
00250     else
00251         SG_ERROR( "initialization of kernel optimization failed\n");
00252 
00253     return false;
00254 }
00255 
00256 void* CSVM::classify_example_helper(void* p)
00257 {
00258     S_THREAD_PARAM* params= (S_THREAD_PARAM*) p;
00259     CLabels* result=params->result;
00260     CSVM* svm=params->svm;
00261 
00262 #ifdef WIN32
00263     for (int32_t vec=params->start; vec<params->end; vec++)
00264 #else
00265     CSignal::clear_cancel();
00266     for (int32_t vec=params->start; vec<params->end && 
00267             !CSignal::cancel_computations(); vec++)
00268 #endif
00269     {
00270         if (params->verbose)
00271         {
00272             int32_t num_vectors=params->end - params->start;
00273             int32_t v=vec-params->start;
00274             if ( (v% (num_vectors/100+1))== 0)
00275                 SG_SPROGRESS(v, 0.0, num_vectors-1);
00276         }
00277 
00278         result->set_label(vec, svm->classify_example(vec));
00279     }
00280 
00281     return NULL;
00282 }
00283 
00284 CLabels* CSVM::classify(CLabels* lab)
00285 {
00286     if (!kernel)
00287     {
00288         SG_ERROR( "SVM can not proceed without kernel!\n");
00289         return false ;
00290     }
00291 
00292     if ( kernel && kernel->get_num_vec_rhs()>0 )
00293     {
00294         int32_t num_vectors=kernel->get_num_vec_rhs();
00295 
00296         if (!lab)
00297         {
00298             lab=new CLabels(num_vectors);
00299             SG_REF(lab);
00300         }
00301 
00302         SG_DEBUG( "computing output on %d test examples\n", num_vectors);
00303 
00304         if (io->get_show_progress())
00305             io->enable_progress();
00306         else
00307             io->disable_progress();
00308 
00309         if (kernel->has_property(KP_BATCHEVALUATION) &&
00310                 get_batch_computation_enabled())
00311         {
00312             float64_t* output=new float64_t[num_vectors];
00313             memset(output, 0, sizeof(float64_t)*num_vectors);
00314 
00315             if (get_num_support_vectors()>0)
00316             {
00317                 int32_t* sv_idx=new int32_t[get_num_support_vectors()];
00318                 float64_t* sv_weight=new float64_t[get_num_support_vectors()];
00319                 int32_t* idx=new int32_t[num_vectors];
00320 
00321                 //compute output for all vectors v[0]...v[num_vectors-1]
00322                 for (int32_t i=0; i<num_vectors; i++)
00323                     idx[i]=i;
00324 
00325                 for (int32_t i=0; i<get_num_support_vectors(); i++)
00326                 {
00327                     sv_idx[i]    = get_support_vector(i) ;
00328                     sv_weight[i] = get_alpha(i) ;
00329                 }
00330 
00331                 kernel->compute_batch(num_vectors, idx,
00332                         output, get_num_support_vectors(), sv_idx, sv_weight);
00333                 delete[] sv_idx ;
00334                 delete[] sv_weight ;
00335                 delete[] idx;
00336             }
00337 
00338             for (int32_t i=0; i<num_vectors; i++)
00339                 lab->set_label(i, get_bias()+output[i]);
00340 
00341             delete[] output;
00342         }
00343         else
00344         {
00345             int32_t num_threads=parallel->get_num_threads();
00346             ASSERT(num_threads>0);
00347 
00348             if (num_threads < 2)
00349             {
00350                 S_THREAD_PARAM params;
00351                 params.svm=this;
00352                 params.result=lab;
00353                 params.start=0;
00354                 params.end=num_vectors;
00355                 params.verbose=true;
00356                 classify_example_helper((void*) &params);
00357             }
00358 #ifndef WIN32
00359             else
00360             {
00361                 pthread_t* threads = new pthread_t[num_threads-1];
00362                 S_THREAD_PARAM* params = new S_THREAD_PARAM[num_threads];
00363                 int32_t step= num_vectors/num_threads;
00364 
00365                 int32_t t;
00366 
00367                 for (t=0; t<num_threads-1; t++)
00368                 {
00369                     params[t].svm = this;
00370                     params[t].result = lab;
00371                     params[t].start = t*step;
00372                     params[t].end = (t+1)*step;
00373                     params[t].verbose = false;
00374                     pthread_create(&threads[t], NULL,
00375                             CSVM::classify_example_helper, (void*)&params[t]);
00376                 }
00377 
00378                 params[t].svm = this;
00379                 params[t].result = lab;
00380                 params[t].start = t*step;
00381                 params[t].end = num_vectors;
00382                 params[t].verbose = true;
00383                 classify_example_helper((void*) &params[t]);
00384 
00385                 for (t=0; t<num_threads-1; t++)
00386                     pthread_join(threads[t], NULL);
00387 
00388                 delete[] params;
00389                 delete[] threads;
00390             }
00391 #endif
00392         }
00393 
00394 #ifndef WIN32
00395         if ( CSignal::cancel_computations() )
00396             SG_INFO( "prematurely stopped.           \n");
00397         else
00398 #endif
00399             SG_DONE();
00400     }
00401     else 
00402         return NULL;
00403 
00404     return lab;
00405 }
00406 
00407 float64_t CSVM::classify_example(int32_t num)
00408 {
00409     ASSERT(kernel);
00410 
00411     if (kernel->has_property(KP_LINADD) && (kernel->get_is_initialized()))
00412     {
00413         float64_t dist = kernel->compute_optimized(num);
00414         return (dist+get_bias());
00415     }
00416     else
00417     {
00418         float64_t dist=0;
00419         for(int32_t i=0; i<get_num_support_vectors(); i++)
00420             dist+=kernel->kernel(get_support_vector(i), num)*get_alpha(i);
00421 
00422         return (dist+get_bias());
00423     }
00424 }
00425 
00426 
00427 float64_t CSVM::compute_objective()
00428 {
00429     int32_t n=get_num_support_vectors();
00430 
00431     if (labels && kernel)
00432     {
00433         objective=0;
00434         for (int32_t i=0; i<n; i++)
00435         {
00436             objective-=get_alpha(i)*labels->get_label(i);
00437             for (int32_t j=0; j<n; j++)
00438                 objective+=0.5*get_alpha(i)*get_alpha(j)*kernel->kernel(i,j);
00439         }
00440     }
00441     else
00442         SG_ERROR( "cannot compute objective, labels or kernel not set\n");
00443 
00444     return objective;
00445 }

SHOGUN Machine Learning Toolbox - Documentation