00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "lib/common.h"
00012 #include "lib/io.h"
00013 #include "lib/Signal.h"
00014 #include "base/Parallel.h"
00015
00016 #include "classifier/svm/SVM.h"
00017
00018 #include <string.h>
00019
00020 #ifndef WIN32
00021 #include <pthread.h>
00022 #endif
00023
00024 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00025 struct S_THREAD_PARAM
00026 {
00027 CSVM* svm;
00028 CLabels* result;
00029 int32_t start;
00030 int32_t end;
00031 bool verbose;
00032 };
00033 #endif // DOXYGEN_SHOULD_SKIP_THIS
00034
00035 CSVM::CSVM(int32_t num_sv)
00036 : CKernelMachine()
00037 {
00038 set_defaults(num_sv);
00039 }
00040
00041 CSVM::CSVM(float64_t C, CKernel* k, CLabels* lab)
00042 : CKernelMachine()
00043 {
00044 set_defaults();
00045 set_C(C,C);
00046 set_labels(lab);
00047 set_kernel(k);
00048 }
00049
00050 CSVM::~CSVM()
00051 {
00052 delete[] svm_model.alpha;
00053 delete[] svm_model.svs;
00054
00055 SG_DEBUG("SVM object destroyed\n");
00056 }
00057
00058 void CSVM::set_defaults(int32_t num_sv)
00059 {
00060 svm_model.b=0.0;
00061 svm_model.alpha=NULL;
00062 svm_model.svs=NULL;
00063 svm_model.num_svs=0;
00064 svm_loaded=false;
00065
00066 weight_epsilon=1e-5;
00067 epsilon=1e-5;
00068 tube_epsilon=1e-2;
00069
00070 nu=0.5;
00071 C1=1;
00072 C2=1;
00073 C_mkl=0;
00074 mkl_norm=1;
00075
00076 objective=0;
00077
00078 qpsize=41;
00079 use_bias=true;
00080 use_shrinking=true;
00081 use_mkl=false;
00082 use_batch_computation=true;
00083 use_linadd=true;
00084
00085 if (num_sv>0)
00086 create_new_model(num_sv);
00087 }
00088
00089 bool CSVM::load(FILE* modelfl)
00090 {
00091 bool result=true;
00092 char char_buffer[1024];
00093 int32_t int_buffer;
00094 float64_t double_buffer;
00095 int32_t line_number=1;
00096
00097 if (fscanf(modelfl,"%4s\n", char_buffer)==EOF)
00098 {
00099 result=false;
00100 SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00101 }
00102 else
00103 {
00104 char_buffer[4]='\0';
00105 if (strcmp("%SVM", char_buffer)!=0)
00106 {
00107 result=false;
00108 SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00109 }
00110 line_number++;
00111 }
00112
00113 int_buffer=0;
00114 if (fscanf(modelfl," numsv=%d; \n", &int_buffer) != 1)
00115 {
00116 result=false;
00117 SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00118 }
00119
00120 if (!feof(modelfl))
00121 line_number++;
00122
00123 SG_INFO( "loading %ld support vectors\n",int_buffer);
00124 create_new_model(int_buffer);
00125
00126 if (fscanf(modelfl," kernel='%s'; \n", char_buffer) != 1)
00127 {
00128 result=false;
00129 SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00130 }
00131
00132 if (!feof(modelfl))
00133 line_number++;
00134
00135 double_buffer=0;
00136
00137 if (fscanf(modelfl," b=%lf; \n", &double_buffer) != 1)
00138 {
00139 result=false;
00140 SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00141 }
00142
00143 if (!feof(modelfl))
00144 line_number++;
00145
00146 set_bias(double_buffer);
00147
00148 if (fscanf(modelfl,"%8s\n", char_buffer) == EOF)
00149 {
00150 result=false;
00151 SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00152 }
00153 else
00154 {
00155 char_buffer[9]='\0';
00156 if (strcmp("alphas=[", char_buffer)!=0)
00157 {
00158 result=false;
00159 SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00160 }
00161 line_number++;
00162 }
00163
00164 for (int32_t i=0; i<get_num_support_vectors(); i++)
00165 {
00166 double_buffer=0;
00167 int_buffer=0;
00168
00169 if (fscanf(modelfl," \[%lf,%d]; \n", &double_buffer, &int_buffer) != 2)
00170 {
00171 result=false;
00172 SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00173 }
00174
00175 if (!feof(modelfl))
00176 line_number++;
00177
00178 set_support_vector(i, int_buffer);
00179 set_alpha(i, double_buffer);
00180 }
00181
00182 if (fscanf(modelfl,"%2s", char_buffer) == EOF)
00183 {
00184 result=false;
00185 SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00186 }
00187 else
00188 {
00189 char_buffer[3]='\0';
00190 if (strcmp("];", char_buffer)!=0)
00191 {
00192 result=false;
00193 SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00194 }
00195 line_number++;
00196 }
00197
00198 svm_loaded=result;
00199 return result;
00200 }
00201
00202 bool CSVM::save(FILE* modelfl)
00203 {
00204 if (!kernel)
00205 SG_ERROR("Kernel not defined!\n");
00206
00207 SG_INFO( "Writing model file...");
00208 fprintf(modelfl,"%%SVM\n");
00209 fprintf(modelfl,"numsv=%d;\n", get_num_support_vectors());
00210 fprintf(modelfl,"kernel='%s';\n", kernel->get_name());
00211 fprintf(modelfl,"b=%+10.16e;\n",get_bias());
00212
00213 fprintf(modelfl, "alphas=\[\n");
00214
00215 for(int32_t i=0; i<get_num_support_vectors(); i++)
00216 fprintf(modelfl,"\t[%+10.16e,%d];\n",
00217 CSVM::get_alpha(i), get_support_vector(i));
00218
00219 fprintf(modelfl, "];\n");
00220
00221 SG_DONE();
00222 return true ;
00223 }
00224
00225 bool CSVM::init_kernel_optimization()
00226 {
00227 int32_t num_sv=get_num_support_vectors();
00228
00229 if (kernel && kernel->has_property(KP_LINADD) && num_sv>0)
00230 {
00231 int32_t * sv_idx = new int32_t[num_sv] ;
00232 float64_t* sv_weight = new float64_t[num_sv] ;
00233
00234 for(int32_t i=0; i<num_sv; i++)
00235 {
00236 sv_idx[i] = get_support_vector(i) ;
00237 sv_weight[i] = get_alpha(i) ;
00238 }
00239
00240 bool ret = kernel->init_optimization(num_sv, sv_idx, sv_weight) ;
00241
00242 delete[] sv_idx ;
00243 delete[] sv_weight ;
00244
00245 if (!ret)
00246 SG_ERROR( "initialization of kernel optimization failed\n");
00247
00248 return ret;
00249 }
00250 else
00251 SG_ERROR( "initialization of kernel optimization failed\n");
00252
00253 return false;
00254 }
00255
00256 void* CSVM::classify_example_helper(void* p)
00257 {
00258 S_THREAD_PARAM* params= (S_THREAD_PARAM*) p;
00259 CLabels* result=params->result;
00260 CSVM* svm=params->svm;
00261
00262 #ifdef WIN32
00263 for (int32_t vec=params->start; vec<params->end; vec++)
00264 #else
00265 CSignal::clear_cancel();
00266 for (int32_t vec=params->start; vec<params->end &&
00267 !CSignal::cancel_computations(); vec++)
00268 #endif
00269 {
00270 if (params->verbose)
00271 {
00272 int32_t num_vectors=params->end - params->start;
00273 int32_t v=vec-params->start;
00274 if ( (v% (num_vectors/100+1))== 0)
00275 SG_SPROGRESS(v, 0.0, num_vectors-1);
00276 }
00277
00278 result->set_label(vec, svm->classify_example(vec));
00279 }
00280
00281 return NULL;
00282 }
00283
00284 CLabels* CSVM::classify(CLabels* lab)
00285 {
00286 if (!kernel)
00287 {
00288 SG_ERROR( "SVM can not proceed without kernel!\n");
00289 return false ;
00290 }
00291
00292 if ( kernel && kernel->get_num_vec_rhs()>0 )
00293 {
00294 int32_t num_vectors=kernel->get_num_vec_rhs();
00295
00296 if (!lab)
00297 {
00298 lab=new CLabels(num_vectors);
00299 SG_REF(lab);
00300 }
00301
00302 SG_DEBUG( "computing output on %d test examples\n", num_vectors);
00303
00304 if (io->get_show_progress())
00305 io->enable_progress();
00306 else
00307 io->disable_progress();
00308
00309 if (kernel->has_property(KP_BATCHEVALUATION) &&
00310 get_batch_computation_enabled())
00311 {
00312 float64_t* output=new float64_t[num_vectors];
00313 memset(output, 0, sizeof(float64_t)*num_vectors);
00314
00315 if (get_num_support_vectors()>0)
00316 {
00317 int32_t* sv_idx=new int32_t[get_num_support_vectors()];
00318 float64_t* sv_weight=new float64_t[get_num_support_vectors()];
00319 int32_t* idx=new int32_t[num_vectors];
00320
00321
00322 for (int32_t i=0; i<num_vectors; i++)
00323 idx[i]=i;
00324
00325 for (int32_t i=0; i<get_num_support_vectors(); i++)
00326 {
00327 sv_idx[i] = get_support_vector(i) ;
00328 sv_weight[i] = get_alpha(i) ;
00329 }
00330
00331 kernel->compute_batch(num_vectors, idx,
00332 output, get_num_support_vectors(), sv_idx, sv_weight);
00333 delete[] sv_idx ;
00334 delete[] sv_weight ;
00335 delete[] idx;
00336 }
00337
00338 for (int32_t i=0; i<num_vectors; i++)
00339 lab->set_label(i, get_bias()+output[i]);
00340
00341 delete[] output;
00342 }
00343 else
00344 {
00345 int32_t num_threads=parallel->get_num_threads();
00346 ASSERT(num_threads>0);
00347
00348 if (num_threads < 2)
00349 {
00350 S_THREAD_PARAM params;
00351 params.svm=this;
00352 params.result=lab;
00353 params.start=0;
00354 params.end=num_vectors;
00355 params.verbose=true;
00356 classify_example_helper((void*) ¶ms);
00357 }
00358 #ifndef WIN32
00359 else
00360 {
00361 pthread_t* threads = new pthread_t[num_threads-1];
00362 S_THREAD_PARAM* params = new S_THREAD_PARAM[num_threads];
00363 int32_t step= num_vectors/num_threads;
00364
00365 int32_t t;
00366
00367 for (t=0; t<num_threads-1; t++)
00368 {
00369 params[t].svm = this;
00370 params[t].result = lab;
00371 params[t].start = t*step;
00372 params[t].end = (t+1)*step;
00373 params[t].verbose = false;
00374 pthread_create(&threads[t], NULL,
00375 CSVM::classify_example_helper, (void*)¶ms[t]);
00376 }
00377
00378 params[t].svm = this;
00379 params[t].result = lab;
00380 params[t].start = t*step;
00381 params[t].end = num_vectors;
00382 params[t].verbose = true;
00383 classify_example_helper((void*) ¶ms[t]);
00384
00385 for (t=0; t<num_threads-1; t++)
00386 pthread_join(threads[t], NULL);
00387
00388 delete[] params;
00389 delete[] threads;
00390 }
00391 #endif
00392 }
00393
00394 #ifndef WIN32
00395 if ( CSignal::cancel_computations() )
00396 SG_INFO( "prematurely stopped. \n");
00397 else
00398 #endif
00399 SG_DONE();
00400 }
00401 else
00402 return NULL;
00403
00404 return lab;
00405 }
00406
00407 float64_t CSVM::classify_example(int32_t num)
00408 {
00409 ASSERT(kernel);
00410
00411 if (kernel->has_property(KP_LINADD) && (kernel->get_is_initialized()))
00412 {
00413 float64_t dist = kernel->compute_optimized(num);
00414 return (dist+get_bias());
00415 }
00416 else
00417 {
00418 float64_t dist=0;
00419 for(int32_t i=0; i<get_num_support_vectors(); i++)
00420 dist+=kernel->kernel(get_support_vector(i), num)*get_alpha(i);
00421
00422 return (dist+get_bias());
00423 }
00424 }
00425
00426
00427 float64_t CSVM::compute_objective()
00428 {
00429 int32_t n=get_num_support_vectors();
00430
00431 if (labels && kernel)
00432 {
00433 objective=0;
00434 for (int32_t i=0; i<n; i++)
00435 {
00436 objective-=get_alpha(i)*labels->get_label(i);
00437 for (int32_t j=0; j<n; j++)
00438 objective+=0.5*get_alpha(i)*get_alpha(j)*kernel->kernel(i,j);
00439 }
00440 }
00441 else
00442 SG_ERROR( "cannot compute objective, labels or kernel not set\n");
00443
00444 return objective;
00445 }