00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "lib/config.h"
00013 #include "lib/Mathematics.h"
00014 #include "lib/Signal.h"
00015 #include "lib/Time.h"
00016 #include "classifier/LinearClassifier.h"
00017 #include "classifier/svm/SubGradientSVM.h"
00018 #include "classifier/svm/qpbsvmlib.h"
00019 #include "features/DotFeatures.h"
00020 #include "features/Labels.h"
00021
00022 #undef DEBUG_SUBGRADIENTSVM
00023
00024 extern float64_t sparsity;
00025 float64_t tim;
00026
00027 CSubGradientSVM::CSubGradientSVM()
00028 : CLinearClassifier(), C1(1), C2(1), epsilon(1e-5), qpsize(42),
00029 qpsize_max(2000), use_bias(false), delta_active(0), delta_bound(0)
00030 {
00031 }
00032
00033 CSubGradientSVM::CSubGradientSVM(
00034 float64_t C, CDotFeatures* traindat, CLabels* trainlab)
00035 : CLinearClassifier(), C1(C), C2(C), epsilon(1e-5), qpsize(42),
00036 qpsize_max(2000), use_bias(false), delta_active(0), delta_bound(0)
00037 {
00038 set_features(traindat);
00039 set_labels(trainlab);
00040 }
00041
00042
00043 CSubGradientSVM::~CSubGradientSVM()
00044 {
00045 }
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080 int32_t CSubGradientSVM::find_active(
00081 int32_t num_feat, int32_t num_vec, int32_t& num_active, int32_t& num_bound)
00082 {
00083 delta_bound=0;
00084 delta_active=0;
00085 num_active=0;
00086 num_bound=0;
00087
00088 for (int32_t i=0; i<num_vec; i++)
00089 {
00090 active[i]=0;
00091
00092
00093 if (proj[i] < 1-autoselected_epsilon)
00094 {
00095 idx_active[num_active++]=i;
00096 active[i]=1;
00097 }
00098
00099
00100 if (CMath::abs(proj[i]-1) <= autoselected_epsilon)
00101 {
00102 idx_bound[num_bound++]=i;
00103 active[i]=2;
00104 }
00105
00106 if (active[i]!=old_active[i])
00107 delta_active++;
00108
00109 if (active[i]==2 && old_active[i]==2)
00110 delta_bound++;
00111 }
00112
00113
00114 if (delta_active==0 && work_epsilon<=epsilon)
00115 return 0;
00116 else if (delta_active==0)
00117 {
00118 work_epsilon=CMath::min(work_epsilon/2, autoselected_epsilon);
00119 work_epsilon=CMath::max(work_epsilon, epsilon);
00120 num_bound=qpsize;
00121 }
00122
00123 delta_bound=0;
00124 delta_active=0;
00125 num_active=0;
00126 num_bound=0;
00127
00128 for (int32_t i=0; i<num_vec; i++)
00129 {
00130 tmp_proj[i]=CMath::abs(proj[i]-1);
00131 tmp_proj_idx[i]=i;
00132 }
00133
00134 CMath::qsort_index(tmp_proj, tmp_proj_idx, num_vec);
00135
00136 autoselected_epsilon=tmp_proj[CMath::min(qpsize,num_vec-1)];
00137
00138 #ifdef DEBUG_SUBGRADIENTSVM
00139
00140 #endif
00141
00142 if (autoselected_epsilon>work_epsilon)
00143 autoselected_epsilon=work_epsilon;
00144
00145 if (autoselected_epsilon<epsilon)
00146 {
00147 autoselected_epsilon=epsilon;
00148
00149 int32_t i=0;
00150 while (i < num_vec && tmp_proj[i] <= autoselected_epsilon)
00151 i++;
00152
00153
00154
00155 if (i>=qpsize_max && autoselected_epsilon>epsilon)
00156 {
00157 SG_INFO("qpsize limit (%d) reached\n", qpsize_max);
00158 int32_t num_in_qp=i;
00159 while (--i>=0 && num_in_qp>=qpsize_max)
00160 {
00161 if (tmp_proj[i] < autoselected_epsilon)
00162 {
00163 autoselected_epsilon=tmp_proj[i];
00164 num_in_qp--;
00165 }
00166 }
00167
00168
00169 }
00170 }
00171
00172 for (int32_t i=0; i<num_vec; i++)
00173 {
00174 active[i]=0;
00175
00176
00177 if (proj[i] < 1-autoselected_epsilon)
00178 {
00179 idx_active[num_active++]=i;
00180 active[i]=1;
00181 }
00182
00183
00184 if (CMath::abs(proj[i]-1) <= autoselected_epsilon)
00185 {
00186 idx_bound[num_bound++]=i;
00187 active[i]=2;
00188 }
00189
00190 if (active[i]!=old_active[i])
00191 delta_active++;
00192
00193 if (active[i]==2 && old_active[i]==2)
00194 delta_bound++;
00195 }
00196
00197
00198 return delta_active;
00199 }
00200
00201
00202 void CSubGradientSVM::update_active(int32_t num_feat, int32_t num_vec)
00203 {
00204 for (int32_t i=0; i<num_vec; i++)
00205 {
00206 if (active[i]==1 && old_active[i]!=1)
00207 {
00208 features->add_to_dense_vec(C1*get_label(i), i, sum_CXy_active, num_feat);
00209 if (use_bias)
00210 sum_Cy_active+=C1*get_label(i);
00211 }
00212 else if (old_active[i]==1 && active[i]!=1)
00213 {
00214 features->add_to_dense_vec(-C1*get_label(i), i, sum_CXy_active, num_feat);
00215 if (use_bias)
00216 sum_Cy_active-=C1*get_label(i);
00217 }
00218 }
00219
00220 CMath::swap(active,old_active);
00221 }
00222
00223 float64_t CSubGradientSVM::line_search(int32_t num_feat, int32_t num_vec)
00224 {
00225 float64_t sum_B = 0;
00226 float64_t A_zero = 0.5*CMath::dot(grad_w, grad_w, num_feat);
00227 float64_t B_zero = -CMath::dot(w, grad_w, num_feat);
00228
00229 int32_t num_hinge=0;
00230
00231 for (int32_t i=0; i<num_vec; i++)
00232 {
00233 float64_t p=get_label(i)*(features->dense_dot(i, grad_w, num_feat)+grad_b);
00234 grad_proj[i]=p;
00235 if (p!=0)
00236 {
00237 hinge_point[num_hinge]=(proj[i]-1)/p;
00238 hinge_idx[num_hinge]=i;
00239 num_hinge++;
00240
00241 if (p<0)
00242 sum_B+=p;
00243 }
00244 }
00245 sum_B*=C1;
00246
00247 CMath::qsort_index(hinge_point, hinge_idx, num_hinge);
00248
00249
00250 float64_t alpha = hinge_point[0];
00251 float64_t grad_val = 2*A_zero*alpha + B_zero + sum_B;
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261
00262 float64_t old_grad_val = grad_val;
00263 float64_t old_alpha = alpha;
00264
00265 for (int32_t i=1; i < num_hinge && grad_val < 0; i++)
00266 {
00267 alpha = hinge_point[i];
00268 grad_val = 2*A_zero*alpha + B_zero + sum_B;
00269
00270 if (grad_val > 0)
00271 {
00272 ASSERT(old_grad_val-grad_val != 0);
00273 float64_t gamma = -grad_val/(old_grad_val-grad_val);
00274 alpha = old_alpha*gamma + (1-gamma)*alpha;
00275 }
00276 else
00277 {
00278 old_grad_val = grad_val;
00279 old_alpha = alpha;
00280
00281 sum_B = sum_B + CMath::abs(C1*grad_proj[hinge_idx[i]]);
00282 grad_val = 2*A_zero*alpha + B_zero + sum_B;
00283 }
00284 }
00285
00286 return alpha;
00287 }
00288
00289 float64_t CSubGradientSVM::compute_min_subgradient(
00290 int32_t num_feat, int32_t num_vec, int32_t num_active, int32_t num_bound)
00291 {
00292 float64_t dir_deriv=0;
00293
00294 if (num_bound > 0)
00295 {
00296
00297 CTime t2;
00298 CMath::add(v, 1.0, w, -1.0, sum_CXy_active, num_feat);
00299
00300 if (num_bound>=qpsize_max && num_it_noimprovement!=10)
00301 {
00302
00303 for (int32_t i=0; i<num_bound; i++)
00304 beta[i]=CMath::random(0.0,1.0);
00305 }
00306 else
00307 {
00308 memset(beta, 0, sizeof(float64_t)*num_bound);
00309
00310 float64_t bias_const=0;
00311
00312 if (use_bias)
00313 bias_const=1;
00314
00315 for (int32_t i=0; i<num_bound; i++)
00316 {
00317 for (int32_t j=i; j<num_bound; j++)
00318 {
00319 Z[i*num_bound+j]= 2.0*C1*C1*get_label(idx_bound[i])*get_label(idx_bound[j])*
00320 (features->dot(idx_bound[i], idx_bound[j]) + bias_const);
00321
00322 Z[j*num_bound+i]=Z[i*num_bound+j];
00323
00324 }
00325
00326 Zv[i]=-2.0*C1*get_label(idx_bound[i])*
00327 (features->dense_dot(idx_bound[i], v, num_feat)-sum_Cy_active);
00328 }
00329
00330
00331
00332 t2.stop();
00333 t2.time_diff_sec(true);
00334
00335 CTime t;
00336 CQPBSVMLib solver(Z,num_bound, Zv,num_bound, 1.0);
00337
00338
00339 #ifdef USE_CPLEX
00340 solver.set_solver(QPB_SOLVER_CPLEX);
00341 #else
00342 solver.set_solver(QPB_SOLVER_SCAS);
00343 #endif
00344
00345 solver.solve_qp(beta, num_bound);
00346
00347 t.stop();
00348 tim+=t.time_diff_sec(true);
00349
00350
00351
00352
00353
00354
00355
00356
00357 }
00358
00359 CMath::add(grad_w, 1.0, w, -1.0, sum_CXy_active, num_feat);
00360 grad_b = -sum_Cy_active;
00361
00362 for (int32_t i=0; i<num_bound; i++)
00363 {
00364 features->add_to_dense_vec(-C1*beta[i]*get_label(idx_bound[i]), idx_bound[i], grad_w, num_feat);
00365 if (use_bias)
00366 grad_b -= C1 * get_label(idx_bound[i])*beta[i];
00367 }
00368
00369 dir_deriv = CMath::dot(grad_w, v, num_feat) - grad_b*sum_Cy_active;
00370 for (int32_t i=0; i<num_bound; i++)
00371 {
00372 float64_t val= C1*get_label(idx_bound[i])*(features->dense_dot(idx_bound[i], grad_w, num_feat)+grad_b);
00373 dir_deriv += CMath::max(0.0, val);
00374 }
00375 }
00376 else
00377 {
00378 CMath::add(grad_w, 1.0, w, -1.0, sum_CXy_active, num_feat);
00379 grad_b = -sum_Cy_active;
00380
00381 dir_deriv = CMath::dot(grad_w, grad_w, num_feat)+ grad_b*grad_b;
00382 }
00383
00384 return dir_deriv;
00385 }
00386
00387 float64_t CSubGradientSVM::compute_objective(int32_t num_feat, int32_t num_vec)
00388 {
00389 float64_t result= 0.5 * CMath::dot(w,w, num_feat);
00390
00391 for (int32_t i=0; i<num_vec; i++)
00392 {
00393 if (proj[i]<1.0)
00394 result += C1 * (1.0-proj[i]);
00395 }
00396
00397 return result;
00398 }
00399
00400 void CSubGradientSVM::compute_projection(int32_t num_feat, int32_t num_vec)
00401 {
00402 for (int32_t i=0; i<num_vec; i++)
00403 proj[i]=get_label(i)*(features->dense_dot(i, w, num_feat)+bias);
00404 }
00405
00406 void CSubGradientSVM::update_projection(float64_t alpha, int32_t num_vec)
00407 {
00408 CMath::vec1_plus_scalar_times_vec2(proj,-alpha, grad_proj, num_vec);
00409 }
00410
00411 void CSubGradientSVM::init(int32_t num_vec, int32_t num_feat)
00412 {
00413
00414 delete[] w;
00415 w=new float64_t[num_feat];
00416 memset(w,0,sizeof(float64_t)*num_feat);
00417
00418 bias=0;
00419 num_it_noimprovement=0;
00420 grad_b=0;
00421 set_w(w, num_feat);
00422 qpsize_limit=5000;
00423
00424 grad_w=new float64_t[num_feat];
00425 memset(grad_w,0,sizeof(float64_t)*num_feat);
00426
00427 sum_CXy_active=new float64_t[num_feat];
00428 memset(sum_CXy_active,0,sizeof(float64_t)*num_feat);
00429
00430 v=new float64_t[num_feat];
00431 memset(v,0,sizeof(float64_t)*num_feat);
00432
00433 old_v=new float64_t[num_feat];
00434 memset(old_v,0,sizeof(float64_t)*num_feat);
00435
00436 sum_Cy_active=0;
00437
00438 proj= new float64_t[num_vec];
00439 memset(proj,0,sizeof(float64_t)*num_vec);
00440
00441 tmp_proj=new float64_t[num_vec];
00442 memset(proj,0,sizeof(float64_t)*num_vec);
00443
00444 tmp_proj_idx= new int32_t[num_vec];
00445 memset(tmp_proj_idx,0,sizeof(int32_t)*num_vec);
00446
00447 grad_proj= new float64_t[num_vec];
00448 memset(grad_proj,0,sizeof(float64_t)*num_vec);
00449
00450 hinge_point= new float64_t[num_vec];
00451 memset(hinge_point,0,sizeof(float64_t)*num_vec);
00452
00453 hinge_idx= new int32_t[num_vec];
00454 memset(hinge_idx,0,sizeof(int32_t)*num_vec);
00455
00456 active=new uint8_t[num_vec];
00457 memset(active,0,sizeof(uint8_t)*num_vec);
00458
00459 old_active=new uint8_t[num_vec];
00460 memset(old_active,0,sizeof(uint8_t)*num_vec);
00461
00462 idx_bound=new int32_t[num_vec];
00463 memset(idx_bound,0,sizeof(int32_t)*num_vec);
00464
00465 idx_active=new int32_t[num_vec];
00466 memset(idx_active,0,sizeof(int32_t)*num_vec);
00467
00468 Z=new float64_t[qpsize_limit*qpsize_limit];
00469 memset(Z,0,sizeof(float64_t)*qpsize_limit*qpsize_limit);
00470
00471 Zv=new float64_t[qpsize_limit];
00472 memset(Zv,0,sizeof(float64_t)*qpsize_limit);
00473
00474 beta=new float64_t[qpsize_limit];
00475 memset(beta,0,sizeof(float64_t)*qpsize_limit);
00476
00477 old_Z=new float64_t[qpsize_limit*qpsize_limit];
00478 memset(old_Z,0,sizeof(float64_t)*qpsize_limit*qpsize_limit);
00479
00480 old_Zv=new float64_t[qpsize_limit];
00481 memset(old_Zv,0,sizeof(float64_t)*qpsize_limit);
00482
00483 old_beta=new float64_t[qpsize_limit];
00484 memset(old_beta,0,sizeof(float64_t)*qpsize_limit);
00485
00486 }
00487
00488 void CSubGradientSVM::cleanup()
00489 {
00490 delete[] hinge_idx;
00491 delete[] hinge_point;
00492 delete[] grad_proj;
00493 delete[] proj;
00494 delete[] tmp_proj;
00495 delete[] tmp_proj_idx;
00496 delete[] active;
00497 delete[] old_active;
00498 delete[] idx_bound;
00499 delete[] idx_active;
00500 delete[] sum_CXy_active;
00501 delete[] grad_w;
00502 delete[] v;
00503 delete[] Z;
00504 delete[] Zv;
00505 delete[] beta;
00506 delete[] old_v;
00507 delete[] old_Z;
00508 delete[] old_Zv;
00509 delete[] old_beta;
00510
00511 hinge_idx=NULL;
00512 proj=NULL;
00513 active=NULL;
00514 old_active=NULL;
00515 idx_bound=NULL;
00516 idx_active=NULL;
00517 sum_CXy_active=NULL;
00518 grad_w=NULL;
00519 v=NULL;
00520 Z=NULL;
00521 Zv=NULL;
00522 beta=NULL;
00523 }
00524
00525 bool CSubGradientSVM::train()
00526 {
00527 tim=0;
00528 SG_INFO("C=%f epsilon=%f\n", C1, epsilon);
00529 ASSERT(labels);
00530 ASSERT(get_features());
00531
00532 int32_t num_iterations=0;
00533 int32_t num_train_labels=labels->get_num_labels();
00534 int32_t num_feat=features->get_dim_feature_space();
00535 int32_t num_vec=features->get_num_vectors();
00536
00537 ASSERT(num_vec==num_train_labels);
00538
00539 init(num_vec, num_feat);
00540
00541 int32_t num_active=0;
00542 int32_t num_bound=0;
00543 float64_t alpha=0;
00544 float64_t dir_deriv=0;
00545 float64_t obj=0;
00546 delta_active=num_vec;
00547 last_it_noimprovement=-1;
00548
00549 work_epsilon=0.99;
00550 autoselected_epsilon=work_epsilon;
00551
00552 compute_projection(num_feat, num_vec);
00553
00554 CTime time;
00555 float64_t loop_time=0;
00556 while (!(CSignal::cancel_computations()))
00557 {
00558 CTime t;
00559 delta_active=find_active(num_feat, num_vec, num_active, num_bound);
00560
00561 update_active(num_feat, num_vec);
00562
00563 #ifdef DEBUG_SUBGRADIENTSVM
00564 SG_PRINT("==================================================\niteration: %d ", num_iterations);
00565 obj=compute_objective(num_feat, num_vec);
00566 SG_PRINT("objective:%.10f alpha: %.10f dir_deriv: %f num_bound: %d num_active: %d work_eps: %10.10f eps: %10.10f auto_eps: %10.10f time:%f\n",
00567 obj, alpha, dir_deriv, num_bound, num_active, work_epsilon, epsilon, autoselected_epsilon, loop_time);
00568 #else
00569 SG_ABS_PROGRESS(work_epsilon, -CMath::log10(work_epsilon), -CMath::log10(0.99999999), -CMath::log10(epsilon), 6);
00570 #endif
00571
00572
00573
00574
00575
00576
00577
00578
00579
00580
00581
00582
00583 dir_deriv=compute_min_subgradient(num_feat, num_vec, num_active, num_bound);
00584
00585 alpha=line_search(num_feat, num_vec);
00586
00587 if (num_it_noimprovement==10 || num_bound<qpsize_max)
00588 {
00589 float64_t norm_grad=CMath::dot(grad_w, grad_w, num_feat) +
00590 grad_b*grad_b;
00591
00592 #ifdef DEBUG_SUBGRADIENTSVM
00593 SG_PRINT("CHECKING OPTIMALITY CONDITIONS: "
00594 "work_epsilon: %10.10f delta_active:%d alpha: %10.10f norm_grad: %10.10f a*norm_grad:%10.16f\n",
00595 work_epsilon, delta_active, alpha, norm_grad, CMath::abs(alpha*norm_grad));
00596 #else
00597 SG_ABS_PROGRESS(work_epsilon, -CMath::log10(work_epsilon), -CMath::log10(0.99999999), -CMath::log10(epsilon), 6);
00598 #endif
00599
00600 if (work_epsilon<=epsilon && delta_active==0 && CMath::abs(alpha*norm_grad)<1e-6)
00601 break;
00602 else
00603 num_it_noimprovement=0;
00604 }
00605
00606 if ((dir_deriv<0 || alpha==0) && (work_epsilon<=epsilon && delta_active==0))
00607 {
00608 if (last_it_noimprovement==num_iterations-1)
00609 {
00610 SG_PRINT("no improvement...\n");
00611 num_it_noimprovement++;
00612 }
00613 else
00614 num_it_noimprovement=0;
00615
00616 last_it_noimprovement=num_iterations;
00617 }
00618
00619 CMath::vec1_plus_scalar_times_vec2(w, -alpha, grad_w, num_feat);
00620 bias-=alpha*grad_b;
00621
00622 update_projection(alpha, num_vec);
00623
00624
00625
00626
00627
00628 t.stop();
00629 loop_time=t.time_diff_sec();
00630 num_iterations++;
00631
00632 if (get_max_train_time()>0 && time.cur_time_diff()>get_max_train_time())
00633 break;
00634 }
00635
00636 SG_INFO("converged after %d iterations\n", num_iterations);
00637
00638 obj=compute_objective(num_feat, num_vec);
00639 SG_INFO("objective: %f alpha: %f dir_deriv: %f num_bound: %d num_active: %d sparsity: %f\n",
00640 obj, alpha, dir_deriv, num_bound, num_active, sparsity/num_iterations);
00641
00642 #ifdef DEBUG_SUBGRADIENTSVM
00643 CMath::display_vector(w, w_dim, "w");
00644 SG_PRINT("bias: %f\n", bias);
00645 #endif
00646 SG_INFO("solver time:%f s\n", tim);
00647
00648 cleanup();
00649
00650 return true;
00651 }