00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "lib/config.h"
00012
00013 #ifdef HAVE_LAPACK
00014 #include "lib/io.h"
00015 #include "lib/Signal.h"
00016 #include "lib/Time.h"
00017 #include "classifier/svm/LibLinear.h"
00018 #include "classifier/svm/SVM_linear.h"
00019 #include "classifier/svm/Tron.h"
00020 #include "features/DotFeatures.h"
00021
00022 using namespace shogun;
00023
00024 CLibLinear::CLibLinear(LIBLINEAR_SOLVER_TYPE l)
00025 : CLinearClassifier()
00026 {
00027 liblinear_solver_type=l;
00028 use_bias=false;
00029 C1=1;
00030 C2=1;
00031 set_max_iterations();
00032 }
00033
00034 CLibLinear::CLibLinear(
00035 float64_t C, CDotFeatures* traindat, CLabels* trainlab)
00036 : CLinearClassifier(), C1(C), C2(C), use_bias(true), epsilon(1e-5)
00037 {
00038 set_features(traindat);
00039 set_labels(trainlab);
00040 liblinear_solver_type=L2R_L1LOSS_SVC_DUAL;
00041 set_max_iterations();
00042 }
00043
00044
00045 CLibLinear::~CLibLinear()
00046 {
00047 }
00048
00049 bool CLibLinear::train(CFeatures* data)
00050 {
00051 CSignal::clear_cancel();
00052 ASSERT(labels);
00053 if (data)
00054 {
00055 if (!data->has_property(FP_DOT))
00056 SG_ERROR("Specified features are not of type CDotFeatures\n");
00057
00058 set_features((CDotFeatures*) data);
00059 }
00060 ASSERT(features);
00061 ASSERT(labels->is_two_class_labeling());
00062
00063
00064 int32_t num_train_labels=labels->get_num_labels();
00065 int32_t num_feat=features->get_dim_feature_space();
00066 int32_t num_vec=features->get_num_vectors();
00067
00068 if (liblinear_solver_type == L1R_L2LOSS_SVC ||
00069 (liblinear_solver_type == L1R_LR) )
00070 {
00071 if (num_feat!=num_train_labels)
00072 {
00073 SG_ERROR("L1 methods require the data to be transposed: "
00074 "number of features %d does not match number of "
00075 "training labels %d\n",
00076 num_feat, num_train_labels);
00077 }
00078 CMath::swap(num_feat, num_vec);
00079
00080 }
00081 else
00082 {
00083 if (num_vec!=num_train_labels)
00084 {
00085 SG_ERROR("number of vectors %d does not match "
00086 "number of training labels %d\n",
00087 num_vec, num_train_labels);
00088 }
00089 }
00090 delete[] w;
00091 if (use_bias)
00092 w=new float64_t[num_feat+1];
00093 else
00094 w=new float64_t[num_feat+0];
00095 w_dim=num_feat;
00096
00097 problem prob;
00098 if (use_bias)
00099 {
00100 prob.n=w_dim+1;
00101 memset(w, 0, sizeof(float64_t)*(w_dim+1));
00102 }
00103 else
00104 {
00105 prob.n=w_dim;
00106 memset(w, 0, sizeof(float64_t)*(w_dim+0));
00107 }
00108 prob.l=num_vec;
00109 prob.x=features;
00110 prob.y=new int[prob.l];
00111 prob.use_bias=use_bias;
00112
00113 for (int32_t i=0; i<prob.l; i++)
00114 prob.y[i]=labels->get_int_label(i);
00115
00116 int pos = 0;
00117 int neg = 0;
00118 for(int i=0;i<prob.l;i++)
00119 {
00120 if(prob.y[i]==+1)
00121 pos++;
00122 }
00123 neg = prob.l - pos;
00124
00125 SG_INFO("%d training points %d dims\n", prob.l, prob.n);
00126
00127 function *fun_obj=NULL;
00128 double Cp=C1;
00129 double Cn=C2;
00130 switch (liblinear_solver_type)
00131 {
00132 case L2R_LR:
00133 {
00134 fun_obj=new l2r_lr_fun(&prob, Cp, Cn);
00135 CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations);
00136 SG_DEBUG("starting L2R_LR training via tron\n");
00137 tron_obj.tron(w, max_train_time);
00138 SG_DEBUG("done with tron\n");
00139 delete fun_obj;
00140 break;
00141 }
00142 case L2R_L2LOSS_SVC:
00143 {
00144 fun_obj=new l2r_l2_svc_fun(&prob, Cp, Cn);
00145 CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations);
00146 tron_obj.tron(w, max_train_time);
00147 delete fun_obj;
00148 break;
00149 }
00150 case L2R_L2LOSS_SVC_DUAL:
00151 solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
00152 break;
00153 case L2R_L1LOSS_SVC_DUAL:
00154 solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
00155 break;
00156 case L1R_L2LOSS_SVC:
00157 {
00158
00159 solve_l1r_l2_svc(&prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn);
00160 break;
00161 }
00162 case L1R_LR:
00163 {
00164
00165 solve_l1r_lr(&prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn);
00166 break;
00167 }
00168 case MCSVM_CS:
00169 {
00170 SG_NOTIMPLEMENTED;
00171
00172
00173
00174
00175
00176
00177
00178
00179 }
00180 default:
00181 SG_ERROR("Error: unknown solver_type\n");
00182 break;
00183 }
00184
00185 if (use_bias)
00186 set_bias(w[w_dim]);
00187 else
00188 set_bias(0);
00189
00190 delete[] prob.y;
00191
00192 return true;
00193 }
00194
00195
00196
00197
00198
00199
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219 #undef GETI
00220 #define GETI(i) (y[i]+1)
00221
00222
00223 void CLibLinear::solve_l2r_l1l2_svc(
00224 const problem *prob, double eps, double Cp, double Cn, LIBLINEAR_SOLVER_TYPE st)
00225 {
00226 int l = prob->l;
00227 int w_size = prob->n;
00228 int i, s, iter = 0;
00229 double C, d, G;
00230 double *QD = new double[l];
00231 int *index = new int[l];
00232 double *alpha = new double[l];
00233 int32_t *y = new int32_t[l];
00234 int active_size = l;
00235
00236
00237 double PG;
00238 double PGmax_old = CMath::INFTY;
00239 double PGmin_old = -CMath::INFTY;
00240 double PGmax_new, PGmin_new;
00241
00242
00243 double diag[3] = {0.5/Cn, 0, 0.5/Cp};
00244 double upper_bound[3] = {CMath::INFTY, 0, CMath::INFTY};
00245 if(st == L2R_L1LOSS_SVC_DUAL)
00246 {
00247 diag[0] = 0;
00248 diag[2] = 0;
00249 upper_bound[0] = Cn;
00250 upper_bound[2] = Cp;
00251 }
00252
00253 int n = prob->n;
00254
00255 if (prob->use_bias)
00256 n--;
00257
00258 for(i=0; i<w_size; i++)
00259 w[i] = 0;
00260 for(i=0; i<l; i++)
00261 {
00262 alpha[i] = 0;
00263 if(prob->y[i] > 0)
00264 {
00265 y[i] = +1;
00266 }
00267 else
00268 {
00269 y[i] = -1;
00270 }
00271 QD[i] = diag[GETI(i)];
00272
00273 QD[i] += prob->x->dot(i,i);
00274 index[i] = i;
00275 }
00276
00277 CTime start_time;
00278 while (iter < max_iterations && !CSignal::cancel_computations())
00279 {
00280 if (max_train_time > 0 && start_time.cur_time_diff() > max_train_time)
00281 break;
00282
00283 PGmax_new = -CMath::INFTY;
00284 PGmin_new = CMath::INFTY;
00285
00286 for (i=0; i<active_size; i++)
00287 {
00288 int j = i+rand()%(active_size-i);
00289 CMath::swap(index[i], index[j]);
00290 }
00291
00292 for (s=0;s<active_size;s++)
00293 {
00294 i = index[s];
00295 int32_t yi = y[i];
00296
00297 G = prob->x->dense_dot(i, w, n);
00298 if (prob->use_bias)
00299 G+=w[n];
00300
00301 G = G*yi-1;
00302
00303 C = upper_bound[GETI(i)];
00304 G += alpha[i]*diag[GETI(i)];
00305
00306 PG = 0;
00307 if (alpha[i] == 0)
00308 {
00309 if (G > PGmax_old)
00310 {
00311 active_size--;
00312 CMath::swap(index[s], index[active_size]);
00313 s--;
00314 continue;
00315 }
00316 else if (G < 0)
00317 PG = G;
00318 }
00319 else if (alpha[i] == C)
00320 {
00321 if (G < PGmin_old)
00322 {
00323 active_size--;
00324 CMath::swap(index[s], index[active_size]);
00325 s--;
00326 continue;
00327 }
00328 else if (G > 0)
00329 PG = G;
00330 }
00331 else
00332 PG = G;
00333
00334 PGmax_new = CMath::max(PGmax_new, PG);
00335 PGmin_new = CMath::min(PGmin_new, PG);
00336
00337 if(fabs(PG) > 1.0e-12)
00338 {
00339 double alpha_old = alpha[i];
00340 alpha[i] = CMath::min(CMath::max(alpha[i] - G/QD[i], 0.0), C);
00341 d = (alpha[i] - alpha_old)*yi;
00342
00343 prob->x->add_to_dense_vec(d, i, w, n);
00344
00345 if (prob->use_bias)
00346 w[n]+=d;
00347 }
00348 }
00349
00350 iter++;
00351 float64_t gap=PGmax_new - PGmin_new;
00352 SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6);
00353
00354 if(gap <= eps)
00355 {
00356 if(active_size == l)
00357 break;
00358 else
00359 {
00360 active_size = l;
00361 PGmax_old = CMath::INFTY;
00362 PGmin_old = -CMath::INFTY;
00363 continue;
00364 }
00365 }
00366 PGmax_old = PGmax_new;
00367 PGmin_old = PGmin_new;
00368 if (PGmax_old <= 0)
00369 PGmax_old = CMath::INFTY;
00370 if (PGmin_old >= 0)
00371 PGmin_old = -CMath::INFTY;
00372 }
00373
00374 SG_DONE();
00375 SG_INFO("optimization finished, #iter = %d\n",iter);
00376 if (iter >= max_iterations)
00377 {
00378 SG_WARNING("reaching max number of iterations\nUsing -s 2 may be faster"
00379 "(also see liblinear FAQ)\n\n");
00380 }
00381
00382
00383
00384 double v = 0;
00385 int nSV = 0;
00386 for(i=0; i<w_size; i++)
00387 v += w[i]*w[i];
00388 for(i=0; i<l; i++)
00389 {
00390 v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
00391 if(alpha[i] > 0)
00392 ++nSV;
00393 }
00394 SG_INFO("Objective value = %lf\n",v/2);
00395 SG_INFO("nSV = %d\n",nSV);
00396
00397 delete [] QD;
00398 delete [] alpha;
00399 delete [] y;
00400 delete [] index;
00401 }
00402
00403
00404
00405
00406
00407
00408
00409
00410
00411
00412
00413
00414 #undef GETI
00415 #define GETI(i) (y[i]+1)
00416
00417
00418 void CLibLinear::solve_l1r_l2_svc(
00419 problem *prob_col, double eps, double Cp, double Cn)
00420 {
00421 int l = prob_col->l;
00422 int w_size = prob_col->n;
00423 int j, s, iter = 0;
00424 int active_size = w_size;
00425 int max_num_linesearch = 20;
00426
00427 double sigma = 0.01;
00428 double d, G_loss, G, H;
00429 double Gmax_old = CMath::INFTY;
00430 double Gmax_new;
00431 double Gmax_init=0;
00432 double d_old, d_diff;
00433 double loss_old=0, loss_new;
00434 double appxcond, cond;
00435
00436 int *index = new int[w_size];
00437 int32_t *y = new int32_t[l];
00438 double *b = new double[l];
00439 double *xj_sq = new double[w_size];
00440
00441 CDotFeatures* x = (CDotFeatures*) prob_col->x;
00442 void* iterator;
00443 int32_t ind;
00444 float64_t val;
00445
00446 double C[3] = {Cn,0,Cp};
00447
00448 int n = prob_col->n;
00449 if (prob_col->use_bias)
00450 n--;
00451
00452 for(j=0; j<l; j++)
00453 {
00454 b[j] = 1;
00455 if(prob_col->y[j] > 0)
00456 y[j] = 1;
00457 else
00458 y[j] = -1;
00459 }
00460
00461 for(j=0; j<w_size; j++)
00462 {
00463 w[j] = 0;
00464 index[j] = j;
00465 xj_sq[j] = 0;
00466
00467 if (use_bias && j==n)
00468 {
00469 for (ind=0; ind<l; ind++)
00470 xj_sq[n] += C[GETI(ind)];
00471 }
00472 else
00473 {
00474 iterator=x->get_feature_iterator(j);
00475 while (x->get_next_feature(ind, val, iterator))
00476 xj_sq[j] += C[GETI(ind)]*val*val;
00477 x->free_feature_iterator(iterator);
00478 }
00479 }
00480
00481
00482 CTime start_time;
00483 while (iter < max_iterations && !CSignal::cancel_computations())
00484 {
00485 if (max_train_time > 0 && start_time.cur_time_diff() > max_train_time)
00486 break;
00487
00488 Gmax_new = 0;
00489
00490 for(j=0; j<active_size; j++)
00491 {
00492 int i = j+rand()%(active_size-j);
00493 CMath::swap(index[i], index[j]);
00494 }
00495
00496 for(s=0; s<active_size; s++)
00497 {
00498 j = index[s];
00499 G_loss = 0;
00500 H = 0;
00501
00502 if (use_bias && j==n)
00503 {
00504 for (ind=0; ind<l; ind++)
00505 {
00506 if(b[ind] > 0)
00507 {
00508 double tmp = C[GETI(ind)]*y[ind];
00509 G_loss -= tmp*b[ind];
00510 H += tmp*y[ind];
00511 }
00512 }
00513 }
00514 else
00515 {
00516 iterator=x->get_feature_iterator(j);
00517
00518 while (x->get_next_feature(ind, val, iterator))
00519 {
00520 if(b[ind] > 0)
00521 {
00522 double tmp = C[GETI(ind)]*val*y[ind];
00523 G_loss -= tmp*b[ind];
00524 H += tmp*val*y[ind];
00525 }
00526 }
00527 x->free_feature_iterator(iterator);
00528 }
00529
00530 G_loss *= 2;
00531
00532 G = G_loss;
00533 H *= 2;
00534 H = CMath::max(H, 1e-12);
00535
00536 double Gp = G+1;
00537 double Gn = G-1;
00538 double violation = 0;
00539 if(w[j] == 0)
00540 {
00541 if(Gp < 0)
00542 violation = -Gp;
00543 else if(Gn > 0)
00544 violation = Gn;
00545 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
00546 {
00547 active_size--;
00548 CMath::swap(index[s], index[active_size]);
00549 s--;
00550 continue;
00551 }
00552 }
00553 else if(w[j] > 0)
00554 violation = fabs(Gp);
00555 else
00556 violation = fabs(Gn);
00557
00558 Gmax_new = CMath::max(Gmax_new, violation);
00559
00560
00561 if(Gp <= H*w[j])
00562 d = -Gp/H;
00563 else if(Gn >= H*w[j])
00564 d = -Gn/H;
00565 else
00566 d = -w[j];
00567
00568 if(fabs(d) < 1.0e-12)
00569 continue;
00570
00571 double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
00572 d_old = 0;
00573 int num_linesearch;
00574 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
00575 {
00576 d_diff = d_old - d;
00577 cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
00578
00579 appxcond = xj_sq[j]*d*d + G_loss*d + cond;
00580 if(appxcond <= 0)
00581 {
00582 if (use_bias && j==n)
00583 {
00584 for (ind=0; ind<l; ind++)
00585 b[ind] += d_diff*y[ind];
00586 break;
00587 }
00588 else
00589 {
00590 iterator=x->get_feature_iterator(j);
00591 while (x->get_next_feature(ind, val, iterator))
00592 b[ind] += d_diff*val*y[ind];
00593
00594 x->free_feature_iterator(iterator);
00595 break;
00596 }
00597 }
00598
00599 if(num_linesearch == 0)
00600 {
00601 loss_old = 0;
00602 loss_new = 0;
00603
00604 if (use_bias && j==n)
00605 {
00606 for (ind=0; ind<l; ind++)
00607 {
00608 if(b[ind] > 0)
00609 loss_old += C[GETI(ind)]*b[ind]*b[ind];
00610 double b_new = b[ind] + d_diff*y[ind];
00611 b[ind] = b_new;
00612 if(b_new > 0)
00613 loss_new += C[GETI(ind)]*b_new*b_new;
00614 }
00615 }
00616 else
00617 {
00618 iterator=x->get_feature_iterator(j);
00619 while (x->get_next_feature(ind, val, iterator))
00620 {
00621 if(b[ind] > 0)
00622 loss_old += C[GETI(ind)]*b[ind]*b[ind];
00623 double b_new = b[ind] + d_diff*val*y[ind];
00624 b[ind] = b_new;
00625 if(b_new > 0)
00626 loss_new += C[GETI(ind)]*b_new*b_new;
00627 }
00628 x->free_feature_iterator(iterator);
00629 }
00630 }
00631 else
00632 {
00633 loss_new = 0;
00634 if (use_bias && j==n)
00635 {
00636 for (ind=0; ind<l; ind++)
00637 {
00638 double b_new = b[ind] + d_diff*y[ind];
00639 b[ind] = b_new;
00640 if(b_new > 0)
00641 loss_new += C[GETI(ind)]*b_new*b_new;
00642 }
00643 }
00644 else
00645 {
00646 iterator=x->get_feature_iterator(j);
00647 while (x->get_next_feature(ind, val, iterator))
00648 {
00649 double b_new = b[ind] + d_diff*val*y[ind];
00650 b[ind] = b_new;
00651 if(b_new > 0)
00652 loss_new += C[GETI(ind)]*b_new*b_new;
00653 }
00654 x->free_feature_iterator(iterator);
00655 }
00656 }
00657
00658 cond = cond + loss_new - loss_old;
00659 if(cond <= 0)
00660 break;
00661 else
00662 {
00663 d_old = d;
00664 d *= 0.5;
00665 delta *= 0.5;
00666 }
00667 }
00668
00669 w[j] += d;
00670
00671
00672 if(num_linesearch >= max_num_linesearch)
00673 {
00674 SG_INFO("#");
00675 for(int i=0; i<l; i++)
00676 b[i] = 1;
00677
00678 for(int i=0; i<n; i++)
00679 {
00680 if(w[i]==0)
00681 continue;
00682
00683 iterator=x->get_feature_iterator(i);
00684 while (x->get_next_feature(ind, val, iterator))
00685 b[ind] -= w[i]*val*y[ind];
00686 x->free_feature_iterator(iterator);
00687 }
00688
00689 if (use_bias && w[n])
00690 {
00691 for (ind=0; ind<l; ind++)
00692 b[ind] -= w[n]*y[ind];
00693 }
00694 }
00695 }
00696
00697 if(iter == 0)
00698 Gmax_init = Gmax_new;
00699 iter++;
00700
00701 SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6);
00702
00703 if(Gmax_new <= eps*Gmax_init)
00704 {
00705 if(active_size == w_size)
00706 break;
00707 else
00708 {
00709 active_size = w_size;
00710 Gmax_old = CMath::INFTY;
00711 continue;
00712 }
00713 }
00714
00715 Gmax_old = Gmax_new;
00716 }
00717
00718 SG_DONE();
00719 SG_INFO("optimization finished, #iter = %d\n", iter);
00720 if(iter >= max_iterations)
00721 SG_WARNING("\nWARNING: reaching max number of iterations\n");
00722
00723
00724
00725 double v = 0;
00726 int nnz = 0;
00727 for(j=0; j<w_size; j++)
00728 {
00729 if(w[j] != 0)
00730 {
00731 v += fabs(w[j]);
00732 nnz++;
00733 }
00734 }
00735 for(j=0; j<l; j++)
00736 if(b[j] > 0)
00737 v += C[GETI(j)]*b[j]*b[j];
00738
00739 SG_INFO("Objective value = %lf\n", v);
00740 SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size);
00741
00742 delete [] index;
00743 delete [] y;
00744 delete [] b;
00745 delete [] xj_sq;
00746 }
00747
00748
00749
00750
00751
00752
00753
00754
00755
00756
00757
00758
00759 #undef GETI
00760 #define GETI(i) (y[i]+1)
00761
00762
00763 void CLibLinear::solve_l1r_lr(
00764 const problem *prob_col, double eps,
00765 double Cp, double Cn)
00766 {
00767 int l = prob_col->l;
00768 int w_size = prob_col->n;
00769 int j, s, iter = 0;
00770 int active_size = w_size;
00771 int max_num_linesearch = 20;
00772
00773 double x_min = 0;
00774 double sigma = 0.01;
00775 double d, G, H;
00776 double Gmax_old = CMath::INFTY;
00777 double Gmax_new;
00778 double Gmax_init=0;
00779 double sum1, appxcond1;
00780 double sum2, appxcond2;
00781 double cond;
00782
00783 int *index = new int[w_size];
00784 int32_t *y = new int32_t[l];
00785 double *exp_wTx = new double[l];
00786 double *exp_wTx_new = new double[l];
00787 double *xj_max = new double[w_size];
00788 double *C_sum = new double[w_size];
00789 double *xjneg_sum = new double[w_size];
00790 double *xjpos_sum = new double[w_size];
00791
00792 CDotFeatures* x = prob_col->x;
00793 void* iterator;
00794 int ind;
00795 double val;
00796
00797 double C[3] = {Cn,0,Cp};
00798
00799 int n = prob_col->n;
00800 if (prob_col->use_bias)
00801 n--;
00802
00803 for(j=0; j<l; j++)
00804 {
00805 exp_wTx[j] = 1;
00806 if(prob_col->y[j] > 0)
00807 y[j] = 1;
00808 else
00809 y[j] = -1;
00810 }
00811 for(j=0; j<w_size; j++)
00812 {
00813 w[j] = 0;
00814 index[j] = j;
00815 xj_max[j] = 0;
00816 C_sum[j] = 0;
00817 xjneg_sum[j] = 0;
00818 xjpos_sum[j] = 0;
00819
00820 if (use_bias && j==n)
00821 {
00822 for (ind=0; ind<l; ind++)
00823 {
00824 x_min = CMath::min(x_min, 1.0);
00825 xj_max[j] = CMath::max(xj_max[j], 1.0);
00826 C_sum[j] += C[GETI(ind)];
00827 if(y[ind] == -1)
00828 xjneg_sum[j] += C[GETI(ind)];
00829 else
00830 xjpos_sum[j] += C[GETI(ind)];
00831 }
00832 }
00833 else
00834 {
00835 iterator=x->get_feature_iterator(j);
00836 while (x->get_next_feature(ind, val, iterator))
00837 {
00838 x_min = CMath::min(x_min, val);
00839 xj_max[j] = CMath::max(xj_max[j], val);
00840 C_sum[j] += C[GETI(ind)];
00841 if(y[ind] == -1)
00842 xjneg_sum[j] += C[GETI(ind)]*val;
00843 else
00844 xjpos_sum[j] += C[GETI(ind)]*val;
00845 }
00846 x->free_feature_iterator(iterator);
00847 }
00848 }
00849
00850 CTime start_time;
00851 while (iter < max_iterations && !CSignal::cancel_computations())
00852 {
00853 if (max_train_time > 0 && start_time.cur_time_diff() > max_train_time)
00854 break;
00855
00856 Gmax_new = 0;
00857
00858 for(j=0; j<active_size; j++)
00859 {
00860 int i = j+rand()%(active_size-j);
00861 CMath::swap(index[i], index[j]);
00862 }
00863
00864 for(s=0; s<active_size; s++)
00865 {
00866 j = index[s];
00867 sum1 = 0;
00868 sum2 = 0;
00869 H = 0;
00870
00871 if (use_bias && j==n)
00872 {
00873 for (ind=0; ind<l; ind++)
00874 {
00875 double exp_wTxind = exp_wTx[ind];
00876 double tmp1 = 1.0/(1+exp_wTxind);
00877 double tmp2 = C[GETI(ind)]*tmp1;
00878 double tmp3 = tmp2*exp_wTxind;
00879 sum2 += tmp2;
00880 sum1 += tmp3;
00881 H += tmp1*tmp3;
00882 }
00883 }
00884 else
00885 {
00886 iterator=x->get_feature_iterator(j);
00887 while (x->get_next_feature(ind, val, iterator))
00888 {
00889 double exp_wTxind = exp_wTx[ind];
00890 double tmp1 = val/(1+exp_wTxind);
00891 double tmp2 = C[GETI(ind)]*tmp1;
00892 double tmp3 = tmp2*exp_wTxind;
00893 sum2 += tmp2;
00894 sum1 += tmp3;
00895 H += tmp1*tmp3;
00896 }
00897 x->free_feature_iterator(iterator);
00898 }
00899
00900 G = -sum2 + xjneg_sum[j];
00901
00902 double Gp = G+1;
00903 double Gn = G-1;
00904 double violation = 0;
00905 if(w[j] == 0)
00906 {
00907 if(Gp < 0)
00908 violation = -Gp;
00909 else if(Gn > 0)
00910 violation = Gn;
00911 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
00912 {
00913 active_size--;
00914 CMath::swap(index[s], index[active_size]);
00915 s--;
00916 continue;
00917 }
00918 }
00919 else if(w[j] > 0)
00920 violation = fabs(Gp);
00921 else
00922 violation = fabs(Gn);
00923
00924 Gmax_new = CMath::max(Gmax_new, violation);
00925
00926
00927 if(Gp <= H*w[j])
00928 d = -Gp/H;
00929 else if(Gn >= H*w[j])
00930 d = -Gn/H;
00931 else
00932 d = -w[j];
00933
00934 if(fabs(d) < 1.0e-12)
00935 continue;
00936
00937 d = CMath::min(CMath::max(d,-10.0),10.0);
00938
00939 double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
00940 int num_linesearch;
00941 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
00942 {
00943 cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
00944
00945 if(x_min >= 0)
00946 {
00947 double tmp = exp(d*xj_max[j]);
00948 appxcond1 = log(1+sum1*(tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond - d*xjpos_sum[j];
00949 appxcond2 = log(1+sum2*(1/tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond + d*xjneg_sum[j];
00950 if(CMath::min(appxcond1,appxcond2) <= 0)
00951 {
00952 if (use_bias && j==n)
00953 {
00954 for (ind=0; ind<l; ind++)
00955 exp_wTx[ind] *= exp(d);
00956 }
00957
00958 else
00959 {
00960 iterator=x->get_feature_iterator(j);
00961 while (x->get_next_feature(ind, val, iterator))
00962 exp_wTx[ind] *= exp(d*val);
00963 x->free_feature_iterator(iterator);
00964 }
00965 break;
00966 }
00967 }
00968
00969 cond += d*xjneg_sum[j];
00970
00971 int i = 0;
00972
00973 if (use_bias && j==n)
00974 {
00975 for (ind=0; ind<l; ind++)
00976 {
00977 double exp_dx = exp(d);
00978 exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
00979 cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
00980 i++;
00981 }
00982 }
00983 else
00984 {
00985
00986 iterator=x->get_feature_iterator(j);
00987 while (x->get_next_feature(ind, val, iterator))
00988 {
00989 double exp_dx = exp(d*val);
00990 exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
00991 cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
00992 i++;
00993 }
00994 x->free_feature_iterator(iterator);
00995 }
00996
00997 if(cond <= 0)
00998 {
00999 i = 0;
01000 if (use_bias && j==n)
01001 {
01002 for (ind=0; ind<l; ind++)
01003 {
01004 exp_wTx[ind] = exp_wTx_new[i];
01005 i++;
01006 }
01007 }
01008 else
01009 {
01010 iterator=x->get_feature_iterator(j);
01011 while (x->get_next_feature(ind, val, iterator))
01012 {
01013 exp_wTx[ind] = exp_wTx_new[i];
01014 i++;
01015 }
01016 x->free_feature_iterator(iterator);
01017 }
01018 break;
01019 }
01020 else
01021 {
01022 d *= 0.5;
01023 delta *= 0.5;
01024 }
01025 }
01026
01027 w[j] += d;
01028
01029
01030 if(num_linesearch >= max_num_linesearch)
01031 {
01032 SG_INFO("#");
01033 for(int i=0; i<l; i++)
01034 exp_wTx[i] = 0;
01035
01036 for(int i=0; i<w_size; i++)
01037 {
01038 if(w[i]==0) continue;
01039
01040 if (use_bias && i==n)
01041 {
01042 for (ind=0; ind<l; ind++)
01043 exp_wTx[ind] += w[i];
01044 }
01045 else
01046 {
01047 iterator=x->get_feature_iterator(i);
01048 while (x->get_next_feature(ind, val, iterator))
01049 exp_wTx[ind] += w[i]*val;
01050 x->free_feature_iterator(iterator);
01051 }
01052 }
01053
01054 for(int i=0; i<l; i++)
01055 exp_wTx[i] = exp(exp_wTx[i]);
01056 }
01057 }
01058
01059 if(iter == 0)
01060 Gmax_init = Gmax_new;
01061 iter++;
01062 SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6);
01063
01064 if(Gmax_new <= eps*Gmax_init)
01065 {
01066 if(active_size == w_size)
01067 break;
01068 else
01069 {
01070 active_size = w_size;
01071 Gmax_old = CMath::INFTY;
01072 continue;
01073 }
01074 }
01075
01076 Gmax_old = Gmax_new;
01077 }
01078
01079 SG_DONE();
01080 SG_INFO("optimization finished, #iter = %d\n", iter);
01081 if(iter >= max_iterations)
01082 SG_WARNING("\nWARNING: reaching max number of iterations\n");
01083
01084
01085
01086 double v = 0;
01087 int nnz = 0;
01088 for(j=0; j<w_size; j++)
01089 if(w[j] != 0)
01090 {
01091 v += fabs(w[j]);
01092 nnz++;
01093 }
01094 for(j=0; j<l; j++)
01095 if(y[j] == 1)
01096 v += C[GETI(j)]*log(1+1/exp_wTx[j]);
01097 else
01098 v += C[GETI(j)]*log(1+exp_wTx[j]);
01099
01100 SG_INFO("Objective value = %lf\n", v);
01101 SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size);
01102
01103 delete [] index;
01104 delete [] y;
01105 delete [] exp_wTx;
01106 delete [] exp_wTx_new;
01107 delete [] xj_max;
01108 delete [] C_sum;
01109 delete [] xjneg_sum;
01110 delete [] xjpos_sum;
01111 }
01112
01113
01114 #endif //HAVE_LAPACK