LibLinear.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2007-2010 Soeren Sonnenburg
00008  * Copyright (c) 2007-2009 The LIBLINEAR Project.
00009  * Copyright (C) 2007-2010 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 #include "lib/config.h"
00012 
00013 #ifdef HAVE_LAPACK
00014 #include "lib/io.h"
00015 #include "lib/Signal.h"
00016 #include "lib/Time.h"
00017 #include "classifier/svm/LibLinear.h"
00018 #include "classifier/svm/SVM_linear.h"
00019 #include "classifier/svm/Tron.h"
00020 #include "features/DotFeatures.h"
00021 
00022 using namespace shogun;
00023 
00024 CLibLinear::CLibLinear(LIBLINEAR_SOLVER_TYPE l)
00025 : CLinearClassifier()
00026 {
00027     liblinear_solver_type=l;
00028     use_bias=false;
00029     C1=1;
00030     C2=1;
00031     set_max_iterations();
00032 }
00033 
00034 CLibLinear::CLibLinear(
00035     float64_t C, CDotFeatures* traindat, CLabels* trainlab)
00036 : CLinearClassifier(), C1(C), C2(C), use_bias(true), epsilon(1e-5)
00037 {
00038     set_features(traindat);
00039     set_labels(trainlab);
00040     liblinear_solver_type=L2R_L1LOSS_SVC_DUAL;
00041     set_max_iterations();
00042 }
00043 
00044 
00045 CLibLinear::~CLibLinear()
00046 {
00047 }
00048 
00049 bool CLibLinear::train(CFeatures* data)
00050 {
00051     CSignal::clear_cancel();
00052     ASSERT(labels);
00053     if (data)
00054     {
00055         if (!data->has_property(FP_DOT))
00056             SG_ERROR("Specified features are not of type CDotFeatures\n");
00057 
00058         set_features((CDotFeatures*) data);
00059     }
00060     ASSERT(features);
00061     ASSERT(labels->is_two_class_labeling());
00062 
00063 
00064     int32_t num_train_labels=labels->get_num_labels();
00065     int32_t num_feat=features->get_dim_feature_space();
00066     int32_t num_vec=features->get_num_vectors();
00067 
00068     if (liblinear_solver_type == L1R_L2LOSS_SVC ||
00069             (liblinear_solver_type == L1R_LR) )
00070     {
00071         if (num_feat!=num_train_labels)
00072         {
00073             SG_ERROR("L1 methods require the data to be transposed: "
00074                     "number of features %d does not match number of "
00075                     "training labels %d\n",
00076                     num_feat, num_train_labels);
00077         }
00078         CMath::swap(num_feat, num_vec);
00079 
00080     }
00081     else
00082     {
00083         if (num_vec!=num_train_labels)
00084         {
00085             SG_ERROR("number of vectors %d does not match "
00086                     "number of training labels %d\n",
00087                     num_vec, num_train_labels);
00088         }
00089     }
00090     delete[] w;
00091     if (use_bias)
00092         w=new float64_t[num_feat+1];
00093     else
00094         w=new float64_t[num_feat+0];
00095     w_dim=num_feat;
00096 
00097     problem prob;
00098     if (use_bias)
00099     {
00100         prob.n=w_dim+1;
00101         memset(w, 0, sizeof(float64_t)*(w_dim+1));
00102     }
00103     else
00104     {
00105         prob.n=w_dim;
00106         memset(w, 0, sizeof(float64_t)*(w_dim+0));
00107     }
00108     prob.l=num_vec;
00109     prob.x=features;
00110     prob.y=new int[prob.l];
00111     prob.use_bias=use_bias;
00112 
00113     for (int32_t i=0; i<prob.l; i++)
00114         prob.y[i]=labels->get_int_label(i);
00115 
00116     int pos = 0;
00117     int neg = 0;
00118     for(int i=0;i<prob.l;i++)
00119     {
00120         if(prob.y[i]==+1)
00121             pos++;
00122     }
00123     neg = prob.l - pos;
00124 
00125     SG_INFO("%d training points %d dims\n", prob.l, prob.n);
00126 
00127     function *fun_obj=NULL;
00128     double Cp=C1;
00129     double Cn=C2;
00130     switch (liblinear_solver_type)
00131     {
00132         case L2R_LR:
00133         {
00134             fun_obj=new l2r_lr_fun(&prob, Cp, Cn);
00135             CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations);
00136             SG_DEBUG("starting L2R_LR training via tron\n");
00137             tron_obj.tron(w, max_train_time);
00138             SG_DEBUG("done with tron\n");
00139             delete fun_obj;
00140             break;
00141         }
00142         case L2R_L2LOSS_SVC:
00143         {
00144             fun_obj=new l2r_l2_svc_fun(&prob, Cp, Cn);
00145             CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations);
00146             tron_obj.tron(w, max_train_time);
00147             delete fun_obj;
00148             break;
00149         }
00150         case L2R_L2LOSS_SVC_DUAL:
00151             solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
00152             break;
00153         case L2R_L1LOSS_SVC_DUAL:
00154             solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
00155             break;
00156         case L1R_L2LOSS_SVC:
00157         {
00158             //ASSUME FEATURES ARE TRANSPOSED ALREADY
00159             solve_l1r_l2_svc(&prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn);
00160             break;
00161         }
00162         case L1R_LR:
00163         {
00164             //ASSUME FEATURES ARE TRANSPOSED ALREADY
00165             solve_l1r_lr(&prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn);
00166             break;
00167         }
00168         case MCSVM_CS:
00169         {
00170             SG_NOTIMPLEMENTED;
00171             /* TODO...
00172             model_->w=Malloc(double, n*nr_class);
00173             for(i=0;i<nr_class;i++)
00174                 for(j=start[i];j<start[i]+count[i];j++)
00175                     sub_prob.y[j] = i;
00176             Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps);
00177             Solver.Solve(model_->w);
00178             */
00179         }
00180         default:
00181             SG_ERROR("Error: unknown solver_type\n");
00182             break;
00183     }
00184 
00185     if (use_bias)
00186         set_bias(w[w_dim]);
00187     else
00188         set_bias(0);
00189 
00190     delete[] prob.y;
00191 
00192     return true;
00193 }
00194 
00195 // A coordinate descent algorithm for 
00196 // L1-loss and L2-loss SVM dual problems
00197 //
00198 //  min_\alpha  0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
00199 //    s.t.      0 <= alpha_i <= upper_bound_i,
00200 // 
00201 //  where Qij = yi yj xi^T xj and
00202 //  D is a diagonal matrix 
00203 //
00204 // In L1-SVM case:
00205 //      upper_bound_i = Cp if y_i = 1
00206 //      upper_bound_i = Cn if y_i = -1
00207 //      D_ii = 0
00208 // In L2-SVM case:
00209 //      upper_bound_i = INF
00210 //      D_ii = 1/(2*Cp) if y_i = 1
00211 //      D_ii = 1/(2*Cn) if y_i = -1
00212 //
00213 // Given: 
00214 // x, y, Cp, Cn
00215 // eps is the stopping tolerance
00216 //
00217 // solution will be put in w
00218 
00219 #undef GETI
00220 #define GETI(i) (y[i]+1)
00221 // To support weights for instances, use GETI(i) (i)
00222 
00223 void CLibLinear::solve_l2r_l1l2_svc(
00224             const problem *prob, double eps, double Cp, double Cn, LIBLINEAR_SOLVER_TYPE st)
00225 {
00226     int l = prob->l;
00227     int w_size = prob->n;
00228     int i, s, iter = 0;
00229     double C, d, G;
00230     double *QD = new double[l];
00231     int *index = new int[l];
00232     double *alpha = new double[l];
00233     int32_t *y = new int32_t[l];
00234     int active_size = l;
00235 
00236     // PG: projected gradient, for shrinking and stopping
00237     double PG;
00238     double PGmax_old = CMath::INFTY;
00239     double PGmin_old = -CMath::INFTY;
00240     double PGmax_new, PGmin_new;
00241 
00242     // default solver_type: L2R_L2LOSS_SVC_DUAL
00243     double diag[3] = {0.5/Cn, 0, 0.5/Cp};
00244     double upper_bound[3] = {CMath::INFTY, 0, CMath::INFTY};
00245     if(st == L2R_L1LOSS_SVC_DUAL)
00246     {
00247         diag[0] = 0;
00248         diag[2] = 0;
00249         upper_bound[0] = Cn;
00250         upper_bound[2] = Cp;
00251     }
00252 
00253     int n = prob->n;
00254 
00255     if (prob->use_bias)
00256         n--;
00257 
00258     for(i=0; i<w_size; i++)
00259         w[i] = 0;
00260     for(i=0; i<l; i++)
00261     {
00262         alpha[i] = 0;
00263         if(prob->y[i] > 0)
00264         {
00265             y[i] = +1; 
00266         }
00267         else
00268         {
00269             y[i] = -1;
00270         }
00271         QD[i] = diag[GETI(i)];
00272 
00273         QD[i] += prob->x->dot(i,i);
00274         index[i] = i;
00275     }
00276 
00277     CTime start_time;
00278     while (iter < max_iterations && !CSignal::cancel_computations())
00279     {
00280         if (max_train_time > 0 && start_time.cur_time_diff() > max_train_time)
00281           break;
00282 
00283         PGmax_new = -CMath::INFTY;
00284         PGmin_new = CMath::INFTY;
00285 
00286         for (i=0; i<active_size; i++)
00287         {
00288             int j = i+rand()%(active_size-i);
00289             CMath::swap(index[i], index[j]);
00290         }
00291 
00292         for (s=0;s<active_size;s++)
00293         {
00294             i = index[s];
00295             int32_t yi = y[i];
00296 
00297             G = prob->x->dense_dot(i, w, n);
00298             if (prob->use_bias)
00299                 G+=w[n];
00300 
00301             G = G*yi-1;
00302 
00303             C = upper_bound[GETI(i)];
00304             G += alpha[i]*diag[GETI(i)];
00305 
00306             PG = 0;
00307             if (alpha[i] == 0)
00308             {
00309                 if (G > PGmax_old)
00310                 {
00311                     active_size--;
00312                     CMath::swap(index[s], index[active_size]);
00313                     s--;
00314                     continue;
00315                 }
00316                 else if (G < 0)
00317                     PG = G;
00318             }
00319             else if (alpha[i] == C)
00320             {
00321                 if (G < PGmin_old)
00322                 {
00323                     active_size--;
00324                     CMath::swap(index[s], index[active_size]);
00325                     s--;
00326                     continue;
00327                 }
00328                 else if (G > 0)
00329                     PG = G;
00330             }
00331             else
00332                 PG = G;
00333 
00334             PGmax_new = CMath::max(PGmax_new, PG);
00335             PGmin_new = CMath::min(PGmin_new, PG);
00336 
00337             if(fabs(PG) > 1.0e-12)
00338             {
00339                 double alpha_old = alpha[i];
00340                 alpha[i] = CMath::min(CMath::max(alpha[i] - G/QD[i], 0.0), C);
00341                 d = (alpha[i] - alpha_old)*yi;
00342 
00343                 prob->x->add_to_dense_vec(d, i, w, n);
00344 
00345                 if (prob->use_bias)
00346                     w[n]+=d;
00347             }
00348         }
00349 
00350         iter++;
00351         float64_t gap=PGmax_new - PGmin_new;
00352         SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6);
00353 
00354         if(gap <= eps)
00355         {
00356             if(active_size == l)
00357                 break;
00358             else
00359             {
00360                 active_size = l;
00361                 PGmax_old = CMath::INFTY;
00362                 PGmin_old = -CMath::INFTY;
00363                 continue;
00364             }
00365         }
00366         PGmax_old = PGmax_new;
00367         PGmin_old = PGmin_new;
00368         if (PGmax_old <= 0)
00369             PGmax_old = CMath::INFTY;
00370         if (PGmin_old >= 0)
00371             PGmin_old = -CMath::INFTY;
00372     }
00373 
00374     SG_DONE();
00375     SG_INFO("optimization finished, #iter = %d\n",iter);
00376     if (iter >= max_iterations)
00377     {
00378         SG_WARNING("reaching max number of iterations\nUsing -s 2 may be faster"
00379                 "(also see liblinear FAQ)\n\n");
00380     }
00381 
00382     // calculate objective value
00383 
00384     double v = 0;
00385     int nSV = 0;
00386     for(i=0; i<w_size; i++)
00387         v += w[i]*w[i];
00388     for(i=0; i<l; i++)
00389     {
00390         v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
00391         if(alpha[i] > 0)
00392             ++nSV;
00393     }
00394     SG_INFO("Objective value = %lf\n",v/2);
00395     SG_INFO("nSV = %d\n",nSV);
00396 
00397     delete [] QD;
00398     delete [] alpha;
00399     delete [] y;
00400     delete [] index;
00401 }
00402 
00403 // A coordinate descent algorithm for 
00404 // L1-regularized L2-loss support vector classification
00405 //
00406 //  min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
00407 //
00408 // Given: 
00409 // x, y, Cp, Cn
00410 // eps is the stopping tolerance
00411 //
00412 // solution will be put in w
00413 
00414 #undef GETI
00415 #define GETI(i) (y[i]+1)
00416 // To support weights for instances, use GETI(i) (i)
00417 
00418 void CLibLinear::solve_l1r_l2_svc(
00419     problem *prob_col, double eps, double Cp, double Cn)
00420 {
00421     int l = prob_col->l;
00422     int w_size = prob_col->n;
00423     int j, s, iter = 0;
00424     int active_size = w_size;
00425     int max_num_linesearch = 20;
00426 
00427     double sigma = 0.01;
00428     double d, G_loss, G, H;
00429     double Gmax_old = CMath::INFTY;
00430     double Gmax_new;
00431     double Gmax_init=0;
00432     double d_old, d_diff;
00433     double loss_old=0, loss_new;
00434     double appxcond, cond;
00435 
00436     int *index = new int[w_size];
00437     int32_t *y = new int32_t[l];
00438     double *b = new double[l]; // b = 1-ywTx
00439     double *xj_sq = new double[w_size];
00440 
00441     CDotFeatures* x = (CDotFeatures*) prob_col->x;
00442     void* iterator;
00443     int32_t ind;
00444     float64_t val;
00445 
00446     double C[3] = {Cn,0,Cp};
00447 
00448     int n = prob_col->n;
00449     if (prob_col->use_bias)
00450         n--;
00451 
00452     for(j=0; j<l; j++)
00453     {
00454         b[j] = 1;
00455         if(prob_col->y[j] > 0)
00456             y[j] = 1;
00457         else
00458             y[j] = -1;
00459     }
00460 
00461     for(j=0; j<w_size; j++)
00462     {
00463         w[j] = 0;
00464         index[j] = j;
00465         xj_sq[j] = 0;
00466 
00467         if (use_bias && j==n)
00468         {
00469             for (ind=0; ind<l; ind++)
00470                 xj_sq[n] += C[GETI(ind)];
00471         }
00472         else
00473         {
00474             iterator=x->get_feature_iterator(j);
00475             while (x->get_next_feature(ind, val, iterator))
00476                 xj_sq[j] += C[GETI(ind)]*val*val;
00477             x->free_feature_iterator(iterator);
00478         }
00479     }
00480     
00481 
00482     CTime start_time;
00483     while (iter < max_iterations && !CSignal::cancel_computations())
00484     {
00485         if (max_train_time > 0 && start_time.cur_time_diff() > max_train_time)
00486           break;
00487 
00488         Gmax_new  = 0;
00489 
00490         for(j=0; j<active_size; j++)
00491         {
00492             int i = j+rand()%(active_size-j);
00493             CMath::swap(index[i], index[j]);
00494         }
00495 
00496         for(s=0; s<active_size; s++)
00497         {
00498             j = index[s];
00499             G_loss = 0;
00500             H = 0;
00501 
00502             if (use_bias && j==n)
00503             {
00504                 for (ind=0; ind<l; ind++)
00505                 {
00506                     if(b[ind] > 0)
00507                     {
00508                         double tmp = C[GETI(ind)]*y[ind];
00509                         G_loss -= tmp*b[ind];
00510                         H += tmp*y[ind];
00511                     }
00512                 }
00513             }
00514             else
00515             {
00516                 iterator=x->get_feature_iterator(j);
00517 
00518                 while (x->get_next_feature(ind, val, iterator))
00519                 {
00520                     if(b[ind] > 0)
00521                     {
00522                         double tmp = C[GETI(ind)]*val*y[ind];
00523                         G_loss -= tmp*b[ind];
00524                         H += tmp*val*y[ind];
00525                     }
00526                 }
00527                 x->free_feature_iterator(iterator);
00528             }
00529 
00530             G_loss *= 2;
00531 
00532             G = G_loss;
00533             H *= 2;
00534             H = CMath::max(H, 1e-12);
00535 
00536             double Gp = G+1;
00537             double Gn = G-1;
00538             double violation = 0;
00539             if(w[j] == 0)
00540             {
00541                 if(Gp < 0)
00542                     violation = -Gp;
00543                 else if(Gn > 0)
00544                     violation = Gn;
00545                 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
00546                 {
00547                     active_size--;
00548                     CMath::swap(index[s], index[active_size]);
00549                     s--;
00550                     continue;
00551                 }
00552             }
00553             else if(w[j] > 0)
00554                 violation = fabs(Gp);
00555             else
00556                 violation = fabs(Gn);
00557 
00558             Gmax_new = CMath::max(Gmax_new, violation);
00559 
00560             // obtain Newton direction d
00561             if(Gp <= H*w[j])
00562                 d = -Gp/H;
00563             else if(Gn >= H*w[j])
00564                 d = -Gn/H;
00565             else
00566                 d = -w[j];
00567 
00568             if(fabs(d) < 1.0e-12)
00569                 continue;
00570 
00571             double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
00572             d_old = 0;
00573             int num_linesearch;
00574             for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
00575             {
00576                 d_diff = d_old - d;
00577                 cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
00578 
00579                 appxcond = xj_sq[j]*d*d + G_loss*d + cond;
00580                 if(appxcond <= 0)
00581                 {
00582                     if (use_bias && j==n)
00583                     {
00584                         for (ind=0; ind<l; ind++)
00585                             b[ind] += d_diff*y[ind];
00586                         break;
00587                     }
00588                     else
00589                     {
00590                         iterator=x->get_feature_iterator(j);
00591                         while (x->get_next_feature(ind, val, iterator))
00592                             b[ind] += d_diff*val*y[ind];
00593 
00594                         x->free_feature_iterator(iterator);
00595                         break;
00596                     }
00597                 }
00598 
00599                 if(num_linesearch == 0)
00600                 {
00601                     loss_old = 0;
00602                     loss_new = 0;
00603 
00604                     if (use_bias && j==n)
00605                     {
00606                         for (ind=0; ind<l; ind++)
00607                         {
00608                             if(b[ind] > 0)
00609                                 loss_old += C[GETI(ind)]*b[ind]*b[ind];
00610                             double b_new = b[ind] + d_diff*y[ind];
00611                             b[ind] = b_new;
00612                             if(b_new > 0)
00613                                 loss_new += C[GETI(ind)]*b_new*b_new;
00614                         }
00615                     }
00616                     else
00617                     {
00618                         iterator=x->get_feature_iterator(j);
00619                         while (x->get_next_feature(ind, val, iterator))
00620                         {
00621                             if(b[ind] > 0)
00622                                 loss_old += C[GETI(ind)]*b[ind]*b[ind];
00623                             double b_new = b[ind] + d_diff*val*y[ind];
00624                             b[ind] = b_new;
00625                             if(b_new > 0)
00626                                 loss_new += C[GETI(ind)]*b_new*b_new;
00627                         }
00628                         x->free_feature_iterator(iterator);
00629                     }
00630                 }
00631                 else
00632                 {
00633                     loss_new = 0;
00634                     if (use_bias && j==n)
00635                     {
00636                         for (ind=0; ind<l; ind++)
00637                         {
00638                             double b_new = b[ind] + d_diff*y[ind];
00639                             b[ind] = b_new;
00640                             if(b_new > 0)
00641                                 loss_new += C[GETI(ind)]*b_new*b_new;
00642                         }
00643                     }
00644                     else
00645                     {
00646                         iterator=x->get_feature_iterator(j);
00647                         while (x->get_next_feature(ind, val, iterator))
00648                         {
00649                             double b_new = b[ind] + d_diff*val*y[ind];
00650                             b[ind] = b_new;
00651                             if(b_new > 0)
00652                                 loss_new += C[GETI(ind)]*b_new*b_new;
00653                         }
00654                         x->free_feature_iterator(iterator);
00655                     }
00656                 }
00657 
00658                 cond = cond + loss_new - loss_old;
00659                 if(cond <= 0)
00660                     break;
00661                 else
00662                 {
00663                     d_old = d;
00664                     d *= 0.5;
00665                     delta *= 0.5;
00666                 }
00667             }
00668 
00669             w[j] += d;
00670 
00671             // recompute b[] if line search takes too many steps
00672             if(num_linesearch >= max_num_linesearch)
00673             {
00674                 SG_INFO("#");
00675                 for(int i=0; i<l; i++)
00676                     b[i] = 1;
00677 
00678                 for(int i=0; i<n; i++)
00679                 {
00680                     if(w[i]==0)
00681                         continue;
00682 
00683                     iterator=x->get_feature_iterator(i);
00684                     while (x->get_next_feature(ind, val, iterator))
00685                         b[ind] -= w[i]*val*y[ind];
00686                     x->free_feature_iterator(iterator);
00687                 }
00688 
00689                 if (use_bias && w[n])
00690                 {
00691                     for (ind=0; ind<l; ind++)
00692                         b[ind] -= w[n]*y[ind];
00693                 }
00694             }
00695         }
00696 
00697         if(iter == 0)
00698             Gmax_init = Gmax_new;
00699         iter++;
00700 
00701         SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6);
00702 
00703         if(Gmax_new <= eps*Gmax_init)
00704         {
00705             if(active_size == w_size)
00706                 break;
00707             else
00708             {
00709                 active_size = w_size;
00710                 Gmax_old = CMath::INFTY;
00711                 continue;
00712             }
00713         }
00714 
00715         Gmax_old = Gmax_new;
00716     }
00717 
00718     SG_DONE();
00719     SG_INFO("optimization finished, #iter = %d\n", iter);
00720     if(iter >= max_iterations)
00721         SG_WARNING("\nWARNING: reaching max number of iterations\n");
00722 
00723     // calculate objective value
00724 
00725     double v = 0;
00726     int nnz = 0;
00727     for(j=0; j<w_size; j++)
00728     {
00729         if(w[j] != 0)
00730         {
00731             v += fabs(w[j]);
00732             nnz++;
00733         }
00734     }
00735     for(j=0; j<l; j++)
00736         if(b[j] > 0)
00737             v += C[GETI(j)]*b[j]*b[j];
00738 
00739     SG_INFO("Objective value = %lf\n", v);
00740     SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size);
00741 
00742     delete [] index;
00743     delete [] y;
00744     delete [] b;
00745     delete [] xj_sq;
00746 }
00747 
00748 // A coordinate descent algorithm for 
00749 // L1-regularized logistic regression problems
00750 //
00751 //  min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
00752 //
00753 // Given: 
00754 // x, y, Cp, Cn
00755 // eps is the stopping tolerance
00756 //
00757 // solution will be put in w
00758 
00759 #undef GETI
00760 #define GETI(i) (y[i]+1)
00761 // To support weights for instances, use GETI(i) (i)
00762 
00763 void CLibLinear::solve_l1r_lr(
00764     const problem *prob_col, double eps, 
00765     double Cp, double Cn)
00766 {
00767     int l = prob_col->l;
00768     int w_size = prob_col->n;
00769     int j, s, iter = 0;
00770     int active_size = w_size;
00771     int max_num_linesearch = 20;
00772 
00773     double x_min = 0;
00774     double sigma = 0.01;
00775     double d, G, H;
00776     double Gmax_old = CMath::INFTY;
00777     double Gmax_new;
00778     double Gmax_init=0;
00779     double sum1, appxcond1;
00780     double sum2, appxcond2;
00781     double cond;
00782 
00783     int *index = new int[w_size];
00784     int32_t *y = new int32_t[l];
00785     double *exp_wTx = new double[l];
00786     double *exp_wTx_new = new double[l];
00787     double *xj_max = new double[w_size];
00788     double *C_sum = new double[w_size];
00789     double *xjneg_sum = new double[w_size];
00790     double *xjpos_sum = new double[w_size];
00791 
00792     CDotFeatures* x = prob_col->x;
00793     void* iterator;
00794     int ind;
00795     double val;
00796 
00797     double C[3] = {Cn,0,Cp};
00798 
00799     int n = prob_col->n;
00800     if (prob_col->use_bias)
00801         n--;
00802 
00803     for(j=0; j<l; j++)
00804     {
00805         exp_wTx[j] = 1;
00806         if(prob_col->y[j] > 0)
00807             y[j] = 1;
00808         else
00809             y[j] = -1;
00810     }
00811     for(j=0; j<w_size; j++)
00812     {
00813         w[j] = 0;
00814         index[j] = j;
00815         xj_max[j] = 0;
00816         C_sum[j] = 0;
00817         xjneg_sum[j] = 0;
00818         xjpos_sum[j] = 0;
00819 
00820         if (use_bias && j==n)
00821         {
00822             for (ind=0; ind<l; ind++)
00823             {
00824                 x_min = CMath::min(x_min, 1.0);
00825                 xj_max[j] = CMath::max(xj_max[j], 1.0);
00826                 C_sum[j] += C[GETI(ind)];
00827                 if(y[ind] == -1)
00828                     xjneg_sum[j] += C[GETI(ind)];
00829                 else
00830                     xjpos_sum[j] += C[GETI(ind)];
00831             }
00832         }
00833         else
00834         {
00835             iterator=x->get_feature_iterator(j);
00836             while (x->get_next_feature(ind, val, iterator))
00837             {
00838                 x_min = CMath::min(x_min, val);
00839                 xj_max[j] = CMath::max(xj_max[j], val);
00840                 C_sum[j] += C[GETI(ind)];
00841                 if(y[ind] == -1)
00842                     xjneg_sum[j] += C[GETI(ind)]*val;
00843                 else
00844                     xjpos_sum[j] += C[GETI(ind)]*val;
00845             }
00846             x->free_feature_iterator(iterator);
00847         }
00848     }
00849 
00850     CTime start_time;
00851     while (iter < max_iterations && !CSignal::cancel_computations())
00852     {
00853         if (max_train_time > 0 && start_time.cur_time_diff() > max_train_time)
00854           break;
00855 
00856         Gmax_new = 0;
00857 
00858         for(j=0; j<active_size; j++)
00859         {
00860             int i = j+rand()%(active_size-j);
00861             CMath::swap(index[i], index[j]);
00862         }
00863 
00864         for(s=0; s<active_size; s++)
00865         {
00866             j = index[s];
00867             sum1 = 0;
00868             sum2 = 0;
00869             H = 0;
00870 
00871             if (use_bias && j==n)
00872             {
00873                 for (ind=0; ind<l; ind++)
00874                 {
00875                     double exp_wTxind = exp_wTx[ind];
00876                     double tmp1 = 1.0/(1+exp_wTxind);
00877                     double tmp2 = C[GETI(ind)]*tmp1;
00878                     double tmp3 = tmp2*exp_wTxind;
00879                     sum2 += tmp2;
00880                     sum1 += tmp3;
00881                     H += tmp1*tmp3;
00882                 }
00883             }
00884             else
00885             {
00886                 iterator=x->get_feature_iterator(j);
00887                 while (x->get_next_feature(ind, val, iterator))
00888                 {
00889                     double exp_wTxind = exp_wTx[ind];
00890                     double tmp1 = val/(1+exp_wTxind);
00891                     double tmp2 = C[GETI(ind)]*tmp1;
00892                     double tmp3 = tmp2*exp_wTxind;
00893                     sum2 += tmp2;
00894                     sum1 += tmp3;
00895                     H += tmp1*tmp3;
00896                 }
00897                 x->free_feature_iterator(iterator);
00898             }
00899 
00900             G = -sum2 + xjneg_sum[j];
00901 
00902             double Gp = G+1;
00903             double Gn = G-1;
00904             double violation = 0;
00905             if(w[j] == 0)
00906             {
00907                 if(Gp < 0)
00908                     violation = -Gp;
00909                 else if(Gn > 0)
00910                     violation = Gn;
00911                 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
00912                 {
00913                     active_size--;
00914                     CMath::swap(index[s], index[active_size]);
00915                     s--;
00916                     continue;
00917                 }
00918             }
00919             else if(w[j] > 0)
00920                 violation = fabs(Gp);
00921             else
00922                 violation = fabs(Gn);
00923 
00924             Gmax_new = CMath::max(Gmax_new, violation);
00925 
00926             // obtain Newton direction d
00927             if(Gp <= H*w[j])
00928                 d = -Gp/H;
00929             else if(Gn >= H*w[j])
00930                 d = -Gn/H;
00931             else
00932                 d = -w[j];
00933 
00934             if(fabs(d) < 1.0e-12)
00935                 continue;
00936 
00937             d = CMath::min(CMath::max(d,-10.0),10.0);
00938 
00939             double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
00940             int num_linesearch;
00941             for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
00942             {
00943                 cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
00944 
00945                 if(x_min >= 0)
00946                 {
00947                     double tmp = exp(d*xj_max[j]);
00948                     appxcond1 = log(1+sum1*(tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond - d*xjpos_sum[j];
00949                     appxcond2 = log(1+sum2*(1/tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond + d*xjneg_sum[j];
00950                     if(CMath::min(appxcond1,appxcond2) <= 0)
00951                     {
00952                         if (use_bias && j==n)
00953                         {
00954                             for (ind=0; ind<l; ind++)
00955                                 exp_wTx[ind] *= exp(d);
00956                         }
00957 
00958                         else
00959                         {
00960                             iterator=x->get_feature_iterator(j);
00961                             while (x->get_next_feature(ind, val, iterator))
00962                                 exp_wTx[ind] *= exp(d*val);
00963                             x->free_feature_iterator(iterator);
00964                         }
00965                         break;
00966                     }
00967                 }
00968 
00969                 cond += d*xjneg_sum[j];
00970 
00971                 int i = 0;
00972 
00973                 if (use_bias && j==n)
00974                 {
00975                     for (ind=0; ind<l; ind++)
00976                     {
00977                         double exp_dx = exp(d);
00978                         exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
00979                         cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
00980                         i++;
00981                     }
00982                 }
00983                 else
00984                 {
00985 
00986                     iterator=x->get_feature_iterator(j);
00987                     while (x->get_next_feature(ind, val, iterator))
00988                     {
00989                         double exp_dx = exp(d*val);
00990                         exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
00991                         cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
00992                         i++;
00993                     }
00994                     x->free_feature_iterator(iterator);
00995                 }
00996 
00997                 if(cond <= 0)
00998                 {
00999                     i = 0;
01000                     if (use_bias && j==n)
01001                     {
01002                         for (ind=0; ind<l; ind++)
01003                         {
01004                             exp_wTx[ind] = exp_wTx_new[i];
01005                             i++;
01006                         }
01007                     }
01008                     else
01009                     {
01010                         iterator=x->get_feature_iterator(j);
01011                         while (x->get_next_feature(ind, val, iterator))
01012                         {
01013                             exp_wTx[ind] = exp_wTx_new[i];
01014                             i++;
01015                         }
01016                         x->free_feature_iterator(iterator);
01017                     }
01018                     break;
01019                 }
01020                 else
01021                 {
01022                     d *= 0.5;
01023                     delta *= 0.5;
01024                 }
01025             }
01026 
01027             w[j] += d;
01028 
01029             // recompute exp_wTx[] if line search takes too many steps
01030             if(num_linesearch >= max_num_linesearch)
01031             {
01032                 SG_INFO("#");
01033                 for(int i=0; i<l; i++)
01034                     exp_wTx[i] = 0;
01035 
01036                 for(int i=0; i<w_size; i++)
01037                 {
01038                     if(w[i]==0) continue;
01039 
01040                     if (use_bias && i==n)
01041                     {
01042                         for (ind=0; ind<l; ind++)
01043                             exp_wTx[ind] += w[i];
01044                     }
01045                     else
01046                     {
01047                         iterator=x->get_feature_iterator(i);
01048                         while (x->get_next_feature(ind, val, iterator))
01049                             exp_wTx[ind] += w[i]*val;
01050                         x->free_feature_iterator(iterator);
01051                     }
01052                 }
01053 
01054                 for(int i=0; i<l; i++)
01055                     exp_wTx[i] = exp(exp_wTx[i]);
01056             }
01057         }
01058 
01059         if(iter == 0)
01060             Gmax_init = Gmax_new;
01061         iter++;
01062         SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6);
01063 
01064         if(Gmax_new <= eps*Gmax_init)
01065         {
01066             if(active_size == w_size)
01067                 break;
01068             else
01069             {
01070                 active_size = w_size;
01071                 Gmax_old = CMath::INFTY;
01072                 continue;
01073             }
01074         }
01075 
01076         Gmax_old = Gmax_new;
01077     }
01078 
01079     SG_DONE();
01080     SG_INFO("optimization finished, #iter = %d\n", iter);
01081     if(iter >= max_iterations)
01082         SG_WARNING("\nWARNING: reaching max number of iterations\n");
01083 
01084     // calculate objective value
01085     
01086     double v = 0;
01087     int nnz = 0;
01088     for(j=0; j<w_size; j++)
01089         if(w[j] != 0)
01090         {
01091             v += fabs(w[j]);
01092             nnz++;
01093         }
01094     for(j=0; j<l; j++)
01095         if(y[j] == 1)
01096             v += C[GETI(j)]*log(1+1/exp_wTx[j]);
01097         else
01098             v += C[GETI(j)]*log(1+exp_wTx[j]);
01099 
01100     SG_INFO("Objective value = %lf\n", v);
01101     SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size);
01102 
01103     delete [] index;
01104     delete [] y;
01105     delete [] exp_wTx;
01106     delete [] exp_wTx_new;
01107     delete [] xj_max;
01108     delete [] C_sum;
01109     delete [] xjneg_sum;
01110     delete [] xjpos_sum;
01111 }
01112 
01113 
01114 #endif //HAVE_LAPACK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation