00001 /****************************************************************************** 00002 *** GPDT - Gradient Projection Decomposition Technique *** 00003 ****************************************************************************** 00004 *** *** 00005 *** GPDT is a C++ software designed to train large-scale Support Vector *** 00006 *** Machines for binary classification in both scalar and distributed *** 00007 *** memory parallel environments. It uses the Joachims' problem *** 00008 *** decomposition technique to split the whole quadratic programming (QP) *** 00009 *** problem into a sequence of smaller QP subproblems, each one being *** 00010 *** solved by a suitable gradient projection method (GPM). The presently *** 00011 *** implemented GPMs are the Generalized Variable Projection Method *** 00012 *** GVPM (T. Serafini, G. Zanghirati, L. Zanni, "Gradient Projection *** 00013 *** Methods for Quadratic Programs and Applications in Training Support *** 00014 *** Vector Machines"; Optim. Meth. Soft. 20, 2005, 353-378) and the *** 00015 *** Dai-Fletcher Method DFGPM (Y. Dai and R. Fletcher,"New Algorithms for *** 00016 *** Singly Linear Constrained Quadratic Programs Subject to Lower and *** 00017 *** Upper Bounds"; Math. Prog. to appear). *** 00018 *** *** 00019 *** Authors: *** 00020 *** Thomas Serafini, Luca Zanni *** 00021 *** Dept. of Mathematics, University of Modena and Reggio Emilia - ITALY *** 00022 *** serafini.thomas@unimo.it, zanni.luca@unimo.it *** 00023 *** Gaetano Zanghirati *** 00024 *** Dept. of Mathematics, University of Ferrara - ITALY *** 00025 *** g.zanghirati@unife.it *** 00026 *** *** 00027 *** Software homepage: http://dm.unife.it/gpdt *** 00028 *** *** 00029 *** This work is supported by the Italian FIRB Projects *** 00030 *** 'Statistical Learning: Theory, Algorithms and Applications' *** 00031 *** (grant RBAU01877P), http://slipguru.disi.unige.it/ASTA *** 00032 *** and *** 00033 *** 'Parallel Algorithms and Numerical Nonlinear Optimization' *** 00034 *** (grant RBAU01JYPN), http://dm.unife.it/pn2o *** 00035 *** *** 00036 *** Copyright (C) 2004-2008 by T. Serafini, G. Zanghirati, L. Zanni. *** 00037 *** *** 00038 *** COPYRIGHT NOTIFICATION *** 00039 *** *** 00040 *** Permission to copy and modify this software and its documentation *** 00041 *** for internal research use is granted, provided that this notice is *** 00042 *** retained thereon and on all copies or modifications. The authors and *** 00043 *** their respective Universities makes no representations as to the *** 00044 *** suitability and operability of this software for any purpose. It is *** 00045 *** provided "as is" without express or implied warranty. *** 00046 *** Use of this software for commercial purposes is expressly prohibited *** 00047 *** without contacting the authors. *** 00048 *** *** 00049 *** This program is free software; you can redistribute it and/or modify *** 00050 *** it under the terms of the GNU General Public License as published by *** 00051 *** the Free Software Foundation; either version 3 of the License, or *** 00052 *** (at your option) any later version. *** 00053 *** *** 00054 *** This program is distributed in the hope that it will be useful, *** 00055 *** but WITHOUT ANY WARRANTY; without even the implied warranty of *** 00056 *** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *** 00057 *** GNU General Public License for more details. *** 00058 *** *** 00059 *** You should have received a copy of the GNU General Public License *** 00060 *** along with this program; if not, write to the Free Software *** 00061 *** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *** 00062 *** *** 00063 *** File: gpdt.cpp *** 00064 *** Type: scalar *** 00065 *** Version: 1.0 *** 00066 *** Date: October, 2005 *** 00067 *** Revision: 1 *** 00068 *** *** 00069 *** SHOGUN adaptions Written (W) 2006-2008 Soeren Sonnenburg *** 00070 ******************************************************************************/ 00071 #include <stdio.h> 00072 #include <stdlib.h> 00073 #include <string.h> 00074 #include <ctype.h> 00075 #include <math.h> 00076 #include "classifier/svm/gpdt.h" 00077 #include "classifier/svm/gpdtsolve.h" 00078 00079 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00080 void fatalError(const char *msg1, const char *msg2); 00081 00082 /******************************************************************************/ 00083 /*** Class constructor ***/ 00084 /******************************************************************************/ 00085 QPproblem::QPproblem() 00086 : CSGObject() 00087 { 00088 /*** set problem defaults ***/ 00089 maxmw = 40; 00090 c_const = 10.0; 00091 projection_solver = SOLVER_FLETCHER; 00092 projection_projector = 1; 00093 PreprocessMode = 0; 00094 delta = 1.0e-3; 00095 DELTAsv = EPS_SV; 00096 ker_type = 2; 00097 chunk_size = 400; 00098 q = -1; 00099 y = NULL; 00100 tau_proximal = 0.0; 00101 dim = 1; 00102 } 00103 00104 /******************************************************************************/ 00105 /*** Class destructor ***/ 00106 /******************************************************************************/ 00107 QPproblem::~QPproblem() 00108 { 00109 //if (y != NULL) free(y); 00110 } 00111 00112 /******************************************************************************/ 00113 /*** Setter method for the subproblem features ***/ 00114 /******************************************************************************/ 00115 void QPproblem::Subproblem(QPproblem &p, int32_t len, int32_t *perm) 00116 { 00117 int32_t k; 00118 00119 memcpy(this, &p, sizeof(QPproblem)); 00120 ell = len; 00121 00122 KER->SetSubproblem(p.KER, len, perm); 00123 y = (int32_t *)malloc(len * sizeof(int32_t)); 00124 for (k = 0; k < ell; k++) 00125 y[k] = p.y[perm[k]]; 00126 } 00127 00128 /******************************************************************************/ 00129 /*** Extract the samples information from an SVMlight-compliant data file ***/ 00130 /******************************************************************************/ 00131 int32_t prescan_document(char *file, int32_t *lines, int32_t *vlen, int32_t *ll) 00132 { 00133 FILE *fl; 00134 int32_t ic; 00135 char c; 00136 int64_t current_length, current_vlen; 00137 00138 if ((fl = fopen (file, "r")) == NULL) 00139 return(-1); 00140 current_length = 0; 00141 current_vlen = 0; 00142 00143 *ll = 0; /* length of the longest input line (the read buffer should 00144 be allocated with this size) */ 00145 *lines = 1; /* number of lines in the file */ 00146 *vlen = 0; /* max number of nonzero components in a vector */ 00147 00148 while ((ic = getc(fl)) != EOF) 00149 { 00150 c = (char)ic; 00151 current_length++; 00152 00153 if (c == ' ') 00154 current_vlen++; 00155 00156 if (c == '\n') 00157 { 00158 (*lines)++; 00159 if (current_length > (*ll)) 00160 *ll = current_length; 00161 if (current_vlen > (*vlen)) 00162 *vlen = current_vlen; 00163 current_length = 0; 00164 current_vlen = 0; 00165 } 00166 } 00167 fclose(fl); 00168 return(0); 00169 } 00170 /******************************************************************************/ 00171 /*** return 1 if problem is single class, 0 if two-class ***/ 00172 /******************************************************************************/ 00173 int32_t QPproblem::Check2Class(void) 00174 { 00175 int32_t i; 00176 00177 for (i = 1; i < ell; i++) 00178 if (y[i] != y[0]) 00179 return 0; 00180 return 1; 00181 } 00182 00183 /******************************************************************************/ 00184 /*** Compute the size of data splitting for preprocessing ***/ 00185 /******************************************************************************/ 00186 void SplitParts( 00187 int32_t n, int32_t part, int32_t parts, int32_t *dim, int32_t *off) 00188 { 00189 int32_t r; 00190 00191 r = n % parts; 00192 *dim = n / parts; 00193 00194 if (part < r) 00195 { 00196 (*dim)++; 00197 *off = *dim * part; 00198 } 00199 else 00200 *off = *dim * part + r; 00201 } 00202 00203 00204 /******************************************************************************/ 00205 /*** Kernel class constructor ***/ 00206 /******************************************************************************/ 00207 sKernel::sKernel (CKernel* k, int32_t l) 00208 { 00209 kernel=k; 00210 ell=l; 00211 nor = NULL; 00212 vaux = NULL; 00213 lx = NULL; 00214 ix = NULL; 00215 x = NULL; 00216 IsSubproblem = 0; 00217 KernelEvaluations = 0.0; 00218 } 00219 00220 /******************************************************************************/ 00221 /*** Set the problem data for kernel evaluation ***/ 00222 /******************************************************************************/ 00223 void sKernel::SetData( 00224 float32_t **x_, int32_t **ix_, int32_t *lx_, int32_t _ell, int32_t _dim) 00225 { 00226 int32_t i, j, k; 00227 00228 dim = _dim; 00229 ell = _ell; 00230 nor = (float64_t *)malloc(ell*sizeof(float64_t)); 00231 vaux = (float32_t *)malloc(dim*sizeof(float32_t )); 00232 memset(vaux, 0, dim*sizeof(float32_t)); 00233 00234 IsSubproblem = 0; 00235 x = x_; 00236 ix = ix_; 00237 lx = lx_; 00238 00239 // unroll one (sparse) vector 00240 vauxRow = 0; 00241 i = vauxRow; 00242 for (k = 0; k < lx[i]; k++) 00243 vaux[ix[i][k]] = x[i][k]; 00244 00245 // compute the squared Euclidean norm of each vector 00246 for (i = 0; i < ell; i++) 00247 { 00248 nor[i] = 0.0; 00249 for (j = 0; j < lx[i]; j++) 00250 nor[i] += (float64_t)(x[i][j]*x[i][j]); 00251 } 00252 } 00253 00254 /******************************************************************************/ 00255 /*** Set the subproblem data ***/ 00256 /******************************************************************************/ 00257 void sKernel::SetSubproblem(sKernel* ker, int32_t len, int32_t *perm) 00258 { 00259 int32_t k; 00260 00261 /* arrays allocations */ 00262 nor = (float64_t *) malloc(len*sizeof(float64_t)); 00263 vaux = (float32_t *) malloc(ker->dim*sizeof(float32_t)); 00264 memset(vaux, 0, ker->dim*sizeof(float32_t)); 00265 00266 lx = (int32_t *) malloc(len * sizeof(int32_t)); 00267 ix = (int32_t **) malloc(len * sizeof(int32_t *)); 00268 x = (float32_t **) malloc(len * sizeof(float32_t *)); 00269 IsSubproblem = 1; 00270 00271 for (k = 0; k < len; k++) 00272 { 00273 x[k] = ker->x[perm[k]]; 00274 ix[k] = ker->ix[perm[k]]; 00275 lx[k] = ker->lx[perm[k]]; 00276 nor[k] = ker->nor[perm[k]]; 00277 } 00278 00279 // unroll one (sparse) vector 00280 vauxRow = 0; 00281 for (k = 0; k < lx[vauxRow]; k++) 00282 vaux[ix[vauxRow][k]] = x[vauxRow][k]; 00283 } 00284 00285 /******************************************************************************/ 00286 /*** Kernel class destructor ***/ 00287 /******************************************************************************/ 00288 sKernel::~sKernel() 00289 { 00290 int32_t i; 00291 00292 if (nor != NULL) free(nor); 00293 if (vaux != NULL) free(vaux); 00294 00295 if (lx != NULL) free(lx); 00296 if (ix != NULL) 00297 { 00298 if (!IsSubproblem) 00299 for (i = 0; i < ell; i++) 00300 free(ix[i]); 00301 free(ix); 00302 } 00303 if (x != NULL) 00304 { 00305 if (!IsSubproblem) 00306 for (i = 0; i < ell; i++) 00307 free(x[i]); 00308 free(x); 00309 } 00310 } 00311 00312 #endif // DOXYGEN_SHOULD_SKIP_THIS 00313 00314 /******************************************************************************/ 00315 /*** End of gpdt.cpp file ***/ 00316 /******************************************************************************/