00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include "classifier/svm/SVMSGD.h"
00024
00025
00026 #define HINGELOSS 1
00027 #define SMOOTHHINGELOSS 2
00028 #define SQUAREDHINGELOSS 3
00029 #define LOGLOSS 10
00030 #define LOGLOSSMARGIN 11
00031
00032
00033 #define LOSS HINGELOSS
00034
00035
00036 #define REGULARIZEBIAS 0
00037
00038 inline
00039 float64_t loss(float64_t z)
00040 {
00041 #if LOSS == LOGLOSS
00042 if (z >= 0)
00043 return log(1+exp(-z));
00044 else
00045 return -z + log(1+exp(z));
00046 #elif LOSS == LOGLOSSMARGIN
00047 if (z >= 1)
00048 return log(1+exp(1-z));
00049 else
00050 return 1-z + log(1+exp(z-1));
00051 #elif LOSS == SMOOTHHINGELOSS
00052 if (z < 0)
00053 return 0.5 - z;
00054 if (z < 1)
00055 return 0.5 * (1-z) * (1-z);
00056 return 0;
00057 #elif LOSS == SQUAREDHINGELOSS
00058 if (z < 1)
00059 return 0.5 * (1 - z) * (1 - z);
00060 return 0;
00061 #elif LOSS == HINGELOSS
00062 if (z < 1)
00063 return 1 - z;
00064 return 0;
00065 #else
00066 # error "Undefined loss"
00067 #endif
00068 }
00069
00070 inline
00071 float64_t dloss(float64_t z)
00072 {
00073 #if LOSS == LOGLOSS
00074 if (z < 0)
00075 return 1 / (exp(z) + 1);
00076 float64_t ez = exp(-z);
00077 return ez / (ez + 1);
00078 #elif LOSS == LOGLOSSMARGIN
00079 if (z < 1)
00080 return 1 / (exp(z-1) + 1);
00081 float64_t ez = exp(1-z);
00082 return ez / (ez + 1);
00083 #elif LOSS == SMOOTHHINGELOSS
00084 if (z < 0)
00085 return 1;
00086 if (z < 1)
00087 return 1-z;
00088 return 0;
00089 #elif LOSS == SQUAREDHINGELOSS
00090 if (z < 1)
00091 return (1 - z);
00092 return 0;
00093 #else
00094 if (z < 1)
00095 return 1;
00096 return 0;
00097 #endif
00098 }
00099
00100
00101
00102 CSVMSGD::CSVMSGD(float64_t C)
00103 : CLinearClassifier(), t(1), C1(C), C2(C),
00104 wscale(1), bscale(1), epochs(5), skip(1000), count(1000), use_bias(true),
00105 use_regularized_bias(false)
00106 {
00107 }
00108
00109 CSVMSGD::CSVMSGD(float64_t C, CDotFeatures* traindat, CLabels* trainlab)
00110 : CLinearClassifier(), t(1), C1(C), C2(C), wscale(1), bscale(1),
00111 epochs(5), skip(1000), count(1000), use_bias(true),
00112 use_regularized_bias(false)
00113 {
00114 w=NULL;
00115 set_features(traindat);
00116 set_labels(trainlab);
00117 }
00118
00119 CSVMSGD::~CSVMSGD()
00120 {
00121 delete[] w;
00122 w=NULL;
00123 }
00124
00125 bool CSVMSGD::train()
00126 {
00127
00128 ASSERT(labels);
00129 ASSERT(features);
00130 ASSERT(labels->is_two_class_labeling());
00131
00132 int32_t num_train_labels=labels->get_num_labels();
00133 w_dim=features->get_dim_feature_space();
00134 int32_t num_vec=features->get_num_vectors();
00135
00136 ASSERT(num_vec==num_train_labels);
00137 ASSERT(num_vec>0);
00138
00139 delete[] w;
00140 w=new float64_t[w_dim];
00141 memset(w, 0, w_dim*sizeof(float64_t));
00142 bias=0;
00143
00144 float64_t lambda= 1.0/(C1*num_vec);
00145
00146
00147
00148
00149 float64_t maxw = 1.0 / sqrt(lambda);
00150 float64_t typw = sqrt(maxw);
00151 float64_t eta0 = typw / CMath::max(1.0,dloss(-typw));
00152 t = 1 / (eta0 * lambda);
00153
00154 SG_INFO("lambda=%f, epochs=%d, eta0=%f\n", lambda, epochs, eta0);
00155
00156
00157
00158 calibrate();
00159
00160 SG_INFO("Training on %d vectors\n", num_vec);
00161 for(int32_t e=0; e<epochs; e++)
00162 {
00163 count = skip;
00164 for (int32_t i=0; i<num_vec; i++)
00165 {
00166 float64_t eta = 1.0 / (lambda * t);
00167 float64_t y = labels->get_label(i);
00168 float64_t z = y * (features->dense_dot(i, w, w_dim) + bias);
00169
00170 #if LOSS < LOGLOSS
00171 if (z < 1)
00172 #endif
00173 {
00174 float64_t etd = eta * dloss(z);
00175 features->add_to_dense_vec(etd * y / wscale, i, w, w_dim);
00176
00177 if (use_bias)
00178 {
00179 if (use_regularized_bias)
00180 bias *= 1 - eta * lambda * bscale;
00181 bias += etd * y * bscale;
00182 }
00183 }
00184
00185 if (--count <= 0)
00186 {
00187 float64_t r = 1 - eta * lambda * skip;
00188 if (r < 0.8)
00189 r = pow(1 - eta * lambda, skip);
00190 CMath::scale_vector(r, w, w_dim);
00191 count = skip;
00192 }
00193 t++;
00194 }
00195 }
00196
00197 float64_t wnorm = CMath::dot(w,w, w_dim);
00198 SG_INFO("Norm: %.6f, Bias: %.6f\n", wnorm, bias);
00199
00200 return true;
00201 }
00202
00203 void CSVMSGD::calibrate()
00204 {
00205 ASSERT(features);
00206 int32_t num_vec=features->get_num_vectors();
00207 int32_t c_dim=features->get_dim_feature_space();
00208
00209 ASSERT(num_vec>0);
00210 ASSERT(c_dim>0);
00211
00212 float64_t* c=new float64_t[c_dim];
00213 memset(c, 0, c_dim*sizeof(float64_t));
00214
00215 SG_INFO("Estimating sparsity and bscale num_vec=%d num_feat=%d.\n", num_vec, c_dim);
00216
00217
00218 int32_t n = 0;
00219 float64_t m = 0;
00220 float64_t r = 0;
00221
00222 for (int32_t j=0; j<num_vec && m<=1000; j++, n++)
00223 {
00224 r += features->get_nnz_features_for_vector(j);
00225 features->add_to_dense_vec(1, j, c, c_dim, true);
00226
00227
00228
00229 m=CMath::max(c, c_dim);
00230 }
00231
00232
00233 bscale = m/n;
00234
00235
00236 skip = (int32_t) ((16 * n * c_dim) / r);
00237 SG_INFO("using %d examples. skip=%d bscale=%.6f\n", n, skip, bscale);
00238
00239 delete[] c;
00240 }
00241