00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "lib/common.h"
00012 #include "kernel/CommUlongStringKernel.h"
00013 #include "kernel/SqrtDiagKernelNormalizer.h"
00014 #include "features/StringFeatures.h"
00015 #include "lib/io.h"
00016
00017 CCommUlongStringKernel::CCommUlongStringKernel(int32_t size, bool us)
00018 : CStringKernel<uint64_t>(size), use_sign(us)
00019 {
00020 properties |= KP_LINADD;
00021 clear_normal();
00022
00023 set_normalizer(new CSqrtDiagKernelNormalizer());
00024 }
00025
00026 CCommUlongStringKernel::CCommUlongStringKernel(
00027 CStringFeatures<uint64_t>* l, CStringFeatures<uint64_t>* r, bool us,
00028 int32_t size)
00029 : CStringKernel<uint64_t>(size), use_sign(us)
00030 {
00031 properties |= KP_LINADD;
00032 clear_normal();
00033 set_normalizer(new CSqrtDiagKernelNormalizer());
00034 init(l,r);
00035 }
00036
00037 CCommUlongStringKernel::~CCommUlongStringKernel()
00038 {
00039 cleanup();
00040 }
00041
00042 void CCommUlongStringKernel::remove_lhs()
00043 {
00044 delete_optimization();
00045
00046 #ifdef SVMLIGHT
00047 if (lhs)
00048 cache_reset();
00049 #endif
00050
00051 lhs = NULL ;
00052 rhs = NULL ;
00053 }
00054
00055 void CCommUlongStringKernel::remove_rhs()
00056 {
00057 #ifdef SVMLIGHT
00058 if (rhs)
00059 cache_reset();
00060 #endif
00061
00062 rhs = lhs;
00063 }
00064
00065 bool CCommUlongStringKernel::init(CFeatures* l, CFeatures* r)
00066 {
00067 CStringKernel<uint64_t>::init(l,r);
00068 return init_normalizer();
00069 }
00070
00071 void CCommUlongStringKernel::cleanup()
00072 {
00073 delete_optimization();
00074 clear_normal();
00075 CKernel::cleanup();
00076 }
00077
00078 bool CCommUlongStringKernel::load_init(FILE* src)
00079 {
00080 return false;
00081 }
00082
00083 bool CCommUlongStringKernel::save_init(FILE* dest)
00084 {
00085 return false;
00086 }
00087
00088 float64_t CCommUlongStringKernel::compute(int32_t idx_a, int32_t idx_b)
00089 {
00090 int32_t alen, blen;
00091 uint64_t* avec=((CStringFeatures<uint64_t>*) lhs)->get_feature_vector(idx_a, alen);
00092 uint64_t* bvec=((CStringFeatures<uint64_t>*) rhs)->get_feature_vector(idx_b, blen);
00093
00094 float64_t result=0;
00095
00096 int32_t left_idx=0;
00097 int32_t right_idx=0;
00098
00099 if (use_sign)
00100 {
00101 while (left_idx < alen && right_idx < blen)
00102 {
00103 if (avec[left_idx]==bvec[right_idx])
00104 {
00105 uint64_t sym=avec[left_idx];
00106
00107 while (left_idx< alen && avec[left_idx]==sym)
00108 left_idx++;
00109
00110 while (right_idx< blen && bvec[right_idx]==sym)
00111 right_idx++;
00112
00113 result++;
00114 }
00115 else if (avec[left_idx]<bvec[right_idx])
00116 left_idx++;
00117 else
00118 right_idx++;
00119 }
00120 }
00121 else
00122 {
00123 while (left_idx < alen && right_idx < blen)
00124 {
00125 if (avec[left_idx]==bvec[right_idx])
00126 {
00127 int32_t old_left_idx=left_idx;
00128 int32_t old_right_idx=right_idx;
00129
00130 uint64_t sym=avec[left_idx];
00131
00132 while (left_idx< alen && avec[left_idx]==sym)
00133 left_idx++;
00134
00135 while (right_idx< blen && bvec[right_idx]==sym)
00136 right_idx++;
00137
00138 result+=((float64_t) (left_idx-old_left_idx)) * ((float64_t) (right_idx-old_right_idx));
00139 }
00140 else if (avec[left_idx]<bvec[right_idx])
00141 left_idx++;
00142 else
00143 right_idx++;
00144 }
00145 }
00146
00147 return result;
00148 }
00149
00150 void CCommUlongStringKernel::add_to_normal(int32_t vec_idx, float64_t weight)
00151 {
00152 int32_t t=0;
00153 int32_t j=0;
00154 int32_t k=0;
00155 int32_t last_j=0;
00156 int32_t len=-1;
00157 uint64_t* vec=((CStringFeatures<uint64_t>*) lhs)->get_feature_vector(vec_idx, len);
00158
00159 if (vec && len>0)
00160 {
00161
00162 uint64_t* dic= (uint64_t*) malloc(
00163 (len+dictionary.get_num_elements())*sizeof(uint64_t));
00164 float64_t* dic_weights= (float64_t*) malloc(
00165 (len+dictionary.get_num_elements())*sizeof(float64_t));
00166
00167 if (use_sign)
00168 {
00169 for (j=1; j<len; j++)
00170 {
00171 if (vec[j]==vec[j-1])
00172 continue;
00173
00174 merge_dictionaries(t, j, k, vec, dic, dic_weights, weight, vec_idx);
00175 }
00176
00177 merge_dictionaries(t, j, k, vec, dic, dic_weights, weight, vec_idx);
00178
00179 while (k<dictionary.get_num_elements())
00180 {
00181 dic[t]=dictionary[k];
00182 dic_weights[t]=dictionary_weights[k];
00183 t++;
00184 k++;
00185 }
00186 }
00187 else
00188 {
00189 for (j=1; j<len; j++)
00190 {
00191 if (vec[j]==vec[j-1])
00192 continue;
00193
00194 merge_dictionaries(t, j, k, vec, dic, dic_weights, weight*(j-last_j), vec_idx);
00195 last_j = j;
00196 }
00197
00198 merge_dictionaries(t, j, k, vec, dic, dic_weights, weight*(j-last_j), vec_idx);
00199
00200 while (k<dictionary.get_num_elements())
00201 {
00202 dic[t]=dictionary[k];
00203 dic_weights[t]=dictionary_weights[k];
00204 t++;
00205 k++;
00206 }
00207 }
00208
00209 dictionary.set_array(dic, t, len+dictionary.get_num_elements());
00210 dictionary_weights.set_array(dic_weights, t, len+dictionary.get_num_elements());
00211 }
00212
00213 set_is_initialized(true);
00214 }
00215
00216 void CCommUlongStringKernel::clear_normal()
00217 {
00218 dictionary.resize_array(0);
00219 dictionary_weights.resize_array(0);
00220 set_is_initialized(false);
00221 }
00222
00223 bool CCommUlongStringKernel::init_optimization(
00224 int32_t count, int32_t *IDX, float64_t * weights)
00225 {
00226 clear_normal();
00227
00228 if (count<=0)
00229 {
00230 set_is_initialized(true);
00231 SG_DEBUG( "empty set of SVs\n");
00232 return true;
00233 }
00234
00235 SG_DEBUG( "initializing CCommUlongStringKernel optimization\n");
00236
00237 for (int32_t i=0; i<count; i++)
00238 {
00239 if ( (i % (count/10+1)) == 0)
00240 SG_PROGRESS(i, 0, count);
00241
00242 add_to_normal(IDX[i], weights[i]);
00243 }
00244
00245 SG_PRINT( "Done. \n");
00246
00247 set_is_initialized(true);
00248 return true;
00249 }
00250
00251 bool CCommUlongStringKernel::delete_optimization()
00252 {
00253 SG_DEBUG( "deleting CCommUlongStringKernel optimization\n");
00254 clear_normal();
00255 return true;
00256 }
00257
00258
00259
00260 float64_t CCommUlongStringKernel::compute_optimized(int32_t i)
00261 {
00262 float64_t result = 0;
00263 int32_t j, last_j=0;
00264 int32_t old_idx = 0;
00265
00266 if (!get_is_initialized())
00267 {
00268 SG_ERROR( "CCommUlongStringKernel optimization not initialized\n");
00269 return 0 ;
00270 }
00271
00272
00273
00274 int32_t alen = -1;
00275 uint64_t* avec=((CStringFeatures<uint64_t>*) rhs)->
00276 get_feature_vector(i, alen);
00277
00278 if (avec && alen>0)
00279 {
00280 if (use_sign)
00281 {
00282 for (j=1; j<alen; j++)
00283 {
00284 if (avec[j]==avec[j-1])
00285 continue;
00286
00287 int32_t idx = CMath::binary_search_max_lower_equal(&(dictionary.get_array()[old_idx]), dictionary.get_num_elements()-old_idx, avec[j-1]);
00288
00289 if (idx!=-1)
00290 {
00291 if (dictionary[idx+old_idx] == avec[j-1])
00292 result += dictionary_weights[idx+old_idx];
00293
00294 old_idx+=idx;
00295 }
00296 }
00297
00298 int32_t idx = CMath::binary_search(&(dictionary.get_array()[old_idx]), dictionary.get_num_elements()-old_idx, avec[alen-1]);
00299 if (idx!=-1)
00300 result += dictionary_weights[idx+old_idx];
00301 }
00302 else
00303 {
00304 for (j=1; j<alen; j++)
00305 {
00306 if (avec[j]==avec[j-1])
00307 continue;
00308
00309 int32_t idx = CMath::binary_search_max_lower_equal(&(dictionary.get_array()[old_idx]), dictionary.get_num_elements()-old_idx, avec[j-1]);
00310
00311 if (idx!=-1)
00312 {
00313 if (dictionary[idx+old_idx] == avec[j-1])
00314 result += dictionary_weights[idx+old_idx]*(j-last_j);
00315
00316 old_idx+=idx;
00317 }
00318
00319 last_j = j;
00320 }
00321
00322 int32_t idx = CMath::binary_search(&(dictionary.get_array()[old_idx]), dictionary.get_num_elements()-old_idx, avec[alen-1]);
00323 if (idx!=-1)
00324 result += dictionary_weights[idx+old_idx]*(alen-last_j);
00325 }
00326 }
00327
00328 return normalizer->normalize_rhs(result, i);
00329 }