00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "lib/common.h"
00013 #include "lib/io.h"
00014 #include "lib/Signal.h"
00015 #include "lib/Trie.h"
00016 #include "base/Parallel.h"
00017
00018 #include "kernel/WeightedDegreeStringKernel.h"
00019 #include "kernel/FirstElementKernelNormalizer.h"
00020 #include "features/Features.h"
00021 #include "features/StringFeatures.h"
00022
00023 #ifndef WIN32
00024 #include <pthread.h>
00025 #endif
00026
00027 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00028 struct S_THREAD_PARAM
00029 {
00030 int32_t* vec;
00031 float64_t* result;
00032 float64_t* weights;
00033 CWeightedDegreeStringKernel* kernel;
00034 CTrie<DNATrie>* tries;
00035 float64_t factor;
00036 int32_t j;
00037 int32_t start;
00038 int32_t end;
00039 int32_t length;
00040 int32_t* vec_idx;
00041 };
00042 #endif // DOXYGEN_SHOULD_SKIP_THIS
00043
00044 CWeightedDegreeStringKernel::CWeightedDegreeStringKernel (
00045 int32_t degree_, EWDKernType type_)
00046 : CStringKernel<char>(10),weights(NULL),position_weights(NULL),
00047 weights_buffer(NULL), mkl_stepsize(1),degree(degree_), length(0),
00048 max_mismatch(0), seq_length(0), block_computation(true),
00049 num_block_weights_external(0), block_weights_external(NULL),
00050 block_weights(NULL), type(type_), which_degree(-1), tries(NULL),
00051 tree_initialized(false), alphabet(NULL)
00052 {
00053 properties |= KP_LINADD | KP_KERNCOMBINATION | KP_BATCHEVALUATION;
00054 lhs=NULL;
00055 rhs=NULL;
00056
00057 if (type!=E_EXTERNAL)
00058 set_wd_weights_by_type(type);
00059
00060 set_normalizer(new CFirstElementKernelNormalizer());
00061 }
00062
00063 CWeightedDegreeStringKernel::CWeightedDegreeStringKernel (
00064 float64_t *weights_, int32_t degree_)
00065 : CStringKernel<char>(10), weights(NULL), position_weights(NULL),
00066 weights_buffer(NULL), mkl_stepsize(1), degree(degree_), length(0),
00067 max_mismatch(0), seq_length(0), block_computation(true),
00068 num_block_weights_external(0), block_weights_external(NULL),
00069 block_weights(NULL), type(E_EXTERNAL), which_degree(-1), tries(NULL),
00070 tree_initialized(false), alphabet(NULL)
00071 {
00072 properties |= KP_LINADD | KP_KERNCOMBINATION | KP_BATCHEVALUATION;
00073 lhs=NULL;
00074 rhs=NULL;
00075
00076 weights=new float64_t[degree*(1+max_mismatch)];
00077 for (int32_t i=0; i<degree*(1+max_mismatch); i++)
00078 weights[i]=weights_[i];
00079 set_normalizer(new CFirstElementKernelNormalizer());
00080 }
00081
00082 CWeightedDegreeStringKernel::CWeightedDegreeStringKernel(
00083 CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t degree_)
00084 : CStringKernel<char>(10), weights(NULL), position_weights(NULL),
00085 weights_buffer(NULL), mkl_stepsize(1), degree(degree_), length(0),
00086 max_mismatch(0), seq_length(0), block_computation(true),
00087 num_block_weights_external(0), block_weights_external(NULL),
00088 block_weights(NULL), type(E_WD), which_degree(-1), tries(NULL),
00089 tree_initialized(false), alphabet(NULL)
00090 {
00091 properties |= KP_LINADD | KP_KERNCOMBINATION | KP_BATCHEVALUATION;
00092
00093 set_wd_weights_by_type(type);
00094 set_normalizer(new CFirstElementKernelNormalizer());
00095 init(l, r);
00096 }
00097
00098 CWeightedDegreeStringKernel::~CWeightedDegreeStringKernel()
00099 {
00100 cleanup();
00101
00102 delete[] weights;
00103 weights=NULL;
00104
00105 delete[] block_weights;
00106 block_weights=NULL;
00107
00108 delete[] position_weights;
00109 position_weights=NULL;
00110
00111 delete[] weights_buffer;
00112 weights_buffer=NULL;
00113 }
00114
00115
00116 void CWeightedDegreeStringKernel::remove_lhs()
00117 {
00118 SG_DEBUG( "deleting CWeightedDegreeStringKernel optimization\n");
00119 delete_optimization();
00120
00121 if (tries!=NULL)
00122 tries->destroy();
00123
00124 CKernel::remove_lhs();
00125 }
00126
00127 void CWeightedDegreeStringKernel::create_empty_tries()
00128 {
00129 seq_length=((CStringFeatures<char>*) lhs)->get_max_vector_length();
00130
00131 if (tries!=NULL)
00132 {
00133 tries->destroy() ;
00134 tries->create(seq_length, max_mismatch==0) ;
00135 }
00136 }
00137
00138 bool CWeightedDegreeStringKernel::init(CFeatures* l, CFeatures* r)
00139 {
00140 int32_t lhs_changed=(lhs!=l);
00141 int32_t rhs_changed=(rhs!=r);
00142
00143 CStringKernel<char>::init(l,r);
00144
00145 SG_DEBUG("lhs_changed: %i\n", lhs_changed);
00146 SG_DEBUG("rhs_changed: %i\n", rhs_changed);
00147
00148 CStringFeatures<char>* sf_l=(CStringFeatures<char>*) l;
00149 CStringFeatures<char>* sf_r=(CStringFeatures<char>*) r;
00150
00151 int32_t len=sf_l->get_max_vector_length();
00152 if (lhs_changed && !sf_l->have_same_length(len))
00153 SG_ERROR("All strings in WD kernel must have same length (lhs wrong)!\n");
00154
00155 if (rhs_changed && !sf_r->have_same_length(len))
00156 SG_ERROR("All strings in WD kernel must have same length (rhs wrong)!\n");
00157
00158 SG_UNREF(alphabet);
00159 alphabet=sf_l->get_alphabet();
00160 CAlphabet* ralphabet=sf_r->get_alphabet();
00161
00162 if (!((alphabet->get_alphabet()==DNA) || (alphabet->get_alphabet()==RNA)))
00163 properties &= ((uint64_t) (-1)) ^ (KP_LINADD | KP_BATCHEVALUATION);
00164
00165 ASSERT(ralphabet->get_alphabet()==alphabet->get_alphabet());
00166 SG_UNREF(ralphabet);
00167
00168 if (tries!=NULL) {
00169 tries->delete_trees(max_mismatch==0);
00170 SG_UNREF(tries);
00171 }
00172 tries=new CTrie<DNATrie>(degree, max_mismatch==0);
00173 create_empty_tries();
00174
00175 init_block_weights();
00176
00177 return init_normalizer();
00178 }
00179
00180 void CWeightedDegreeStringKernel::cleanup()
00181 {
00182 SG_DEBUG("deleting CWeightedDegreeStringKernel optimization\n");
00183 delete_optimization();
00184
00185 delete[] block_weights;
00186 block_weights=NULL;
00187
00188 if (tries!=NULL)
00189 {
00190 tries->destroy();
00191 SG_UNREF(tries);
00192 tries=NULL;
00193 }
00194
00195 seq_length=0;
00196 tree_initialized = false;
00197
00198 SG_UNREF(alphabet);
00199 alphabet=NULL;
00200
00201 CKernel::cleanup();
00202 }
00203
00204 bool CWeightedDegreeStringKernel::load_init(FILE* src)
00205 {
00206 return false;
00207 }
00208
00209 bool CWeightedDegreeStringKernel::save_init(FILE* dest)
00210 {
00211 return false;
00212 }
00213
00214
00215 bool CWeightedDegreeStringKernel::init_optimization(int32_t count, int32_t* IDX, float64_t* alphas, int32_t tree_num)
00216 {
00217 if (tree_num<0)
00218 SG_DEBUG( "deleting CWeightedDegreeStringKernel optimization\n");
00219
00220 delete_optimization();
00221
00222 if (tree_num<0)
00223 SG_DEBUG( "initializing CWeightedDegreeStringKernel optimization\n") ;
00224
00225 for (int32_t i=0; i<count; i++)
00226 {
00227 if (tree_num<0)
00228 {
00229 if ( (i % (count/10+1)) == 0)
00230 SG_PROGRESS(i, 0, count);
00231
00232 if (max_mismatch==0)
00233 add_example_to_tree(IDX[i], alphas[i]) ;
00234 else
00235 add_example_to_tree_mismatch(IDX[i], alphas[i]) ;
00236
00237
00238 }
00239 else
00240 {
00241 if (max_mismatch==0)
00242 add_example_to_single_tree(IDX[i], alphas[i], tree_num) ;
00243 else
00244 add_example_to_single_tree_mismatch(IDX[i], alphas[i], tree_num) ;
00245 }
00246 }
00247
00248 if (tree_num<0)
00249 SG_DONE();
00250
00251
00252
00253 set_is_initialized(true) ;
00254 return true ;
00255 }
00256
00257 bool CWeightedDegreeStringKernel::delete_optimization()
00258 {
00259 if (get_is_initialized())
00260 {
00261 if (tries!=NULL)
00262 tries->delete_trees(max_mismatch==0);
00263 set_is_initialized(false);
00264 return true;
00265 }
00266
00267 return false;
00268 }
00269
00270
00271 float64_t CWeightedDegreeStringKernel::compute_with_mismatch(
00272 char* avec, int32_t alen, char* bvec, int32_t blen)
00273 {
00274 float64_t sum = 0.0;
00275
00276 for (int32_t i=0; i<alen; i++)
00277 {
00278 float64_t sumi = 0.0;
00279 int32_t mismatches=0;
00280
00281 for (int32_t j=0; (i+j<alen) && (j<degree); j++)
00282 {
00283 if (avec[i+j]!=bvec[i+j])
00284 {
00285 mismatches++ ;
00286 if (mismatches>max_mismatch)
00287 break ;
00288 } ;
00289 sumi += weights[j+degree*mismatches];
00290 }
00291 if (position_weights!=NULL)
00292 sum+=position_weights[i]*sumi ;
00293 else
00294 sum+=sumi ;
00295 }
00296 return sum ;
00297 }
00298
00299 float64_t CWeightedDegreeStringKernel::compute_using_block(
00300 char* avec, int32_t alen, char* bvec, int32_t blen)
00301 {
00302 ASSERT(alen==blen);
00303
00304 float64_t sum=0;
00305 int32_t match_len=-1;
00306
00307 for (int32_t i=0; i<alen; i++)
00308 {
00309 if (avec[i]==bvec[i])
00310 match_len++;
00311 else
00312 {
00313 if (match_len>=0)
00314 sum+=block_weights[match_len];
00315 match_len=-1;
00316 }
00317 }
00318
00319 if (match_len>=0)
00320 sum+=block_weights[match_len];
00321
00322 return sum;
00323 }
00324
00325 float64_t CWeightedDegreeStringKernel::compute_without_mismatch(
00326 char* avec, int32_t alen, char* bvec, int32_t blen)
00327 {
00328 float64_t sum = 0.0;
00329
00330 for (int32_t i=0; i<alen; i++)
00331 {
00332 float64_t sumi = 0.0;
00333
00334 for (int32_t j=0; (i+j<alen) && (j<degree); j++)
00335 {
00336 if (avec[i+j]!=bvec[i+j])
00337 break ;
00338 sumi += weights[j];
00339 }
00340 if (position_weights!=NULL)
00341 sum+=position_weights[i]*sumi ;
00342 else
00343 sum+=sumi ;
00344 }
00345 return sum ;
00346 }
00347
00348 float64_t CWeightedDegreeStringKernel::compute_without_mismatch_matrix(
00349 char* avec, int32_t alen, char* bvec, int32_t blen)
00350 {
00351 float64_t sum = 0.0;
00352
00353 for (int32_t i=0; i<alen; i++)
00354 {
00355 float64_t sumi=0.0;
00356 for (int32_t j=0; (i+j<alen) && (j<degree); j++)
00357 {
00358 if (avec[i+j]!=bvec[i+j])
00359 break;
00360 sumi += weights[i*degree+j];
00361 }
00362 if (position_weights!=NULL)
00363 sum += position_weights[i]*sumi ;
00364 else
00365 sum += sumi ;
00366 }
00367
00368 return sum ;
00369 }
00370
00371
00372 float64_t CWeightedDegreeStringKernel::compute(int32_t idx_a, int32_t idx_b)
00373 {
00374 int32_t alen, blen;
00375 char* avec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen);
00376 char* bvec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen);
00377 float64_t result=0;
00378
00379 if (max_mismatch==0 && length==0 && block_computation)
00380 result=compute_using_block(avec, alen, bvec, blen);
00381 else
00382 {
00383 if (max_mismatch>0)
00384 result=compute_with_mismatch(avec, alen, bvec, blen);
00385 else if (length==0)
00386 result=compute_without_mismatch(avec, alen, bvec, blen);
00387 else
00388 result=compute_without_mismatch_matrix(avec, alen, bvec, blen);
00389 }
00390
00391 return result;
00392 }
00393
00394
00395 void CWeightedDegreeStringKernel::add_example_to_tree(
00396 int32_t idx, float64_t alpha)
00397 {
00398 ASSERT(alphabet);
00399 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00400
00401 int32_t len=0;
00402 char* char_vec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx, len);
00403 ASSERT(max_mismatch==0);
00404 int32_t *vec=new int32_t[len];
00405
00406 for (int32_t i=0; i<len; i++)
00407 vec[i]=alphabet->remap_to_bin(char_vec[i]);
00408
00409 if (length == 0 || max_mismatch > 0)
00410 {
00411 for (int32_t i=0; i<len; i++)
00412 {
00413 float64_t alpha_pw=alpha;
00414
00415
00416 if (alpha_pw==0.0)
00417 continue;
00418 ASSERT(tries);
00419 tries->add_to_trie(i, 0, vec, normalizer->normalize_lhs(alpha_pw, idx), weights, (length!=0));
00420 }
00421 }
00422 else
00423 {
00424 for (int32_t i=0; i<len; i++)
00425 {
00426 float64_t alpha_pw=alpha;
00427
00428
00429 if (alpha_pw==0.0)
00430 continue ;
00431 ASSERT(tries);
00432 tries->add_to_trie(i, 0, vec, normalizer->normalize_lhs(alpha_pw, idx), weights, (length!=0));
00433 }
00434 }
00435 delete[] vec ;
00436 tree_initialized=true ;
00437 }
00438
00439 void CWeightedDegreeStringKernel::add_example_to_single_tree(
00440 int32_t idx, float64_t alpha, int32_t tree_num)
00441 {
00442 ASSERT(alphabet);
00443 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00444
00445 int32_t len ;
00446 char* char_vec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx, len);
00447 ASSERT(max_mismatch==0);
00448 int32_t *vec = new int32_t[len] ;
00449
00450 for (int32_t i=tree_num; i<tree_num+degree && i<len; i++)
00451 vec[i]=alphabet->remap_to_bin(char_vec[i]);
00452
00453 ASSERT(tries);
00454 if (alpha!=0.0)
00455 tries->add_to_trie(tree_num, 0, vec, normalizer->normalize_lhs(alpha, idx), weights, (length!=0));
00456
00457 delete[] vec ;
00458 tree_initialized=true ;
00459 }
00460
00461 void CWeightedDegreeStringKernel::add_example_to_tree_mismatch(int32_t idx, float64_t alpha)
00462 {
00463 ASSERT(tries);
00464 ASSERT(alphabet);
00465 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00466
00467 int32_t len ;
00468 char* char_vec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx, len);
00469
00470 int32_t *vec = new int32_t[len] ;
00471
00472 for (int32_t i=0; i<len; i++)
00473 vec[i]=alphabet->remap_to_bin(char_vec[i]);
00474
00475 for (int32_t i=0; i<len; i++)
00476 {
00477 if (alpha!=0.0)
00478 tries->add_example_to_tree_mismatch_recursion(NO_CHILD, i, normalizer->normalize_lhs(alpha, idx), &vec[i], len-i, 0, 0, max_mismatch, weights);
00479 }
00480
00481 delete[] vec ;
00482 tree_initialized=true ;
00483 }
00484
00485 void CWeightedDegreeStringKernel::add_example_to_single_tree_mismatch(
00486 int32_t idx, float64_t alpha, int32_t tree_num)
00487 {
00488 ASSERT(tries);
00489 ASSERT(alphabet);
00490 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00491
00492 int32_t len=0;
00493 char* char_vec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx, len);
00494 int32_t *vec=new int32_t[len];
00495
00496 for (int32_t i=tree_num; i<len && i<tree_num+degree; i++)
00497 vec[i]=alphabet->remap_to_bin(char_vec[i]);
00498
00499 if (alpha!=0.0)
00500 {
00501 tries->add_example_to_tree_mismatch_recursion(
00502 NO_CHILD, tree_num, normalizer->normalize_lhs(alpha, idx), &vec[tree_num], len-tree_num,
00503 0, 0, max_mismatch, weights);
00504 }
00505
00506 delete[] vec;
00507 tree_initialized=true;
00508 }
00509
00510
00511 float64_t CWeightedDegreeStringKernel::compute_by_tree(int32_t idx)
00512 {
00513 ASSERT(alphabet);
00514 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00515
00516 int32_t len=0;
00517 char* char_vec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx, len);
00518 ASSERT(char_vec && len>0);
00519 int32_t *vec=new int32_t[len];
00520
00521 for (int32_t i=0; i<len; i++)
00522 vec[i]=alphabet->remap_to_bin(char_vec[i]);
00523
00524 float64_t sum=0;
00525 ASSERT(tries);
00526 for (int32_t i=0; i<len; i++)
00527 sum+=tries->compute_by_tree_helper(vec, len, i, i, i, weights, (length!=0));
00528
00529 delete[] vec;
00530 return normalizer->normalize_rhs(sum, idx);
00531 }
00532
00533 void CWeightedDegreeStringKernel::compute_by_tree(
00534 int32_t idx, float64_t* LevelContrib)
00535 {
00536 ASSERT(alphabet);
00537 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00538
00539 int32_t len ;
00540 char* char_vec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx, len);
00541
00542 int32_t *vec = new int32_t[len] ;
00543
00544 for (int32_t i=0; i<len; i++)
00545 vec[i]=alphabet->remap_to_bin(char_vec[i]);
00546
00547 ASSERT(tries);
00548 for (int32_t i=0; i<len; i++)
00549 {
00550 tries->compute_by_tree_helper(vec, len, i, i, i, LevelContrib,
00551 normalizer->normalize_rhs(1.0, idx),
00552 mkl_stepsize, weights, (length!=0));
00553 }
00554
00555 delete[] vec ;
00556 }
00557
00558 float64_t *CWeightedDegreeStringKernel::compute_abs_weights(int32_t &len)
00559 {
00560 ASSERT(tries);
00561 return tries->compute_abs_weights(len);
00562 }
00563
00564 bool CWeightedDegreeStringKernel::set_wd_weights_by_type(EWDKernType p_type)
00565 {
00566 ASSERT(degree>0);
00567 ASSERT(p_type==E_WD);
00568
00569 delete[] weights;
00570 weights=new float64_t[degree];
00571 if (weights)
00572 {
00573 int32_t i;
00574 float64_t sum=0;
00575 for (i=0; i<degree; i++)
00576 {
00577 weights[i]=degree-i;
00578 sum+=weights[i];
00579 }
00580 for (i=0; i<degree; i++)
00581 weights[i]/=sum;
00582
00583 for (i=0; i<degree; i++)
00584 {
00585 for (int32_t j=1; j<=max_mismatch; j++)
00586 {
00587 if (j<i+1)
00588 {
00589 int32_t nk=CMath::nchoosek(i+1, j);
00590 weights[i+j*degree]=weights[i]/(nk*CMath::pow(3.0,j));
00591 }
00592 else
00593 weights[i+j*degree]= 0;
00594 }
00595 }
00596
00597 if (which_degree>=0)
00598 {
00599 ASSERT(which_degree<degree);
00600 for (i=0; i<degree; i++)
00601 {
00602 if (i!=which_degree)
00603 weights[i]=0;
00604 else
00605 weights[i]=1;
00606 }
00607 }
00608 return true;
00609 }
00610 else
00611 return false;
00612 }
00613
00614 bool CWeightedDegreeStringKernel::set_weights(
00615 float64_t* ws, int32_t d, int32_t len)
00616 {
00617 SG_DEBUG("degree = %i d=%i\n", degree, d);
00618 degree=d;
00619 ASSERT(tries);
00620 tries->set_degree(degree);
00621 length=len;
00622
00623 if (length==0) length=1;
00624 int32_t num_weights=degree*(length+max_mismatch);
00625 delete[] weights;
00626 weights=new float64_t[num_weights];
00627 if (weights)
00628 {
00629 for (int32_t i=0; i<num_weights; i++) {
00630 if (ws[i])
00631 weights[i]=ws[i];
00632 }
00633 return true;
00634 }
00635 else
00636 return false;
00637 }
00638
00639 bool CWeightedDegreeStringKernel::set_position_weights(
00640 float64_t* pws, int32_t len)
00641 {
00642 if (len==0)
00643 {
00644 delete[] position_weights;
00645 position_weights=NULL;
00646 ASSERT(tries);
00647 tries->set_position_weights(position_weights);
00648 }
00649
00650 if (seq_length!=len)
00651 SG_ERROR("seq_length = %i, position_weights_length=%i\n", seq_length, len);
00652
00653 delete[] position_weights;
00654 position_weights=new float64_t[len];
00655 ASSERT(tries);
00656 tries->set_position_weights(position_weights);
00657
00658 if (position_weights)
00659 {
00660 for (int32_t i=0; i<len; i++)
00661 position_weights[i]=pws[i];
00662 return true;
00663 }
00664 else
00665 return false;
00666 }
00667
00668 bool CWeightedDegreeStringKernel::init_block_weights_from_wd()
00669 {
00670 delete[] block_weights;
00671 block_weights=new float64_t[CMath::max(seq_length,degree)];
00672
00673 if (block_weights)
00674 {
00675 int32_t k;
00676 float64_t d=degree;
00677
00678 for (k=0; k<degree; k++)
00679 block_weights[k]=
00680 (-CMath::pow(k, 3)+(3*d-3)*CMath::pow(k, 2)+(9*d-2)*k+6*d)/(3*d*(d+1));
00681 for (k=degree; k<seq_length; k++)
00682 block_weights[k]=(-d+3*k+4)/3;
00683 }
00684
00685 return (block_weights!=NULL);
00686 }
00687
00688 bool CWeightedDegreeStringKernel::init_block_weights_from_wd_external()
00689 {
00690 ASSERT(weights);
00691 delete[] block_weights;
00692 block_weights=new float64_t[CMath::max(seq_length,degree)];
00693
00694 if (block_weights)
00695 {
00696 int32_t i=0;
00697 block_weights[0]=weights[0];
00698 for (i=1; i<CMath::max(seq_length,degree); i++)
00699 block_weights[i]=0;
00700
00701 for (i=1; i<CMath::max(seq_length,degree); i++)
00702 {
00703 block_weights[i]=block_weights[i-1];
00704
00705 float64_t contrib=0;
00706 for (int32_t j=0; j<CMath::min(degree,i+1); j++)
00707 contrib+=weights[j];
00708
00709 block_weights[i]+=contrib;
00710 }
00711 }
00712
00713 return (block_weights!=NULL);
00714 }
00715
00716 bool CWeightedDegreeStringKernel::init_block_weights_const()
00717 {
00718 delete[] block_weights;
00719 block_weights=new float64_t[seq_length];
00720
00721 if (block_weights)
00722 {
00723 for (int32_t i=1; i<seq_length+1 ; i++)
00724 block_weights[i-1]=1.0/seq_length;
00725 }
00726
00727 return (block_weights!=NULL);
00728 }
00729
00730 bool CWeightedDegreeStringKernel::init_block_weights_linear()
00731 {
00732 delete[] block_weights;
00733 block_weights=new float64_t[seq_length];
00734
00735 if (block_weights)
00736 {
00737 for (int32_t i=1; i<seq_length+1 ; i++)
00738 block_weights[i-1]=degree*i;
00739 }
00740
00741 return (block_weights!=NULL);
00742 }
00743
00744 bool CWeightedDegreeStringKernel::init_block_weights_sqpoly()
00745 {
00746 delete[] block_weights;
00747 block_weights=new float64_t[seq_length];
00748
00749 if (block_weights)
00750 {
00751 for (int32_t i=1; i<degree+1 ; i++)
00752 block_weights[i-1]=((float64_t) i)*i;
00753
00754 for (int32_t i=degree+1; i<seq_length+1 ; i++)
00755 block_weights[i-1]=i;
00756 }
00757
00758 return (block_weights!=NULL);
00759 }
00760
00761 bool CWeightedDegreeStringKernel::init_block_weights_cubicpoly()
00762 {
00763 delete[] block_weights;
00764 block_weights=new float64_t[seq_length];
00765
00766 if (block_weights)
00767 {
00768 for (int32_t i=1; i<degree+1 ; i++)
00769 block_weights[i-1]=((float64_t) i)*i*i;
00770
00771 for (int32_t i=degree+1; i<seq_length+1 ; i++)
00772 block_weights[i-1]=i;
00773 }
00774
00775 return (block_weights!=NULL);
00776 }
00777
00778 bool CWeightedDegreeStringKernel::init_block_weights_exp()
00779 {
00780 delete[] block_weights;
00781 block_weights=new float64_t[seq_length];
00782
00783 if (block_weights)
00784 {
00785 for (int32_t i=1; i<degree+1 ; i++)
00786 block_weights[i-1]=exp(((float64_t) i/10.0));
00787
00788 for (int32_t i=degree+1; i<seq_length+1 ; i++)
00789 block_weights[i-1]=i;
00790 }
00791
00792 return (block_weights!=NULL);
00793 }
00794
00795 bool CWeightedDegreeStringKernel::init_block_weights_log()
00796 {
00797 delete[] block_weights;
00798 block_weights=new float64_t[seq_length];
00799
00800 if (block_weights)
00801 {
00802 for (int32_t i=1; i<degree+1 ; i++)
00803 block_weights[i-1]=CMath::pow(CMath::log((float64_t) i),2);
00804
00805 for (int32_t i=degree+1; i<seq_length+1 ; i++)
00806 block_weights[i-1]=i-degree+1+CMath::pow(CMath::log(degree+1.0),2);
00807 }
00808
00809 return (block_weights!=NULL);
00810 }
00811
00812 bool CWeightedDegreeStringKernel::init_block_weights_external()
00813 {
00814 if (block_weights_external && (seq_length == num_block_weights_external) )
00815 {
00816 delete[] block_weights;
00817 block_weights=new float64_t[seq_length];
00818
00819 if (block_weights)
00820 {
00821 for (int32_t i=0; i<seq_length; i++)
00822 block_weights[i]=block_weights_external[i];
00823 }
00824 }
00825 else {
00826 SG_ERROR( "sequence longer then weights (seqlen:%d, wlen:%d)\n", seq_length, block_weights_external);
00827 }
00828 return (block_weights!=NULL);
00829 }
00830
00831 bool CWeightedDegreeStringKernel::init_block_weights()
00832 {
00833 switch (type)
00834 {
00835 case E_WD:
00836 return init_block_weights_from_wd();
00837 case E_EXTERNAL:
00838 return init_block_weights_from_wd_external();
00839 case E_BLOCK_CONST:
00840 return init_block_weights_const();
00841 case E_BLOCK_LINEAR:
00842 return init_block_weights_linear();
00843 case E_BLOCK_SQPOLY:
00844 return init_block_weights_sqpoly();
00845 case E_BLOCK_CUBICPOLY:
00846 return init_block_weights_cubicpoly();
00847 case E_BLOCK_EXP:
00848 return init_block_weights_exp();
00849 case E_BLOCK_LOG:
00850 return init_block_weights_log();
00851 case E_BLOCK_EXTERNAL:
00852 return init_block_weights_external();
00853 default:
00854 return false;
00855 };
00856 }
00857
00858
00859 void* CWeightedDegreeStringKernel::compute_batch_helper(void* p)
00860 {
00861 S_THREAD_PARAM* params = (S_THREAD_PARAM*) p;
00862 int32_t j=params->j;
00863 CWeightedDegreeStringKernel* wd=params->kernel;
00864 CTrie<DNATrie>* tries=params->tries;
00865 float64_t* weights=params->weights;
00866 int32_t length=params->length;
00867 int32_t* vec=params->vec;
00868 float64_t* result=params->result;
00869 float64_t factor=params->factor;
00870 int32_t* vec_idx=params->vec_idx;
00871
00872 CStringFeatures<char>* rhs_feat=((CStringFeatures<char>*) wd->get_rhs());
00873 CAlphabet* alpha=wd->alphabet;
00874
00875 for (int32_t i=params->start; i<params->end; i++)
00876 {
00877 int32_t len=0;
00878 char* char_vec=rhs_feat->get_feature_vector(vec_idx[i], len);
00879 for (int32_t k=j; k<CMath::min(len,j+wd->get_degree()); k++)
00880 vec[k]=alpha->remap_to_bin(char_vec[k]);
00881
00882 ASSERT(tries);
00883
00884 result[i]+=factor*
00885 wd->normalizer->normalize_rhs(tries->compute_by_tree_helper(vec, len, j, j, j, weights, (length!=0)), vec_idx[i]);
00886 }
00887
00888 SG_UNREF(rhs_feat);
00889
00890 return NULL;
00891 }
00892
00893 void CWeightedDegreeStringKernel::compute_batch(
00894 int32_t num_vec, int32_t* vec_idx, float64_t* result, int32_t num_suppvec,
00895 int32_t* IDX, float64_t* alphas, float64_t factor)
00896 {
00897 ASSERT(tries);
00898 ASSERT(alphabet);
00899 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00900 ASSERT(rhs);
00901 ASSERT(num_vec<=rhs->get_num_vectors());
00902 ASSERT(num_vec>0);
00903 ASSERT(vec_idx);
00904 ASSERT(result);
00905 create_empty_tries();
00906
00907 int32_t num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length();
00908 ASSERT(num_feat>0);
00909 int32_t num_threads=parallel->get_num_threads();
00910 ASSERT(num_threads>0);
00911 int32_t* vec=new int32_t[num_threads*num_feat];
00912
00913 if (num_threads < 2)
00914 {
00915 #ifdef CYGWIN
00916 for (int32_t j=0; j<num_feat; j++)
00917 #else
00918 CSignal::clear_cancel();
00919 for (int32_t j=0; j<num_feat && !CSignal::cancel_computations(); j++)
00920 #endif
00921 {
00922 init_optimization(num_suppvec, IDX, alphas, j);
00923 S_THREAD_PARAM params;
00924 params.vec=vec;
00925 params.result=result;
00926 params.weights=weights;
00927 params.kernel=this;
00928 params.tries=tries;
00929 params.factor=factor;
00930 params.j=j;
00931 params.start=0;
00932 params.end=num_vec;
00933 params.length=length;
00934 params.vec_idx=vec_idx;
00935 compute_batch_helper((void*) ¶ms);
00936
00937 SG_PROGRESS(j,0,num_feat);
00938 }
00939 }
00940 #ifndef WIN32
00941 else
00942 {
00943 CSignal::clear_cancel();
00944 for (int32_t j=0; j<num_feat && !CSignal::cancel_computations(); j++)
00945 {
00946 init_optimization(num_suppvec, IDX, alphas, j);
00947 pthread_t* threads = new pthread_t[num_threads-1];
00948 S_THREAD_PARAM* params = new S_THREAD_PARAM[num_threads];
00949 int32_t step= num_vec/num_threads;
00950 int32_t t;
00951
00952 for (t=0; t<num_threads-1; t++)
00953 {
00954 params[t].vec=&vec[num_feat*t];
00955 params[t].result=result;
00956 params[t].weights=weights;
00957 params[t].kernel=this;
00958 params[t].tries=tries;
00959 params[t].factor=factor;
00960 params[t].j=j;
00961 params[t].start = t*step;
00962 params[t].end = (t+1)*step;
00963 params[t].length=length;
00964 params[t].vec_idx=vec_idx;
00965 pthread_create(&threads[t], NULL, CWeightedDegreeStringKernel::compute_batch_helper, (void*)¶ms[t]);
00966 }
00967 params[t].vec=&vec[num_feat*t];
00968 params[t].result=result;
00969 params[t].weights=weights;
00970 params[t].kernel=this;
00971 params[t].tries=tries;
00972 params[t].factor=factor;
00973 params[t].j=j;
00974 params[t].start=t*step;
00975 params[t].end=num_vec;
00976 params[t].length=length;
00977 params[t].vec_idx=vec_idx;
00978 compute_batch_helper((void*) ¶ms[t]);
00979
00980 for (t=0; t<num_threads-1; t++)
00981 pthread_join(threads[t], NULL);
00982 SG_PROGRESS(j,0,num_feat);
00983
00984 delete[] params;
00985 delete[] threads;
00986 }
00987 }
00988 #endif
00989
00990 delete[] vec;
00991
00992
00993
00994 create_empty_tries();
00995 }
00996
00997 bool CWeightedDegreeStringKernel::set_max_mismatch(int32_t max)
00998 {
00999 if (type==E_EXTERNAL && max!=0) {
01000 return false;
01001 }
01002
01003 max_mismatch=max;
01004
01005 if (lhs!=NULL && rhs!=NULL)
01006 return init(lhs, rhs);
01007 else
01008 return true;
01009 }