WeightedDegreePositionStringKernel.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "lib/common.h"
00013 #include "lib/io.h"
00014 #include "lib/Signal.h"
00015 #include "lib/Trie.h"
00016 #include "base/Parallel.h"
00017 
00018 #include "kernel/WeightedDegreePositionStringKernel.h"
00019 #include "kernel/SqrtDiagKernelNormalizer.h"
00020 #include "features/Features.h"
00021 #include "features/StringFeatures.h"
00022 
00023 #include "classifier/svm/SVM.h"
00024 
00025 #ifndef WIN32
00026 #include <pthread.h>
00027 #endif
00028 
00029 #define TRIES(X) ((use_poim_tries) ? (poim_tries.X) : (tries.X))
00030 
00031 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00032 template <class Trie> struct S_THREAD_PARAM 
00033 {
00034     int32_t* vec;
00035     float64_t* result;
00036     float64_t* weights;
00037     CWeightedDegreePositionStringKernel* kernel;
00038     CTrie<Trie>* tries;
00039     float64_t factor;
00040     int32_t j;
00041     int32_t start;
00042     int32_t end;
00043     int32_t length;
00044     int32_t max_shift;
00045     int32_t* shift;
00046     int32_t* vec_idx;
00047 };
00048 #endif // DOXYGEN_SHOULD_SKIP_THIS
00049 
00050 CWeightedDegreePositionStringKernel::CWeightedDegreePositionStringKernel(
00051     int32_t size, int32_t d, int32_t mm, int32_t mkls)
00052 : CStringKernel<char>(size), weights(NULL), position_weights(NULL),
00053     position_weights_lhs(NULL), position_weights_rhs(NULL),
00054     weights_buffer(NULL), mkl_stepsize(mkls), degree(d), length(0),
00055     max_mismatch(mm), seq_length(0), shift(NULL), shift_len(0),
00056     num_block_weights_external(0), block_weights_external(NULL),
00057     block_weights(NULL), type(E_EXTERNAL), tries(d), poim_tries(d),
00058     tree_initialized(false), use_poim_tries(false), m_poim_distrib(NULL),
00059     m_poim(NULL), m_poim_num_sym(0), m_poim_num_feat(0), m_poim_result_len(0),
00060     alphabet(NULL)
00061 {
00062     properties |= KP_LINADD | KP_KERNCOMBINATION | KP_BATCHEVALUATION;
00063     set_wd_weights();
00064     ASSERT(weights);
00065 
00066     set_normalizer(new CSqrtDiagKernelNormalizer());
00067 }
00068 
00069 CWeightedDegreePositionStringKernel::CWeightedDegreePositionStringKernel(
00070     int32_t size, float64_t* w, int32_t d, int32_t mm, int32_t* s, int32_t sl,
00071     int32_t mkls)
00072 : CStringKernel<char>(size), weights(NULL), position_weights(NULL),
00073     position_weights_lhs(NULL), position_weights_rhs(NULL),
00074     weights_buffer(NULL), mkl_stepsize(mkls), degree(d), length(0),
00075     max_mismatch(mm), seq_length(0), shift(NULL), shift_len(0),
00076     num_block_weights_external(0), block_weights_external(NULL),
00077     block_weights(NULL), type(E_EXTERNAL), tries(d), poim_tries(d),
00078     tree_initialized(false), use_poim_tries(false), m_poim_distrib(NULL),
00079     m_poim(NULL), m_poim_num_sym(0), m_poim_num_feat(0), m_poim_result_len(0),
00080     alphabet(NULL)
00081 {
00082     properties |= KP_LINADD | KP_KERNCOMBINATION | KP_BATCHEVALUATION;
00083 
00084     weights=new float64_t[d*(1+max_mismatch)];
00085     for (int32_t i=0; i<d*(1+max_mismatch); i++)
00086         weights[i]=w[i];
00087 
00088     set_shifts(s, sl);
00089 
00090     set_normalizer(new CSqrtDiagKernelNormalizer());
00091 }
00092 
00093 CWeightedDegreePositionStringKernel::CWeightedDegreePositionStringKernel(
00094     CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t d)
00095 : CStringKernel<char>(10), weights(NULL), position_weights(NULL),
00096     position_weights_lhs(NULL), position_weights_rhs(NULL),
00097     weights_buffer(NULL), mkl_stepsize(1), degree(d), length(0),
00098     max_mismatch(0), seq_length(0), shift(NULL), shift_len(0),
00099     num_block_weights_external(0), block_weights_external(NULL),
00100     block_weights(NULL), type(E_EXTERNAL), tries(d), poim_tries(d),
00101     tree_initialized(false), use_poim_tries(false), m_poim_distrib(NULL),
00102     m_poim(NULL), m_poim_num_sym(0), m_poim_num_feat(0), m_poim_result_len(0),
00103     alphabet(NULL)
00104 {
00105     properties |= KP_LINADD | KP_KERNCOMBINATION | KP_BATCHEVALUATION;
00106     set_wd_weights();
00107     ASSERT(weights);
00108     set_normalizer(new CSqrtDiagKernelNormalizer());
00109 
00110     init(l, r);
00111 }
00112 
00113 
00114 CWeightedDegreePositionStringKernel::~CWeightedDegreePositionStringKernel()
00115 {
00116     cleanup();
00117     cleanup_POIM2();
00118 
00119     delete[] shift;
00120     shift=NULL;
00121 
00122     delete[] weights;
00123     weights=NULL ;
00124 
00125     delete[] block_weights;
00126     block_weights=NULL;
00127 
00128     delete[] position_weights;
00129     position_weights=NULL;
00130 
00131     delete[] position_weights_lhs;
00132     position_weights_lhs=NULL;
00133 
00134     delete[] position_weights_rhs;
00135     position_weights_rhs=NULL;
00136 
00137     delete[] weights_buffer;
00138     weights_buffer=NULL;
00139 }
00140 
00141 void CWeightedDegreePositionStringKernel::remove_lhs()
00142 {
00143     SG_DEBUG( "deleting CWeightedDegreePositionStringKernel optimization\n");
00144     delete_optimization();
00145 
00146     tries.destroy();
00147     poim_tries.destroy();
00148 
00149     CKernel::remove_lhs();
00150 }
00151 
00152 void CWeightedDegreePositionStringKernel::create_empty_tries()
00153 {
00154     ASSERT(lhs);
00155     seq_length = ((CStringFeatures<char>*) lhs)->get_max_vector_length();
00156 
00157     if (opt_type==SLOWBUTMEMEFFICIENT)
00158     {
00159         tries.create(seq_length, true); 
00160         poim_tries.create(seq_length, true); 
00161     }
00162     else if (opt_type==FASTBUTMEMHUNGRY)
00163     {
00164         tries.create(seq_length, false);  // still buggy
00165         poim_tries.create(seq_length, false);  // still buggy
00166     }
00167     else
00168         SG_ERROR( "unknown optimization type\n");
00169 }
00170 
00171 bool CWeightedDegreePositionStringKernel::init(CFeatures* l, CFeatures* r)
00172 {
00173     int32_t lhs_changed = (lhs!=l) ;
00174     int32_t rhs_changed = (rhs!=r) ;
00175 
00176     CStringKernel<char>::init(l,r);
00177 
00178     SG_DEBUG( "lhs_changed: %i\n", lhs_changed) ;
00179     SG_DEBUG( "rhs_changed: %i\n", rhs_changed) ;
00180 
00181     CStringFeatures<char>* sf_l=(CStringFeatures<char>*) l;
00182     CStringFeatures<char>* sf_r=(CStringFeatures<char>*) r;
00183 
00184     /* set shift */
00185     if (shift_len==0) {
00186         shift_len=sf_l->get_vector_length(0);
00187         int32_t *shifts=new int32_t[shift_len];
00188         for (int32_t i=0; i<shift_len; i++) {
00189             shifts[i]=1;
00190         }
00191         set_shifts(shifts, shift_len);
00192         delete[] shifts;
00193     }
00194 
00195 
00196     int32_t len=sf_l->get_max_vector_length();
00197     if (lhs_changed && !sf_l->have_same_length(len))
00198         SG_ERROR("All strings in WD kernel must have same length (lhs wrong)!\n");
00199 
00200     if (rhs_changed && !sf_r->have_same_length(len))
00201         SG_ERROR("All strings in WD kernel must have same length (rhs wrong)!\n");
00202 
00203     SG_UNREF(alphabet);
00204     alphabet= sf_l->get_alphabet();
00205     CAlphabet* ralphabet=sf_r->get_alphabet();
00206 
00207     if (!((alphabet->get_alphabet()==DNA) || (alphabet->get_alphabet()==RNA)))
00208         properties &= ((uint64_t) (-1)) ^ (KP_LINADD | KP_BATCHEVALUATION);
00209 
00210     ASSERT(ralphabet->get_alphabet()==alphabet->get_alphabet());
00211     SG_UNREF(ralphabet);
00212 
00213     //whenever init is called also init tries and block weights
00214     create_empty_tries();
00215     init_block_weights();
00216 
00217     return init_normalizer();
00218 }
00219 
00220 void CWeightedDegreePositionStringKernel::cleanup()
00221 {
00222     SG_DEBUG( "deleting CWeightedDegreePositionStringKernel optimization\n");
00223     delete_optimization();
00224 
00225     delete[] block_weights;
00226     block_weights=NULL;
00227 
00228     tries.destroy();
00229     poim_tries.destroy();
00230 
00231     seq_length = 0;
00232     tree_initialized = false;
00233 
00234     SG_UNREF(alphabet);
00235     alphabet=NULL;
00236 
00237     CKernel::cleanup();
00238 }
00239 
00240 bool CWeightedDegreePositionStringKernel::load_init(FILE* src)
00241 {
00242     return false;
00243 }
00244 
00245 bool CWeightedDegreePositionStringKernel::save_init(FILE* dest)
00246 {
00247     return false;
00248 }
00249 
00250 bool CWeightedDegreePositionStringKernel::init_optimization(
00251     int32_t p_count, int32_t * IDX, float64_t * alphas, int32_t tree_num,
00252     int32_t upto_tree)
00253 {
00254     ASSERT(position_weights_lhs==NULL);
00255     ASSERT(position_weights_rhs==NULL);
00256 
00257     if (upto_tree<0)
00258         upto_tree=tree_num;
00259 
00260     if (max_mismatch!=0)
00261     {
00262         SG_ERROR( "CWeightedDegreePositionStringKernel optimization not implemented for mismatch!=0\n");
00263         return false ;
00264     }
00265 
00266     if (tree_num<0)
00267         SG_DEBUG( "deleting CWeightedDegreePositionStringKernel optimization\n");
00268 
00269     delete_optimization();
00270 
00271     if (tree_num<0)
00272         SG_DEBUG( "initializing CWeightedDegreePositionStringKernel optimization\n") ;
00273 
00274     for (int32_t i=0; i<p_count; i++)
00275     {
00276         if (tree_num<0)
00277         {
00278             if ( (i % (p_count/10+1)) == 0)
00279                 SG_PROGRESS(i,0,p_count);
00280             add_example_to_tree(IDX[i], alphas[i]);
00281         }
00282         else
00283         {
00284             for (int32_t t=tree_num; t<=upto_tree; t++)
00285                 add_example_to_single_tree(IDX[i], alphas[i], t);
00286         }
00287     }
00288 
00289     if (tree_num<0)
00290         SG_DONE();
00291 
00292     set_is_initialized(true) ;
00293     return true ;
00294 }
00295 
00296 bool CWeightedDegreePositionStringKernel::delete_optimization() 
00297 { 
00298     if ((opt_type==FASTBUTMEMHUNGRY) && (tries.get_use_compact_terminal_nodes())) 
00299     {
00300         tries.set_use_compact_terminal_nodes(false) ;
00301         SG_DEBUG( "disabling compact trie nodes with FASTBUTMEMHUNGRY\n") ;
00302     }
00303 
00304     if (get_is_initialized())
00305     {
00306         if (opt_type==SLOWBUTMEMEFFICIENT)
00307             tries.delete_trees(true); 
00308         else if (opt_type==FASTBUTMEMHUNGRY)
00309             tries.delete_trees(false);  // still buggy
00310         else {
00311             SG_ERROR( "unknown optimization type\n");
00312         }
00313         set_is_initialized(false);
00314 
00315         return true;
00316     }
00317 
00318     return false;
00319 }
00320 
00321 float64_t CWeightedDegreePositionStringKernel::compute_with_mismatch(
00322     char* avec, int32_t alen, char* bvec, int32_t blen)
00323 {
00324     float64_t* max_shift_vec= new float64_t[max_shift];
00325     float64_t sum0=0 ;
00326     for (int32_t i=0; i<max_shift; i++)
00327         max_shift_vec[i]=0 ;
00328     
00329     // no shift
00330     for (int32_t i=0; i<alen; i++)
00331     {
00332         if ((position_weights!=NULL) && (position_weights[i]==0.0))
00333             continue ;
00334         
00335         int32_t mismatches=0;
00336         float64_t sumi = 0.0 ;
00337         for (int32_t j=0; (j<degree) && (i+j<alen); j++)
00338         {
00339             if (avec[i+j]!=bvec[i+j])
00340             {
00341                 mismatches++ ;
00342                 if (mismatches>max_mismatch)
00343                     break ;
00344             } ;
00345             sumi += weights[j+degree*mismatches];
00346         }
00347         if (position_weights!=NULL)
00348             sum0 += position_weights[i]*sumi ;
00349         else
00350             sum0 += sumi ;
00351     } ;
00352     
00353     for (int32_t i=0; i<alen; i++)
00354     {
00355         for (int32_t k=1; (k<=shift[i]) && (i+k<alen); k++)
00356         {
00357             if ((position_weights!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0))
00358                 continue ;
00359             
00360             float64_t sumi1 = 0.0 ;
00361             // shift in sequence a
00362             int32_t mismatches=0;
00363             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00364             {
00365                 if (avec[i+j+k]!=bvec[i+j])
00366                 {
00367                     mismatches++ ;
00368                     if (mismatches>max_mismatch)
00369                         break ;
00370                 } ;
00371                 sumi1 += weights[j+degree*mismatches];
00372             }
00373             float64_t sumi2 = 0.0 ;
00374             // shift in sequence b
00375             mismatches=0;
00376             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00377             {
00378                 if (avec[i+j]!=bvec[i+j+k])
00379                 {
00380                     mismatches++ ;
00381                     if (mismatches>max_mismatch)
00382                         break ;
00383                 } ;
00384                 sumi2 += weights[j+degree*mismatches];
00385             }
00386             if (position_weights!=NULL)
00387                 max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ;
00388             else
00389                 max_shift_vec[k-1] += sumi1 + sumi2 ;
00390         } ;
00391     }
00392     
00393     float64_t result = sum0 ;
00394     for (int32_t i=0; i<max_shift; i++)
00395         result += max_shift_vec[i]/(2*(i+1)) ;
00396     
00397     delete[] max_shift_vec;
00398     return result ;
00399 }
00400 
00401 float64_t CWeightedDegreePositionStringKernel::compute_without_mismatch(
00402     char* avec, int32_t alen, char* bvec, int32_t blen) 
00403 {
00404     float64_t* max_shift_vec = new float64_t[max_shift];
00405     float64_t sum0=0 ;
00406     for (int32_t i=0; i<max_shift; i++)
00407         max_shift_vec[i]=0 ;
00408 
00409     // no shift
00410     for (int32_t i=0; i<alen; i++)
00411     {
00412         if ((position_weights!=NULL) && (position_weights[i]==0.0))
00413             continue ;
00414 
00415         float64_t sumi = 0.0 ;
00416         for (int32_t j=0; (j<degree) && (i+j<alen); j++)
00417         {
00418             if (avec[i+j]!=bvec[i+j])
00419                 break ;
00420             sumi += weights[j];
00421         }
00422         if (position_weights!=NULL)
00423             sum0 += position_weights[i]*sumi ;
00424         else
00425             sum0 += sumi ;
00426     } ;
00427 
00428     for (int32_t i=0; i<alen; i++)
00429     {
00430         for (int32_t k=1; (k<=shift[i]) && (i+k<alen); k++)
00431         {
00432             if ((position_weights!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0))
00433                 continue ;
00434 
00435             float64_t sumi1 = 0.0 ;
00436             // shift in sequence a
00437             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00438             {
00439                 if (avec[i+j+k]!=bvec[i+j])
00440                     break ;
00441                 sumi1 += weights[j];
00442             }
00443             float64_t sumi2 = 0.0 ;
00444             // shift in sequence b
00445             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00446             {
00447                 if (avec[i+j]!=bvec[i+j+k])
00448                     break ;
00449                 sumi2 += weights[j];
00450             }
00451             if (position_weights!=NULL)
00452                 max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ;
00453             else
00454                 max_shift_vec[k-1] += sumi1 + sumi2 ;
00455         } ;
00456     }
00457 
00458     float64_t result = sum0 ;
00459     for (int32_t i=0; i<max_shift; i++)
00460         result += max_shift_vec[i]/(2*(i+1)) ;
00461 
00462     delete[] max_shift_vec;
00463     return result ;
00464 }
00465 
00466 float64_t CWeightedDegreePositionStringKernel::compute_without_mismatch_matrix(
00467     char* avec, int32_t alen, char* bvec, int32_t blen) 
00468 {
00469     float64_t* max_shift_vec = new float64_t[max_shift];
00470     float64_t sum0=0 ;
00471     for (int32_t i=0; i<max_shift; i++)
00472         max_shift_vec[i]=0 ;
00473 
00474     // no shift
00475     for (int32_t i=0; i<alen; i++)
00476     {
00477         if ((position_weights!=NULL) && (position_weights[i]==0.0))
00478             continue ;
00479         float64_t sumi = 0.0 ;
00480         for (int32_t j=0; (j<degree) && (i+j<alen); j++)
00481         {
00482             if (avec[i+j]!=bvec[i+j])
00483                 break ;
00484             sumi += weights[i*degree+j];
00485         }
00486         if (position_weights!=NULL)
00487             sum0 += position_weights[i]*sumi ;
00488         else
00489             sum0 += sumi ;
00490     } ;
00491 
00492     for (int32_t i=0; i<alen; i++)
00493     {
00494         for (int32_t k=1; (k<=shift[i]) && (i+k<alen); k++)
00495         {
00496             if ((position_weights!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0))
00497                 continue ;
00498 
00499             float64_t sumi1 = 0.0 ;
00500             // shift in sequence a
00501             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00502             {
00503                 if (avec[i+j+k]!=bvec[i+j])
00504                     break ;
00505                 sumi1 += weights[i*degree+j];
00506             }
00507             float64_t sumi2 = 0.0 ;
00508             // shift in sequence b
00509             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00510             {
00511                 if (avec[i+j]!=bvec[i+j+k])
00512                     break ;
00513                 sumi2 += weights[i*degree+j];
00514             }
00515             if (position_weights!=NULL)
00516                 max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ;
00517             else
00518                 max_shift_vec[k-1] += sumi1 + sumi2 ;
00519         } ;
00520     }
00521 
00522     float64_t result = sum0 ;
00523     for (int32_t i=0; i<max_shift; i++)
00524         result += max_shift_vec[i]/(2*(i+1)) ;
00525 
00526     delete[] max_shift_vec;
00527     return result ;
00528 }
00529 
00530 float64_t CWeightedDegreePositionStringKernel::compute_without_mismatch_position_weights(
00531     char* avec, float64_t* pos_weights_lhs, int32_t alen, char* bvec,
00532     float64_t* pos_weights_rhs, int32_t blen)
00533 {
00534 
00535     float64_t* max_shift_vec = new float64_t[max_shift];
00536     float64_t sum0=0 ;
00537     for (int32_t i=0; i<max_shift; i++)
00538         max_shift_vec[i]=0 ;
00539 
00540     // no shift
00541     for (int32_t i=0; i<alen; i++)
00542     {
00543         if ((position_weights!=NULL) && (position_weights[i]==0.0))
00544             continue ;
00545 
00546         float64_t sumi = 0.0 ;
00547         float64_t posweight_lhs = 0.0 ;
00548         float64_t posweight_rhs = 0.0 ;
00549         for (int32_t j=0; (j<degree) && (i+j<alen); j++)
00550         {
00551             posweight_lhs += pos_weights_lhs[i+j] ;
00552             posweight_rhs += pos_weights_rhs[i+j] ;
00553             
00554             if (avec[i+j]!=bvec[i+j])
00555                 break ;
00556             sumi += weights[j]*(posweight_lhs/(j+1))*(posweight_rhs/(j+1)) ;
00557         }
00558         if (position_weights!=NULL)
00559             sum0 += position_weights[i]*sumi ;
00560         else
00561             sum0 += sumi ;
00562     } ;
00563 
00564     for (int32_t i=0; i<alen; i++)
00565     {
00566         for (int32_t k=1; (k<=shift[i]) && (i+k<alen); k++)
00567         {
00568             if ((position_weights!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0))
00569                 continue ;
00570 
00571             // shift in sequence a  
00572             float64_t sumi1 = 0.0 ;
00573             float64_t posweight_lhs = 0.0 ;
00574             float64_t posweight_rhs = 0.0 ;
00575             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00576             {
00577                 posweight_lhs += pos_weights_lhs[i+j+k] ;
00578                 posweight_rhs += pos_weights_rhs[i+j] ;
00579                 if (avec[i+j+k]!=bvec[i+j])
00580                     break ;
00581                 sumi1 += weights[j]*(posweight_lhs/(j+1))*(posweight_rhs/(j+1)) ;
00582             }
00583             // shift in sequence b
00584             float64_t sumi2 = 0.0 ;
00585             posweight_lhs = 0.0 ;
00586             posweight_rhs = 0.0 ;
00587             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00588             {
00589                 posweight_lhs += pos_weights_lhs[i+j] ;
00590                 posweight_rhs += pos_weights_rhs[i+j+k] ;
00591                 if (avec[i+j]!=bvec[i+j+k])
00592                     break ;
00593                 sumi2 += weights[j]*(posweight_lhs/(j+1))*(posweight_rhs/(j+1)) ;
00594             }
00595             if (position_weights!=NULL)
00596                 max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ;
00597             else
00598                 max_shift_vec[k-1] += sumi1 + sumi2 ;
00599         } ;
00600     }
00601 
00602     float64_t result = sum0 ;
00603     for (int32_t i=0; i<max_shift; i++)
00604         result += max_shift_vec[i]/(2*(i+1)) ;
00605 
00606     delete[] max_shift_vec;
00607     return result ;
00608 }
00609 
00610 
00611 float64_t CWeightedDegreePositionStringKernel::compute(
00612     int32_t idx_a, int32_t idx_b)
00613 {
00614     int32_t alen, blen;
00615 
00616     char* avec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen);
00617     char* bvec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen);
00618     // can only deal with strings of same length
00619     ASSERT(alen==blen);
00620     ASSERT(shift_len==alen);
00621 
00622     float64_t result = 0 ;
00623     if (position_weights_lhs!=NULL || position_weights_rhs!=NULL)
00624     {
00625         ASSERT(max_mismatch==0);
00626         float64_t* position_weights_rhs_ = position_weights_rhs ;
00627         if (lhs==rhs)
00628         {
00629             //fprintf(stdout, ".") ;
00630             position_weights_rhs_ = position_weights_lhs ;
00631         }
00632         else
00633         {
00634             //fprintf(stdout, "*") ;
00635         }
00636         
00637         result = compute_without_mismatch_position_weights(avec, &position_weights_lhs[idx_a*alen], alen, bvec, &position_weights_rhs_[idx_b*blen], blen) ;
00638     }
00639     else if (max_mismatch > 0)
00640         result = compute_with_mismatch(avec, alen, bvec, blen) ;
00641     else if (length==0)
00642         result = compute_without_mismatch(avec, alen, bvec, blen) ;
00643     else
00644         result = compute_without_mismatch_matrix(avec, alen, bvec, blen) ;
00645 
00646     return result ;
00647 }
00648 
00649 
00650 void CWeightedDegreePositionStringKernel::add_example_to_tree(
00651     int32_t idx, float64_t alpha)
00652 {
00653     ASSERT(position_weights_lhs==NULL);
00654     ASSERT(position_weights_rhs==NULL);
00655     ASSERT(alphabet);
00656     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00657 
00658     int32_t len=0;
00659     char* char_vec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx, len);
00660     ASSERT(max_mismatch==0);
00661     int32_t *vec = new int32_t[len] ;
00662 
00663     for (int32_t i=0; i<len; i++)
00664         vec[i]=alphabet->remap_to_bin(char_vec[i]);
00665 
00666     if (opt_type==FASTBUTMEMHUNGRY)
00667     {
00668         //TRIES(set_use_compact_terminal_nodes(false)) ;
00669         ASSERT(!TRIES(get_use_compact_terminal_nodes()));
00670     }
00671 
00672     for (int32_t i=0; i<len; i++)
00673     {
00674         int32_t max_s=-1;
00675 
00676         if (opt_type==SLOWBUTMEMEFFICIENT)
00677             max_s=0;
00678         else if (opt_type==FASTBUTMEMHUNGRY)
00679             max_s=shift[i];
00680         else {
00681             SG_ERROR( "unknown optimization type\n");
00682         }
00683 
00684         for (int32_t s=max_s; s>=0; s--)
00685         {
00686             float64_t alpha_pw = normalizer->normalize_lhs((s==0) ? (alpha) : (alpha/(2.0*s)), idx);
00687             TRIES(add_to_trie(i, s, vec, alpha_pw, weights, (length!=0))) ;
00688             //fprintf(stderr, "tree=%i, s=%i, example=%i, alpha_pw=%1.2f\n", i, s, idx, alpha_pw) ;
00689 
00690             if ((s==0) || (i+s>=len))
00691                 continue;
00692 
00693             TRIES(add_to_trie(i+s, -s, vec, alpha_pw, weights, (length!=0))) ;
00694             //fprintf(stderr, "tree=%i, s=%i, example=%i, alpha_pw=%1.2f\n", i+s, -s, idx, alpha_pw) ;
00695         }
00696     }
00697 
00698     delete[] vec ;
00699     tree_initialized=true ;
00700 }
00701 
00702 void CWeightedDegreePositionStringKernel::add_example_to_single_tree(
00703     int32_t idx, float64_t alpha, int32_t tree_num) 
00704 {
00705     ASSERT(position_weights_lhs==NULL);
00706     ASSERT(position_weights_rhs==NULL);
00707     ASSERT(alphabet);
00708     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00709 
00710     int32_t len=0;
00711     char* char_vec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx, len);
00712     ASSERT(max_mismatch==0);
00713     int32_t *vec=new int32_t[len];
00714     int32_t max_s=-1;
00715 
00716     if (opt_type==SLOWBUTMEMEFFICIENT)
00717         max_s=0;
00718     else if (opt_type==FASTBUTMEMHUNGRY)
00719     {
00720         ASSERT(!tries.get_use_compact_terminal_nodes());
00721         max_s=shift[tree_num];
00722     }
00723     else {
00724         SG_ERROR( "unknown optimization type\n");
00725     }
00726     for (int32_t i=CMath::max(0,tree_num-max_shift); i<CMath::min(len,tree_num+degree+max_shift); i++)
00727         vec[i]=alphabet->remap_to_bin(char_vec[i]);
00728 
00729     for (int32_t s=max_s; s>=0; s--)
00730     {
00731         float64_t alpha_pw = normalizer->normalize_lhs((s==0) ? (alpha) : (alpha/(2.0*s)), idx);
00732         tries.add_to_trie(tree_num, s, vec, alpha_pw, weights, (length!=0)) ;
00733         //fprintf(stderr, "tree=%i, s=%i, example=%i, alpha_pw=%1.2f\n", tree_num, s, idx, alpha_pw) ;
00734     } 
00735 
00736     if (opt_type==FASTBUTMEMHUNGRY)
00737     {
00738         for (int32_t i=CMath::max(0,tree_num-max_shift); i<CMath::min(len,tree_num+max_shift+1); i++)
00739         {
00740             int32_t s=tree_num-i;
00741             if ((i+s<len) && (s>=1) && (s<=shift[i]))
00742             {
00743                 float64_t alpha_pw = normalizer->normalize_lhs((s==0) ? (alpha) : (alpha/(2.0*s)), idx);
00744                 tries.add_to_trie(tree_num, -s, vec, alpha_pw, weights, (length!=0)) ; 
00745                 //fprintf(stderr, "tree=%i, s=%i, example=%i, alpha_pw=%1.2f\n", tree_num, -s, idx, alpha_pw) ;
00746             }
00747         }
00748     }
00749     delete[] vec ;
00750     tree_initialized=true ;
00751 }
00752 
00753 float64_t CWeightedDegreePositionStringKernel::compute_by_tree(int32_t idx)
00754 {
00755     ASSERT(position_weights_lhs==NULL);
00756     ASSERT(position_weights_rhs==NULL);
00757     ASSERT(alphabet);
00758     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00759 
00760     float64_t sum=0;
00761     int32_t len=0;
00762     char* char_vec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx, len);
00763     ASSERT(max_mismatch==0);
00764     int32_t *vec=new int32_t[len];
00765 
00766     for (int32_t i=0; i<len; i++)
00767         vec[i]=alphabet->remap_to_bin(char_vec[i]);
00768 
00769     for (int32_t i=0; i<len; i++)
00770         sum += tries.compute_by_tree_helper(vec, len, i, i, i, weights, (length!=0)) ;
00771 
00772     if (opt_type==SLOWBUTMEMEFFICIENT)
00773     {
00774         for (int32_t i=0; i<len; i++)
00775         {
00776             for (int32_t s=1; (s<=shift[i]) && (i+s<len); s++)
00777             {
00778                 sum+=tries.compute_by_tree_helper(vec, len, i, i+s, i, weights, (length!=0))/(2*s) ;
00779                 sum+=tries.compute_by_tree_helper(vec, len, i+s, i, i+s, weights, (length!=0))/(2*s) ;
00780             }
00781         }
00782     }
00783 
00784     delete[] vec ;
00785 
00786     return normalizer->normalize_rhs(sum, idx);
00787 }
00788 
00789 void CWeightedDegreePositionStringKernel::compute_by_tree(
00790     int32_t idx, float64_t* LevelContrib)
00791 {
00792     ASSERT(position_weights_lhs==NULL);
00793     ASSERT(position_weights_rhs==NULL);
00794     ASSERT(alphabet);
00795     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00796 
00797     int32_t len=0;
00798     char* char_vec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx, len);
00799     ASSERT(max_mismatch==0);
00800     int32_t *vec=new int32_t[len];
00801 
00802     for (int32_t i=0; i<len; i++)
00803         vec[i]=alphabet->remap_to_bin(char_vec[i]);
00804 
00805     for (int32_t i=0; i<len; i++)
00806     {
00807         tries.compute_by_tree_helper(vec, len, i, i, i, LevelContrib,
00808                 normalizer->normalize_rhs(1.0, idx), mkl_stepsize, weights,
00809                 (length!=0));
00810     }
00811 
00812     if (opt_type==SLOWBUTMEMEFFICIENT)
00813     {
00814         for (int32_t i=0; i<len; i++)
00815             for (int32_t k=1; (k<=shift[i]) && (i+k<len); k++)
00816             {
00817                 tries.compute_by_tree_helper(vec, len, i, i+k, i, LevelContrib,
00818                         normalizer->normalize_rhs(1.0/(2*k), idx), mkl_stepsize,
00819                         weights, (length!=0)) ;
00820                 tries.compute_by_tree_helper(vec, len, i+k, i, i+k,
00821                         LevelContrib, normalizer->normalize_rhs(1.0/(2*k), idx),
00822                         mkl_stepsize, weights, (length!=0)) ;
00823             }
00824     }
00825 
00826     delete[] vec ;
00827 }
00828 
00829 float64_t* CWeightedDegreePositionStringKernel::compute_abs_weights(
00830     int32_t &len)
00831 {
00832     return tries.compute_abs_weights(len);
00833 }
00834 
00835 bool CWeightedDegreePositionStringKernel::set_shifts(
00836     int32_t* shift_, int32_t shift_len_)
00837 {
00838     delete[] shift;
00839 
00840     shift_len = shift_len_ ;
00841     shift = new int32_t[shift_len] ;
00842 
00843     if (shift)
00844     {
00845         max_shift = 0 ;
00846 
00847         for (int32_t i=0; i<shift_len; i++)
00848         {
00849             shift[i] = shift_[i] ;
00850             if (shift[i]>max_shift)
00851                 max_shift = shift[i] ;
00852         }
00853 
00854         ASSERT(max_shift>=0 && max_shift<=shift_len);
00855     }
00856     
00857     return false;
00858 }
00859 
00860 bool CWeightedDegreePositionStringKernel::set_wd_weights()
00861 {
00862     ASSERT(degree>0);
00863 
00864     delete[] weights;
00865     weights=new float64_t[degree];
00866     if (weights)
00867     {
00868         int32_t i;
00869         float64_t sum=0;
00870         for (i=0; i<degree; i++)
00871         {
00872             weights[i]=degree-i;
00873             sum+=weights[i];
00874         }
00875         for (i=0; i<degree; i++)
00876             weights[i]/=sum;
00877 
00878         for (i=0; i<degree; i++)
00879         {
00880             for (int32_t j=1; j<=max_mismatch; j++)
00881             {
00882                 if (j<i+1)
00883                 {
00884                     int32_t nk=CMath::nchoosek(i+1, j);
00885                     weights[i+j*degree]=weights[i]/(nk*CMath::pow(3.0,j));
00886                 }
00887                 else
00888                     weights[i+j*degree]= 0;
00889             }
00890         }
00891 
00892         return true;
00893     }
00894     else
00895         return false;
00896 }
00897 
00898 bool CWeightedDegreePositionStringKernel::set_weights(
00899     float64_t* ws, int32_t d, int32_t len)
00900 {
00901     SG_DEBUG("degree = %i  d=%i\n", degree, d);
00902     degree=d;
00903     length=len;
00904 
00905     if (len <= 0)
00906         len=1;
00907 
00908     delete[] weights;
00909     weights=new float64_t[d*len];
00910 
00911     if (weights)
00912     {
00913         for (int32_t i=0; i<degree*len; i++)
00914             weights[i]=ws[i];
00915         return true;
00916     }
00917     else
00918         return false;
00919 }
00920 
00921 bool CWeightedDegreePositionStringKernel::set_position_weights(
00922     float64_t* pws, int32_t len)
00923 {
00924     //fprintf(stderr, "len=%i\n", len);
00925 
00926     if (len==0)
00927     {
00928         delete[] position_weights;
00929         position_weights = NULL;
00930         tries.set_position_weights(position_weights);
00931         return true;
00932     }
00933     if (seq_length==0)
00934         seq_length=len;
00935 
00936     if (seq_length!=len)
00937     {
00938         SG_ERROR("seq_length = %i, position_weights_length=%i\n", seq_length, len);
00939         return false;
00940     }
00941     delete[] position_weights;
00942     position_weights=new float64_t[len];
00943     tries.set_position_weights(position_weights);
00944 
00945     if (position_weights)
00946     {
00947         for (int32_t i=0; i<len; i++)
00948             position_weights[i]=pws[i];
00949         return true;
00950     }
00951     else
00952         return false;
00953 }
00954 
00955 bool CWeightedDegreePositionStringKernel::set_position_weights_lhs(float64_t* pws, int32_t len, int32_t num)
00956 {
00957     fprintf(stderr, "set_position_weights_lhs %i %i %i\n", len, num, seq_length);
00958 
00959     if (position_weights_rhs==position_weights_lhs)
00960         position_weights_rhs=NULL;
00961     else
00962         delete_position_weights_rhs();
00963 
00964     if (len==0)
00965     {
00966         return delete_position_weights_lhs();
00967     }
00968     
00969     if (seq_length!=len)
00970     {
00971         SG_ERROR("seq_length = %i, position_weights_length=%i\n", seq_length, len);
00972         return false;
00973     }
00974     if (0)
00975     {
00976         
00977     if (!lhs)
00978     {
00979         SG_ERROR("lhs=NULL\n");
00980         return false ;
00981     }
00982     if (lhs->get_num_vectors()!=num)
00983     {
00984         SG_ERROR("lhs->get_num_vectors()=%i, num=%i\n", lhs->get_num_vectors(), num);
00985         return false;
00986     }
00987     }
00988     
00989     delete[] position_weights_lhs;
00990     position_weights_lhs=new float64_t[len*num];
00991     if (position_weights_lhs)
00992     {
00993         for (int32_t i=0; i<len*num; i++)
00994             position_weights_lhs[i]=pws[i];
00995         return true;
00996     }
00997     else
00998         return false;
00999 }
01000 
01001 bool CWeightedDegreePositionStringKernel::set_position_weights_rhs(
01002     float64_t* pws, int32_t len, int32_t num)
01003 {
01004     fprintf(stderr, "set_position_weights_rhs %i %i %i\n", len, num, seq_length);
01005     if (len==0)
01006     {
01007         if (position_weights_rhs==position_weights_lhs)
01008         {
01009             position_weights_rhs=NULL;
01010             return true;
01011         }
01012         return delete_position_weights_rhs();
01013     }
01014 
01015     if (seq_length!=len)
01016     {
01017         SG_ERROR("seq_length = %i, position_weights_length=%i\n", seq_length, len);
01018         return false;
01019     }
01020     if (0)
01021     {
01022         
01023     if (!rhs)
01024     {
01025         if (!lhs)
01026         {
01027             SG_ERROR("rhs=NULL and lhs=NULL\n");
01028             return false;
01029         }
01030         if (lhs->get_num_vectors()!=num)
01031         {
01032             SG_ERROR("lhs->get_num_vectors()=%i, num=%i\n", lhs->get_num_vectors(), num);
01033             return false;
01034         }
01035     } else
01036         if (rhs->get_num_vectors()!=num)
01037         {
01038             SG_ERROR("rhs->get_num_vectors()=%i, num=%i\n", rhs->get_num_vectors(), num);
01039             return false;
01040         }
01041     }
01042     
01043     delete[] position_weights_rhs;
01044     position_weights_rhs=new float64_t[len*num];
01045     if (position_weights_rhs)
01046     {
01047         for (int32_t i=0; i<len*num; i++)
01048             position_weights_rhs[i]=pws[i];
01049         return true;
01050     }
01051     else
01052         return false;
01053 }
01054 
01055 bool CWeightedDegreePositionStringKernel::init_block_weights_from_wd()
01056 {
01057     delete[] block_weights;
01058     block_weights=new float64_t[CMath::max(seq_length,degree)];
01059 
01060     if (block_weights)
01061     {
01062         int32_t k;
01063         float64_t d=degree; // use float to evade rounding errors below
01064 
01065         for (k=0; k<degree; k++)
01066             block_weights[k]=
01067                 (-CMath::pow(k, 3)+(3*d-3)*CMath::pow(k, 2)+(9*d-2)*k+6*d)/(3*d*(d+1));
01068         for (k=degree; k<seq_length; k++)
01069             block_weights[k]=(-d+3*k+4)/3;
01070     }
01071 
01072     return (block_weights!=NULL);
01073 }
01074 
01075 bool CWeightedDegreePositionStringKernel::init_block_weights_from_wd_external()
01076 {
01077     ASSERT(weights);
01078     delete[] block_weights;
01079     block_weights=new float64_t[CMath::max(seq_length,degree)];
01080 
01081     if (block_weights)
01082     {
01083         int32_t i=0;
01084         block_weights[0]=weights[0];
01085         for (i=1; i<CMath::max(seq_length,degree); i++)
01086             block_weights[i]=0;
01087 
01088         for (i=1; i<CMath::max(seq_length,degree); i++)
01089         {
01090             block_weights[i]=block_weights[i-1];
01091 
01092             float64_t contrib=0;
01093             for (int32_t j=0; j<CMath::min(degree,i+1); j++)
01094                 contrib+=weights[j];
01095 
01096             block_weights[i]+=contrib;
01097         }
01098     }
01099 
01100     return (block_weights!=NULL);
01101 }
01102 
01103 bool CWeightedDegreePositionStringKernel::init_block_weights_const()
01104 {
01105     delete[] block_weights;
01106     block_weights=new float64_t[seq_length];
01107 
01108     if (block_weights)
01109     {
01110         for (int32_t i=1; i<seq_length+1 ; i++)
01111             block_weights[i-1]=1.0/seq_length;
01112     }
01113 
01114     return (block_weights!=NULL);
01115 }
01116 
01117 bool CWeightedDegreePositionStringKernel::init_block_weights_linear()
01118 {
01119     delete[] block_weights;
01120     block_weights=new float64_t[seq_length];
01121 
01122     if (block_weights)
01123     {
01124         for (int32_t i=1; i<seq_length+1 ; i++)
01125             block_weights[i-1]=degree*i;
01126     }
01127 
01128     return (block_weights!=NULL);
01129 }
01130 
01131 bool CWeightedDegreePositionStringKernel::init_block_weights_sqpoly()
01132 {
01133     delete[] block_weights;
01134     block_weights=new float64_t[seq_length];
01135 
01136     if (block_weights)
01137     {
01138         for (int32_t i=1; i<degree+1 ; i++)
01139             block_weights[i-1]=((float64_t) i)*i;
01140 
01141         for (int32_t i=degree+1; i<seq_length+1 ; i++)
01142             block_weights[i-1]=i;
01143     }
01144 
01145     return (block_weights!=NULL);
01146 }
01147 
01148 bool CWeightedDegreePositionStringKernel::init_block_weights_cubicpoly()
01149 {
01150     delete[] block_weights;
01151     block_weights=new float64_t[seq_length];
01152 
01153     if (block_weights)
01154     {
01155         for (int32_t i=1; i<degree+1 ; i++)
01156             block_weights[i-1]=((float64_t) i)*i*i;
01157 
01158         for (int32_t i=degree+1; i<seq_length+1 ; i++)
01159             block_weights[i-1]=i;
01160     }
01161 
01162     return (block_weights!=NULL);
01163 }
01164 
01165 bool CWeightedDegreePositionStringKernel::init_block_weights_exp()
01166 {
01167     delete[] block_weights;
01168     block_weights=new float64_t[seq_length];
01169 
01170     if (block_weights)
01171     {
01172         for (int32_t i=1; i<degree+1 ; i++)
01173             block_weights[i-1]=exp(((float64_t) i/10.0));
01174 
01175         for (int32_t i=degree+1; i<seq_length+1 ; i++)
01176             block_weights[i-1]=i;
01177     }
01178 
01179     return (block_weights!=NULL);
01180 }
01181 
01182 bool CWeightedDegreePositionStringKernel::init_block_weights_log()
01183 {
01184     delete[] block_weights;
01185     block_weights=new float64_t[seq_length];
01186 
01187     if (block_weights)
01188     {
01189         for (int32_t i=1; i<degree+1 ; i++)
01190             block_weights[i-1]=CMath::pow(CMath::log((float64_t) i),2);
01191 
01192         for (int32_t i=degree+1; i<seq_length+1 ; i++)
01193             block_weights[i-1]=i-degree+1+CMath::pow(CMath::log(degree+1.0),2);
01194     }
01195 
01196     return (block_weights!=NULL);
01197 }
01198 
01199 bool CWeightedDegreePositionStringKernel::init_block_weights_external()
01200 {
01201     if (block_weights_external && (seq_length == num_block_weights_external) )
01202     {
01203         delete[] block_weights;
01204         block_weights=new float64_t[seq_length];
01205 
01206         if (block_weights)
01207         {
01208             for (int32_t i=0; i<seq_length; i++)
01209                 block_weights[i]=block_weights_external[i];
01210         }
01211     }
01212     else {
01213       SG_ERROR( "sequence longer then weights (seqlen:%d, wlen:%d)\n", seq_length, block_weights_external);
01214    }
01215     return (block_weights!=NULL);
01216 }
01217 
01218 bool CWeightedDegreePositionStringKernel::init_block_weights()
01219 {
01220     switch (type)
01221     {
01222         case E_WD:
01223             return init_block_weights_from_wd();
01224         case E_EXTERNAL:
01225             return init_block_weights_from_wd_external();
01226         case E_BLOCK_CONST:
01227             return init_block_weights_const();
01228         case E_BLOCK_LINEAR:
01229             return init_block_weights_linear();
01230         case E_BLOCK_SQPOLY:
01231             return init_block_weights_sqpoly();
01232         case E_BLOCK_CUBICPOLY:
01233             return init_block_weights_cubicpoly();
01234         case E_BLOCK_EXP:
01235             return init_block_weights_exp();
01236         case E_BLOCK_LOG:
01237             return init_block_weights_log();
01238         case E_BLOCK_EXTERNAL:
01239             return init_block_weights_external();
01240         default:
01241             return false;
01242     };
01243 }
01244 
01245 
01246 
01247 void* CWeightedDegreePositionStringKernel::compute_batch_helper(void* p)
01248 {
01249     S_THREAD_PARAM<DNATrie>* params = (S_THREAD_PARAM<DNATrie>*) p;
01250     int32_t j=params->j;
01251     CWeightedDegreePositionStringKernel* wd=params->kernel;
01252     CTrie<DNATrie>* tries=params->tries;
01253     float64_t* weights=params->weights;
01254     int32_t length=params->length;
01255     int32_t max_shift=params->max_shift;
01256     int32_t* vec=params->vec;
01257     float64_t* result=params->result;
01258     float64_t factor=params->factor;
01259     int32_t* shift=params->shift;
01260     int32_t* vec_idx=params->vec_idx;
01261 
01262     for (int32_t i=params->start; i<params->end; i++)
01263     {
01264         int32_t len=0;
01265         CStringFeatures<char>* rhs_feat=((CStringFeatures<char>*) wd->get_rhs());
01266         CAlphabet* alpha=wd->alphabet;
01267 
01268         char* char_vec=rhs_feat->get_feature_vector(vec_idx[i], len);
01269         for (int32_t k=CMath::max(0,j-max_shift); k<CMath::min(len,j+wd->get_degree()+max_shift); k++)
01270             vec[k]=alpha->remap_to_bin(char_vec[k]);
01271 
01272         SG_UNREF(rhs_feat);
01273 
01274         result[i] += factor*wd->normalizer->normalize_rhs(tries->compute_by_tree_helper(vec, len, j, j, j, weights, (length!=0)), vec_idx[i]);
01275 
01276         if (wd->get_optimization_type()==SLOWBUTMEMEFFICIENT)
01277         {
01278             for (int32_t q=CMath::max(0,j-max_shift); q<CMath::min(len,j+max_shift+1); q++)
01279             {
01280                 int32_t s=j-q ;
01281                 if ((s>=1) && (s<=shift[q]) && (q+s<len))
01282                 {
01283                     result[i] +=
01284                         wd->normalizer->normalize_rhs(tries->compute_by_tree_helper(vec,
01285                                 len, q, q+s, q, weights, (length!=0)),
01286                                 vec_idx[i])/(2.0*s);
01287                 }
01288             }
01289 
01290             for (int32_t s=1; (s<=shift[j]) && (j+s<len); s++)
01291             {
01292                 result[i] +=
01293                     wd->normalizer->normalize_rhs(tries->compute_by_tree_helper(vec,
01294                                 len, j+s, j, j+s, weights, (length!=0)),
01295                                 vec_idx[i])/(2.0*s);
01296             }
01297         }
01298     }
01299 
01300     return NULL;
01301 }
01302 
01303 void CWeightedDegreePositionStringKernel::compute_batch(
01304     int32_t num_vec, int32_t* vec_idx, float64_t* result, int32_t num_suppvec,
01305     int32_t* IDX, float64_t* alphas, float64_t factor)
01306 {
01307     ASSERT(alphabet);
01308     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
01309     ASSERT(position_weights_lhs==NULL);
01310     ASSERT(position_weights_rhs==NULL);
01311     ASSERT(rhs);
01312     ASSERT(num_vec<=rhs->get_num_vectors());
01313     ASSERT(num_vec>0);
01314     ASSERT(vec_idx);
01315     ASSERT(result);
01316     create_empty_tries();
01317 
01318     int32_t num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length();
01319     ASSERT(num_feat>0);
01320     int32_t num_threads=parallel->get_num_threads();
01321     ASSERT(num_threads>0);
01322     int32_t* vec=new int32_t[num_threads*num_feat];
01323 
01324     if (num_threads < 2)
01325     {
01326 #ifdef WIN32
01327        for (int32_t j=0; j<num_feat; j++)
01328 #else
01329        CSignal::clear_cancel();
01330        for (int32_t j=0; j<num_feat && !CSignal::cancel_computations(); j++)
01331 #endif
01332             {
01333                 init_optimization(num_suppvec, IDX, alphas, j);
01334                 S_THREAD_PARAM<DNATrie> params;
01335                 params.vec=vec;
01336                 params.result=result;
01337                 params.weights=weights;
01338                 params.kernel=this;
01339                 params.tries=&tries;
01340                 params.factor=factor;
01341                 params.j=j;
01342                 params.start=0;
01343                 params.end=num_vec;
01344                 params.length=length;
01345                 params.max_shift=max_shift;
01346                 params.shift=shift;
01347                 params.vec_idx=vec_idx;
01348                 compute_batch_helper((void*) &params);
01349 
01350                 SG_PROGRESS(j,0,num_feat);
01351             }
01352     }
01353 #ifndef WIN32
01354     else
01355     {
01356 
01357         CSignal::clear_cancel();
01358         for (int32_t j=0; j<num_feat && !CSignal::cancel_computations(); j++)
01359         {
01360             init_optimization(num_suppvec, IDX, alphas, j);
01361             pthread_t threads[num_threads-1];
01362             S_THREAD_PARAM<DNATrie> params[num_threads];
01363             int32_t step= num_vec/num_threads;
01364             int32_t t;
01365 
01366             for (t=0; t<num_threads-1; t++)
01367             {
01368                 params[t].vec=&vec[num_feat*t];
01369                 params[t].result=result;
01370                 params[t].weights=weights;
01371                 params[t].kernel=this;
01372                 params[t].tries=&tries;
01373                 params[t].factor=factor;
01374                 params[t].j=j;
01375                 params[t].start = t*step;
01376                 params[t].end = (t+1)*step;
01377                 params[t].length=length;
01378                 params[t].max_shift=max_shift;
01379                 params[t].shift=shift;
01380                 params[t].vec_idx=vec_idx;
01381                 pthread_create(&threads[t], NULL, CWeightedDegreePositionStringKernel::compute_batch_helper, (void*)&params[t]);
01382             }
01383 
01384             params[t].vec=&vec[num_feat*t];
01385             params[t].result=result;
01386             params[t].weights=weights;
01387             params[t].kernel=this;
01388             params[t].tries=&tries;
01389             params[t].factor=factor;
01390             params[t].j=j;
01391             params[t].start=t*step;
01392             params[t].end=num_vec;
01393             params[t].length=length;
01394             params[t].max_shift=max_shift;
01395             params[t].shift=shift;
01396             params[t].vec_idx=vec_idx;
01397             compute_batch_helper((void*) &params[t]);
01398 
01399             for (t=0; t<num_threads-1; t++)
01400                 pthread_join(threads[t], NULL);
01401             SG_PROGRESS(j,0,num_feat);
01402         }
01403     }
01404 #endif
01405 
01406     delete[] vec;
01407 
01408     //really also free memory as this can be huge on testing especially when
01409     //using the combined kernel
01410     create_empty_tries();
01411 }
01412 
01413 float64_t* CWeightedDegreePositionStringKernel::compute_scoring(
01414     int32_t max_degree, int32_t& num_feat, int32_t& num_sym, float64_t* result,
01415     int32_t num_suppvec, int32_t* IDX, float64_t* alphas)
01416 {
01417     ASSERT(position_weights_lhs==NULL);
01418     ASSERT(position_weights_rhs==NULL);
01419 
01420     num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length();
01421     ASSERT(num_feat>0);
01422     ASSERT(alphabet);
01423     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
01424 
01425     num_sym=4; //for now works only w/ DNA
01426 
01427     ASSERT(max_degree>0);
01428 
01429     // === variables
01430     int32_t* nofsKmers=new int32_t[max_degree];
01431     float64_t** C=new float64_t*[max_degree];
01432     float64_t** L=new float64_t*[max_degree];
01433     float64_t** R=new float64_t*[max_degree];
01434 
01435     int32_t i;
01436     int32_t k;
01437 
01438     // --- return table
01439     int32_t bigtabSize=0;
01440     for (k=0; k<max_degree; ++k )
01441     {
01442         nofsKmers[k]=(int32_t) CMath::pow(num_sym, k+1);
01443         const int32_t tabSize=nofsKmers[k]*num_feat;
01444         bigtabSize+=tabSize;
01445     }
01446     result=new float64_t[bigtabSize];
01447 
01448     // --- auxilliary tables
01449     int32_t tabOffs=0;
01450     for( k = 0; k < max_degree; ++k )
01451     {
01452         const int32_t tabSize = nofsKmers[k] * num_feat;
01453         C[k] = &result[tabOffs];
01454         L[k] = new float64_t[ tabSize ];
01455         R[k] = new float64_t[ tabSize ];
01456         tabOffs+=tabSize;
01457         for(i = 0; i < tabSize; i++ )
01458         {
01459             C[k][i] = 0.0;
01460             L[k][i] = 0.0;
01461             R[k][i] = 0.0;
01462         }
01463     }
01464 
01465     // --- tree parsing info
01466     float64_t* margFactors=new float64_t[degree];
01467 
01468     int32_t* x = new int32_t[ degree+1 ];
01469     int32_t* substrs = new int32_t[ degree+1 ];
01470     // - fill arrays
01471     margFactors[0] = 1.0;
01472     substrs[0] = 0;
01473     for( k=1; k < degree; ++k ) {
01474         margFactors[k] = 0.25 * margFactors[k-1];
01475         substrs[k] = -1;
01476     }
01477     substrs[degree] = -1;
01478     // - fill struct
01479     struct TreeParseInfo info;
01480     info.num_sym = num_sym;
01481     info.num_feat = num_feat;
01482     info.p = -1;
01483     info.k = -1;
01484     info.nofsKmers = nofsKmers;
01485     info.margFactors = margFactors;
01486     info.x = x;
01487     info.substrs = substrs;
01488     info.y0 = 0;
01489     info.C_k = NULL;
01490     info.L_k = NULL;
01491     info.R_k = NULL;
01492 
01493     // === main loop
01494     i = 0; // total progress
01495     for( k = 0; k < max_degree; ++k )
01496     {
01497         const int32_t nofKmers = nofsKmers[ k ];
01498         info.C_k = C[k];
01499         info.L_k = L[k];
01500         info.R_k = R[k];
01501 
01502         // --- run over all trees
01503         for(int32_t p = 0; p < num_feat; ++p )
01504         {
01505             init_optimization( num_suppvec, IDX, alphas, p );
01506             int32_t tree = p ;
01507             for(int32_t j = 0; j < degree+1; j++ ) {
01508                 x[j] = -1;
01509             }
01510             tries.traverse( tree, p, info, 0, x, k );
01511             SG_PROGRESS(i++,0,num_feat*max_degree);
01512         }
01513 
01514         // --- add partial overlap scores
01515         if( k > 0 ) {
01516             const int32_t j = k - 1;
01517             const int32_t nofJmers = (int32_t) CMath::pow( num_sym, j+1 );
01518             for(int32_t p = 0; p < num_feat; ++p ) {
01519                 const int32_t offsetJ = nofJmers * p;
01520                 const int32_t offsetJ1 = nofJmers * (p+1);
01521                 const int32_t offsetK = nofKmers * p;
01522                 int32_t y;
01523                 int32_t sym;
01524                 for( y = 0; y < nofJmers; ++y ) {
01525                     for( sym = 0; sym < num_sym; ++sym ) {
01526                         const int32_t y_sym = num_sym*y + sym;
01527                         const int32_t sym_y = nofJmers*sym + y;
01528                         ASSERT(0<=y_sym && y_sym<nofKmers);
01529                         ASSERT(0<=sym_y && sym_y<nofKmers);
01530                         C[k][ y_sym + offsetK ] += L[j][ y + offsetJ ];
01531                         if( p < num_feat-1 ) {
01532                             C[k][ sym_y + offsetK ] += R[j][ y + offsetJ1 ];
01533                         }
01534                     }
01535                 }
01536             }
01537         }
01538         //   if( k > 1 )
01539         //     j = k-1
01540         //     for all positions p
01541         //       for all j-mers y
01542         //          for n in {A,C,G,T}
01543         //            C_k[ p, [y,n] ] += L_j[ p, y ]
01544         //            C_k[ p, [n,y] ] += R_j[ p+1, y ]
01545         //          end;
01546         //       end;
01547         //     end;
01548         //   end;
01549     }
01550 
01551     // === return a vector
01552     num_feat=1;
01553     num_sym = bigtabSize;
01554     // --- clean up
01555     delete[] nofsKmers;
01556     delete[] margFactors;
01557     delete[] substrs;
01558     delete[] x;
01559     delete[] C;
01560     for( k = 0; k < max_degree; ++k ) {
01561         delete[] L[k];
01562         delete[] R[k];
01563     }
01564     delete[] L;
01565     delete[] R;
01566     return result;
01567 }
01568 
01569 char* CWeightedDegreePositionStringKernel::compute_consensus(
01570     int32_t &num_feat, int32_t num_suppvec, int32_t* IDX, float64_t* alphas)
01571 {
01572     ASSERT(position_weights_lhs==NULL);
01573     ASSERT(position_weights_rhs==NULL);
01574     //only works for order <= 32
01575     ASSERT(degree<=32);
01576     ASSERT(!tries.get_use_compact_terminal_nodes());
01577     num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length();
01578     ASSERT(num_feat>0);
01579     ASSERT(alphabet);
01580     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
01581 
01582     //consensus
01583     char* result=new char[num_feat];
01584 
01585     //backtracking and scoring table
01586     int32_t num_tables=CMath::max(1,num_feat-degree+1);
01587     CDynamicArray<ConsensusEntry>** table=new CDynamicArray<ConsensusEntry>*[num_tables];
01588 
01589     for (int32_t i=0; i<num_tables; i++)
01590         table[i]=new CDynamicArray<ConsensusEntry>(num_suppvec/10);
01591 
01592     //compute consensus via dynamic programming
01593     for (int32_t i=0; i<num_tables; i++)
01594     {
01595         bool cumulative=false;
01596 
01597         if (i<num_tables-1)
01598             init_optimization(num_suppvec, IDX, alphas, i);
01599         else
01600         {
01601             init_optimization(num_suppvec, IDX, alphas, i, num_feat-1);
01602             cumulative=true;
01603         }
01604 
01605         if (i==0)
01606             tries.fill_backtracking_table(i, NULL, table[i], cumulative, weights);
01607         else
01608             tries.fill_backtracking_table(i, table[i-1], table[i], cumulative, weights);
01609 
01610         SG_PROGRESS(i,0,num_feat);
01611     }
01612 
01613 
01614     //int32_t n=table[0]->get_num_elements();
01615 
01616     //for (int32_t i=0; i<n; i++)
01617     //{
01618     //  ConsensusEntry e= table[0]->get_element(i);
01619     //  SG_PRint32_t("first: str:0%0llx sc:%f bt:%d\n",e.string,e.score,e.bt);
01620     //}
01621 
01622     //n=table[num_tables-1]->get_num_elements();
01623     //for (int32_t i=0; i<n; i++)
01624     //{
01625     //  ConsensusEntry e= table[num_tables-1]->get_element(i);
01626     //  SG_PRint32_t("last: str:0%0llx sc:%f bt:%d\n",e.string,e.score,e.bt);
01627     //}
01628     //n=table[num_tables-2]->get_num_elements();
01629     //for (int32_t i=0; i<n; i++)
01630     //{
01631     //  ConsensusEntry e= table[num_tables-2]->get_element(i);
01632     //  SG_PRINT("second last: str:0%0llx sc:%f bt:%d\n",e.string,e.score,e.bt);
01633     //}
01634 
01635     const char* acgt="ACGT";
01636 
01637     //backtracking start
01638     int32_t max_idx=-1;
01639     float32_t max_score=0;
01640     int32_t num_elements=table[num_tables-1]->get_num_elements();
01641 
01642     for (int32_t i=0; i<num_elements; i++)
01643     {
01644         float64_t sc=table[num_tables-1]->get_element(i).score;
01645         if (sc>max_score || max_idx==-1)
01646         {
01647             max_idx=i;
01648             max_score=sc;
01649         }
01650     }
01651     uint64_t endstr=table[num_tables-1]->get_element(max_idx).string;
01652 
01653     SG_INFO("max_idx:%d num_el:%d num_feat:%d num_tables:%d max_score:%f\n", max_idx, num_elements, num_feat, num_tables, max_score);
01654 
01655     for (int32_t i=0; i<degree; i++)
01656         result[num_feat-1-i]=acgt[(endstr >> (2*i)) & 3];
01657 
01658     if (num_tables>1)
01659     {
01660         for (int32_t i=num_tables-1; i>=0; i--)
01661         {
01662             //SG_PRINT("max_idx: %d, i:%d\n", max_idx, i);
01663             result[i]=acgt[table[i]->get_element(max_idx).string >> (2*(degree-1)) & 3];
01664             max_idx=table[i]->get_element(max_idx).bt;
01665         }
01666     }
01667 
01668     //for (int32_t t=0; t<num_tables; t++)
01669     //{
01670     //  n=table[t]->get_num_elements();
01671     //  for (int32_t i=0; i<n; i++)
01672     //  {
01673     //      ConsensusEntry e= table[t]->get_element(i);
01674     //      SG_PRINT("table[%d,%d]: str:0%0llx sc:%+f bt:%d\n",t,i, e.string,e.score,e.bt);
01675     //  }
01676     //}
01677 
01678     for (int32_t i=0; i<num_tables; i++)
01679         delete table[i];
01680 
01681     delete[] table;
01682     return result;
01683 }
01684 
01685 
01686 float64_t* CWeightedDegreePositionStringKernel::extract_w(
01687     int32_t max_degree, int32_t& num_feat, int32_t& num_sym,
01688     float64_t* w_result, int32_t num_suppvec, int32_t* IDX, float64_t* alphas)
01689 {
01690   delete_optimization();
01691   use_poim_tries=true;
01692   poim_tries.delete_trees(false);
01693 
01694   // === check
01695   ASSERT(position_weights_lhs==NULL);
01696   ASSERT(position_weights_rhs==NULL);
01697   num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length();
01698   ASSERT(num_feat>0);
01699   ASSERT(alphabet->get_alphabet()==DNA);
01700   ASSERT(max_degree>0);
01701 
01702   // === general variables
01703   static const int32_t NUM_SYMS = poim_tries.NUM_SYMS;
01704   const int32_t seqLen = num_feat;
01705   float64_t** subs;
01706   int32_t i;
01707   int32_t k;
01708   //int32_t y;
01709 
01710   // === init tables "subs" for substring scores / POIMs
01711   // --- compute table sizes
01712   int32_t* offsets;
01713   int32_t offset;
01714   offsets = new int32_t[ max_degree ];
01715   offset = 0;
01716   for( k = 0; k < max_degree; ++k ) {
01717     offsets[k] = offset;
01718     const int32_t nofsKmers = (int32_t) CMath::pow( NUM_SYMS, k+1 );
01719     const int32_t tabSize = nofsKmers * seqLen;
01720     offset += tabSize;
01721   }
01722   // --- allocate memory
01723   const int32_t bigTabSize = offset;
01724   w_result=new float64_t[bigTabSize];
01725   for (i=0; i<bigTabSize; ++i)
01726     w_result[i]=0;
01727 
01728   // --- set pointers for tables
01729   subs = new float64_t*[ max_degree ];
01730   ASSERT( subs != NULL );
01731   for( k = 0; k < max_degree; ++k ) {
01732     subs[k] = &w_result[ offsets[k] ];
01733   }
01734   delete[] offsets;
01735 
01736   // === init trees; extract "w"
01737   init_optimization( num_suppvec, IDX, alphas, -1);
01738   poim_tries.POIMs_extract_W( subs, max_degree );
01739 
01740   // === clean; return "subs" as vector
01741   delete[] subs;
01742   num_feat = 1;
01743   num_sym = bigTabSize;
01744   use_poim_tries=false;
01745   poim_tries.delete_trees(false);
01746   return w_result;
01747 }
01748 
01749 float64_t* CWeightedDegreePositionStringKernel::compute_POIM(
01750     int32_t max_degree, int32_t& num_feat, int32_t& num_sym,
01751     float64_t* poim_result, int32_t num_suppvec, int32_t* IDX,
01752     float64_t* alphas, float64_t* distrib )
01753 {
01754   delete_optimization();
01755   use_poim_tries=true;
01756   poim_tries.delete_trees(false);
01757 
01758   // === check
01759   ASSERT(position_weights_lhs==NULL);
01760   ASSERT(position_weights_rhs==NULL);
01761   num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length();
01762   ASSERT(num_feat>0);
01763   ASSERT(alphabet->get_alphabet()==DNA);
01764   ASSERT(max_degree!=0);
01765   ASSERT(distrib);
01766 
01767   // === general variables
01768   static const int32_t NUM_SYMS = poim_tries.NUM_SYMS;
01769   const int32_t seqLen = num_feat;
01770   float64_t** subs;
01771   int32_t i;
01772   int32_t k;
01773 
01774   // === DEBUGGING mode
01775   //
01776   // Activated if "max_degree" < 0.
01777   // Allows to output selected partial score.
01778   //
01779   // |max_degree| mod 4
01780   //   0: substring
01781   //   1: superstring
01782   //   2: left overlap
01783   //   3: right overlap
01784   //
01785   const int32_t debug = ( max_degree < 0 ) ? ( abs(max_degree) % 4 + 1 ) : 0;
01786   if( debug ) {
01787     max_degree = abs(max_degree) / 4;
01788     switch( debug ) {
01789     case 1: {
01790       printf( "POIM DEBUGGING: substring only (max order=%d)\n", max_degree );
01791       break;
01792     }
01793     case 2: {
01794       printf( "POIM DEBUGGING: superstring only (max order=%d)\n", max_degree );
01795       break;
01796     }
01797     case 3: {
01798       printf( "POIM DEBUGGING: left overlap only (max order=%d)\n", max_degree );
01799       break;
01800     }
01801     case 4: {
01802       printf( "POIM DEBUGGING: right overlap only (max order=%d)\n", max_degree );
01803       break;
01804     }
01805     default: {
01806       printf( "POIM DEBUGGING: something is wrong (max order=%d)\n", max_degree );
01807       ASSERT(0);
01808       break;
01809     }
01810     }
01811   }
01812 
01813   // --- compute table sizes
01814   int32_t* offsets;
01815   int32_t offset;
01816   offsets = new int32_t[ max_degree ];
01817   offset = 0;
01818   for( k = 0; k < max_degree; ++k ) {
01819     offsets[k] = offset;
01820     const int32_t nofsKmers = (int32_t) CMath::pow( NUM_SYMS, k+1 );
01821     const int32_t tabSize = nofsKmers * seqLen;
01822     offset += tabSize;
01823   }
01824   // --- allocate memory
01825   const int32_t bigTabSize=offset;
01826   poim_result=new float64_t[bigTabSize];
01827   for (i=0; i<bigTabSize; ++i )
01828     poim_result[i]=0;
01829 
01830   // --- set pointers for tables
01831   subs=new float64_t*[max_degree];
01832   for (k=0; k<max_degree; ++k)
01833     subs[k]=&poim_result[offsets[k]];
01834 
01835   delete[] offsets;
01836 
01837   // === init trees; precalc S, L and R
01838   init_optimization( num_suppvec, IDX, alphas, -1);
01839   poim_tries.POIMs_precalc_SLR( distrib );
01840 
01841   // === compute substring scores
01842   if( debug==0 || debug==1 ) {
01843     poim_tries.POIMs_extract_W( subs, max_degree );
01844     for( k = 1; k < max_degree; ++k ) {
01845       const int32_t nofKmers2 = ( k > 1 ) ? (int32_t) CMath::pow(NUM_SYMS,k-1) : 0;
01846       const int32_t nofKmers1 = (int32_t) CMath::pow( NUM_SYMS, k );
01847       const int32_t nofKmers0 = nofKmers1 * NUM_SYMS;
01848       for( i = 0; i < seqLen; ++i ) {
01849     float64_t* const subs_k2i1 = ( k>1 && i<seqLen-1 ) ? &subs[k-2][(i+1)*nofKmers2] : NULL;
01850     float64_t* const subs_k1i1 = ( i < seqLen-1 ) ? &subs[k-1][(i+1)*nofKmers1] : NULL;
01851     float64_t* const subs_k1i0 = & subs[ k-1 ][ i*nofKmers1 ];
01852     float64_t* const subs_k0i  = & subs[ k-0 ][ i*nofKmers0 ];
01853     int32_t y0;
01854     for( y0 = 0; y0 < nofKmers0; ++y0 ) {
01855       const int32_t y1l = y0 / NUM_SYMS;
01856       const int32_t y1r = y0 % nofKmers1;
01857       const int32_t y2 = y1r / NUM_SYMS;
01858       subs_k0i[ y0 ] += subs_k1i0[ y1l ];
01859       if( i < seqLen-1 ) {
01860         subs_k0i[ y0 ] += subs_k1i1[ y1r ];
01861         if( k > 1 ) {
01862           subs_k0i[ y0 ] -= subs_k2i1[ y2 ];
01863         }
01864       }
01865     }
01866       }
01867     }
01868   }
01869 
01870   // === compute POIMs
01871   poim_tries.POIMs_add_SLR( subs, max_degree, debug );
01872 
01873   // === clean; return "subs" as vector
01874   delete[] subs;
01875   num_feat = 1;
01876   num_sym = bigTabSize;
01877 
01878   use_poim_tries=false;
01879   poim_tries.delete_trees(false);
01880   
01881   return poim_result;
01882 }
01883 
01884 
01885 void CWeightedDegreePositionStringKernel::prepare_POIM2(
01886     float64_t* distrib, int32_t num_sym, int32_t num_feat)
01887 {
01888     free(m_poim_distrib);
01889     m_poim_distrib=(float64_t*)malloc(num_sym*num_feat*sizeof(float64_t));
01890     ASSERT(m_poim_distrib);
01891 
01892     memcpy(m_poim_distrib, distrib, num_sym*num_feat*sizeof(float64_t));
01893     m_poim_num_sym=num_sym;
01894     m_poim_num_feat=num_feat;
01895 }
01896 
01897 void CWeightedDegreePositionStringKernel::compute_POIM2(
01898     int32_t max_degree, CSVM* svm)
01899 {
01900     ASSERT(svm);
01901     int32_t num_suppvec=svm->get_num_support_vectors();
01902     int32_t* sv_idx=new int32_t[num_suppvec];
01903     float64_t* sv_weight=new float64_t[num_suppvec];
01904 
01905     for (int32_t i=0; i<num_suppvec; i++)
01906     {
01907         sv_idx[i]=svm->get_support_vector(i);
01908         sv_weight[i]=svm->get_alpha(i);
01909     }
01910     
01911     if ((max_degree < 1) || (max_degree > 12))
01912     {
01913         //SG_WARNING( "max_degree out of range 1..12 (%d).\n", max_degree);
01914         SG_WARNING( "max_degree out of range 1..12 (%d). setting to 1.\n", max_degree);
01915         max_degree=1;
01916     }
01917     
01918     int32_t num_feat = m_poim_num_feat;
01919     int32_t num_sym = m_poim_num_sym;
01920     free(m_poim);
01921 
01922     m_poim = compute_POIM(max_degree, num_feat, num_sym, NULL,  num_suppvec, sv_idx, 
01923                           sv_weight, m_poim_distrib);
01924 
01925     ASSERT(num_feat==1);
01926     m_poim_result_len=num_sym;
01927     
01928     delete[] sv_weight;
01929     delete[] sv_idx;
01930 }
01931 
01932 void CWeightedDegreePositionStringKernel::get_POIM2(
01933     float64_t** poim, int32_t* result_len)
01934 {
01935     *poim=(float64_t*) malloc(m_poim_result_len*sizeof(float64_t));
01936     ASSERT(*poim);
01937     memcpy(*poim, m_poim, m_poim_result_len*sizeof(float64_t)) ;
01938     *result_len=m_poim_result_len ;
01939 }
01940 
01941 void CWeightedDegreePositionStringKernel::cleanup_POIM2()
01942 {
01943     free(m_poim) ;
01944     m_poim=NULL ;
01945     free(m_poim_distrib) ;
01946     m_poim_distrib=NULL ;
01947     m_poim_num_sym=0 ;
01948     m_poim_num_sym=0 ;
01949     m_poim_result_len=0 ;
01950 }
01951 

SHOGUN Machine Learning Toolbox - Documentation