00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef _WEIGHTEDDEGREESTRINGKERNEL_H___
00013 #define _WEIGHTEDDEGREESTRINGKERNEL_H___
00014
00015 #include "lib/common.h"
00016 #include "lib/Trie.h"
00017 #include "kernel/StringKernel.h"
00018 #include "features/StringFeatures.h"
00019
00020
00021 enum EWDKernType
00022 {
00023 E_WD=0,
00024 E_EXTERNAL=1,
00025
00026 E_BLOCK_CONST=2,
00027 E_BLOCK_LINEAR=3,
00028 E_BLOCK_SQPOLY=4,
00029 E_BLOCK_CUBICPOLY=5,
00030 E_BLOCK_EXP=6,
00031 E_BLOCK_LOG=7,
00032 E_BLOCK_EXTERNAL=8
00033 };
00034
00035
00050 class CWeightedDegreeStringKernel: public CStringKernel<char>
00051 {
00052 public:
00058 CWeightedDegreeStringKernel(int32_t degree, EWDKernType type=E_WD);
00059
00065 CWeightedDegreeStringKernel(float64_t* weights, int32_t degree);
00066
00073 CWeightedDegreeStringKernel(
00074 CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t degree);
00075
00076 virtual ~CWeightedDegreeStringKernel();
00077
00084 virtual bool init(CFeatures* l, CFeatures* r);
00085
00087 virtual void cleanup();
00088
00094 bool load_init(FILE* src);
00095
00101 bool save_init(FILE* dest);
00102
00107 virtual EKernelType get_kernel_type() { return K_WEIGHTEDDEGREE; }
00108
00113 virtual const char* get_name() const { return "WeightedDegree"; }
00114
00122 inline virtual bool init_optimization(
00123 int32_t count, int32_t *IDX, float64_t* alphas)
00124 {
00125 return init_optimization(count, IDX, alphas, -1);
00126 }
00127
00138 virtual bool init_optimization(
00139 int32_t count, int32_t *IDX, float64_t* alphas, int32_t tree_num);
00140
00145 virtual bool delete_optimization();
00146
00152 virtual float64_t compute_optimized(int32_t idx)
00153 {
00154 if (get_is_initialized())
00155 return compute_by_tree(idx);
00156
00157 SG_ERROR( "CWeightedDegreeStringKernel optimization not initialized\n");
00158 return 0;
00159 }
00160
00165 static void* compute_batch_helper(void* p);
00166
00177 virtual void compute_batch(
00178 int32_t num_vec, int32_t* vec_idx, float64_t* target,
00179 int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
00180 float64_t factor=1.0);
00181
00185 inline virtual void clear_normal()
00186 {
00187 if (get_is_initialized())
00188 {
00189 tries->delete_trees(max_mismatch==0);
00190 set_is_initialized(false);
00191 }
00192 }
00193
00199 inline virtual void add_to_normal(int32_t idx, float64_t weight)
00200 {
00201 if (max_mismatch==0)
00202 add_example_to_tree(idx, weight);
00203 else
00204 add_example_to_tree_mismatch(idx, weight);
00205
00206 set_is_initialized(true);
00207 }
00208
00213 inline virtual int32_t get_num_subkernels()
00214 {
00215 if (position_weights!=NULL)
00216 return (int32_t) ceil(1.0*seq_length/mkl_stepsize) ;
00217 if (length==0)
00218 return (int32_t) ceil(1.0*get_degree()/mkl_stepsize);
00219 return (int32_t) ceil(1.0*get_degree()*length/mkl_stepsize) ;
00220 }
00221
00227 inline void compute_by_subkernel(
00228 int32_t idx, float64_t * subkernel_contrib)
00229 {
00230 if (get_is_initialized())
00231 {
00232 compute_by_tree(idx, subkernel_contrib);
00233 return ;
00234 }
00235
00236 SG_ERROR( "CWeightedDegreeStringKernel optimization not initialized\n");
00237 }
00238
00244 inline const float64_t* get_subkernel_weights(int32_t& num_weights)
00245 {
00246 num_weights = get_num_subkernels();
00247
00248 delete[] weights_buffer ;
00249 weights_buffer = new float64_t[num_weights];
00250
00251 if (position_weights!=NULL)
00252 for (int32_t i=0; i<num_weights; i++)
00253 weights_buffer[i] = position_weights[i*mkl_stepsize];
00254 else
00255 for (int32_t i=0; i<num_weights; i++)
00256 weights_buffer[i] = weights[i*mkl_stepsize];
00257
00258 return weights_buffer;
00259 }
00260
00266 inline void set_subkernel_weights(
00267 float64_t* weights2, int32_t num_weights2)
00268 {
00269 int32_t num_weights = get_num_subkernels();
00270 if (num_weights!=num_weights2)
00271 SG_ERROR( "number of weights do not match\n");
00272
00273 if (position_weights!=NULL)
00274 {
00275 for (int32_t i=0; i<num_weights; i++)
00276 {
00277 for (int32_t j=0; j<mkl_stepsize; j++)
00278 {
00279 if (i*mkl_stepsize+j<seq_length)
00280 position_weights[i*mkl_stepsize+j] = weights2[i];
00281 }
00282 }
00283 }
00284 else if (length==0)
00285 {
00286 for (int32_t i=0; i<num_weights; i++)
00287 {
00288 for (int32_t j=0; j<mkl_stepsize; j++)
00289 {
00290 if (i*mkl_stepsize+j<get_degree())
00291 weights[i*mkl_stepsize+j] = weights2[i];
00292 }
00293 }
00294 }
00295 else
00296 {
00297 for (int32_t i=0; i<num_weights; i++)
00298 {
00299 for (int32_t j=0; j<mkl_stepsize; j++)
00300 {
00301 if (i*mkl_stepsize+j<get_degree()*length)
00302 weights[i*mkl_stepsize+j] = weights2[i];
00303 }
00304 }
00305 }
00306 }
00307
00308
00314 float64_t *compute_abs_weights(int32_t & len);
00315
00322 void compute_by_tree(int32_t idx, float64_t *LevelContrib);
00323
00328 bool is_tree_initialized() { return tree_initialized; }
00329
00335 inline float64_t *get_degree_weights(int32_t& d, int32_t& len)
00336 {
00337 d=degree;
00338 len=length;
00339 return weights;
00340 }
00341
00347 inline float64_t *get_weights(int32_t& num_weights)
00348 {
00349 if (position_weights!=NULL)
00350 {
00351 num_weights = seq_length ;
00352 return position_weights ;
00353 }
00354 if (length==0)
00355 num_weights = degree ;
00356 else
00357 num_weights = degree*length ;
00358 return weights;
00359 }
00360
00366 inline float64_t *get_position_weights(int32_t& len)
00367 {
00368 len=seq_length;
00369 return position_weights;
00370 }
00371
00377 bool set_wd_weights_by_type(EWDKernType type);
00378
00385 void set_wd_weights(float64_t* p_weights, int32_t d)
00386 {
00387 set_weights(p_weights,d,0);
00388 }
00389
00396 bool set_weights(float64_t* weights, int32_t d, int32_t len);
00397
00404 bool set_position_weights(float64_t* position_weights, int32_t len=0);
00405
00410 bool init_block_weights();
00411
00416 bool init_block_weights_from_wd();
00417
00422 bool init_block_weights_from_wd_external();
00423
00428 bool init_block_weights_const();
00429
00434 bool init_block_weights_linear();
00435
00440 bool init_block_weights_sqpoly();
00441
00446 bool init_block_weights_cubicpoly();
00447
00452 bool init_block_weights_exp();
00453
00458 bool init_block_weights_log();
00459
00464 bool init_block_weights_external();
00465
00470 bool delete_position_weights()
00471 {
00472 delete[] position_weights;
00473 position_weights=NULL;
00474 return true;
00475 }
00476
00482 bool set_max_mismatch(int32_t max);
00483
00488 inline int32_t get_max_mismatch() { return max_mismatch; }
00489
00495 inline bool set_degree(int32_t deg) { degree=deg; return true; }
00496
00501 inline int32_t get_degree() { return degree; }
00502
00508 inline bool set_use_block_computation(bool block)
00509 {
00510 block_computation=block;
00511 return true;
00512 }
00513
00518 inline bool get_use_block_computation() { return block_computation; }
00519
00525 inline bool set_mkl_stepsize(int32_t step)
00526 {
00527 mkl_stepsize=step;
00528 return true;
00529 }
00530
00535 inline int32_t get_mkl_stepsize() { return mkl_stepsize; }
00536
00542 inline bool set_which_degree(int32_t which)
00543 {
00544 which_degree=which;
00545 return true;
00546 }
00547
00552 inline int32_t get_which_degree() { return which_degree; }
00553
00554 protected:
00556 void create_empty_tries();
00557
00563 void add_example_to_tree(int32_t idx, float64_t weight);
00564
00571 void add_example_to_single_tree(
00572 int32_t idx, float64_t weight, int32_t tree_num);
00573
00579 void add_example_to_tree_mismatch(int32_t idx, float64_t weight);
00580
00587 void add_example_to_single_tree_mismatch(
00588 int32_t idx, float64_t weight, int32_t tree_num);
00589
00595 float64_t compute_by_tree(int32_t idx);
00596
00605 float64_t compute(int32_t idx_a, int32_t idx_b);
00606
00615 float64_t compute_with_mismatch(
00616 char* avec, int32_t alen, char* bvec, int32_t blen);
00617
00626 float64_t compute_without_mismatch(
00627 char* avec, int32_t alen, char* bvec, int32_t blen);
00628
00637 float64_t compute_without_mismatch_matrix(
00638 char* avec, int32_t alen, char* bvec, int32_t blen);
00639
00648 float64_t compute_using_block(char* avec, int32_t alen,
00649 char* bvec, int32_t blen);
00650
00652 virtual void remove_lhs();
00653
00654 protected:
00658 float64_t* weights;
00660 float64_t* position_weights;
00662 float64_t* weights_buffer;
00664 int32_t mkl_stepsize;
00666 int32_t degree;
00668 int32_t length;
00669
00671 int32_t max_mismatch;
00673 int32_t seq_length;
00674
00676 bool initialized;
00677
00679 bool block_computation;
00680
00682 int32_t num_block_weights_external;
00684 float64_t* block_weights_external;
00685
00687 float64_t* block_weights;
00689 EWDKernType type;
00691 int32_t which_degree;
00692
00694 CTrie<DNATrie>* tries;
00695
00697 bool tree_initialized;
00698
00700 CAlphabet* alphabet;
00701 };
00702
00703 #endif