WeightedDegreeStringKernel.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #ifndef _WEIGHTEDDEGREESTRINGKERNEL_H___
00013 #define _WEIGHTEDDEGREESTRINGKERNEL_H___
00014 
00015 #include "lib/common.h"
00016 #include "lib/Trie.h"
00017 #include "kernel/StringKernel.h"
00018 #include "features/StringFeatures.h"
00019 
00020 
00021 enum EWDKernType
00022 {
00023     E_WD=0,
00024     E_EXTERNAL=1,
00025 
00026     E_BLOCK_CONST=2,
00027     E_BLOCK_LINEAR=3,
00028     E_BLOCK_SQPOLY=4,
00029     E_BLOCK_CUBICPOLY=5,
00030     E_BLOCK_EXP=6,
00031     E_BLOCK_LOG=7,
00032     E_BLOCK_EXTERNAL=8
00033 };
00034 
00035 
00050 class CWeightedDegreeStringKernel: public CStringKernel<char>
00051 {
00052     public:
00058         CWeightedDegreeStringKernel(int32_t degree, EWDKernType type=E_WD);
00059 
00065         CWeightedDegreeStringKernel(float64_t* weights, int32_t degree);
00066 
00073         CWeightedDegreeStringKernel(
00074             CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t degree);
00075 
00076         virtual ~CWeightedDegreeStringKernel();
00077 
00084         virtual bool init(CFeatures* l, CFeatures* r);
00085 
00087         virtual void cleanup();
00088 
00094         bool load_init(FILE* src);
00095 
00101         bool save_init(FILE* dest);
00102 
00107         virtual EKernelType get_kernel_type() { return K_WEIGHTEDDEGREE; }
00108 
00113         virtual const char* get_name() const { return "WeightedDegree"; }
00114 
00122         inline virtual bool init_optimization(
00123             int32_t count, int32_t *IDX, float64_t* alphas)
00124         {
00125             return init_optimization(count, IDX, alphas, -1);
00126         }
00127 
00138         virtual bool init_optimization(
00139             int32_t count, int32_t *IDX, float64_t* alphas, int32_t tree_num);
00140 
00145         virtual bool delete_optimization();
00146 
00152         virtual float64_t compute_optimized(int32_t idx)
00153         { 
00154             if (get_is_initialized())
00155                 return compute_by_tree(idx);
00156 
00157             SG_ERROR( "CWeightedDegreeStringKernel optimization not initialized\n");
00158             return 0;
00159         }
00160 
00165         static void* compute_batch_helper(void* p);
00166 
00177         virtual void compute_batch(
00178             int32_t num_vec, int32_t* vec_idx, float64_t* target,
00179             int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
00180             float64_t factor=1.0);
00181 
00185         inline virtual void clear_normal()
00186         {
00187             if (get_is_initialized())
00188             {
00189                 tries->delete_trees(max_mismatch==0);
00190                 set_is_initialized(false);
00191             }
00192         }
00193 
00199         inline virtual void add_to_normal(int32_t idx, float64_t weight)
00200         {
00201             if (max_mismatch==0)
00202                 add_example_to_tree(idx, weight);
00203             else
00204                 add_example_to_tree_mismatch(idx, weight);
00205 
00206             set_is_initialized(true);
00207         }
00208 
00213         inline virtual int32_t get_num_subkernels()
00214         {
00215             if (position_weights!=NULL)
00216                 return (int32_t) ceil(1.0*seq_length/mkl_stepsize) ;
00217             if (length==0)
00218                 return (int32_t) ceil(1.0*get_degree()/mkl_stepsize);
00219             return (int32_t) ceil(1.0*get_degree()*length/mkl_stepsize) ;
00220         }
00221 
00227         inline void compute_by_subkernel(
00228             int32_t idx, float64_t * subkernel_contrib)
00229         { 
00230             if (get_is_initialized())
00231             {
00232                 compute_by_tree(idx, subkernel_contrib);
00233                 return ;
00234             }
00235 
00236             SG_ERROR( "CWeightedDegreeStringKernel optimization not initialized\n");
00237         }
00238 
00244         inline const float64_t* get_subkernel_weights(int32_t& num_weights)
00245         {
00246             num_weights = get_num_subkernels();
00247 
00248             delete[] weights_buffer ;
00249             weights_buffer = new float64_t[num_weights];
00250 
00251             if (position_weights!=NULL)
00252                 for (int32_t i=0; i<num_weights; i++)
00253                     weights_buffer[i] = position_weights[i*mkl_stepsize];
00254             else
00255                 for (int32_t i=0; i<num_weights; i++)
00256                     weights_buffer[i] = weights[i*mkl_stepsize];
00257 
00258             return weights_buffer;
00259         }
00260 
00266         inline void set_subkernel_weights(
00267             float64_t* weights2, int32_t num_weights2)
00268         {
00269             int32_t num_weights = get_num_subkernels();
00270             if (num_weights!=num_weights2)
00271                 SG_ERROR( "number of weights do not match\n");
00272 
00273             if (position_weights!=NULL)
00274             {
00275                 for (int32_t i=0; i<num_weights; i++)
00276                 {
00277                     for (int32_t j=0; j<mkl_stepsize; j++)
00278                     {
00279                         if (i*mkl_stepsize+j<seq_length)
00280                             position_weights[i*mkl_stepsize+j] = weights2[i];
00281                     }
00282                 }
00283             }
00284             else if (length==0)
00285             {
00286                 for (int32_t i=0; i<num_weights; i++)
00287                 {
00288                     for (int32_t j=0; j<mkl_stepsize; j++)
00289                     {
00290                         if (i*mkl_stepsize+j<get_degree())
00291                             weights[i*mkl_stepsize+j] = weights2[i];
00292                     }
00293                 }
00294             }
00295             else
00296             {
00297                 for (int32_t i=0; i<num_weights; i++)
00298                 {
00299                     for (int32_t j=0; j<mkl_stepsize; j++)
00300                     {
00301                         if (i*mkl_stepsize+j<get_degree()*length)
00302                             weights[i*mkl_stepsize+j] = weights2[i];
00303                     }
00304                 }
00305             }
00306         }
00307 
00308         // other kernel tree operations
00314         float64_t *compute_abs_weights(int32_t & len);
00315 
00322         void compute_by_tree(int32_t idx, float64_t *LevelContrib);
00323 
00328         bool is_tree_initialized() { return tree_initialized; }
00329 
00335         inline float64_t *get_degree_weights(int32_t& d, int32_t& len)
00336         {
00337             d=degree;
00338             len=length;
00339             return weights;
00340         }
00341 
00347         inline float64_t *get_weights(int32_t& num_weights)
00348         {
00349             if (position_weights!=NULL)
00350             {
00351                 num_weights = seq_length ;
00352                 return position_weights ;
00353             }
00354             if (length==0)
00355                 num_weights = degree ;
00356             else
00357                 num_weights = degree*length ;
00358             return weights;
00359         }
00360 
00366         inline float64_t *get_position_weights(int32_t& len)
00367         {
00368             len=seq_length;
00369             return position_weights;
00370         }
00371 
00377         bool set_wd_weights_by_type(EWDKernType type);
00378 
00385         void set_wd_weights(float64_t* p_weights, int32_t d)
00386         {
00387             set_weights(p_weights,d,0);
00388         }
00389 
00396         bool set_weights(float64_t* weights, int32_t d, int32_t len);
00397 
00404         bool set_position_weights(float64_t* position_weights, int32_t len=0);
00405 
00410         bool init_block_weights();
00411 
00416         bool init_block_weights_from_wd();
00417 
00422         bool init_block_weights_from_wd_external();
00423 
00428         bool init_block_weights_const();
00429 
00434         bool init_block_weights_linear();
00435 
00440         bool init_block_weights_sqpoly();
00441 
00446         bool init_block_weights_cubicpoly();
00447 
00452         bool init_block_weights_exp();
00453 
00458         bool init_block_weights_log();
00459 
00464         bool init_block_weights_external();
00465 
00470         bool delete_position_weights()
00471         {
00472             delete[] position_weights;
00473             position_weights=NULL;
00474             return true;
00475         }
00476 
00482         bool set_max_mismatch(int32_t max);
00483 
00488         inline int32_t get_max_mismatch() { return max_mismatch; }
00489 
00495         inline bool set_degree(int32_t deg) { degree=deg; return true; }
00496 
00501         inline int32_t get_degree() { return degree; }
00502 
00508         inline bool set_use_block_computation(bool block)
00509         {
00510             block_computation=block;
00511             return true;
00512         }
00513 
00518         inline bool get_use_block_computation() { return block_computation; }
00519 
00525         inline bool set_mkl_stepsize(int32_t step)
00526         {
00527             mkl_stepsize=step;
00528             return true;
00529         }
00530 
00535         inline int32_t get_mkl_stepsize() { return mkl_stepsize; }
00536 
00542         inline bool set_which_degree(int32_t which)
00543         {
00544             which_degree=which;
00545             return true;
00546         }
00547 
00552         inline int32_t get_which_degree() { return which_degree; }
00553 
00554     protected:
00556         void create_empty_tries();
00557 
00563         void add_example_to_tree(int32_t idx, float64_t weight);
00564 
00571         void add_example_to_single_tree(
00572             int32_t idx, float64_t weight, int32_t tree_num);
00573 
00579         void add_example_to_tree_mismatch(int32_t idx, float64_t weight);
00580 
00587         void add_example_to_single_tree_mismatch(
00588             int32_t idx, float64_t weight, int32_t tree_num);
00589 
00595         float64_t compute_by_tree(int32_t idx);
00596 
00605         float64_t compute(int32_t idx_a, int32_t idx_b);
00606 
00615         float64_t compute_with_mismatch(
00616             char* avec, int32_t alen, char* bvec, int32_t blen);
00617 
00626         float64_t compute_without_mismatch(
00627             char* avec, int32_t alen, char* bvec, int32_t blen);
00628 
00637         float64_t compute_without_mismatch_matrix(
00638             char* avec, int32_t alen, char* bvec, int32_t blen);
00639 
00648         float64_t compute_using_block(char* avec, int32_t alen,
00649             char* bvec, int32_t blen);
00650 
00652         virtual void remove_lhs();
00653 
00654     protected:
00658         float64_t* weights;
00660         float64_t* position_weights;
00662         float64_t* weights_buffer;
00664         int32_t mkl_stepsize;
00666         int32_t degree;
00668         int32_t length;
00669 
00671         int32_t max_mismatch;
00673         int32_t seq_length;
00674 
00676         bool initialized;
00677 
00679         bool block_computation;
00680 
00682         int32_t num_block_weights_external;
00684         float64_t* block_weights_external;
00685 
00687         float64_t* block_weights;
00689         EWDKernType type;
00691         int32_t which_degree;
00692 
00694         CTrie<DNATrie>* tries;
00695 
00697         bool tree_initialized;
00698 
00700         CAlphabet* alphabet;
00701 };
00702 
00703 #endif /* _WEIGHTEDDEGREESTRINGKERNEL_H__ */

SHOGUN Machine Learning Toolbox - Documentation