Kernel.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #ifndef _KERNEL_H___
00013 #define _KERNEL_H___
00014 
00015 #include "lib/common.h"
00016 #include "base/SGObject.h"
00017 #include "features/Features.h"
00018 #include "kernel/KernelNormalizer.h"
00019 
00020 #ifdef USE_SHORTREAL_KERNELCACHE
00021     typedef float32_t KERNELCACHE_ELEM;
00022 #else
00023     typedef float64_t KERNELCACHE_ELEM;
00024 #endif
00025 
00026 typedef int64_t KERNELCACHE_IDX;
00027 
00028 
00029 enum EOptimizationType
00030 {
00031     FASTBUTMEMHUNGRY,
00032     SLOWBUTMEMEFFICIENT
00033 };
00034 
00035 enum EKernelType
00036 {
00037     K_UNKNOWN = 0,
00038     K_LINEAR = 10,
00039     K_SPARSELINEAR = 11,
00040     K_POLY = 20,
00041     K_GAUSSIAN = 30,
00042     K_SPARSEGAUSSIAN = 31,
00043     K_GAUSSIANSHIFT = 32,
00044     K_HISTOGRAM = 40,
00045     K_SALZBERG = 41,
00046     K_LOCALITYIMPROVED = 50,
00047     K_SIMPLELOCALITYIMPROVED = 60,
00048     K_FIXEDDEGREE = 70,
00049     K_WEIGHTEDDEGREE =    80,
00050     K_WEIGHTEDDEGREEPOS = 81,
00051     K_WEIGHTEDCOMMWORDSTRING = 90,
00052     K_POLYMATCH = 100,
00053     K_ALIGNMENT = 110,
00054     K_COMMWORDSTRING = 120,
00055     K_COMMULONGSTRING = 121,
00056     K_COMBINED = 140,
00057     K_AUC = 150,
00058     K_CUSTOM = 160,
00059     K_SIGMOID = 170,
00060     K_CHI2 = 180,
00061     K_DIAG = 190,
00062     K_CONST = 200,
00063     K_MINDYGRAM = 210,
00064     K_DISTANCE = 220,
00065     K_LOCALALIGNMENT = 230,
00066     K_PYRAMIDCHI2 = 240,
00067     K_OLIGO = 250,
00068     K_MATCHWORD = 260,
00069     K_TPPK = 270
00070 };
00071 
00072 enum EKernelProperty
00073 {
00074     KP_NONE = 0,
00075     KP_LINADD = 1,  // Kernels that can be optimized via doing normal updates w + dw
00076     KP_KERNCOMBINATION = 2, // Kernels that are infact a linear combination of subkernels K=\sum_i b_i*K_i
00077     KP_BATCHEVALUATION = 4  // Kernels that can on the fly generate normals in linadd and more quickly/memory efficient process batches instead of single examples
00078 };
00079 
00080 
00081 class CSVM;
00082 
00108 class CKernel : public CSGObject
00109 {
00110     friend class CSqrtDiagKernelNormalizer;
00111     friend class CAvgDiagKernelNormalizer;
00112     friend class CFirstElementKernelNormalizer;
00113     friend class CTanimotoKernelNormalizer;
00114     friend class CDiceKernelNormalizer;
00115 
00116     public:
00121         CKernel(int32_t size);
00122 
00129         CKernel(CFeatures* l, CFeatures* r, int32_t size);
00130 
00131         virtual ~CKernel();
00132 
00140         inline float64_t kernel(int32_t idx_a, int32_t idx_b)
00141         {
00142             if (idx_a < 0 || idx_b <0)
00143                 return 0;
00144 
00145             if (lhs && lhs==rhs)
00146             {
00147                 int32_t num_vectors = lhs->get_num_vectors();
00148 
00149                 if (idx_a>=num_vectors)
00150                     idx_a=2*num_vectors-1-idx_a;
00151 
00152                 if (idx_b>=num_vectors)
00153                     idx_b=2*num_vectors-1-idx_b;
00154             }
00155 
00156             return normalizer->normalize(compute(idx_a, idx_b), idx_a, idx_b);
00157         }
00158 
00165         void get_kernel_matrix(float64_t** dst, int32_t* m, int32_t* n);
00166 
00174         virtual float64_t* get_kernel_matrix_real(
00175             int32_t &m, int32_t &n, float64_t* target);
00176 
00184         virtual float32_t* get_kernel_matrix_shortreal(
00185             int32_t &m, int32_t &n, float32_t* target);
00186 
00197         virtual bool init(CFeatures* lhs, CFeatures* rhs);
00198 
00203         virtual bool set_normalizer(CKernelNormalizer* normalizer);
00204 
00209         virtual CKernelNormalizer* get_normalizer();
00210 
00214         virtual bool init_normalizer();
00215 
00222         virtual void cleanup();
00223 
00229         bool load(char* fname);
00230 
00236         bool save(char* fname);
00237 
00245         virtual bool load_init(FILE* src)=0;
00246 
00254         virtual bool save_init(FILE* dest)=0;
00255 
00260         inline CFeatures* get_lhs() { SG_REF(lhs); return lhs; }
00261 
00266         inline CFeatures* get_rhs() { SG_REF(rhs); return rhs; }
00267 
00272         virtual inline int32_t get_num_vec_lhs()
00273         {
00274             if (!lhs)
00275                 return 0;
00276             else
00277                 return lhs->get_num_vectors();
00278         }
00279 
00284         virtual inline int32_t get_num_vec_rhs()
00285         {
00286             if (!rhs)
00287                 return 0;
00288             else
00289                 return rhs->get_num_vectors();
00290         }
00291 
00296         virtual inline bool has_features()
00297         {
00298             return lhs && rhs;
00299         }
00300 
00305         inline bool lhs_equals_rhs()
00306         {
00307             return lhs==rhs;
00308         }
00309 
00311         virtual void remove_lhs_and_rhs();
00312 
00314         virtual void remove_lhs();
00315 
00317         virtual void remove_rhs();
00318 
00326         virtual EKernelType get_kernel_type()=0 ;
00327 
00334         virtual EFeatureType get_feature_type()=0;
00335 
00342         virtual EFeatureClass get_feature_class()=0;
00343 
00348         inline void set_cache_size(int32_t size)
00349         {
00350             cache_size = size;
00351 
00352         }
00353 
00358         inline int32_t get_cache_size() { return cache_size; }
00359 
00360 
00361 
00363         void list_kernel();
00364 
00370         inline bool has_property(EKernelProperty p) { return (properties & p) != 0; }
00371 
00375         virtual void clear_normal();
00376 
00382         virtual void add_to_normal(int32_t vector_idx, float64_t weight);
00383 
00388         inline EOptimizationType get_optimization_type() { return opt_type; }
00389 
00394         virtual inline void set_optimization_type(EOptimizationType t) { opt_type=t;}
00395 
00400         inline bool get_is_initialized() { return optimization_initialized; }
00401 
00409         virtual bool init_optimization(
00410             int32_t count, int32_t *IDX, float64_t *weights);
00411 
00416         virtual bool delete_optimization();
00417 
00423         bool init_optimization_svm(CSVM * svm) ;
00424 
00430         virtual float64_t compute_optimized(int32_t vector_idx);
00431 
00440         virtual void compute_batch(
00441             int32_t num_vec, int32_t* vec_idx, float64_t* target,
00442             int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
00443             float64_t factor=1.0);
00444 
00449         inline float64_t get_combined_kernel_weight() { return combined_kernel_weight; }
00450 
00455         inline void set_combined_kernel_weight(float64_t nw) { combined_kernel_weight=nw; }
00456 
00461         virtual int32_t get_num_subkernels();
00462 
00468         virtual void compute_by_subkernel(
00469             int32_t vector_idx, float64_t * subkernel_contrib);
00470 
00476         virtual const float64_t* get_subkernel_weights(int32_t& num_weights);
00477 
00483         virtual void set_subkernel_weights(
00484             float64_t* weights, int32_t num_weights);
00485 
00486     protected:
00491         inline void set_property(EKernelProperty p)
00492         {
00493             properties |= p;
00494         }
00495 
00500         inline void unset_property(EKernelProperty p)
00501         {
00502             properties &= (properties | p) ^ p;
00503         }
00504 
00509         inline void set_is_initialized(bool p_init) { optimization_initialized=p_init; }
00510 
00521         virtual float64_t compute(int32_t x, int32_t y)=0;
00522 
00523 
00525 
00526 
00527     protected:
00529         int32_t cache_size;
00530 
00531 
00532 
00535         KERNELCACHE_ELEM* kernel_matrix;
00536 
00538         CFeatures* lhs;
00540         CFeatures* rhs;
00541 
00543         float64_t combined_kernel_weight;
00544 
00546         bool optimization_initialized;
00550         EOptimizationType opt_type;
00551 
00553         uint64_t  properties;
00554 
00557         CKernelNormalizer* normalizer;
00558 };
00559 
00560 #endif /* _KERNEL_H__ */

SHOGUN Machine Learning Toolbox - Documentation