00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef _KERNEL_H___
00013 #define _KERNEL_H___
00014
00015 #include "lib/common.h"
00016 #include "base/SGObject.h"
00017 #include "features/Features.h"
00018 #include "kernel/KernelNormalizer.h"
00019
00020 #ifdef USE_SHORTREAL_KERNELCACHE
00021 typedef float32_t KERNELCACHE_ELEM;
00022 #else
00023 typedef float64_t KERNELCACHE_ELEM;
00024 #endif
00025
00026 typedef int64_t KERNELCACHE_IDX;
00027
00028
00029 enum EOptimizationType
00030 {
00031 FASTBUTMEMHUNGRY,
00032 SLOWBUTMEMEFFICIENT
00033 };
00034
00035 enum EKernelType
00036 {
00037 K_UNKNOWN = 0,
00038 K_LINEAR = 10,
00039 K_SPARSELINEAR = 11,
00040 K_POLY = 20,
00041 K_GAUSSIAN = 30,
00042 K_SPARSEGAUSSIAN = 31,
00043 K_GAUSSIANSHIFT = 32,
00044 K_HISTOGRAM = 40,
00045 K_SALZBERG = 41,
00046 K_LOCALITYIMPROVED = 50,
00047 K_SIMPLELOCALITYIMPROVED = 60,
00048 K_FIXEDDEGREE = 70,
00049 K_WEIGHTEDDEGREE = 80,
00050 K_WEIGHTEDDEGREEPOS = 81,
00051 K_WEIGHTEDCOMMWORDSTRING = 90,
00052 K_POLYMATCH = 100,
00053 K_ALIGNMENT = 110,
00054 K_COMMWORDSTRING = 120,
00055 K_COMMULONGSTRING = 121,
00056 K_COMBINED = 140,
00057 K_AUC = 150,
00058 K_CUSTOM = 160,
00059 K_SIGMOID = 170,
00060 K_CHI2 = 180,
00061 K_DIAG = 190,
00062 K_CONST = 200,
00063 K_MINDYGRAM = 210,
00064 K_DISTANCE = 220,
00065 K_LOCALALIGNMENT = 230,
00066 K_PYRAMIDCHI2 = 240,
00067 K_OLIGO = 250,
00068 K_MATCHWORD = 260,
00069 K_TPPK = 270
00070 };
00071
00072 enum EKernelProperty
00073 {
00074 KP_NONE = 0,
00075 KP_LINADD = 1,
00076 KP_KERNCOMBINATION = 2,
00077 KP_BATCHEVALUATION = 4
00078 };
00079
00080
00081 class CSVM;
00082
00108 class CKernel : public CSGObject
00109 {
00110 friend class CSqrtDiagKernelNormalizer;
00111 friend class CAvgDiagKernelNormalizer;
00112 friend class CFirstElementKernelNormalizer;
00113 friend class CTanimotoKernelNormalizer;
00114 friend class CDiceKernelNormalizer;
00115
00116 public:
00121 CKernel(int32_t size);
00122
00129 CKernel(CFeatures* l, CFeatures* r, int32_t size);
00130
00131 virtual ~CKernel();
00132
00140 inline float64_t kernel(int32_t idx_a, int32_t idx_b)
00141 {
00142 if (idx_a < 0 || idx_b <0)
00143 return 0;
00144
00145 if (lhs && lhs==rhs)
00146 {
00147 int32_t num_vectors = lhs->get_num_vectors();
00148
00149 if (idx_a>=num_vectors)
00150 idx_a=2*num_vectors-1-idx_a;
00151
00152 if (idx_b>=num_vectors)
00153 idx_b=2*num_vectors-1-idx_b;
00154 }
00155
00156 return normalizer->normalize(compute(idx_a, idx_b), idx_a, idx_b);
00157 }
00158
00165 void get_kernel_matrix(float64_t** dst, int32_t* m, int32_t* n);
00166
00174 virtual float64_t* get_kernel_matrix_real(
00175 int32_t &m, int32_t &n, float64_t* target);
00176
00184 virtual float32_t* get_kernel_matrix_shortreal(
00185 int32_t &m, int32_t &n, float32_t* target);
00186
00197 virtual bool init(CFeatures* lhs, CFeatures* rhs);
00198
00203 virtual bool set_normalizer(CKernelNormalizer* normalizer);
00204
00209 virtual CKernelNormalizer* get_normalizer();
00210
00214 virtual bool init_normalizer();
00215
00222 virtual void cleanup();
00223
00229 bool load(char* fname);
00230
00236 bool save(char* fname);
00237
00245 virtual bool load_init(FILE* src)=0;
00246
00254 virtual bool save_init(FILE* dest)=0;
00255
00260 inline CFeatures* get_lhs() { SG_REF(lhs); return lhs; }
00261
00266 inline CFeatures* get_rhs() { SG_REF(rhs); return rhs; }
00267
00272 virtual inline int32_t get_num_vec_lhs()
00273 {
00274 if (!lhs)
00275 return 0;
00276 else
00277 return lhs->get_num_vectors();
00278 }
00279
00284 virtual inline int32_t get_num_vec_rhs()
00285 {
00286 if (!rhs)
00287 return 0;
00288 else
00289 return rhs->get_num_vectors();
00290 }
00291
00296 virtual inline bool has_features()
00297 {
00298 return lhs && rhs;
00299 }
00300
00305 inline bool lhs_equals_rhs()
00306 {
00307 return lhs==rhs;
00308 }
00309
00311 virtual void remove_lhs_and_rhs();
00312
00314 virtual void remove_lhs();
00315
00317 virtual void remove_rhs();
00318
00326 virtual EKernelType get_kernel_type()=0 ;
00327
00334 virtual EFeatureType get_feature_type()=0;
00335
00342 virtual EFeatureClass get_feature_class()=0;
00343
00348 inline void set_cache_size(int32_t size)
00349 {
00350 cache_size = size;
00351
00352 }
00353
00358 inline int32_t get_cache_size() { return cache_size; }
00359
00360
00361
00363 void list_kernel();
00364
00370 inline bool has_property(EKernelProperty p) { return (properties & p) != 0; }
00371
00375 virtual void clear_normal();
00376
00382 virtual void add_to_normal(int32_t vector_idx, float64_t weight);
00383
00388 inline EOptimizationType get_optimization_type() { return opt_type; }
00389
00394 virtual inline void set_optimization_type(EOptimizationType t) { opt_type=t;}
00395
00400 inline bool get_is_initialized() { return optimization_initialized; }
00401
00409 virtual bool init_optimization(
00410 int32_t count, int32_t *IDX, float64_t *weights);
00411
00416 virtual bool delete_optimization();
00417
00423 bool init_optimization_svm(CSVM * svm) ;
00424
00430 virtual float64_t compute_optimized(int32_t vector_idx);
00431
00440 virtual void compute_batch(
00441 int32_t num_vec, int32_t* vec_idx, float64_t* target,
00442 int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
00443 float64_t factor=1.0);
00444
00449 inline float64_t get_combined_kernel_weight() { return combined_kernel_weight; }
00450
00455 inline void set_combined_kernel_weight(float64_t nw) { combined_kernel_weight=nw; }
00456
00461 virtual int32_t get_num_subkernels();
00462
00468 virtual void compute_by_subkernel(
00469 int32_t vector_idx, float64_t * subkernel_contrib);
00470
00476 virtual const float64_t* get_subkernel_weights(int32_t& num_weights);
00477
00483 virtual void set_subkernel_weights(
00484 float64_t* weights, int32_t num_weights);
00485
00486 protected:
00491 inline void set_property(EKernelProperty p)
00492 {
00493 properties |= p;
00494 }
00495
00500 inline void unset_property(EKernelProperty p)
00501 {
00502 properties &= (properties | p) ^ p;
00503 }
00504
00509 inline void set_is_initialized(bool p_init) { optimization_initialized=p_init; }
00510
00521 virtual float64_t compute(int32_t x, int32_t y)=0;
00522
00523
00525
00526
00527 protected:
00529 int32_t cache_size;
00530
00531
00532
00535 KERNELCACHE_ELEM* kernel_matrix;
00536
00538 CFeatures* lhs;
00540 CFeatures* rhs;
00541
00543 float64_t combined_kernel_weight;
00544
00546 bool optimization_initialized;
00550 EOptimizationType opt_type;
00551
00553 uint64_t properties;
00554
00557 CKernelNormalizer* normalizer;
00558 };
00559
00560 #endif