00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef _KERNEL_H___
00013 #define _KERNEL_H___
00014
00015 #include "lib/common.h"
00016 #include "lib/Signal.h"
00017 #include "lib/File.h"
00018 #include "lib/Mathematics.h"
00019 #include "base/SGObject.h"
00020 #include "features/Features.h"
00021 #include "kernel/KernelNormalizer.h"
00022
00023
00024 namespace shogun
00025 {
00026 class CFile;
00027 class CFeatures;
00028 class CKernelNormalizer;
00029 enum EFeatureType;
00030 enum EFeatureClass;
00031
00032 #ifdef USE_SHORTREAL_KERNELCACHE
00033 typedef float32_t KERNELCACHE_ELEM;
00034 #else
00035 typedef float64_t KERNELCACHE_ELEM;
00036 #endif
00037
00038 typedef int64_t KERNELCACHE_IDX;
00039
00040
00041 enum EOptimizationType
00042 {
00043 FASTBUTMEMHUNGRY,
00044 SLOWBUTMEMEFFICIENT
00045 };
00046
00047 enum EKernelType
00048 {
00049 K_UNKNOWN = 0,
00050 K_LINEAR = 10,
00051 K_SPARSELINEAR = 11,
00052 K_POLY = 20,
00053 K_GAUSSIAN = 30,
00054 K_SPARSEGAUSSIAN = 31,
00055 K_GAUSSIANSHIFT = 32,
00056 K_GAUSSIANMATCH = 33,
00057 K_HISTOGRAM = 40,
00058 K_SALZBERG = 41,
00059 K_LOCALITYIMPROVED = 50,
00060 K_SIMPLELOCALITYIMPROVED = 60,
00061 K_FIXEDDEGREE = 70,
00062 K_WEIGHTEDDEGREE = 80,
00063 K_WEIGHTEDDEGREEPOS = 81,
00064 K_WEIGHTEDDEGREERBF = 82,
00065 K_WEIGHTEDCOMMWORDSTRING = 90,
00066 K_POLYMATCH = 100,
00067 K_ALIGNMENT = 110,
00068 K_COMMWORDSTRING = 120,
00069 K_COMMULONGSTRING = 121,
00070 K_SPECTRUMMISMATCHRBF = 122,
00071 K_COMBINED = 140,
00072 K_AUC = 150,
00073 K_CUSTOM = 160,
00074 K_SIGMOID = 170,
00075 K_CHI2 = 180,
00076 K_DIAG = 190,
00077 K_CONST = 200,
00078 K_DISTANCE = 220,
00079 K_LOCALALIGNMENT = 230,
00080 K_PYRAMIDCHI2 = 240,
00081 K_OLIGO = 250,
00082 K_MATCHWORD = 260,
00083 K_TPPK = 270,
00084 K_REGULATORYMODULES = 280
00085 };
00086
00087 enum EKernelProperty
00088 {
00089 KP_NONE = 0,
00090 KP_LINADD = 1,
00091 KP_KERNCOMBINATION = 2,
00092 KP_BATCHEVALUATION = 4
00093 };
00094
00096 template <class T> struct K_THREAD_PARAM
00097 {
00099 CKernel* kernel;
00101 int32_t start;
00103 int32_t end;
00105 int32_t total_start;
00107 int32_t total_end;
00109 int32_t m;
00111 int32_t n;
00113 T* result;
00115 bool symmetric;
00117 bool verbose;
00118 };
00119
00120 class CSVM;
00121
00147 class CKernel : public CSGObject
00148 {
00149 friend class CVarianceKernelNormalizer;
00150 friend class CSqrtDiagKernelNormalizer;
00151 friend class CAvgDiagKernelNormalizer;
00152 friend class CRidgeKernelNormalizer;
00153 friend class CFirstElementKernelNormalizer;
00154 friend class CMultitaskKernelNormalizer;
00155 friend class CMultitaskKernelMklNormalizer;
00156 friend class CMultitaskKernelMaskNormalizer;
00157 friend class CMultitaskKernelMaskPairNormalizer;
00158 friend class CTanimotoKernelNormalizer;
00159 friend class CDiceKernelNormalizer;
00160
00161 public:
00162
00166 CKernel();
00167
00168
00173 CKernel(int32_t size);
00174
00181 CKernel(CFeatures* l, CFeatures* r, int32_t size);
00182
00183 virtual ~CKernel();
00184
00192 inline float64_t kernel(int32_t idx_a, int32_t idx_b)
00193 {
00194 if (idx_a<0 || idx_b<0 || idx_a>=num_lhs || idx_b>=num_rhs)
00195 {
00196 SG_ERROR("Index out of Range: idx_a=%d/%d idx_b=%d/%d\n",
00197 idx_a,num_lhs, idx_b,num_rhs);
00198 }
00199
00200 return normalizer->normalize(compute(idx_a, idx_b), idx_a, idx_b);
00201 }
00202
00209 void get_kernel_matrix(float64_t** dst, int32_t* m, int32_t* n);
00210
00218 template <class T>
00219 T* get_kernel_matrix(int32_t &m, int32_t &n, T* target)
00220 {
00221 T* result = NULL;
00222
00223 if (!has_features())
00224 SG_ERROR( "no features assigned to kernel\n");
00225
00226 if (target && (m!=get_num_vec_lhs() ||
00227 n!=get_num_vec_rhs()) )
00228 {
00229 SG_ERROR( "kernel matrix size mismatch\n");
00230 }
00231
00232 m=get_num_vec_lhs();
00233 n=get_num_vec_rhs();
00234
00235 int64_t total_num = int64_t(m)*n;
00236
00237
00238 bool symmetric= (lhs && lhs==rhs && m==n);
00239
00240 SG_DEBUG( "returning kernel matrix of size %dx%d\n", m, n);
00241
00242 if (target)
00243 result=target;
00244 else
00245 result=new T[total_num];
00246
00247 int32_t num_threads=parallel->get_num_threads();
00248 if (num_threads < 2)
00249 {
00250 K_THREAD_PARAM<T> params;
00251 params.kernel=this;
00252 params.result=result;
00253 params.start=0;
00254 params.end=m;
00255 params.total_start=0;
00256 params.total_end=total_num;
00257 params.n=n;
00258 params.m=m;
00259 params.symmetric=symmetric;
00260 params.verbose=true;
00261 get_kernel_matrix_helper<T>((void*) ¶ms);
00262 }
00263 else
00264 {
00265 pthread_t* threads = new pthread_t[num_threads-1];
00266 K_THREAD_PARAM<T>* params = new K_THREAD_PARAM<T>[num_threads];
00267 int64_t step= total_num/num_threads;
00268
00269 int32_t t;
00270
00271 for (t=0; t<num_threads-1; t++)
00272 {
00273 params[t].kernel = this;
00274 params[t].result = result;
00275 params[t].start = compute_row_start(t*step, n, symmetric);
00276 params[t].end = compute_row_start((t+1)*step, n, symmetric);
00277 params[t].total_start=t*step;
00278 params[t].total_end=(t+1)*step;
00279 params[t].n=n;
00280 params[t].m=m;
00281 params[t].symmetric=symmetric;
00282 params[t].verbose=false;
00283 pthread_create(&threads[t], NULL,
00284 CKernel::get_kernel_matrix_helper<T>, (void*)¶ms[t]);
00285 }
00286
00287 params[t].kernel = this;
00288 params[t].result = result;
00289 params[t].start = compute_row_start(t*step, n, symmetric);
00290 params[t].end = m;
00291 params[t].total_start=t*step;
00292 params[t].total_end=total_num;
00293 params[t].n=n;
00294 params[t].m=m;
00295 params[t].symmetric=symmetric;
00296 params[t].verbose=true;
00297 get_kernel_matrix_helper<T>(¶ms[t]);
00298
00299 for (t=0; t<num_threads-1; t++)
00300 pthread_join(threads[t], NULL);
00301
00302 delete[] params;
00303 delete[] threads;
00304 }
00305
00306 SG_DONE();
00307
00308 return result;
00309 }
00310
00311
00322 virtual bool init(CFeatures* lhs, CFeatures* rhs);
00323
00328 virtual bool set_normalizer(CKernelNormalizer* normalizer);
00329
00334 virtual CKernelNormalizer* get_normalizer();
00335
00339 virtual bool init_normalizer();
00340
00347 virtual void cleanup();
00348
00353 void load(CFile* loader);
00354
00359 void save(CFile* writer);
00360
00365 inline CFeatures* get_lhs() { SG_REF(lhs); return lhs; }
00366
00371 inline CFeatures* get_rhs() { SG_REF(rhs); return rhs; }
00372
00377 virtual inline int32_t get_num_vec_lhs()
00378 {
00379 return num_lhs;
00380 }
00381
00386 virtual inline int32_t get_num_vec_rhs()
00387 {
00388 return num_rhs;
00389 }
00390
00395 virtual inline bool has_features()
00396 {
00397 return lhs && rhs;
00398 }
00399
00404 inline bool lhs_equals_rhs()
00405 {
00406 return lhs==rhs;
00407 }
00408
00410 virtual void remove_lhs_and_rhs();
00411
00413 virtual void remove_lhs();
00414
00416 virtual void remove_rhs();
00417
00425 virtual EKernelType get_kernel_type()=0 ;
00426
00433 virtual EFeatureType get_feature_type()=0;
00434
00441 virtual EFeatureClass get_feature_class()=0;
00442
00447 inline void set_cache_size(int32_t size)
00448 {
00449 cache_size = size;
00450
00451 }
00452
00457 inline int32_t get_cache_size() { return cache_size; }
00458
00459
00460
00462 void list_kernel();
00463
00469 inline bool has_property(EKernelProperty p) { return (properties & p) != 0; }
00470
00474 virtual void clear_normal();
00475
00481 virtual void add_to_normal(int32_t vector_idx, float64_t weight);
00482
00487 inline EOptimizationType get_optimization_type() { return opt_type; }
00488
00493 virtual inline void set_optimization_type(EOptimizationType t) { opt_type=t;}
00494
00499 inline bool get_is_initialized() { return optimization_initialized; }
00500
00508 virtual bool init_optimization(
00509 int32_t count, int32_t *IDX, float64_t *weights);
00510
00515 virtual bool delete_optimization();
00516
00522 bool init_optimization_svm(CSVM * svm) ;
00523
00529 virtual float64_t compute_optimized(int32_t vector_idx);
00530
00539 virtual void compute_batch(
00540 int32_t num_vec, int32_t* vec_idx, float64_t* target,
00541 int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
00542 float64_t factor=1.0);
00543
00548 inline float64_t get_combined_kernel_weight() { return combined_kernel_weight; }
00549
00554 inline void set_combined_kernel_weight(float64_t nw) { combined_kernel_weight=nw; }
00555
00560 virtual int32_t get_num_subkernels();
00561
00567 virtual void compute_by_subkernel(
00568 int32_t vector_idx, float64_t * subkernel_contrib);
00569
00575 virtual const float64_t* get_subkernel_weights(int32_t& num_weights);
00576
00582 virtual void set_subkernel_weights(
00583 float64_t* weights, int32_t num_weights);
00584
00585 protected:
00590 inline void set_property(EKernelProperty p)
00591 {
00592 properties |= p;
00593 }
00594
00599 inline void unset_property(EKernelProperty p)
00600 {
00601 properties &= (properties | p) ^ p;
00602 }
00603
00608 inline void set_is_initialized(bool p_init) { optimization_initialized=p_init; }
00609
00620 virtual float64_t compute(int32_t x, int32_t y)=0;
00621
00628 int32_t compute_row_start(int64_t offs, int32_t n, bool symmetric)
00629 {
00630 int32_t i_start;
00631
00632 if (symmetric)
00633 i_start=(int32_t) CMath::floor(n-CMath::sqrt(CMath::sq((float64_t) n)-offs));
00634 else
00635 i_start=(int32_t) (offs/int64_t(n));
00636
00637 return i_start;
00638 }
00639
00644 template <class T>
00645 static void* get_kernel_matrix_helper(void* p)
00646 {
00647 K_THREAD_PARAM<T>* params= (K_THREAD_PARAM<T>*) p;
00648 int32_t i_start=params->start;
00649 int32_t i_end=params->end;
00650 CKernel* k=params->kernel;
00651 T* result=params->result;
00652 bool symmetric=params->symmetric;
00653 int32_t n=params->n;
00654 int32_t m=params->m;
00655 bool verbose=params->verbose;
00656 int64_t total_start=params->total_start;
00657 int64_t total_end=params->total_end;
00658 int64_t total=total_start;
00659
00660 for (int32_t i=i_start; i<i_end; i++)
00661 {
00662 int32_t j_start=0;
00663
00664 if (symmetric)
00665 j_start=i;
00666
00667 for (int32_t j=j_start; j<n; j++)
00668 {
00669 float64_t v=k->kernel(i,j);
00670 result[i+j*m]=v;
00671
00672 if (symmetric && i!=j)
00673 result[j+i*m]=v;
00674
00675 if (verbose)
00676 {
00677 total++;
00678
00679 if (symmetric && i!=j)
00680 total++;
00681
00682 if (total%100 == 0)
00683 k->SG_PROGRESS(total, total_start, total_end);
00684
00685 if (CSignal::cancel_computations())
00686 break;
00687 }
00688 }
00689
00690 }
00691
00692 return NULL;
00693 }
00694
00695
00697
00698
00699 #ifdef HAVE_BOOST_SERIALIZATION
00700 private:
00701
00702 friend class ::boost::serialization::access;
00703 template<class Archive>
00704 void serialize(Archive & ar, const unsigned int archive_version)
00705 {
00706
00707 SG_DEBUG("archiving CKernel\n");
00708
00709 ar & ::boost::serialization::base_object<CSGObject>(*this);
00710
00711 ar & cache_size;
00712
00713
00714
00715
00716
00717
00718
00719
00720
00721
00722
00723 ar & rhs;
00724 ar & lhs;
00725
00726 ar & combined_kernel_weight;
00727
00728 ar & optimization_initialized;
00729
00730 ar & opt_type;
00731
00732 ar & properties;
00733
00734 SG_DEBUG("done with CKernel\n");
00735
00736 }
00737
00738 #endif //HAVE_BOOST_SERIALIZATION
00739
00740
00741
00742 protected:
00744 int32_t cache_size;
00745
00746
00747
00750 KERNELCACHE_ELEM* kernel_matrix;
00751
00753 CFeatures* lhs;
00755 CFeatures* rhs;
00756
00758 int32_t num_lhs;
00760 int32_t num_rhs;
00761
00763 float64_t combined_kernel_weight;
00764
00766 bool optimization_initialized;
00770 EOptimizationType opt_type;
00771
00773 uint64_t properties;
00774
00777 CKernelNormalizer* normalizer;
00778 };
00779
00780 }
00781 #endif