Kernel.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #ifndef _KERNEL_H___
00013 #define _KERNEL_H___
00014 
00015 #include "lib/common.h"
00016 #include "lib/Signal.h"
00017 #include "lib/File.h"
00018 #include "lib/Mathematics.h"
00019 #include "base/SGObject.h"
00020 #include "features/Features.h"
00021 #include "kernel/KernelNormalizer.h"
00022 
00023 
00024 namespace shogun
00025 {
00026     class CFile;
00027     class CFeatures;
00028     class CKernelNormalizer;
00029     enum EFeatureType;
00030     enum EFeatureClass;
00031 
00032 #ifdef USE_SHORTREAL_KERNELCACHE
00033     typedef float32_t KERNELCACHE_ELEM;
00034 #else
00035     typedef float64_t KERNELCACHE_ELEM;
00036 #endif
00037 
00038 typedef int64_t KERNELCACHE_IDX;
00039 
00040 
00041 enum EOptimizationType
00042 {
00043     FASTBUTMEMHUNGRY,
00044     SLOWBUTMEMEFFICIENT
00045 };
00046 
00047 enum EKernelType
00048 {
00049     K_UNKNOWN = 0,
00050     K_LINEAR = 10,
00051     K_SPARSELINEAR = 11,
00052     K_POLY = 20,
00053     K_GAUSSIAN = 30,
00054     K_SPARSEGAUSSIAN = 31,
00055     K_GAUSSIANSHIFT = 32,
00056     K_GAUSSIANMATCH = 33,
00057     K_HISTOGRAM = 40,
00058     K_SALZBERG = 41,
00059     K_LOCALITYIMPROVED = 50,
00060     K_SIMPLELOCALITYIMPROVED = 60,
00061     K_FIXEDDEGREE = 70,
00062     K_WEIGHTEDDEGREE =    80,
00063     K_WEIGHTEDDEGREEPOS = 81,
00064     K_WEIGHTEDDEGREERBF = 82,
00065     K_WEIGHTEDCOMMWORDSTRING = 90,
00066     K_POLYMATCH = 100,
00067     K_ALIGNMENT = 110,
00068     K_COMMWORDSTRING = 120,
00069     K_COMMULONGSTRING = 121,
00070     K_SPECTRUMMISMATCHRBF = 122,
00071     K_COMBINED = 140,
00072     K_AUC = 150,
00073     K_CUSTOM = 160,
00074     K_SIGMOID = 170,
00075     K_CHI2 = 180,
00076     K_DIAG = 190,
00077     K_CONST = 200,
00078     K_DISTANCE = 220,
00079     K_LOCALALIGNMENT = 230,
00080     K_PYRAMIDCHI2 = 240,
00081     K_OLIGO = 250,
00082     K_MATCHWORD = 260,
00083     K_TPPK = 270,
00084     K_REGULATORYMODULES = 280
00085 };
00086 
00087 enum EKernelProperty
00088 {
00089     KP_NONE = 0,
00090     KP_LINADD = 1,  // Kernels that can be optimized via doing normal updates w + dw
00091     KP_KERNCOMBINATION = 2, // Kernels that are infact a linear combination of subkernels K=\sum_i b_i*K_i
00092     KP_BATCHEVALUATION = 4  // Kernels that can on the fly generate normals in linadd and more quickly/memory efficient process batches instead of single examples
00093 };
00094 
00096 template <class T> struct K_THREAD_PARAM
00097 {
00099     CKernel* kernel;
00101     int32_t start;
00103     int32_t end;
00105     int32_t total_start;
00107     int32_t total_end;
00109     int32_t m;
00111     int32_t n;
00113     T* result;
00115     bool symmetric;
00117     bool verbose;
00118 };
00119 
00120 class CSVM;
00121 
00147 class CKernel : public CSGObject
00148 {
00149     friend class CVarianceKernelNormalizer;
00150     friend class CSqrtDiagKernelNormalizer;
00151     friend class CAvgDiagKernelNormalizer;
00152     friend class CRidgeKernelNormalizer;
00153     friend class CFirstElementKernelNormalizer;
00154     friend class CMultitaskKernelNormalizer;
00155     friend class CMultitaskKernelMklNormalizer;
00156     friend class CMultitaskKernelMaskNormalizer;
00157     friend class CMultitaskKernelMaskPairNormalizer;
00158     friend class CTanimotoKernelNormalizer;
00159     friend class CDiceKernelNormalizer;
00160 
00161     public:
00162 
00166         CKernel();
00167 
00168 
00173         CKernel(int32_t size);
00174 
00181         CKernel(CFeatures* l, CFeatures* r, int32_t size);
00182 
00183         virtual ~CKernel();
00184 
00192         inline float64_t kernel(int32_t idx_a, int32_t idx_b)
00193         {
00194             if (idx_a<0 || idx_b<0 || idx_a>=num_lhs || idx_b>=num_rhs)
00195             {
00196                 SG_ERROR("Index out of Range: idx_a=%d/%d idx_b=%d/%d\n",
00197                         idx_a,num_lhs, idx_b,num_rhs);
00198             }
00199 
00200             return normalizer->normalize(compute(idx_a, idx_b), idx_a, idx_b);
00201         }
00202 
00209         void get_kernel_matrix(float64_t** dst, int32_t* m, int32_t* n);
00210 
00218         template <class T>
00219         T* get_kernel_matrix(int32_t &m, int32_t &n, T* target)
00220         {
00221             T* result = NULL;
00222 
00223             if (!has_features())
00224                 SG_ERROR( "no features assigned to kernel\n");
00225 
00226             if (target && (m!=get_num_vec_lhs() ||
00227                         n!=get_num_vec_rhs()) )
00228             {
00229                 SG_ERROR( "kernel matrix size mismatch\n");
00230             }
00231 
00232             m=get_num_vec_lhs();
00233             n=get_num_vec_rhs();
00234 
00235             int64_t total_num = int64_t(m)*n;
00236 
00237             // if lhs == rhs and sizes match assume k(i,j)=k(j,i)
00238             bool symmetric= (lhs && lhs==rhs && m==n);
00239 
00240             SG_DEBUG( "returning kernel matrix of size %dx%d\n", m, n);
00241 
00242             if (target)
00243                 result=target;
00244             else
00245                 result=new T[total_num];
00246 
00247             int32_t num_threads=parallel->get_num_threads();
00248             if (num_threads < 2)
00249             {
00250                 K_THREAD_PARAM<T> params;
00251                 params.kernel=this;
00252                 params.result=result;
00253                 params.start=0;
00254                 params.end=m;
00255                 params.total_start=0;
00256                 params.total_end=total_num;
00257                 params.n=n;
00258                 params.m=m;
00259                 params.symmetric=symmetric;
00260                 params.verbose=true;
00261                 get_kernel_matrix_helper<T>((void*) &params);
00262             }
00263             else
00264             {
00265                 pthread_t* threads = new pthread_t[num_threads-1];
00266                 K_THREAD_PARAM<T>* params = new K_THREAD_PARAM<T>[num_threads];
00267                 int64_t step= total_num/num_threads;
00268 
00269                 int32_t t;
00270 
00271                 for (t=0; t<num_threads-1; t++)
00272                 {
00273                     params[t].kernel = this;
00274                     params[t].result = result;
00275                     params[t].start = compute_row_start(t*step, n, symmetric);
00276                     params[t].end = compute_row_start((t+1)*step, n, symmetric);
00277                     params[t].total_start=t*step;
00278                     params[t].total_end=(t+1)*step;
00279                     params[t].n=n;
00280                     params[t].m=m;
00281                     params[t].symmetric=symmetric;
00282                     params[t].verbose=false;
00283                     pthread_create(&threads[t], NULL,
00284                             CKernel::get_kernel_matrix_helper<T>, (void*)&params[t]);
00285                 }
00286 
00287                 params[t].kernel = this;
00288                 params[t].result = result;
00289                 params[t].start = compute_row_start(t*step, n, symmetric);
00290                 params[t].end = m;
00291                 params[t].total_start=t*step;
00292                 params[t].total_end=total_num;
00293                 params[t].n=n;
00294                 params[t].m=m;
00295                 params[t].symmetric=symmetric;
00296                 params[t].verbose=true;
00297                 get_kernel_matrix_helper<T>(&params[t]);
00298 
00299                 for (t=0; t<num_threads-1; t++)
00300                     pthread_join(threads[t], NULL);
00301 
00302                 delete[] params;
00303                 delete[] threads;
00304             }
00305 
00306             SG_DONE();
00307 
00308             return result;
00309         }
00310 
00311 
00322         virtual bool init(CFeatures* lhs, CFeatures* rhs);
00323 
00328         virtual bool set_normalizer(CKernelNormalizer* normalizer);
00329 
00334         virtual CKernelNormalizer* get_normalizer();
00335 
00339         virtual bool init_normalizer();
00340 
00347         virtual void cleanup();
00348 
00353         void load(CFile* loader);
00354 
00359         void save(CFile* writer);
00360 
00365         inline CFeatures* get_lhs() { SG_REF(lhs); return lhs; }
00366 
00371         inline CFeatures* get_rhs() { SG_REF(rhs); return rhs; }
00372 
00377         virtual inline int32_t get_num_vec_lhs()
00378         {
00379             return num_lhs;
00380         }
00381 
00386         virtual inline int32_t get_num_vec_rhs()
00387         {
00388             return num_rhs;
00389         }
00390 
00395         virtual inline bool has_features()
00396         {
00397             return lhs && rhs;
00398         }
00399 
00404         inline bool lhs_equals_rhs()
00405         {
00406             return lhs==rhs;
00407         }
00408 
00410         virtual void remove_lhs_and_rhs();
00411 
00413         virtual void remove_lhs();
00414 
00416         virtual void remove_rhs();
00417 
00425         virtual EKernelType get_kernel_type()=0 ;
00426 
00433         virtual EFeatureType get_feature_type()=0;
00434 
00441         virtual EFeatureClass get_feature_class()=0;
00442 
00447         inline void set_cache_size(int32_t size)
00448         {
00449             cache_size = size;
00450 
00451         }
00452 
00457         inline int32_t get_cache_size() { return cache_size; }
00458 
00459 
00460 
00462         void list_kernel();
00463 
00469         inline bool has_property(EKernelProperty p) { return (properties & p) != 0; }
00470 
00474         virtual void clear_normal();
00475 
00481         virtual void add_to_normal(int32_t vector_idx, float64_t weight);
00482 
00487         inline EOptimizationType get_optimization_type() { return opt_type; }
00488 
00493         virtual inline void set_optimization_type(EOptimizationType t) { opt_type=t;}
00494 
00499         inline bool get_is_initialized() { return optimization_initialized; }
00500 
00508         virtual bool init_optimization(
00509             int32_t count, int32_t *IDX, float64_t *weights);
00510 
00515         virtual bool delete_optimization();
00516 
00522         bool init_optimization_svm(CSVM * svm) ;
00523 
00529         virtual float64_t compute_optimized(int32_t vector_idx);
00530 
00539         virtual void compute_batch(
00540             int32_t num_vec, int32_t* vec_idx, float64_t* target,
00541             int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
00542             float64_t factor=1.0);
00543 
00548         inline float64_t get_combined_kernel_weight() { return combined_kernel_weight; }
00549 
00554         inline void set_combined_kernel_weight(float64_t nw) { combined_kernel_weight=nw; }
00555 
00560         virtual int32_t get_num_subkernels();
00561 
00567         virtual void compute_by_subkernel(
00568             int32_t vector_idx, float64_t * subkernel_contrib);
00569 
00575         virtual const float64_t* get_subkernel_weights(int32_t& num_weights);
00576 
00582         virtual void set_subkernel_weights(
00583             float64_t* weights, int32_t num_weights);
00584 
00585     protected:
00590         inline void set_property(EKernelProperty p)
00591         {
00592             properties |= p;
00593         }
00594 
00599         inline void unset_property(EKernelProperty p)
00600         {
00601             properties &= (properties | p) ^ p;
00602         }
00603 
00608         inline void set_is_initialized(bool p_init) { optimization_initialized=p_init; }
00609 
00620         virtual float64_t compute(int32_t x, int32_t y)=0;
00621 
00628         int32_t compute_row_start(int64_t offs, int32_t n, bool symmetric)
00629         {
00630             int32_t i_start;
00631 
00632             if (symmetric)
00633                 i_start=(int32_t) CMath::floor(n-CMath::sqrt(CMath::sq((float64_t) n)-offs));
00634             else
00635                 i_start=(int32_t) (offs/int64_t(n));
00636 
00637             return i_start;
00638         }
00639 
00644         template <class T>
00645         static void* get_kernel_matrix_helper(void* p)
00646         {
00647             K_THREAD_PARAM<T>* params= (K_THREAD_PARAM<T>*) p;
00648             int32_t i_start=params->start;
00649             int32_t i_end=params->end;
00650             CKernel* k=params->kernel;
00651             T* result=params->result;
00652             bool symmetric=params->symmetric;
00653             int32_t n=params->n;
00654             int32_t m=params->m;
00655             bool verbose=params->verbose;
00656             int64_t total_start=params->total_start;
00657             int64_t total_end=params->total_end;
00658             int64_t total=total_start;
00659 
00660             for (int32_t i=i_start; i<i_end; i++)
00661             {
00662                 int32_t j_start=0;
00663 
00664                 if (symmetric)
00665                     j_start=i;
00666 
00667                 for (int32_t j=j_start; j<n; j++)
00668                 {
00669                     float64_t v=k->kernel(i,j);
00670                     result[i+j*m]=v;
00671 
00672                     if (symmetric && i!=j)
00673                         result[j+i*m]=v;
00674 
00675                     if (verbose)
00676                     {
00677                         total++;
00678 
00679                         if (symmetric && i!=j)
00680                             total++;
00681 
00682                         if (total%100 == 0)
00683                             k->SG_PROGRESS(total, total_start, total_end);
00684 
00685                         if (CSignal::cancel_computations())
00686                             break;
00687                     }
00688                 }
00689 
00690             }
00691 
00692             return NULL;
00693         }
00694 
00695 
00697 
00698 
00699 #ifdef HAVE_BOOST_SERIALIZATION
00700     private:
00701 
00702         friend class ::boost::serialization::access;
00703         template<class Archive>
00704             void serialize(Archive & ar, const unsigned int archive_version)
00705             {
00706 
00707                 SG_DEBUG("archiving CKernel\n");
00708 
00709                 ar & ::boost::serialization::base_object<CSGObject>(*this);
00710 
00711                 ar & cache_size;
00712 
00713 
00714 
00715                 //TODO
00716                 //KERNELCACHE_ELEM* kernel_matrix;
00717 
00718                 //TODO
00719                 //SHORTREAL * precomputed_matrix ;
00720                 //ar & precompute_subkernel_matrix ;
00721                 //ar & precompute_matrix ;
00722 
00723                 ar & rhs;
00724                 ar & lhs;
00725 
00726                 ar & combined_kernel_weight;
00727 
00728                 ar & optimization_initialized;
00729 
00730                 ar & opt_type;
00731 
00732                 ar & properties;
00733 
00734                 SG_DEBUG("done with CKernel\n");
00735 
00736             }
00737 
00738 #endif //HAVE_BOOST_SERIALIZATION
00739 
00740 
00741 
00742     protected:
00744         int32_t cache_size;
00745 
00746 
00747 
00750         KERNELCACHE_ELEM* kernel_matrix;
00751 
00753         CFeatures* lhs;
00755         CFeatures* rhs;
00756 
00758         int32_t num_lhs;
00760         int32_t num_rhs;
00761 
00763         float64_t combined_kernel_weight;
00764 
00766         bool optimization_initialized;
00770         EOptimizationType opt_type;
00771 
00773         uint64_t  properties;
00774 
00777         CKernelNormalizer* normalizer;
00778 };
00779 
00780 }
00781 #endif /* _KERNEL_H__ */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation