Kernel.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "lib/config.h"
00013 #include "lib/common.h"
00014 #include "lib/io.h"
00015 #include "lib/File.h"
00016 #include "lib/Time.h"
00017 #include "lib/Signal.h"
00018 
00019 #include "base/Parallel.h"
00020 
00021 #include "kernel/Kernel.h"
00022 #include "kernel/IdentityKernelNormalizer.h"
00023 #include "features/Features.h"
00024 
00025 #include "classifier/svm/SVM.h"
00026 
00027 #include <string.h>
00028 #include <unistd.h>
00029 #include <math.h>
00030 
00031 #ifndef WIN32
00032 #include <pthread.h>
00033 #endif
00034 
00035 
00036 #ifdef HAVE_BOOST_SERIALIZATION
00037 #include <boost/serialization/export.hpp>
00038 BOOST_CLASS_EXPORT(shogun::CKernel);
00039 #endif //HAVE_BOOST_SERIALIZATION
00040 
00041 
00042 using namespace shogun;
00043 
00044 CKernel::CKernel()
00045 : CSGObject(), cache_size(10), kernel_matrix(NULL), lhs(NULL),
00046     rhs(NULL), num_lhs(0), num_rhs(0), combined_kernel_weight(1),
00047     optimization_initialized(false), opt_type(FASTBUTMEMHUNGRY),
00048     properties(KP_NONE), normalizer(NULL)
00049 {
00050 
00051 
00052 
00053     set_normalizer(new CIdentityKernelNormalizer());
00054 }
00055 
00056 CKernel::CKernel(int32_t size)
00057 : CSGObject(), kernel_matrix(NULL), lhs(NULL), rhs(NULL), num_lhs(0),
00058     num_rhs(0), combined_kernel_weight(1), optimization_initialized(false),
00059     opt_type(FASTBUTMEMHUNGRY), properties(KP_NONE), normalizer(NULL)
00060 {
00061     if (size<10)
00062         size=10;
00063 
00064     cache_size=size;
00065 
00066 
00067     if (get_is_initialized())
00068         SG_ERROR( "COptimizableKernel still initialized on destruction");
00069 
00070     set_normalizer(new CIdentityKernelNormalizer());
00071 }
00072 
00073 
00074 CKernel::CKernel(CFeatures* p_lhs, CFeatures* p_rhs, int32_t size) : CSGObject(),
00075     kernel_matrix(NULL), lhs(NULL), rhs(NULL), num_lhs(0), num_rhs(0),
00076     combined_kernel_weight(1), optimization_initialized(false),
00077     opt_type(FASTBUTMEMHUNGRY), properties(KP_NONE), normalizer(NULL)
00078 {
00079     if (size<10)
00080         size=10;
00081 
00082     cache_size=size;
00083 
00084     if (get_is_initialized())
00085         SG_ERROR("Kernel initialized on construction.\n");
00086 
00087     set_normalizer(new CIdentityKernelNormalizer());
00088     init(p_lhs, p_rhs);
00089 }
00090 
00091 CKernel::~CKernel()
00092 {
00093     if (get_is_initialized())
00094         SG_ERROR("Kernel still initialized on destruction.\n");
00095 
00096     remove_lhs_and_rhs();
00097     SG_UNREF(normalizer);
00098 
00099     SG_INFO("Kernel deleted (%p).\n", this);
00100 }
00101 
00102 void CKernel::get_kernel_matrix(float64_t** dst, int32_t* m, int32_t* n)
00103 {
00104     ASSERT(dst && m && n);
00105 
00106     float64_t* result = NULL;
00107 
00108     if (has_features())
00109     {
00110         int32_t num_vec1=get_num_vec_lhs();
00111         int32_t num_vec2=get_num_vec_rhs();
00112         *m=num_vec1;
00113         *n=num_vec2;
00114 
00115         int64_t total_num = ((int64_t) num_vec1) * num_vec2;
00116         SG_DEBUG( "allocating memory for a kernel matrix"
00117                 " of size %dx%d\n", num_vec1, num_vec2);
00118 
00119         result=(float64_t*) malloc(sizeof(float64_t)*total_num);
00120         ASSERT(result);
00121         get_kernel_matrix<float64_t>(num_vec1,num_vec2, result);
00122     }
00123     else
00124         SG_ERROR( "no features assigned to kernel\n");
00125 
00126     *dst=result;
00127 }
00128 
00129 
00130 
00131 bool CKernel::init(CFeatures* l, CFeatures* r)
00132 {
00133     //make sure features were indeed supplied
00134     ASSERT(l);
00135     ASSERT(r);
00136 
00137     //make sure features are compatible
00138     ASSERT(l->get_feature_class()==r->get_feature_class());
00139     ASSERT(l->get_feature_type()==r->get_feature_type());
00140 
00141     //remove references to previous features
00142     remove_lhs_and_rhs();
00143 
00144     //increase reference counts
00145     SG_REF(l);
00146     if (l!=r)
00147         SG_REF(r);
00148 
00149     lhs=l;
00150     rhs=r;
00151 
00152     ASSERT(!num_lhs || num_lhs==l->get_num_vectors());
00153     ASSERT(!num_rhs || num_rhs==l->get_num_vectors());
00154 
00155     num_lhs=l->get_num_vectors();
00156     num_rhs=r->get_num_vectors();
00157 
00158     return true;
00159 }
00160 
00161 bool CKernel::set_normalizer(CKernelNormalizer* n)
00162 {
00163     SG_REF(n);
00164     if (lhs && rhs)
00165         n->init(this);
00166 
00167     SG_UNREF(normalizer);
00168     normalizer=n;
00169 
00170     return (normalizer!=NULL);
00171 }
00172 
00173 CKernelNormalizer* CKernel::get_normalizer()
00174 {
00175     SG_REF(normalizer)
00176     return normalizer;
00177 }
00178 
00179 bool CKernel::init_normalizer()
00180 {
00181     return normalizer->init(this);
00182 }
00183 
00184 void CKernel::cleanup()
00185 {
00186     remove_lhs_and_rhs();
00187 }
00188 
00189 
00190 
00191 void CKernel::load(CFile* loader)
00192 {
00193 }
00194 
00195 void CKernel::save(CFile* writer)
00196 {
00197     int32_t m,n;
00198     float64_t* km=get_kernel_matrix<float64_t>(m,n, NULL);
00199     writer->set_real_matrix(km, m,n);
00200     delete[] km;
00201 }
00202 
00203 void CKernel::remove_lhs_and_rhs()
00204 {
00205     if (rhs!=lhs)
00206         SG_UNREF(rhs);
00207     rhs = NULL;
00208     num_rhs=0;
00209 
00210     SG_UNREF(lhs);
00211     lhs = NULL;
00212     num_lhs=0;
00213 
00214 
00215 }
00216 
00217 void CKernel::remove_lhs()
00218 {
00219     if (rhs==lhs)
00220         rhs=NULL;
00221     SG_UNREF(lhs);
00222     lhs = NULL;
00223     num_lhs=NULL;
00224 
00225 
00226 }
00227 
00229 void CKernel::remove_rhs()
00230 {
00231     if (rhs!=lhs)
00232         SG_UNREF(rhs);
00233     rhs = NULL;
00234     num_rhs=NULL;
00235 
00236 
00237 }
00238 
00239 
00240 void CKernel::list_kernel()
00241 {
00242     SG_INFO( "%p - \"%s\" weight=%1.2f OPT:%s", this, get_name(),
00243             get_combined_kernel_weight(),
00244             get_optimization_type()==FASTBUTMEMHUNGRY ? "FASTBUTMEMHUNGRY" :
00245             "SLOWBUTMEMEFFICIENT");
00246 
00247     switch (get_kernel_type())
00248     {
00249         case K_UNKNOWN:
00250             SG_INFO( "K_UNKNOWN ");
00251             break;
00252         case K_LINEAR:
00253             SG_INFO( "K_LINEAR ");
00254             break;
00255         case K_SPARSELINEAR:
00256             SG_INFO( "K_SPARSELINEAR ");
00257             break;
00258         case K_POLY:
00259             SG_INFO( "K_POLY ");
00260             break;
00261         case K_GAUSSIAN:
00262             SG_INFO( "K_GAUSSIAN ");
00263             break;
00264         case K_SPARSEGAUSSIAN:
00265             SG_INFO( "K_SPARSEGAUSSIAN ");
00266             break;
00267         case K_GAUSSIANSHIFT:
00268             SG_INFO( "K_GAUSSIANSHIFT ");
00269             break;
00270         case K_HISTOGRAM:
00271             SG_INFO( "K_HISTOGRAM ");
00272             break;
00273         case K_SALZBERG:
00274             SG_INFO( "K_SALZBERG ");
00275             break;
00276         case K_LOCALITYIMPROVED:
00277             SG_INFO( "K_LOCALITYIMPROVED ");
00278             break;
00279         case K_SIMPLELOCALITYIMPROVED:
00280             SG_INFO( "K_SIMPLELOCALITYIMPROVED ");
00281             break;
00282         case K_FIXEDDEGREE:
00283             SG_INFO( "K_FIXEDDEGREE ");
00284             break;
00285         case K_WEIGHTEDDEGREE:
00286             SG_INFO( "K_WEIGHTEDDEGREE ");
00287             break;
00288         case K_WEIGHTEDDEGREEPOS:
00289             SG_INFO( "K_WEIGHTEDDEGREEPOS ");
00290             break;
00291         case K_WEIGHTEDDEGREERBF:
00292             SG_INFO( "K_WEIGHTEDDEGREERBF ");
00293             break;
00294         case K_WEIGHTEDCOMMWORDSTRING:
00295             SG_INFO( "K_WEIGHTEDCOMMWORDSTRING ");
00296             break;
00297         case K_POLYMATCH:
00298             SG_INFO( "K_POLYMATCH ");
00299             break;
00300         case K_ALIGNMENT:
00301             SG_INFO( "K_ALIGNMENT ");
00302             break;
00303         case K_COMMWORDSTRING:
00304             SG_INFO( "K_COMMWORDSTRING ");
00305             break;
00306         case K_COMMULONGSTRING:
00307             SG_INFO( "K_COMMULONGSTRING ");
00308             break;
00309         case K_SPECTRUMMISMATCHRBF:
00310             SG_INFO( "K_SPECTRUMMISMATCHRBF ");
00311             break;
00312         case K_COMBINED:
00313             SG_INFO( "K_COMBINED ");
00314             break;
00315         case K_AUC:
00316             SG_INFO( "K_AUC ");
00317             break;
00318         case K_CUSTOM:
00319             SG_INFO( "K_CUSTOM ");
00320             break;
00321         case K_SIGMOID:
00322             SG_INFO( "K_SIGMOID ");
00323             break;
00324         case K_CHI2:
00325             SG_INFO( "K_CHI2 ");
00326             break;
00327         case K_DIAG:
00328             SG_INFO( "K_DIAG ");
00329             break;
00330         case K_CONST:
00331             SG_INFO( "K_CONST ");
00332             break;
00333         case K_DISTANCE:
00334             SG_INFO( "K_DISTANCE ");
00335             break;
00336         case K_LOCALALIGNMENT:
00337             SG_INFO( "K_LOCALALIGNMENT ");
00338             break;
00339         case K_TPPK:
00340             SG_INFO( "K_TPPK ");
00341             break;
00342         default:
00343          SG_ERROR( "ERROR UNKNOWN KERNEL TYPE");
00344             break;
00345     }
00346 
00347     switch (get_feature_class())
00348     {
00349         case C_UNKNOWN:
00350             SG_INFO( "C_UNKNOWN ");
00351             break;
00352         case C_SIMPLE:
00353             SG_INFO( "C_SIMPLE ");
00354             break;
00355         case C_SPARSE:
00356             SG_INFO( "C_SPARSE ");
00357             break;
00358         case C_STRING:
00359             SG_INFO( "C_STRING ");
00360             break;
00361         case C_COMBINED:
00362             SG_INFO( "C_COMBINED ");
00363             break;
00364         case C_ANY:
00365             SG_INFO( "C_ANY ");
00366             break;
00367         default:
00368          SG_ERROR( "ERROR UNKNOWN FEATURE CLASS");
00369     }
00370 
00371     switch (get_feature_type())
00372     {
00373         case F_UNKNOWN:
00374             SG_INFO( "F_UNKNOWN ");
00375             break;
00376         case F_DREAL:
00377             SG_INFO( "F_REAL ");
00378             break;
00379         case F_SHORT:
00380             SG_INFO( "F_SHORT ");
00381             break;
00382         case F_CHAR:
00383             SG_INFO( "F_CHAR ");
00384             break;
00385         case F_INT:
00386             SG_INFO( "F_INT ");
00387             break;
00388         case F_BYTE:
00389             SG_INFO( "F_BYTE ");
00390             break;
00391         case F_WORD:
00392             SG_INFO( "F_WORD ");
00393             break;
00394         case F_ULONG:
00395             SG_INFO( "F_ULONG ");
00396             break;
00397         case F_ANY:
00398             SG_INFO( "F_ANY ");
00399             break;
00400         default:
00401          SG_ERROR( "ERROR UNKNOWN FEATURE TYPE");
00402             break;
00403     }
00404     SG_INFO( "\n");
00405 }
00406 
00407 bool CKernel::init_optimization(
00408     int32_t count, int32_t *IDX, float64_t * weights)
00409 {
00410    SG_ERROR( "kernel does not support linadd optimization\n");
00411     return false ;
00412 }
00413 
00414 bool CKernel::delete_optimization()
00415 {
00416    SG_ERROR( "kernel does not support linadd optimization\n");
00417     return false;
00418 }
00419 
00420 float64_t CKernel::compute_optimized(int32_t vector_idx)
00421 {
00422    SG_ERROR( "kernel does not support linadd optimization\n");
00423     return 0;
00424 }
00425 
00426 void CKernel::compute_batch(
00427     int32_t num_vec, int32_t* vec_idx, float64_t* target, int32_t num_suppvec,
00428     int32_t* IDX, float64_t* weights, float64_t factor)
00429 {
00430    SG_ERROR( "kernel does not support batch computation\n");
00431 }
00432 
00433 void CKernel::add_to_normal(int32_t vector_idx, float64_t weight)
00434 {
00435    SG_ERROR( "kernel does not support linadd optimization, add_to_normal not implemented\n");
00436 }
00437 
00438 void CKernel::clear_normal()
00439 {
00440    SG_ERROR( "kernel does not support linadd optimization, clear_normal not implemented\n");
00441 }
00442 
00443 int32_t CKernel::get_num_subkernels()
00444 {
00445     return 1;
00446 }
00447 
00448 void CKernel::compute_by_subkernel(
00449     int32_t vector_idx, float64_t * subkernel_contrib)
00450 {
00451    SG_ERROR( "kernel compute_by_subkernel not implemented\n");
00452 }
00453 
00454 const float64_t* CKernel::get_subkernel_weights(int32_t &num_weights)
00455 {
00456     num_weights=1 ;
00457     return &combined_kernel_weight ;
00458 }
00459 
00460 void CKernel::set_subkernel_weights(float64_t* weights, int32_t num_weights)
00461 {
00462     combined_kernel_weight = weights[0] ;
00463     if (num_weights!=1)
00464       SG_ERROR( "number of subkernel weights should be one ...\n");
00465 }
00466 
00467 bool CKernel::init_optimization_svm(CSVM * svm)
00468 {
00469     int32_t num_suppvec=svm->get_num_support_vectors();
00470     int32_t* sv_idx=new int32_t[num_suppvec];
00471     float64_t* sv_weight=new float64_t[num_suppvec];
00472 
00473     for (int32_t i=0; i<num_suppvec; i++)
00474     {
00475         sv_idx[i]    = svm->get_support_vector(i);
00476         sv_weight[i] = svm->get_alpha(i);
00477     }
00478     bool ret = init_optimization(num_suppvec, sv_idx, sv_weight);
00479 
00480     delete[] sv_idx;
00481     delete[] sv_weight;
00482     return ret;
00483 }
00484 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation