Kernel.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "lib/config.h"
00013 
00014 #include "lib/common.h"
00015 #include "lib/io.h"
00016 #include "lib/File.h"
00017 #include "lib/Time.h"
00018 #include "base/Parallel.h"
00019 
00020 #include "kernel/Kernel.h"
00021 #include "kernel/IdentityKernelNormalizer.h"
00022 #include "features/Features.h"
00023 
00024 #include "classifier/svm/SVM.h"
00025 
00026 #include <string.h>
00027 #include <unistd.h>
00028 #include <math.h>
00029 
00030 #ifndef WIN32
00031 #include <pthread.h>
00032 #endif
00033 
00034 CKernel::CKernel(int32_t size)
00035 : CSGObject(), kernel_matrix(NULL), lhs(NULL),
00036     rhs(NULL), combined_kernel_weight(1), optimization_initialized(false),
00037     opt_type(FASTBUTMEMHUNGRY), properties(KP_NONE), normalizer(NULL)
00038 {
00039     if (size<10)
00040         size=10;
00041 
00042     cache_size=size;
00043 
00044 
00045     if (get_is_initialized())
00046         SG_ERROR( "COptimizableKernel still initialized on destruction");
00047 
00048     set_normalizer(new CIdentityKernelNormalizer());
00049 }
00050 
00051 
00052 CKernel::CKernel(CFeatures* p_lhs, CFeatures* p_rhs, int32_t size) : CSGObject(),
00053     kernel_matrix(NULL), lhs(NULL), rhs(NULL), combined_kernel_weight(1),
00054     optimization_initialized(false), opt_type(FASTBUTMEMHUNGRY),
00055     properties(KP_NONE), normalizer(NULL)
00056 {
00057     if (size<10)
00058         size=10;
00059 
00060     cache_size=size;
00061 
00062     if (get_is_initialized())
00063         SG_ERROR("Kernel initialized on construction.\n");
00064 
00065     set_normalizer(new CIdentityKernelNormalizer());
00066     init(p_lhs, p_rhs);
00067 }
00068 
00069 CKernel::~CKernel()
00070 {
00071     if (get_is_initialized())
00072         SG_ERROR("Kernel still initialized on destruction.\n");
00073 
00074     remove_lhs_and_rhs();
00075     SG_UNREF(normalizer);
00076 
00077     SG_INFO("Kernel deleted (%p).\n", this);
00078 }
00079 
00080 void CKernel::get_kernel_matrix(float64_t** dst, int32_t* m, int32_t* n)
00081 {
00082     ASSERT(dst && m && n);
00083 
00084     float64_t* result = NULL;
00085 
00086     if (has_features())
00087     {
00088         int32_t num_vec1=get_num_vec_lhs();
00089         int32_t num_vec2=get_num_vec_rhs();
00090         *m=num_vec1;
00091         *n=num_vec2;
00092 
00093         int64_t total_num = ((int64_t) num_vec1) * num_vec2;
00094         int32_t num_done = 0;
00095         SG_DEBUG( "returning kernel matrix of size %dx%d\n", num_vec1, num_vec2);
00096 
00097         result=(float64_t*) malloc(sizeof(float64_t)*total_num);
00098         ASSERT(result);
00099 
00100         if ( lhs && lhs==rhs && num_vec1==num_vec2 )
00101         {
00102             for (int32_t i=0; i<num_vec1; i++)
00103             {
00104                 for (int32_t j=i; j<num_vec1; j++)
00105                 {
00106                     float64_t v=kernel(i,j);
00107 
00108                     result[i+j*num_vec1]=v;
00109                     result[j+i*num_vec1]=v;
00110 
00111                     if (num_done%100000)
00112                         SG_PROGRESS(num_done, 0, total_num-1);
00113 
00114                     if (i!=j)
00115                         num_done+=2;
00116                     else
00117                         num_done+=1;
00118                 }
00119             }
00120         }
00121         else
00122         {
00123             for (int32_t i=0; i<num_vec1; i++)
00124             {
00125                 for (int32_t j=0; j<num_vec2; j++)
00126                 {
00127                     result[i+j*num_vec1]=kernel(i,j) ;
00128 
00129                     if (num_done%100000)
00130                         SG_PROGRESS(num_done, 0, total_num-1);
00131 
00132                     num_done++;
00133                 }
00134             }
00135         }
00136 
00137         SG_DONE();
00138     }
00139     else
00140       SG_ERROR( "no features assigned to kernel\n");
00141 
00142     *dst=result;
00143 }
00144 
00145 float32_t* CKernel::get_kernel_matrix_shortreal(
00146     int32_t &num_vec1, int32_t &num_vec2, float32_t* target)
00147 {
00148     float32_t* result = NULL;
00149 
00150     if (has_features())
00151     {
00152         if (target && (num_vec1!=get_num_vec_lhs() ||
00153                     num_vec2!=get_num_vec_rhs()) )
00154             SG_ERROR( "kernel matrix size mismatch\n");
00155 
00156         num_vec1=get_num_vec_lhs();
00157         num_vec2=get_num_vec_rhs();
00158 
00159         int64_t total_num = ((int64_t) num_vec1) * num_vec2;
00160         int32_t num_done = 0;
00161 
00162         SG_DEBUG( "returning kernel matrix of size %dx%d\n", num_vec1, num_vec2);
00163 
00164         if (target)
00165             result=target;
00166         else
00167             result=new float32_t[total_num];
00168 
00169         if (lhs && lhs==rhs && num_vec1==num_vec2)
00170         {
00171             for (int32_t i=0; i<num_vec1; i++)
00172             {
00173                 for (int32_t j=i; j<num_vec1; j++)
00174                 {
00175                     float64_t v=kernel(i,j);
00176 
00177                     result[i+j*num_vec1]=v;
00178                     result[j+i*num_vec1]=v;
00179 
00180                     if (num_done%100000)
00181                         SG_PROGRESS(num_done, 0, total_num-1);
00182 
00183                     if (i!=j)
00184                         num_done+=2;
00185                     else
00186                         num_done+=1;
00187                 }
00188             }
00189         }
00190         else
00191         {
00192             for (int32_t i=0; i<num_vec1; i++)
00193             {
00194                 for (int32_t j=0; j<num_vec2; j++)
00195                 {
00196                     result[i+j*num_vec1]=kernel(i,j) ;
00197 
00198                     if (num_done%100000)
00199                         SG_PROGRESS(num_done, 0, total_num-1);
00200 
00201                     num_done++;
00202                 }
00203             }
00204         }
00205 
00206         SG_DONE();
00207     }
00208     else
00209       SG_ERROR( "no features assigned to kernel\n");
00210 
00211     return result;
00212 }
00213 
00214 float64_t* CKernel::get_kernel_matrix_real(
00215     int32_t &num_vec1, int32_t &num_vec2, float64_t* target)
00216 {
00217     float64_t* result = NULL;
00218 
00219     if (has_features())
00220     {
00221         if (target && (num_vec1!=get_num_vec_lhs() ||
00222                     num_vec2!=get_num_vec_rhs()) )
00223             SG_ERROR( "kernel matrix size mismatch\n");
00224 
00225         num_vec1=get_num_vec_lhs();
00226         num_vec2=get_num_vec_rhs();
00227 
00228         int64_t total_num = ((int64_t) num_vec1) * num_vec2;
00229         int32_t num_done = 0;
00230 
00231         SG_DEBUG( "returning kernel matrix of size %dx%d\n", num_vec1, num_vec2);
00232 
00233         if (target)
00234             result=target;
00235         else
00236             result=new float64_t[total_num];
00237 
00238         if (lhs && lhs==rhs && num_vec1==num_vec2)
00239         {
00240             for (int32_t i=0; i<num_vec1; i++)
00241             {
00242                 for (int32_t j=i; j<num_vec1; j++)
00243                 {
00244                     float64_t v=kernel(i,j);
00245 
00246                     result[i+j*num_vec1]=v;
00247                     result[j+i*num_vec1]=v;
00248 
00249                     if (num_done%100000)
00250                         SG_PROGRESS(num_done, 0, total_num-1);
00251 
00252                     if (i!=j)
00253                         num_done+=2;
00254                     else
00255                         num_done+=1;
00256                 }
00257             }
00258         }
00259         else
00260         {
00261             for (int32_t i=0; i<num_vec1; i++)
00262             {
00263                 for (int32_t j=0; j<num_vec2; j++)
00264                 {
00265                     result[i+j*num_vec1]=kernel(i,j) ;
00266 
00267                     if (num_done%100000)
00268                         SG_PROGRESS(num_done, 0, total_num-1);
00269 
00270                     num_done++;
00271                 }
00272             }
00273         }
00274 
00275         SG_DONE();
00276     }
00277     else
00278       SG_ERROR( "no features assigned to kernel\n");
00279 
00280     return result;
00281 }
00282 
00283 
00284 
00285 
00286 bool CKernel::init(CFeatures* l, CFeatures* r)
00287 {
00288     //make sure features were indeed supplied
00289     ASSERT(l);
00290     ASSERT(r);
00291 
00292     //make sure features are compatible
00293     ASSERT(l->get_feature_class()==r->get_feature_class());
00294     ASSERT(l->get_feature_type()==r->get_feature_type());
00295 
00296     //remove references to previous features
00297     remove_lhs_and_rhs();
00298 
00299     //increase reference counts
00300     SG_REF(l);
00301     if (l!=r)
00302         SG_REF(r);
00303 
00304     lhs=l;
00305     rhs=r;
00306 
00307     return true;
00308 }
00309 
00310 bool CKernel::set_normalizer(CKernelNormalizer* n)
00311 {
00312     SG_REF(n);
00313     SG_UNREF(normalizer);
00314     normalizer=n;
00315 
00316     return (normalizer!=NULL);
00317 }
00318 
00319 CKernelNormalizer* CKernel::get_normalizer()
00320 {
00321     SG_REF(normalizer)
00322     return normalizer;
00323 }
00324 
00325 bool CKernel::init_normalizer()
00326 {
00327     return normalizer->init(this);
00328 }
00329 
00330 void CKernel::cleanup()
00331 {
00332     remove_lhs_and_rhs();
00333 }
00334 
00335 
00336 
00337 bool CKernel::load(char* fname)
00338 {
00339     return false;
00340 }
00341 
00342 bool CKernel::save(char* fname)
00343 {
00344     int32_t i=0;
00345     int32_t num_left=get_num_vec_lhs();
00346     int32_t num_right=rhs->get_num_vectors();
00347     KERNELCACHE_IDX num_total=num_left*num_right;
00348 
00349     CFile f(fname, 'w', F_DREAL);
00350 
00351     for (int32_t l=0; l< (int32_t) num_left && f.is_ok(); l++)
00352     {
00353         for (int32_t r=0; r< (int32_t) num_right && f.is_ok(); r++)
00354         {
00355             if (!(i % (num_total/10+1)))
00356                 SG_PRINT("%02d%%.", (int32_t) (100.0*i/num_total));
00357             else if (!(i % (num_total/200+1)))
00358                 SG_PRINT(".");
00359 
00360             float64_t k=kernel(l,r);
00361             f.save_real_data(&k, 1);
00362 
00363             i++;
00364         }
00365     }
00366 
00367     if (f.is_ok())
00368         SG_INFO( "kernel matrix of size %ld x %ld written (filesize: %ld)\n", num_left, num_right, num_total*sizeof(KERNELCACHE_ELEM));
00369 
00370     return (f.is_ok());
00371 }
00372 
00373 void CKernel::remove_lhs_and_rhs()
00374 {
00375     if (rhs!=lhs)
00376         SG_UNREF(rhs);
00377     rhs = NULL;
00378 
00379     SG_UNREF(lhs);
00380     lhs = NULL;
00381 
00382 
00383 }
00384 
00385 void CKernel::remove_lhs()
00386 { 
00387     if (rhs==lhs)
00388         rhs=NULL;
00389     SG_UNREF(lhs);
00390     lhs = NULL;
00391 
00392 
00393 }
00394 
00396 void CKernel::remove_rhs()
00397 {
00398     if (rhs!=lhs)
00399         SG_UNREF(rhs);
00400     rhs = NULL;
00401 
00402 
00403 }
00404 
00405 
00406 void CKernel::list_kernel()
00407 {
00408     SG_INFO( "0x%p - \"%s\" weight=%1.2f OPT:%s", this, get_name(),
00409             get_combined_kernel_weight(),
00410             get_optimization_type()==FASTBUTMEMHUNGRY ? "FASTBUTMEMHUNGRY" :
00411             "SLOWBUTMEMEFFICIENT");
00412 
00413     switch (get_kernel_type())
00414     {
00415         case K_UNKNOWN:
00416             SG_INFO( "K_UNKNOWN ");
00417             break;
00418         case K_LINEAR:
00419             SG_INFO( "K_LINEAR ");
00420             break;
00421         case K_SPARSELINEAR:
00422             SG_INFO( "K_SPARSELINEAR ");
00423             break;
00424         case K_POLY:
00425             SG_INFO( "K_POLY ");
00426             break;
00427         case K_GAUSSIAN:
00428             SG_INFO( "K_GAUSSIAN ");
00429             break;
00430         case K_SPARSEGAUSSIAN:
00431             SG_INFO( "K_SPARSEGAUSSIAN ");
00432             break;
00433         case K_GAUSSIANSHIFT:
00434             SG_INFO( "K_GAUSSIANSHIFT ");
00435             break;
00436         case K_HISTOGRAM:
00437             SG_INFO( "K_HISTOGRAM ");
00438             break;
00439         case K_SALZBERG:
00440             SG_INFO( "K_SALZBERG ");
00441             break;
00442         case K_LOCALITYIMPROVED:
00443             SG_INFO( "K_LOCALITYIMPROVED ");
00444             break;
00445         case K_SIMPLELOCALITYIMPROVED:
00446             SG_INFO( "K_SIMPLELOCALITYIMPROVED ");
00447             break;
00448         case K_FIXEDDEGREE:
00449             SG_INFO( "K_FIXEDDEGREE ");
00450             break;
00451         case K_WEIGHTEDDEGREE:
00452             SG_INFO( "K_WEIGHTEDDEGREE ");
00453             break;
00454         case K_WEIGHTEDDEGREEPOS:
00455             SG_INFO( "K_WEIGHTEDDEGREEPOS ");
00456             break;
00457         case K_WEIGHTEDCOMMWORDSTRING:
00458             SG_INFO( "K_WEIGHTEDCOMMWORDSTRING ");
00459             break;
00460         case K_POLYMATCH:
00461             SG_INFO( "K_POLYMATCH ");
00462             break;
00463         case K_ALIGNMENT:
00464             SG_INFO( "K_ALIGNMENT ");
00465             break;
00466         case K_COMMWORDSTRING:
00467             SG_INFO( "K_COMMWORDSTRING ");
00468             break;
00469         case K_COMMULONGSTRING:
00470             SG_INFO( "K_COMMULONGSTRING ");
00471             break;
00472         case K_COMBINED:
00473             SG_INFO( "K_COMBINED ");
00474             break;
00475         case K_AUC:
00476             SG_INFO( "K_AUC ");
00477             break;
00478         case K_CUSTOM:
00479             SG_INFO( "K_CUSTOM ");
00480             break;
00481         case K_SIGMOID:
00482             SG_INFO( "K_SIGMOID ");
00483             break;
00484         case K_CHI2:
00485             SG_INFO( "K_CHI2 ");
00486             break;
00487         case K_DIAG:
00488             SG_INFO( "K_DIAG ");
00489             break;
00490         case K_CONST:
00491             SG_INFO( "K_CONST ");
00492             break;
00493         case K_MINDYGRAM:
00494             SG_INFO( "K_MINDYGRAM ");
00495             break;
00496         case K_DISTANCE:
00497             SG_INFO( "K_DISTANCE ");
00498             break;
00499         case K_LOCALALIGNMENT:
00500             SG_INFO( "K_LOCALALIGNMENT ");
00501             break;
00502         case K_TPPK:
00503             SG_INFO( "K_TPPK ");
00504             break;
00505         default:
00506          SG_ERROR( "ERROR UNKNOWN KERNEL TYPE");
00507             break;
00508     }
00509 
00510     switch (get_feature_class())
00511     {
00512         case C_UNKNOWN:
00513             SG_INFO( "C_UNKNOWN ");
00514             break;
00515         case C_SIMPLE:
00516             SG_INFO( "C_SIMPLE ");
00517             break;
00518         case C_SPARSE:
00519             SG_INFO( "C_SPARSE ");
00520             break;
00521         case C_STRING:
00522             SG_INFO( "C_STRING ");
00523             break;
00524         case C_COMBINED:
00525             SG_INFO( "C_COMBINED ");
00526             break;
00527         case C_ANY:
00528             SG_INFO( "C_ANY ");
00529             break;
00530         default:
00531          SG_ERROR( "ERROR UNKNOWN FEATURE CLASS");
00532     }
00533 
00534     switch (get_feature_type())
00535     {
00536         case F_UNKNOWN:
00537             SG_INFO( "F_UNKNOWN ");
00538             break;
00539         case F_DREAL:
00540             SG_INFO( "F_REAL ");
00541             break;
00542         case F_SHORT:
00543             SG_INFO( "F_SHORT ");
00544             break;
00545         case F_CHAR:
00546             SG_INFO( "F_CHAR ");
00547             break;
00548         case F_INT:
00549             SG_INFO( "F_INT ");
00550             break;
00551         case F_BYTE:
00552             SG_INFO( "F_BYTE ");
00553             break;
00554         case F_WORD:
00555             SG_INFO( "F_WORD ");
00556             break;
00557         case F_ULONG:
00558             SG_INFO( "F_ULONG ");
00559             break;
00560         case F_ANY:
00561             SG_INFO( "F_ANY ");
00562             break;
00563         default:
00564          SG_ERROR( "ERROR UNKNOWN FEATURE TYPE");
00565             break;
00566     }
00567     SG_INFO( "\n");
00568 }
00569 
00570 bool CKernel::init_optimization(
00571     int32_t count, int32_t *IDX, float64_t * weights)
00572 {
00573    SG_ERROR( "kernel does not support linadd optimization\n");
00574     return false ;
00575 }
00576 
00577 bool CKernel::delete_optimization() 
00578 {
00579    SG_ERROR( "kernel does not support linadd optimization\n");
00580     return false;
00581 }
00582 
00583 float64_t CKernel::compute_optimized(int32_t vector_idx)
00584 {
00585    SG_ERROR( "kernel does not support linadd optimization\n");
00586     return 0;
00587 }
00588 
00589 void CKernel::compute_batch(
00590     int32_t num_vec, int32_t* vec_idx, float64_t* target, int32_t num_suppvec,
00591     int32_t* IDX, float64_t* weights, float64_t factor)
00592 {
00593    SG_ERROR( "kernel does not support batch computation\n");
00594 }
00595 
00596 void CKernel::add_to_normal(int32_t vector_idx, float64_t weight)
00597 {
00598    SG_ERROR( "kernel does not support linadd optimization, add_to_normal not implemented\n");
00599 }
00600 
00601 void CKernel::clear_normal()
00602 {
00603    SG_ERROR( "kernel does not support linadd optimization, clear_normal not implemented\n");
00604 }
00605 
00606 int32_t CKernel::get_num_subkernels()
00607 {
00608     return 1;
00609 }
00610 
00611 void CKernel::compute_by_subkernel(
00612     int32_t vector_idx, float64_t * subkernel_contrib)
00613 {
00614    SG_ERROR( "kernel compute_by_subkernel not implemented\n");
00615 }
00616 
00617 const float64_t* CKernel::get_subkernel_weights(int32_t &num_weights)
00618 {
00619     num_weights=1 ;
00620     return &combined_kernel_weight ;
00621 }
00622 
00623 void CKernel::set_subkernel_weights(float64_t* weights, int32_t num_weights)
00624 {
00625     combined_kernel_weight = weights[0] ;
00626     if (num_weights!=1)
00627       SG_ERROR( "number of subkernel weights should be one ...\n");
00628 }
00629 
00630 bool CKernel::init_optimization_svm(CSVM * svm)
00631 {
00632     int32_t num_suppvec=svm->get_num_support_vectors();
00633     int32_t* sv_idx=new int32_t[num_suppvec];
00634     float64_t* sv_weight=new float64_t[num_suppvec];
00635 
00636     for (int32_t i=0; i<num_suppvec; i++)
00637     {
00638         sv_idx[i]    = svm->get_support_vector(i);
00639         sv_weight[i] = svm->get_alpha(i);
00640     }
00641     bool ret = init_optimization(num_suppvec, sv_idx, sv_weight);
00642 
00643     delete[] sv_idx;
00644     delete[] sv_weight;
00645     return ret;
00646 }
00647 

SHOGUN Machine Learning Toolbox - Documentation