00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "lib/config.h"
00013 #include "lib/common.h"
00014 #include "lib/io.h"
00015 #include "lib/File.h"
00016 #include "lib/Time.h"
00017 #include "lib/Signal.h"
00018
00019 #include "base/Parallel.h"
00020
00021 #include "kernel/Kernel.h"
00022 #include "kernel/IdentityKernelNormalizer.h"
00023 #include "features/Features.h"
00024
00025 #include "classifier/svm/SVM.h"
00026
00027 #include <string.h>
00028 #include <unistd.h>
00029 #include <math.h>
00030
00031 #ifndef WIN32
00032 #include <pthread.h>
00033 #endif
00034
00035
00036 #ifdef HAVE_BOOST_SERIALIZATION
00037 #include <boost/serialization/export.hpp>
00038 BOOST_CLASS_EXPORT(shogun::CKernel);
00039 #endif //HAVE_BOOST_SERIALIZATION
00040
00041
00042 using namespace shogun;
00043
00044 CKernel::CKernel()
00045 : CSGObject(), cache_size(10), kernel_matrix(NULL), lhs(NULL),
00046 rhs(NULL), num_lhs(0), num_rhs(0), combined_kernel_weight(1),
00047 optimization_initialized(false), opt_type(FASTBUTMEMHUNGRY),
00048 properties(KP_NONE), normalizer(NULL)
00049 {
00050
00051
00052
00053 set_normalizer(new CIdentityKernelNormalizer());
00054 }
00055
00056 CKernel::CKernel(int32_t size)
00057 : CSGObject(), kernel_matrix(NULL), lhs(NULL), rhs(NULL), num_lhs(0),
00058 num_rhs(0), combined_kernel_weight(1), optimization_initialized(false),
00059 opt_type(FASTBUTMEMHUNGRY), properties(KP_NONE), normalizer(NULL)
00060 {
00061 if (size<10)
00062 size=10;
00063
00064 cache_size=size;
00065
00066
00067 if (get_is_initialized())
00068 SG_ERROR( "COptimizableKernel still initialized on destruction");
00069
00070 set_normalizer(new CIdentityKernelNormalizer());
00071 }
00072
00073
00074 CKernel::CKernel(CFeatures* p_lhs, CFeatures* p_rhs, int32_t size) : CSGObject(),
00075 kernel_matrix(NULL), lhs(NULL), rhs(NULL), num_lhs(0), num_rhs(0),
00076 combined_kernel_weight(1), optimization_initialized(false),
00077 opt_type(FASTBUTMEMHUNGRY), properties(KP_NONE), normalizer(NULL)
00078 {
00079 if (size<10)
00080 size=10;
00081
00082 cache_size=size;
00083
00084 if (get_is_initialized())
00085 SG_ERROR("Kernel initialized on construction.\n");
00086
00087 set_normalizer(new CIdentityKernelNormalizer());
00088 init(p_lhs, p_rhs);
00089 }
00090
00091 CKernel::~CKernel()
00092 {
00093 if (get_is_initialized())
00094 SG_ERROR("Kernel still initialized on destruction.\n");
00095
00096 remove_lhs_and_rhs();
00097 SG_UNREF(normalizer);
00098
00099 SG_INFO("Kernel deleted (%p).\n", this);
00100 }
00101
00102 void CKernel::get_kernel_matrix(float64_t** dst, int32_t* m, int32_t* n)
00103 {
00104 ASSERT(dst && m && n);
00105
00106 float64_t* result = NULL;
00107
00108 if (has_features())
00109 {
00110 int32_t num_vec1=get_num_vec_lhs();
00111 int32_t num_vec2=get_num_vec_rhs();
00112 *m=num_vec1;
00113 *n=num_vec2;
00114
00115 int64_t total_num = ((int64_t) num_vec1) * num_vec2;
00116 SG_DEBUG( "allocating memory for a kernel matrix"
00117 " of size %dx%d\n", num_vec1, num_vec2);
00118
00119 result=(float64_t*) malloc(sizeof(float64_t)*total_num);
00120 ASSERT(result);
00121 get_kernel_matrix<float64_t>(num_vec1,num_vec2, result);
00122 }
00123 else
00124 SG_ERROR( "no features assigned to kernel\n");
00125
00126 *dst=result;
00127 }
00128
00129
00130
00131 bool CKernel::init(CFeatures* l, CFeatures* r)
00132 {
00133
00134 ASSERT(l);
00135 ASSERT(r);
00136
00137
00138 ASSERT(l->get_feature_class()==r->get_feature_class());
00139 ASSERT(l->get_feature_type()==r->get_feature_type());
00140
00141
00142 remove_lhs_and_rhs();
00143
00144
00145 SG_REF(l);
00146 if (l!=r)
00147 SG_REF(r);
00148
00149 lhs=l;
00150 rhs=r;
00151
00152 ASSERT(!num_lhs || num_lhs==l->get_num_vectors());
00153 ASSERT(!num_rhs || num_rhs==l->get_num_vectors());
00154
00155 num_lhs=l->get_num_vectors();
00156 num_rhs=r->get_num_vectors();
00157
00158 return true;
00159 }
00160
00161 bool CKernel::set_normalizer(CKernelNormalizer* n)
00162 {
00163 SG_REF(n);
00164 if (lhs && rhs)
00165 n->init(this);
00166
00167 SG_UNREF(normalizer);
00168 normalizer=n;
00169
00170 return (normalizer!=NULL);
00171 }
00172
00173 CKernelNormalizer* CKernel::get_normalizer()
00174 {
00175 SG_REF(normalizer)
00176 return normalizer;
00177 }
00178
00179 bool CKernel::init_normalizer()
00180 {
00181 return normalizer->init(this);
00182 }
00183
00184 void CKernel::cleanup()
00185 {
00186 remove_lhs_and_rhs();
00187 }
00188
00189
00190
00191 void CKernel::load(CFile* loader)
00192 {
00193 }
00194
00195 void CKernel::save(CFile* writer)
00196 {
00197 int32_t m,n;
00198 float64_t* km=get_kernel_matrix<float64_t>(m,n, NULL);
00199 writer->set_real_matrix(km, m,n);
00200 delete[] km;
00201 }
00202
00203 void CKernel::remove_lhs_and_rhs()
00204 {
00205 if (rhs!=lhs)
00206 SG_UNREF(rhs);
00207 rhs = NULL;
00208 num_rhs=0;
00209
00210 SG_UNREF(lhs);
00211 lhs = NULL;
00212 num_lhs=0;
00213
00214
00215 }
00216
00217 void CKernel::remove_lhs()
00218 {
00219 if (rhs==lhs)
00220 rhs=NULL;
00221 SG_UNREF(lhs);
00222 lhs = NULL;
00223 num_lhs=NULL;
00224
00225
00226 }
00227
00229 void CKernel::remove_rhs()
00230 {
00231 if (rhs!=lhs)
00232 SG_UNREF(rhs);
00233 rhs = NULL;
00234 num_rhs=NULL;
00235
00236
00237 }
00238
00239
00240 void CKernel::list_kernel()
00241 {
00242 SG_INFO( "%p - \"%s\" weight=%1.2f OPT:%s", this, get_name(),
00243 get_combined_kernel_weight(),
00244 get_optimization_type()==FASTBUTMEMHUNGRY ? "FASTBUTMEMHUNGRY" :
00245 "SLOWBUTMEMEFFICIENT");
00246
00247 switch (get_kernel_type())
00248 {
00249 case K_UNKNOWN:
00250 SG_INFO( "K_UNKNOWN ");
00251 break;
00252 case K_LINEAR:
00253 SG_INFO( "K_LINEAR ");
00254 break;
00255 case K_SPARSELINEAR:
00256 SG_INFO( "K_SPARSELINEAR ");
00257 break;
00258 case K_POLY:
00259 SG_INFO( "K_POLY ");
00260 break;
00261 case K_GAUSSIAN:
00262 SG_INFO( "K_GAUSSIAN ");
00263 break;
00264 case K_SPARSEGAUSSIAN:
00265 SG_INFO( "K_SPARSEGAUSSIAN ");
00266 break;
00267 case K_GAUSSIANSHIFT:
00268 SG_INFO( "K_GAUSSIANSHIFT ");
00269 break;
00270 case K_HISTOGRAM:
00271 SG_INFO( "K_HISTOGRAM ");
00272 break;
00273 case K_SALZBERG:
00274 SG_INFO( "K_SALZBERG ");
00275 break;
00276 case K_LOCALITYIMPROVED:
00277 SG_INFO( "K_LOCALITYIMPROVED ");
00278 break;
00279 case K_SIMPLELOCALITYIMPROVED:
00280 SG_INFO( "K_SIMPLELOCALITYIMPROVED ");
00281 break;
00282 case K_FIXEDDEGREE:
00283 SG_INFO( "K_FIXEDDEGREE ");
00284 break;
00285 case K_WEIGHTEDDEGREE:
00286 SG_INFO( "K_WEIGHTEDDEGREE ");
00287 break;
00288 case K_WEIGHTEDDEGREEPOS:
00289 SG_INFO( "K_WEIGHTEDDEGREEPOS ");
00290 break;
00291 case K_WEIGHTEDDEGREERBF:
00292 SG_INFO( "K_WEIGHTEDDEGREERBF ");
00293 break;
00294 case K_WEIGHTEDCOMMWORDSTRING:
00295 SG_INFO( "K_WEIGHTEDCOMMWORDSTRING ");
00296 break;
00297 case K_POLYMATCH:
00298 SG_INFO( "K_POLYMATCH ");
00299 break;
00300 case K_ALIGNMENT:
00301 SG_INFO( "K_ALIGNMENT ");
00302 break;
00303 case K_COMMWORDSTRING:
00304 SG_INFO( "K_COMMWORDSTRING ");
00305 break;
00306 case K_COMMULONGSTRING:
00307 SG_INFO( "K_COMMULONGSTRING ");
00308 break;
00309 case K_SPECTRUMMISMATCHRBF:
00310 SG_INFO( "K_SPECTRUMMISMATCHRBF ");
00311 break;
00312 case K_COMBINED:
00313 SG_INFO( "K_COMBINED ");
00314 break;
00315 case K_AUC:
00316 SG_INFO( "K_AUC ");
00317 break;
00318 case K_CUSTOM:
00319 SG_INFO( "K_CUSTOM ");
00320 break;
00321 case K_SIGMOID:
00322 SG_INFO( "K_SIGMOID ");
00323 break;
00324 case K_CHI2:
00325 SG_INFO( "K_CHI2 ");
00326 break;
00327 case K_DIAG:
00328 SG_INFO( "K_DIAG ");
00329 break;
00330 case K_CONST:
00331 SG_INFO( "K_CONST ");
00332 break;
00333 case K_DISTANCE:
00334 SG_INFO( "K_DISTANCE ");
00335 break;
00336 case K_LOCALALIGNMENT:
00337 SG_INFO( "K_LOCALALIGNMENT ");
00338 break;
00339 case K_TPPK:
00340 SG_INFO( "K_TPPK ");
00341 break;
00342 default:
00343 SG_ERROR( "ERROR UNKNOWN KERNEL TYPE");
00344 break;
00345 }
00346
00347 switch (get_feature_class())
00348 {
00349 case C_UNKNOWN:
00350 SG_INFO( "C_UNKNOWN ");
00351 break;
00352 case C_SIMPLE:
00353 SG_INFO( "C_SIMPLE ");
00354 break;
00355 case C_SPARSE:
00356 SG_INFO( "C_SPARSE ");
00357 break;
00358 case C_STRING:
00359 SG_INFO( "C_STRING ");
00360 break;
00361 case C_COMBINED:
00362 SG_INFO( "C_COMBINED ");
00363 break;
00364 case C_ANY:
00365 SG_INFO( "C_ANY ");
00366 break;
00367 default:
00368 SG_ERROR( "ERROR UNKNOWN FEATURE CLASS");
00369 }
00370
00371 switch (get_feature_type())
00372 {
00373 case F_UNKNOWN:
00374 SG_INFO( "F_UNKNOWN ");
00375 break;
00376 case F_DREAL:
00377 SG_INFO( "F_REAL ");
00378 break;
00379 case F_SHORT:
00380 SG_INFO( "F_SHORT ");
00381 break;
00382 case F_CHAR:
00383 SG_INFO( "F_CHAR ");
00384 break;
00385 case F_INT:
00386 SG_INFO( "F_INT ");
00387 break;
00388 case F_BYTE:
00389 SG_INFO( "F_BYTE ");
00390 break;
00391 case F_WORD:
00392 SG_INFO( "F_WORD ");
00393 break;
00394 case F_ULONG:
00395 SG_INFO( "F_ULONG ");
00396 break;
00397 case F_ANY:
00398 SG_INFO( "F_ANY ");
00399 break;
00400 default:
00401 SG_ERROR( "ERROR UNKNOWN FEATURE TYPE");
00402 break;
00403 }
00404 SG_INFO( "\n");
00405 }
00406
00407 bool CKernel::init_optimization(
00408 int32_t count, int32_t *IDX, float64_t * weights)
00409 {
00410 SG_ERROR( "kernel does not support linadd optimization\n");
00411 return false ;
00412 }
00413
00414 bool CKernel::delete_optimization()
00415 {
00416 SG_ERROR( "kernel does not support linadd optimization\n");
00417 return false;
00418 }
00419
00420 float64_t CKernel::compute_optimized(int32_t vector_idx)
00421 {
00422 SG_ERROR( "kernel does not support linadd optimization\n");
00423 return 0;
00424 }
00425
00426 void CKernel::compute_batch(
00427 int32_t num_vec, int32_t* vec_idx, float64_t* target, int32_t num_suppvec,
00428 int32_t* IDX, float64_t* weights, float64_t factor)
00429 {
00430 SG_ERROR( "kernel does not support batch computation\n");
00431 }
00432
00433 void CKernel::add_to_normal(int32_t vector_idx, float64_t weight)
00434 {
00435 SG_ERROR( "kernel does not support linadd optimization, add_to_normal not implemented\n");
00436 }
00437
00438 void CKernel::clear_normal()
00439 {
00440 SG_ERROR( "kernel does not support linadd optimization, clear_normal not implemented\n");
00441 }
00442
00443 int32_t CKernel::get_num_subkernels()
00444 {
00445 return 1;
00446 }
00447
00448 void CKernel::compute_by_subkernel(
00449 int32_t vector_idx, float64_t * subkernel_contrib)
00450 {
00451 SG_ERROR( "kernel compute_by_subkernel not implemented\n");
00452 }
00453
00454 const float64_t* CKernel::get_subkernel_weights(int32_t &num_weights)
00455 {
00456 num_weights=1 ;
00457 return &combined_kernel_weight ;
00458 }
00459
00460 void CKernel::set_subkernel_weights(float64_t* weights, int32_t num_weights)
00461 {
00462 combined_kernel_weight = weights[0] ;
00463 if (num_weights!=1)
00464 SG_ERROR( "number of subkernel weights should be one ...\n");
00465 }
00466
00467 bool CKernel::init_optimization_svm(CSVM * svm)
00468 {
00469 int32_t num_suppvec=svm->get_num_support_vectors();
00470 int32_t* sv_idx=new int32_t[num_suppvec];
00471 float64_t* sv_weight=new float64_t[num_suppvec];
00472
00473 for (int32_t i=0; i<num_suppvec; i++)
00474 {
00475 sv_idx[i] = svm->get_support_vector(i);
00476 sv_weight[i] = svm->get_alpha(i);
00477 }
00478 bool ret = init_optimization(num_suppvec, sv_idx, sv_weight);
00479
00480 delete[] sv_idx;
00481 delete[] sv_weight;
00482 return ret;
00483 }
00484