SimpleLocalityImprovedStringKernel.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Gunnar Raetsch
00008  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include "lib/common.h"
00012 #include "lib/io.h"
00013 #include "kernel/SimpleLocalityImprovedStringKernel.h"
00014 #include "features/Features.h"
00015 #include "features/StringFeatures.h"
00016 
00017 CSimpleLocalityImprovedStringKernel::CSimpleLocalityImprovedStringKernel(
00018     int32_t size, int32_t l, int32_t id, int32_t od)
00019 : CStringKernel<char>(size), length(l), inner_degree(id), outer_degree(od),
00020     pyramid_weights(NULL)
00021 {
00022 }
00023 
00024 CSimpleLocalityImprovedStringKernel::CSimpleLocalityImprovedStringKernel(
00025     CStringFeatures<char>* l, CStringFeatures<char>* r,
00026     int32_t len, int32_t id, int32_t od)
00027 : CStringKernel<char>(10), length(len), inner_degree(id), outer_degree(od),
00028     pyramid_weights(NULL)
00029 {
00030     init(l, r);
00031 }
00032 
00033 CSimpleLocalityImprovedStringKernel::~CSimpleLocalityImprovedStringKernel()
00034 {
00035     cleanup();
00036 }
00037 
00038 bool CSimpleLocalityImprovedStringKernel::init(CFeatures* l, CFeatures* r)
00039 {
00040     bool result = CStringKernel<char>::init(l,r);
00041 
00042     if (!result)
00043         return false;
00044     int32_t num_features = ((CStringFeatures<char>*) l)->get_max_vector_length();
00045     delete[] pyramid_weights;
00046     pyramid_weights = new float64_t[num_features];
00047 
00048     SG_INFO("initializing pyramid weights: size=%ld length=%i\n",
00049         num_features, length);
00050 
00051     const int32_t PYRAL = 2 * length - 1; // total window length
00052     float64_t PYRAL_pot;
00053     int32_t DEGREE1_1  = (inner_degree & 0x1)==0;
00054     int32_t DEGREE1_1n = (inner_degree & ~0x1)!=0;
00055     int32_t DEGREE1_2  = (inner_degree & 0x2)!=0;
00056     int32_t DEGREE1_3  = (inner_degree & ~0x3)!=0;
00057     int32_t DEGREE1_4  = (inner_degree & 0x4)!=0;
00058     {
00059     float64_t PYRAL_ = PYRAL;
00060     PYRAL_pot = DEGREE1_1 ? 1.0 : PYRAL_;
00061     if (DEGREE1_1n)
00062     {
00063         PYRAL_ *= PYRAL_;
00064         if (DEGREE1_2)
00065             PYRAL_pot *= PYRAL_;
00066         if (DEGREE1_3)
00067         {
00068             PYRAL_ *= PYRAL_;
00069             if (DEGREE1_4)
00070                 PYRAL_pot *= PYRAL_;
00071         }
00072     }
00073     }
00074 
00075     int32_t pyra_len  = num_features-PYRAL+1;
00076     int32_t pyra_len2 = (int32_t) pyra_len/2;
00077     {
00078     int32_t j;
00079     for (j = 0; j < pyra_len; j++)
00080         pyramid_weights[j] = 4*((float64_t)((j < pyra_len2)? j+1 : pyra_len-j))/((float64_t)pyra_len);
00081     for (j = 0; j < pyra_len; j++)
00082         pyramid_weights[j] /= PYRAL_pot;
00083     }
00084 
00085     return init_normalizer();
00086 }
00087 
00088 void CSimpleLocalityImprovedStringKernel::cleanup()
00089 {
00090     delete[] pyramid_weights;
00091     pyramid_weights = NULL;
00092 
00093     CKernel::cleanup();
00094 }
00095 
00096 bool CSimpleLocalityImprovedStringKernel::load_init(FILE* src)
00097 {
00098     return false;
00099 }
00100 
00101 bool CSimpleLocalityImprovedStringKernel::save_init(FILE* dest)
00102 {
00103     return false;
00104 }
00105 
00106 float64_t CSimpleLocalityImprovedStringKernel::dot_pyr (const char* const x1,
00107          const char* const x2, const int32_t NOF_NTS, const int32_t NTWIDTH,
00108          const int32_t DEGREE1, const int32_t DEGREE2, float64_t *pyra)
00109 {
00110     const int32_t PYRAL = 2*NTWIDTH-1; // total window length
00111     int32_t pyra_len, pyra_len2;
00112     float64_t pot, PYRAL_pot;
00113     float64_t sum;
00114     int32_t DEGREE1_1 = (DEGREE1 & 0x1)==0;
00115     int32_t DEGREE1_1n = (DEGREE1 & ~0x1)!=0;
00116     int32_t DEGREE1_2 = (DEGREE1 & 0x2)!=0;
00117     int32_t DEGREE1_3 = (DEGREE1 & ~0x3)!=0;
00118     int32_t DEGREE1_4 = (DEGREE1 & 0x4)!=0;
00119     {
00120     float64_t PYRAL_ = PYRAL;
00121     PYRAL_pot = DEGREE1_1 ? 1.0 : PYRAL_;
00122     if (DEGREE1_1n)
00123     {
00124         PYRAL_ *= PYRAL_;
00125         if (DEGREE1_2) PYRAL_pot *= PYRAL_;
00126         if (DEGREE1_3)
00127         {
00128             PYRAL_ *= PYRAL_;
00129             if (DEGREE1_4) PYRAL_pot *= PYRAL_;
00130         }
00131     }
00132     }
00133 
00134     ASSERT((DEGREE1 & ~0x7) == 0);
00135     ASSERT((DEGREE2 & ~0x7) == 0);
00136 
00137     pyra_len = NOF_NTS-PYRAL+1;
00138     pyra_len2 = (int32_t) pyra_len/2;
00139     {
00140     int32_t j;
00141     for (j = 0; j < pyra_len; j++)
00142         pyra[j] = 4*((float64_t)((j < pyra_len2) ? j+1 : pyra_len-j))/((float64_t)pyra_len);
00143     for (j = 0; j < pyra_len; j++)
00144         pyra[j] /= PYRAL_pot;
00145     }
00146 
00147     register int32_t conv;
00148     register int32_t i;
00149     register int32_t j;
00150 
00151     sum = 0.0;
00152     conv = 0;
00153     for (j = 0; j < PYRAL; j++)
00154         conv += (x1[j] == x2[j]) ? 1 : 0;
00155 
00156     for (i = 0; i < NOF_NTS-PYRAL+1; i++)
00157     {
00158         register float64_t pot2;
00159         if (i>0)
00160             conv += ((x1[i+PYRAL-1] == x2[i+PYRAL-1]) ? 1 : 0 ) - 
00161                 ((x1[i-1] == x2[i-1]) ? 1 : 0);
00162         { /* potencing of conv -- float64_t is faster*/
00163         register float64_t conv2 = conv;
00164         pot2 = (DEGREE1_1) ? 1.0 : conv2;
00165             if (DEGREE1_1n)
00166             {
00167                 conv2 *= conv2;
00168                 if (DEGREE1_2)
00169                     pot2 *= conv2;
00170                 if (DEGREE1_3 && DEGREE1_4)
00171                     pot2 *= conv2*conv2;
00172             }
00173         }
00174         sum += pot2*pyra[i];
00175     }
00176 
00177     pot = ((DEGREE2 & 0x1) == 0) ? 1.0 : sum;
00178     if ((DEGREE2 & ~0x1) != 0)
00179     {
00180         sum *= sum;
00181         if ((DEGREE2 & 0x2) != 0)
00182             pot *= sum;
00183         if ((DEGREE2 & ~0x3) != 0)
00184         {
00185             sum *= sum;
00186             if ((DEGREE2 & 0x4) != 0)
00187                 pot *= sum;
00188         }
00189     }
00190     return pot;
00191 }
00192 
00193 float64_t CSimpleLocalityImprovedStringKernel::compute(
00194     int32_t idx_a, int32_t idx_b)
00195 {
00196     int32_t alen, blen;
00197 
00198     char* avec = ((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen);
00199     char* bvec = ((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen);
00200 
00201     // can only deal with strings of same length
00202     ASSERT(alen==blen);
00203 
00204     float64_t dpt;
00205 
00206     dpt = dot_pyr(avec, bvec, alen, length, inner_degree, outer_degree, pyramid_weights);
00207     dpt = dpt / pow((float64_t)alen, (float64_t)outer_degree);
00208     return (float64_t) dpt;
00209 }

SHOGUN Machine Learning Toolbox - Documentation