SimpleLocalityImprovedStringKernel.cpp
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "lib/common.h"
00012 #include "lib/io.h"
00013 #include "kernel/SimpleLocalityImprovedStringKernel.h"
00014 #include "features/Features.h"
00015 #include "features/StringFeatures.h"
00016
00017 CSimpleLocalityImprovedStringKernel::CSimpleLocalityImprovedStringKernel(
00018 int32_t size, int32_t l, int32_t id, int32_t od)
00019 : CStringKernel<char>(size), length(l), inner_degree(id), outer_degree(od),
00020 pyramid_weights(NULL)
00021 {
00022 }
00023
00024 CSimpleLocalityImprovedStringKernel::CSimpleLocalityImprovedStringKernel(
00025 CStringFeatures<char>* l, CStringFeatures<char>* r,
00026 int32_t len, int32_t id, int32_t od)
00027 : CStringKernel<char>(10), length(len), inner_degree(id), outer_degree(od),
00028 pyramid_weights(NULL)
00029 {
00030 init(l, r);
00031 }
00032
00033 CSimpleLocalityImprovedStringKernel::~CSimpleLocalityImprovedStringKernel()
00034 {
00035 cleanup();
00036 }
00037
00038 bool CSimpleLocalityImprovedStringKernel::init(CFeatures* l, CFeatures* r)
00039 {
00040 bool result = CStringKernel<char>::init(l,r);
00041
00042 if (!result)
00043 return false;
00044 int32_t num_features = ((CStringFeatures<char>*) l)->get_max_vector_length();
00045 delete[] pyramid_weights;
00046 pyramid_weights = new float64_t[num_features];
00047
00048 SG_INFO("initializing pyramid weights: size=%ld length=%i\n",
00049 num_features, length);
00050
00051 const int32_t PYRAL = 2 * length - 1;
00052 float64_t PYRAL_pot;
00053 int32_t DEGREE1_1 = (inner_degree & 0x1)==0;
00054 int32_t DEGREE1_1n = (inner_degree & ~0x1)!=0;
00055 int32_t DEGREE1_2 = (inner_degree & 0x2)!=0;
00056 int32_t DEGREE1_3 = (inner_degree & ~0x3)!=0;
00057 int32_t DEGREE1_4 = (inner_degree & 0x4)!=0;
00058 {
00059 float64_t PYRAL_ = PYRAL;
00060 PYRAL_pot = DEGREE1_1 ? 1.0 : PYRAL_;
00061 if (DEGREE1_1n)
00062 {
00063 PYRAL_ *= PYRAL_;
00064 if (DEGREE1_2)
00065 PYRAL_pot *= PYRAL_;
00066 if (DEGREE1_3)
00067 {
00068 PYRAL_ *= PYRAL_;
00069 if (DEGREE1_4)
00070 PYRAL_pot *= PYRAL_;
00071 }
00072 }
00073 }
00074
00075 int32_t pyra_len = num_features-PYRAL+1;
00076 int32_t pyra_len2 = (int32_t) pyra_len/2;
00077 {
00078 int32_t j;
00079 for (j = 0; j < pyra_len; j++)
00080 pyramid_weights[j] = 4*((float64_t)((j < pyra_len2)? j+1 : pyra_len-j))/((float64_t)pyra_len);
00081 for (j = 0; j < pyra_len; j++)
00082 pyramid_weights[j] /= PYRAL_pot;
00083 }
00084
00085 return init_normalizer();
00086 }
00087
00088 void CSimpleLocalityImprovedStringKernel::cleanup()
00089 {
00090 delete[] pyramid_weights;
00091 pyramid_weights = NULL;
00092
00093 CKernel::cleanup();
00094 }
00095
00096 bool CSimpleLocalityImprovedStringKernel::load_init(FILE* src)
00097 {
00098 return false;
00099 }
00100
00101 bool CSimpleLocalityImprovedStringKernel::save_init(FILE* dest)
00102 {
00103 return false;
00104 }
00105
00106 float64_t CSimpleLocalityImprovedStringKernel::dot_pyr (const char* const x1,
00107 const char* const x2, const int32_t NOF_NTS, const int32_t NTWIDTH,
00108 const int32_t DEGREE1, const int32_t DEGREE2, float64_t *pyra)
00109 {
00110 const int32_t PYRAL = 2*NTWIDTH-1;
00111 int32_t pyra_len, pyra_len2;
00112 float64_t pot, PYRAL_pot;
00113 float64_t sum;
00114 int32_t DEGREE1_1 = (DEGREE1 & 0x1)==0;
00115 int32_t DEGREE1_1n = (DEGREE1 & ~0x1)!=0;
00116 int32_t DEGREE1_2 = (DEGREE1 & 0x2)!=0;
00117 int32_t DEGREE1_3 = (DEGREE1 & ~0x3)!=0;
00118 int32_t DEGREE1_4 = (DEGREE1 & 0x4)!=0;
00119 {
00120 float64_t PYRAL_ = PYRAL;
00121 PYRAL_pot = DEGREE1_1 ? 1.0 : PYRAL_;
00122 if (DEGREE1_1n)
00123 {
00124 PYRAL_ *= PYRAL_;
00125 if (DEGREE1_2) PYRAL_pot *= PYRAL_;
00126 if (DEGREE1_3)
00127 {
00128 PYRAL_ *= PYRAL_;
00129 if (DEGREE1_4) PYRAL_pot *= PYRAL_;
00130 }
00131 }
00132 }
00133
00134 ASSERT((DEGREE1 & ~0x7) == 0);
00135 ASSERT((DEGREE2 & ~0x7) == 0);
00136
00137 pyra_len = NOF_NTS-PYRAL+1;
00138 pyra_len2 = (int32_t) pyra_len/2;
00139 {
00140 int32_t j;
00141 for (j = 0; j < pyra_len; j++)
00142 pyra[j] = 4*((float64_t)((j < pyra_len2) ? j+1 : pyra_len-j))/((float64_t)pyra_len);
00143 for (j = 0; j < pyra_len; j++)
00144 pyra[j] /= PYRAL_pot;
00145 }
00146
00147 register int32_t conv;
00148 register int32_t i;
00149 register int32_t j;
00150
00151 sum = 0.0;
00152 conv = 0;
00153 for (j = 0; j < PYRAL; j++)
00154 conv += (x1[j] == x2[j]) ? 1 : 0;
00155
00156 for (i = 0; i < NOF_NTS-PYRAL+1; i++)
00157 {
00158 register float64_t pot2;
00159 if (i>0)
00160 conv += ((x1[i+PYRAL-1] == x2[i+PYRAL-1]) ? 1 : 0 ) -
00161 ((x1[i-1] == x2[i-1]) ? 1 : 0);
00162 {
00163 register float64_t conv2 = conv;
00164 pot2 = (DEGREE1_1) ? 1.0 : conv2;
00165 if (DEGREE1_1n)
00166 {
00167 conv2 *= conv2;
00168 if (DEGREE1_2)
00169 pot2 *= conv2;
00170 if (DEGREE1_3 && DEGREE1_4)
00171 pot2 *= conv2*conv2;
00172 }
00173 }
00174 sum += pot2*pyra[i];
00175 }
00176
00177 pot = ((DEGREE2 & 0x1) == 0) ? 1.0 : sum;
00178 if ((DEGREE2 & ~0x1) != 0)
00179 {
00180 sum *= sum;
00181 if ((DEGREE2 & 0x2) != 0)
00182 pot *= sum;
00183 if ((DEGREE2 & ~0x3) != 0)
00184 {
00185 sum *= sum;
00186 if ((DEGREE2 & 0x4) != 0)
00187 pot *= sum;
00188 }
00189 }
00190 return pot;
00191 }
00192
00193 float64_t CSimpleLocalityImprovedStringKernel::compute(
00194 int32_t idx_a, int32_t idx_b)
00195 {
00196 int32_t alen, blen;
00197
00198 char* avec = ((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen);
00199 char* bvec = ((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen);
00200
00201
00202 ASSERT(alen==blen);
00203
00204 float64_t dpt;
00205
00206 dpt = dot_pyr(avec, bvec, alen, length, inner_degree, outer_degree, pyramid_weights);
00207 dpt = dpt / pow((float64_t)alen, (float64_t)outer_degree);
00208 return (float64_t) dpt;
00209 }