FixedDegreeStringKernel.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include "lib/common.h"
00012 #include "kernel/FixedDegreeStringKernel.h"
00013 #include "kernel/SqrtDiagKernelNormalizer.h"
00014 #include "features/Features.h"
00015 #include "features/StringFeatures.h"
00016 #include "lib/io.h"
00017 
00018 CFixedDegreeStringKernel::CFixedDegreeStringKernel(int32_t size, int32_t d)
00019 : CStringKernel<char>(size), degree(d)
00020 {
00021     set_normalizer(new CSqrtDiagKernelNormalizer());
00022 }
00023 
00024 CFixedDegreeStringKernel::CFixedDegreeStringKernel(
00025     CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t d)
00026 : CStringKernel<char>(10), degree(d)
00027 {
00028     set_normalizer(new CSqrtDiagKernelNormalizer());
00029     init(l, r);
00030 }
00031 
00032 CFixedDegreeStringKernel::~CFixedDegreeStringKernel()
00033 {
00034     cleanup();
00035 }
00036 
00037 bool CFixedDegreeStringKernel::init(CFeatures* l, CFeatures* r)
00038 {
00039     CStringKernel<char>::init(l, r);
00040     return init_normalizer();
00041 }
00042 
00043 void CFixedDegreeStringKernel::cleanup()
00044 {
00045     CKernel::cleanup();
00046 }
00047 
00048 bool CFixedDegreeStringKernel::load_init(FILE* src)
00049 {
00050     return false;
00051 }
00052 
00053 bool CFixedDegreeStringKernel::save_init(FILE* dest)
00054 {
00055     return false;
00056 }
00057 
00058 float64_t CFixedDegreeStringKernel::compute(int32_t idx_a, int32_t idx_b)
00059 {
00060     int32_t alen, blen;
00061 
00062     char* avec = ((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen);
00063     char* bvec = ((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen);
00064 
00065     // can only deal with strings of same length
00066     ASSERT(alen==blen);
00067 
00068     int64_t sum = 0;
00069     for (int32_t i = 0; i<alen-degree+1; i++)
00070     {
00071         bool match = true;
00072 
00073         for (int32_t j = i; j<i+degree && match; j++)
00074             match = avec[j]==bvec[j];
00075         if (match)
00076             sum++;
00077     }
00078     return sum;
00079 }

SHOGUN Machine Learning Toolbox - Documentation