CanberraWordDistance.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) Christian Gehl
00008  * Written (W) 1999-2009 Soeren Sonnenburg
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "lib/common.h"
00013 #include "distance/CanberraWordDistance.h"
00014 #include "features/Features.h"
00015 #include "features/StringFeatures.h"
00016 #include "lib/io.h"
00017 
00018 using namespace shogun;
00019 
00020 CCanberraWordDistance::CCanberraWordDistance()
00021 : CStringDistance<uint16_t>()
00022 {
00023     SG_DEBUG("CCanberraWordDistance created");
00024     dictionary_size= 1<<(sizeof(uint16_t)*8);
00025     dictionary_weights = new float64_t[dictionary_size];
00026     SG_DEBUG( "using dictionary of %d bytes\n", dictionary_size);
00027 }
00028 
00029 CCanberraWordDistance::CCanberraWordDistance(
00030     CStringFeatures<uint16_t>* l, CStringFeatures<uint16_t>* r)
00031 : CStringDistance<uint16_t>()
00032 {
00033     SG_DEBUG("CCanberraWordDistance created");
00034     dictionary_size= 1<<(sizeof(uint16_t)*8);
00035     dictionary_weights = new float64_t[dictionary_size];
00036     SG_DEBUG( "using dictionary of %d bytes\n", dictionary_size);
00037 
00038     init(l, r);
00039 }
00040 
00041 CCanberraWordDistance::~CCanberraWordDistance()
00042 {
00043     cleanup();
00044 
00045     delete[] dictionary_weights;
00046 }
00047 
00048 bool CCanberraWordDistance::init(CFeatures* l, CFeatures* r)
00049 {
00050     return CStringDistance<uint16_t>::init(l,r);
00051 }
00052 
00053 void CCanberraWordDistance::cleanup()
00054 {
00055 }
00056 
00057 float64_t CCanberraWordDistance::compute(int32_t idx_a, int32_t idx_b)
00058 {
00059     int32_t alen, blen;
00060     bool free_avec, free_bvec;
00061 
00062     uint16_t* avec=((CStringFeatures<uint16_t>*) lhs)->
00063         get_feature_vector(idx_a, alen, free_avec);
00064     uint16_t* bvec=((CStringFeatures<uint16_t>*) rhs)->
00065         get_feature_vector(idx_b, blen, free_bvec);
00066 
00067     float64_t result=0;
00068 
00069     int32_t left_idx=0;
00070     int32_t right_idx=0;
00071 
00072     while (left_idx < alen && right_idx < blen)
00073     {
00074         uint16_t sym=avec[left_idx];
00075         if (avec[left_idx]==bvec[right_idx])
00076         {
00077             int32_t old_left_idx=left_idx;
00078             int32_t old_right_idx=right_idx;
00079 
00080             while (left_idx< alen && avec[left_idx]==sym)
00081                 left_idx++;
00082 
00083             while (right_idx< blen && bvec[right_idx]==sym)
00084                 right_idx++;
00085 
00086             result +=
00087                 CMath::abs((float64_t)
00088                     ((left_idx-old_left_idx)-(right_idx-old_right_idx)))/
00089                 ((float64_t)
00090                     ((left_idx-old_left_idx) + (right_idx-old_right_idx)));
00091         }
00092         else if (avec[left_idx]<bvec[right_idx])
00093         {
00094             result++;
00095 
00096             while (left_idx< alen && avec[left_idx]==sym)
00097                 left_idx++;
00098         }
00099         else
00100         {
00101             sym=bvec[right_idx];
00102             result++;
00103 
00104             while (right_idx< blen && bvec[right_idx]==sym)
00105                 right_idx++;
00106         }
00107     }
00108     
00109     while (left_idx < alen)
00110     {
00111         uint16_t sym=avec[left_idx];
00112         result++;
00113 
00114         while (left_idx< alen && avec[left_idx]==sym)
00115             left_idx++;
00116     }
00117 
00118     while (right_idx < blen)
00119     {
00120         uint16_t sym=bvec[right_idx];
00121         result++;
00122 
00123         while (right_idx< blen && bvec[right_idx]==sym)
00124             right_idx++;
00125     }
00126     ((CStringFeatures<uint16_t>*) lhs)->
00127         free_feature_vector(avec, idx_a, free_avec);
00128     ((CStringFeatures<uint16_t>*) rhs)->
00129         free_feature_vector(bvec, idx_b, free_bvec);
00130 
00131     return result;
00132 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation