Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "lib/common.h"
00013 #include "distance/CanberraWordDistance.h"
00014 #include "features/Features.h"
00015 #include "features/StringFeatures.h"
00016 #include "lib/io.h"
00017
00018 using namespace shogun;
00019
00020 CCanberraWordDistance::CCanberraWordDistance()
00021 : CStringDistance<uint16_t>()
00022 {
00023 SG_DEBUG("CCanberraWordDistance created");
00024 dictionary_size= 1<<(sizeof(uint16_t)*8);
00025 dictionary_weights = new float64_t[dictionary_size];
00026 SG_DEBUG( "using dictionary of %d bytes\n", dictionary_size);
00027 }
00028
00029 CCanberraWordDistance::CCanberraWordDistance(
00030 CStringFeatures<uint16_t>* l, CStringFeatures<uint16_t>* r)
00031 : CStringDistance<uint16_t>()
00032 {
00033 SG_DEBUG("CCanberraWordDistance created");
00034 dictionary_size= 1<<(sizeof(uint16_t)*8);
00035 dictionary_weights = new float64_t[dictionary_size];
00036 SG_DEBUG( "using dictionary of %d bytes\n", dictionary_size);
00037
00038 init(l, r);
00039 }
00040
00041 CCanberraWordDistance::~CCanberraWordDistance()
00042 {
00043 cleanup();
00044
00045 delete[] dictionary_weights;
00046 }
00047
00048 bool CCanberraWordDistance::init(CFeatures* l, CFeatures* r)
00049 {
00050 return CStringDistance<uint16_t>::init(l,r);
00051 }
00052
00053 void CCanberraWordDistance::cleanup()
00054 {
00055 }
00056
00057 float64_t CCanberraWordDistance::compute(int32_t idx_a, int32_t idx_b)
00058 {
00059 int32_t alen, blen;
00060 bool free_avec, free_bvec;
00061
00062 uint16_t* avec=((CStringFeatures<uint16_t>*) lhs)->
00063 get_feature_vector(idx_a, alen, free_avec);
00064 uint16_t* bvec=((CStringFeatures<uint16_t>*) rhs)->
00065 get_feature_vector(idx_b, blen, free_bvec);
00066
00067 float64_t result=0;
00068
00069 int32_t left_idx=0;
00070 int32_t right_idx=0;
00071
00072 while (left_idx < alen && right_idx < blen)
00073 {
00074 uint16_t sym=avec[left_idx];
00075 if (avec[left_idx]==bvec[right_idx])
00076 {
00077 int32_t old_left_idx=left_idx;
00078 int32_t old_right_idx=right_idx;
00079
00080 while (left_idx< alen && avec[left_idx]==sym)
00081 left_idx++;
00082
00083 while (right_idx< blen && bvec[right_idx]==sym)
00084 right_idx++;
00085
00086 result +=
00087 CMath::abs((float64_t)
00088 ((left_idx-old_left_idx)-(right_idx-old_right_idx)))/
00089 ((float64_t)
00090 ((left_idx-old_left_idx) + (right_idx-old_right_idx)));
00091 }
00092 else if (avec[left_idx]<bvec[right_idx])
00093 {
00094 result++;
00095
00096 while (left_idx< alen && avec[left_idx]==sym)
00097 left_idx++;
00098 }
00099 else
00100 {
00101 sym=bvec[right_idx];
00102 result++;
00103
00104 while (right_idx< blen && bvec[right_idx]==sym)
00105 right_idx++;
00106 }
00107 }
00108
00109 while (left_idx < alen)
00110 {
00111 uint16_t sym=avec[left_idx];
00112 result++;
00113
00114 while (left_idx< alen && avec[left_idx]==sym)
00115 left_idx++;
00116 }
00117
00118 while (right_idx < blen)
00119 {
00120 uint16_t sym=bvec[right_idx];
00121 result++;
00122
00123 while (right_idx< blen && bvec[right_idx]==sym)
00124 right_idx++;
00125 }
00126 ((CStringFeatures<uint16_t>*) lhs)->
00127 free_feature_vector(avec, idx_a, free_avec);
00128 ((CStringFeatures<uint16_t>*) rhs)->
00129 free_feature_vector(bvec, idx_b, free_bvec);
00130
00131 return result;
00132 }