Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "lib/common.h"
00013 #include "distance/HammingWordDistance.h"
00014 #include "features/Features.h"
00015 #include "features/StringFeatures.h"
00016 #include "lib/io.h"
00017
00018 using namespace shogun;
00019
00020 CHammingWordDistance::CHammingWordDistance(bool sign)
00021 : CStringDistance<uint16_t>(), use_sign(sign)
00022 {
00023 SG_DEBUG( "CHammingWordDistance with sign: %d created\n", (sign) ? 1 : 0);
00024 dictionary_size= 1<<(sizeof(uint16_t)*8);
00025 dictionary_weights = new float64_t[dictionary_size];
00026 SG_DEBUG( "using dictionary of %d bytes\n", dictionary_size);
00027 }
00028
00029 CHammingWordDistance::CHammingWordDistance(
00030 CStringFeatures<uint16_t>* l, CStringFeatures<uint16_t>* r, bool sign)
00031 : CStringDistance<uint16_t>(), use_sign(sign)
00032 {
00033 SG_DEBUG( "CHammingWordDistance with sign: %d created\n", (sign) ? 1 : 0);
00034 dictionary_size= 1<<(sizeof(uint16_t)*8);
00035 dictionary_weights = new float64_t[dictionary_size];
00036 SG_DEBUG( "using dictionary of %d bytes\n", dictionary_size);
00037
00038 init(l, r);
00039 }
00040
00041 CHammingWordDistance::~CHammingWordDistance()
00042 {
00043 cleanup();
00044
00045 delete[] dictionary_weights;
00046 }
00047
00048 bool CHammingWordDistance::init(CFeatures* l, CFeatures* r)
00049 {
00050 bool result=CStringDistance<uint16_t>::init(l,r);
00051 return result;
00052 }
00053
00054 void CHammingWordDistance::cleanup()
00055 {
00056 }
00057
00058 float64_t CHammingWordDistance::compute(int32_t idx_a, int32_t idx_b)
00059 {
00060 int32_t alen, blen;
00061 bool free_avec, free_bvec;
00062
00063 uint16_t* avec=((CStringFeatures<uint16_t>*) lhs)->
00064 get_feature_vector(idx_a, alen, free_avec);
00065 uint16_t* bvec=((CStringFeatures<uint16_t>*) rhs)->
00066 get_feature_vector(idx_b, blen, free_bvec);
00067
00068 int32_t result=0;
00069
00070 int32_t left_idx=0;
00071 int32_t right_idx=0;
00072
00073 if (use_sign)
00074 {
00075
00076 while (left_idx < alen && right_idx < blen)
00077 {
00078 uint16_t sym=avec[left_idx];
00079 if (avec[left_idx]==bvec[right_idx])
00080 {
00081 while (left_idx< alen && avec[left_idx]==sym)
00082 left_idx++;
00083
00084 while (right_idx< blen && bvec[right_idx]==sym)
00085 right_idx++;
00086 }
00087 else if (avec[left_idx]<bvec[right_idx])
00088 {
00089 result++;
00090
00091 while (left_idx< alen && avec[left_idx]==sym)
00092 left_idx++;
00093 }
00094 else
00095 {
00096 sym=bvec[right_idx];
00097 result++;
00098
00099 while (right_idx< blen && bvec[right_idx]==sym)
00100 right_idx++;
00101 }
00102 }
00103 }
00104 else
00105 {
00106
00107 while (left_idx < alen && right_idx < blen)
00108 {
00109 uint16_t sym=avec[left_idx];
00110 if (avec[left_idx]==bvec[right_idx])
00111 {
00112 int32_t old_left_idx=left_idx;
00113 int32_t old_right_idx=right_idx;
00114
00115 while (left_idx< alen && avec[left_idx]==sym)
00116 left_idx++;
00117
00118 while (right_idx< blen && bvec[right_idx]==sym)
00119 right_idx++;
00120
00121 if ((left_idx-old_left_idx)!=(right_idx-old_right_idx))
00122 result++;
00123 }
00124 else if (avec[left_idx]<bvec[right_idx])
00125 {
00126 result++;
00127
00128 while (left_idx< alen && avec[left_idx]==sym)
00129 left_idx++;
00130 }
00131 else
00132 {
00133 sym=bvec[right_idx];
00134 result++;
00135
00136 while (right_idx< blen && bvec[right_idx]==sym)
00137 right_idx++;
00138 }
00139 }
00140 }
00141
00142 while (left_idx < alen)
00143 {
00144 uint16_t sym=avec[left_idx];
00145 result++;
00146
00147 while (left_idx< alen && avec[left_idx]==sym)
00148 left_idx++;
00149 }
00150
00151 while (right_idx < blen)
00152 {
00153 uint16_t sym=bvec[right_idx];
00154 result++;
00155
00156 while (right_idx< blen && bvec[right_idx]==sym)
00157 right_idx++;
00158 }
00159
00160 ((CStringFeatures<uint16_t>*) lhs)->
00161 free_feature_vector(avec, idx_a, free_avec);
00162 ((CStringFeatures<uint16_t>*) rhs)->
00163 free_feature_vector(bvec, idx_b, free_bvec);
00164
00165 return result;
00166 }