HammingWordDistance.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2007-2009 Christian Gehl
00008  * Written (W) 1999-2009 Soeren Sonnenburg
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "lib/common.h"
00013 #include "distance/HammingWordDistance.h"
00014 #include "features/Features.h"
00015 #include "features/StringFeatures.h"
00016 #include "lib/io.h"
00017 
00018 using namespace shogun;
00019 
00020 CHammingWordDistance::CHammingWordDistance(bool sign)
00021 : CStringDistance<uint16_t>(), use_sign(sign)
00022 {
00023     SG_DEBUG( "CHammingWordDistance with sign: %d created\n", (sign) ? 1 : 0);
00024     dictionary_size= 1<<(sizeof(uint16_t)*8);
00025     dictionary_weights = new float64_t[dictionary_size];
00026     SG_DEBUG( "using dictionary of %d bytes\n", dictionary_size);
00027 }
00028 
00029 CHammingWordDistance::CHammingWordDistance(
00030     CStringFeatures<uint16_t>* l, CStringFeatures<uint16_t>* r, bool sign)
00031 : CStringDistance<uint16_t>(), use_sign(sign)
00032 {
00033     SG_DEBUG( "CHammingWordDistance with sign: %d created\n", (sign) ? 1 : 0);
00034     dictionary_size= 1<<(sizeof(uint16_t)*8);
00035     dictionary_weights = new float64_t[dictionary_size];
00036     SG_DEBUG( "using dictionary of %d bytes\n", dictionary_size);
00037 
00038     init(l, r);
00039 }
00040 
00041 CHammingWordDistance::~CHammingWordDistance()
00042 {
00043     cleanup();
00044 
00045     delete[] dictionary_weights;
00046 }
00047   
00048 bool CHammingWordDistance::init(CFeatures* l, CFeatures* r)
00049 {
00050     bool result=CStringDistance<uint16_t>::init(l,r);
00051     return result;
00052 }
00053 
00054 void CHammingWordDistance::cleanup()
00055 {
00056 }
00057 
00058 float64_t CHammingWordDistance::compute(int32_t idx_a, int32_t idx_b)
00059 {
00060     int32_t alen, blen;
00061     bool free_avec, free_bvec;
00062 
00063     uint16_t* avec=((CStringFeatures<uint16_t>*) lhs)->
00064         get_feature_vector(idx_a, alen, free_avec);
00065     uint16_t* bvec=((CStringFeatures<uint16_t>*) rhs)->
00066         get_feature_vector(idx_b, blen, free_bvec);
00067 
00068     int32_t result=0;
00069 
00070     int32_t left_idx=0;
00071     int32_t right_idx=0;
00072 
00073     if (use_sign)
00074     {
00075         // hamming of: if words appear in both vectors 
00076         while (left_idx < alen && right_idx < blen)
00077         {
00078             uint16_t sym=avec[left_idx];
00079             if (avec[left_idx]==bvec[right_idx])
00080             {
00081                 while (left_idx< alen && avec[left_idx]==sym)
00082                     left_idx++;
00083 
00084                 while (right_idx< blen && bvec[right_idx]==sym)
00085                     right_idx++;
00086             }
00087             else if (avec[left_idx]<bvec[right_idx])
00088             {
00089                 result++;
00090 
00091                 while (left_idx< alen && avec[left_idx]==sym)
00092                     left_idx++;
00093             }
00094             else
00095             {
00096                 sym=bvec[right_idx];
00097                 result++;
00098 
00099                 while (right_idx< blen && bvec[right_idx]==sym)
00100                     right_idx++;
00101             }
00102         }
00103     }
00104     else
00105     {
00106         //hamming of: if words appear in both vectors _the same number_ of times
00107         while (left_idx < alen && right_idx < blen)
00108         {
00109             uint16_t sym=avec[left_idx];
00110             if (avec[left_idx]==bvec[right_idx])
00111             {
00112                 int32_t old_left_idx=left_idx;
00113                 int32_t old_right_idx=right_idx;
00114 
00115                 while (left_idx< alen && avec[left_idx]==sym)
00116                     left_idx++;
00117 
00118                 while (right_idx< blen && bvec[right_idx]==sym)
00119                     right_idx++;
00120 
00121                 if ((left_idx-old_left_idx)!=(right_idx-old_right_idx))
00122                     result++;
00123             }
00124             else if (avec[left_idx]<bvec[right_idx])
00125             {
00126                 result++;
00127 
00128                 while (left_idx< alen && avec[left_idx]==sym)
00129                     left_idx++;
00130             }
00131             else
00132             {
00133                 sym=bvec[right_idx];
00134                 result++;
00135 
00136                 while (right_idx< blen && bvec[right_idx]==sym)
00137                     right_idx++;
00138             }
00139         }
00140     }
00141 
00142     while (left_idx < alen)
00143     {
00144         uint16_t sym=avec[left_idx];
00145         result++;
00146 
00147         while (left_idx< alen && avec[left_idx]==sym)
00148             left_idx++;
00149     }
00150 
00151     while (right_idx < blen)
00152     {
00153         uint16_t sym=bvec[right_idx];
00154         result++;
00155 
00156         while (right_idx< blen && bvec[right_idx]==sym)
00157             right_idx++;
00158     }
00159 
00160     ((CStringFeatures<uint16_t>*) lhs)->
00161         free_feature_vector(avec, idx_a, free_avec);
00162     ((CStringFeatures<uint16_t>*) rhs)->
00163         free_feature_vector(bvec, idx_b, free_bvec);
00164 
00165     return result;
00166 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation