ImplicitWeightedSpecFeatures.cpp
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "features/ImplicitWeightedSpecFeatures.h"
00012 #include "lib/io.h"
00013
00014 CImplicitWeightedSpecFeatures::CImplicitWeightedSpecFeatures(CStringFeatures<uint16_t>* str, bool normalize) : CDotFeatures()
00015 {
00016 ASSERT(str);
00017 strings=str;
00018 SG_REF(strings)
00019 normalization_factors=NULL;
00020 spec_weights=NULL;
00021 num_strings = str->get_num_vectors();
00022 alphabet_size = str->get_original_num_symbols();
00023 degree=str->get_order();
00024 set_wd_weights();
00025
00026 SG_DEBUG("WEIGHTED SPEC alphasz=%d, size=%d, num_str=%d\n", alphabet_size,
00027 spec_size, num_strings);
00028
00029 if (normalize)
00030 compute_normalization_const();
00031 }
00032
00033 void CImplicitWeightedSpecFeatures::compute_normalization_const()
00034 {
00035 float64_t* factors=new float64_t[num_strings];
00036
00037 for (int32_t i=0; i<num_strings; i++)
00038 factors[i]=1.0/CMath::sqrt(dot(i,i));
00039
00040 normalization_factors=factors;
00041
00042 }
00043
00044 bool CImplicitWeightedSpecFeatures::set_wd_weights()
00045 {
00046 delete[] spec_weights;
00047 spec_weights=new float64_t[degree];
00048
00049 int32_t i;
00050 float64_t sum=0;
00051 spec_size=0;
00052
00053 for (i=0; i<degree; i++)
00054 {
00055 spec_size+=CMath::pow(alphabet_size, i+1);
00056 spec_weights[i]=degree-i;
00057 sum+=spec_weights[i];
00058 }
00059 for (i=0; i<degree; i++)
00060 spec_weights[i]=CMath::sqrt(spec_weights[i]/sum);
00061
00062 return spec_weights!=NULL;
00063 }
00064
00065 bool CImplicitWeightedSpecFeatures::set_weights(float64_t* w, int32_t d)
00066 {
00067 ASSERT(d==degree);
00068
00069 delete[] spec_weights;
00070 spec_weights=new float64_t[degree];
00071 for (int32_t i=0; i<degree; i++)
00072 spec_weights[i]=CMath::sqrt(w[i]);
00073 return true;
00074 }
00075
00076 CImplicitWeightedSpecFeatures::CImplicitWeightedSpecFeatures(const CImplicitWeightedSpecFeatures& orig) : CDotFeatures(orig),
00077 num_strings(orig.num_strings),
00078 alphabet_size(orig.alphabet_size), spec_size(orig.spec_size)
00079 {
00080 SG_NOTIMPLEMENTED;
00081 SG_REF(strings);
00082 }
00083
00084 CImplicitWeightedSpecFeatures::~CImplicitWeightedSpecFeatures()
00085 {
00086 SG_UNREF(strings);
00087 delete[] spec_weights;
00088 delete[] normalization_factors;
00089 }
00090
00091 float64_t CImplicitWeightedSpecFeatures::dot(int32_t vec_idx1, int32_t vec_idx2)
00092 {
00093 ASSERT(vec_idx1 < num_strings);
00094 ASSERT(vec_idx2 < num_strings);
00095
00096 int32_t len1=-1;
00097 int32_t len2=-1;
00098 uint16_t* vec1=strings->get_feature_vector(vec_idx1, len1);
00099 uint16_t* vec2=strings->get_feature_vector(vec_idx2, len2);
00100
00101 float64_t result=0;
00102 uint8_t mask=0;
00103
00104 for (int32_t d=0; d<degree; d++)
00105 {
00106 mask = mask | (1 << (degree-d-1));
00107 uint16_t masked=strings->get_masked_symbols(0xffff, mask);
00108
00109 int32_t left_idx=0;
00110 int32_t right_idx=0;
00111 float64_t weight=spec_weights[d]*spec_weights[d];
00112
00113 while (left_idx < len1 && right_idx < len2)
00114 {
00115 uint16_t lsym=vec1[left_idx] & masked;
00116 uint16_t rsym=vec2[right_idx] & masked;
00117
00118 if (lsym == rsym)
00119 {
00120 int32_t old_left_idx=left_idx;
00121 int32_t old_right_idx=right_idx;
00122
00123 while (left_idx<len1 && (vec1[left_idx] & masked) ==lsym)
00124 left_idx++;
00125
00126 while (right_idx<len2 && (vec2[right_idx] & masked) ==lsym)
00127 right_idx++;
00128
00129 result+=weight*(left_idx-old_left_idx)*(right_idx-old_right_idx);
00130 }
00131 else if (lsym<rsym)
00132 left_idx++;
00133 else
00134 right_idx++;
00135 }
00136 }
00137
00138 if (normalization_factors)
00139 return result*normalization_factors[vec_idx1]*normalization_factors[vec_idx2];
00140 else
00141 return result;
00142 }
00143
00144 float64_t CImplicitWeightedSpecFeatures::dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00145 {
00146 ASSERT(vec2_len == spec_size);
00147 ASSERT(vec_idx1 < num_strings);
00148
00149 float64_t result=0;
00150 int32_t len1=-1;
00151 uint16_t* vec1=strings->get_feature_vector(vec_idx1, len1);
00152
00153 if (vec1 && len1>0)
00154 {
00155 for (int32_t j=0; j<len1; j++)
00156 {
00157 uint8_t mask=0;
00158 int32_t offs=0;
00159 uint16_t v=*vec1++;
00160
00161 for (int32_t d=0; d<degree; d++)
00162 {
00163 mask = mask | (1 << (degree-d-1));
00164 int32_t idx=strings->get_masked_symbols(v, mask);
00165 idx=strings->shift_symbol(idx, degree-d-1);
00166 result += vec2[offs + idx]*spec_weights[d];
00167 offs+=strings->shift_offset(1,d+1);
00168 }
00169 }
00170
00171 if (normalization_factors)
00172 result*=normalization_factors[vec_idx1];
00173 }
00174 else
00175 SG_ERROR("huh?\n");
00176
00177 return result;
00178 }
00179
00180 void CImplicitWeightedSpecFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val)
00181 {
00182 int32_t len1=-1;
00183 uint16_t* vec=strings->get_feature_vector(vec_idx1, len1);
00184
00185 if (normalization_factors)
00186 alpha*=normalization_factors[vec_idx1];
00187
00188 if (vec && len1>0)
00189 {
00190 for (int32_t j=0; j<len1; j++)
00191 {
00192 uint8_t mask=0;
00193 int32_t offs=0;
00194 for (int32_t d=0; d<degree; d++)
00195 {
00196 mask = mask | (1 << (degree-d-1));
00197 int32_t idx=strings->get_masked_symbols(vec[j], mask);
00198 idx=strings->shift_symbol(idx, degree-d-1);
00199 if (abs_val)
00200 vec2[offs + idx] += CMath::abs(alpha*spec_weights[d]);
00201 else
00202 vec2[offs + idx] += alpha*spec_weights[d];
00203 offs+=strings->shift_offset(1,d+1);
00204 }
00205 }
00206 }
00207 }
00208
00209 CFeatures* CImplicitWeightedSpecFeatures::duplicate() const
00210 {
00211 return new CImplicitWeightedSpecFeatures(*this);
00212 }