LinearHMM.cpp
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "distributions/hmm/LinearHMM.h"
00013 #include "lib/common.h"
00014 #include "features/StringFeatures.h"
00015 #include "lib/io.h"
00016
00017 CLinearHMM::CLinearHMM(CStringFeatures<uint16_t>* f)
00018 : CDistribution(), transition_probs(NULL), log_transition_probs(NULL)
00019 {
00020 features=f;
00021 sequence_length = f->get_vector_length(0);
00022 num_symbols = (int32_t) f->get_num_symbols();
00023 num_params = sequence_length*num_symbols;
00024 }
00025
00026 CLinearHMM::CLinearHMM(int32_t p_num_features, int32_t p_num_symbols)
00027 : CDistribution(), transition_probs(NULL), log_transition_probs(NULL)
00028 {
00029 sequence_length = p_num_features;
00030 num_symbols = p_num_symbols;
00031 num_params = sequence_length*num_symbols;
00032 }
00033
00034 CLinearHMM::~CLinearHMM()
00035 {
00036 delete[] transition_probs;
00037 delete[] log_transition_probs;
00038 }
00039
00040 bool CLinearHMM::train()
00041 {
00042 delete[] transition_probs;
00043 delete[] log_transition_probs;
00044 int32_t* int_transition_probs=new int32_t[num_params];
00045
00046 int32_t vec;
00047 int32_t i;
00048
00049 for (i=0; i< num_params; i++)
00050 int_transition_probs[i]=0;
00051
00052 for (vec=0; vec<features->get_num_vectors(); vec++)
00053 {
00054 int32_t len;
00055
00056 uint16_t* vector=((CStringFeatures<uint16_t>*) features)->
00057 get_feature_vector(vec, len);
00058
00059
00060 for (int32_t feat=0; feat<len ; feat++)
00061 int_transition_probs[feat*num_symbols+vector[feat]]++;
00062 }
00063
00064
00065 transition_probs=new float64_t[num_params];
00066 log_transition_probs=new float64_t[num_params];
00067
00068 for (i=0;i<sequence_length;i++)
00069 {
00070 for (int32_t j=0; j<num_symbols; j++)
00071 {
00072 float64_t sum=0;
00073 int32_t offs=i*num_symbols+
00074 ((CStringFeatures<uint16_t> *) features)->
00075 get_masked_symbols((uint16_t)j,(uint8_t) 254);
00076 int32_t original_num_symbols=(int32_t)
00077 ((CStringFeatures<uint16_t> *) features)->
00078 get_original_num_symbols();
00079
00080 for (int32_t k=0; k<original_num_symbols; k++)
00081 sum+=int_transition_probs[offs+k];
00082
00083 transition_probs[i*num_symbols+j]=
00084 (int_transition_probs[i*num_symbols+j]+pseudo_count)/
00085 (sum+((CStringFeatures<uint16_t> *) features)->
00086 get_original_num_symbols()*pseudo_count);
00087 log_transition_probs[i*num_symbols+j]=
00088 log(transition_probs[i*num_symbols+j]);
00089 }
00090 }
00091
00092 delete[] int_transition_probs;
00093 return true;
00094 }
00095
00096 bool CLinearHMM::train(
00097 const int32_t* indizes, int32_t num_indizes, float64_t pseudo)
00098 {
00099 delete[] transition_probs;
00100 delete[] log_transition_probs;
00101 int32_t* int_transition_probs=new int32_t[num_params];
00102 int32_t vec;
00103 int32_t i;
00104
00105 for (i=0; i< num_params; i++)
00106 int_transition_probs[i]=0;
00107
00108 for (vec=0; vec<num_indizes; vec++)
00109 {
00110 int32_t len;
00111
00112 ASSERT(indizes[vec]>=0 &&
00113 indizes[vec]<((CStringFeatures<uint16_t>*) features)->
00114 get_num_vectors());
00115 uint16_t* vector=((CStringFeatures<uint16_t>*) features)->
00116 get_feature_vector(indizes[vec], len);
00117
00118
00119
00120 for (int32_t feat=0; feat<len ; feat++)
00121 int_transition_probs[feat*num_symbols+vector[feat]]++;
00122 }
00123
00124
00125 transition_probs=new float64_t[num_params];
00126 log_transition_probs=new float64_t[num_params];
00127
00128 for (i=0;i<sequence_length;i++)
00129 {
00130 for (int32_t j=0; j<num_symbols; j++)
00131 {
00132 float64_t sum=0;
00133 int32_t original_num_symbols=(int32_t)
00134 ((CStringFeatures<uint16_t> *) features)->
00135 get_original_num_symbols();
00136 for (int32_t k=0; k<original_num_symbols; k++)
00137 {
00138 sum+=int_transition_probs[i*num_symbols+
00139 ((CStringFeatures<uint16_t>*) features)->
00140 get_masked_symbols((uint16_t)j,(uint8_t) 254)+k];
00141 }
00142
00143 transition_probs[i*num_symbols+j]=
00144 (int_transition_probs[i*num_symbols+j]+pseudo)/
00145 (sum+((CStringFeatures<uint16_t>*) features)->
00146 get_original_num_symbols()*pseudo);
00147 log_transition_probs[i*num_symbols+j]=
00148 log(transition_probs[i*num_symbols+j]);
00149 }
00150 }
00151
00152 delete[] int_transition_probs;
00153 return true;
00154 }
00155
00156 float64_t CLinearHMM::get_log_likelihood_example(uint16_t* vector, int32_t len)
00157 {
00158 float64_t result=log_transition_probs[vector[0]];
00159
00160 for (int32_t i=1; i<len; i++)
00161 result+=log_transition_probs[i*num_symbols+vector[i]];
00162
00163 return result;
00164 }
00165
00166 float64_t CLinearHMM::get_log_likelihood_example(int32_t num_example)
00167 {
00168 int32_t len;
00169 uint16_t* vector=((CStringFeatures<uint16_t>*) features)->
00170 get_feature_vector(num_example, len);
00171 float64_t result=log_transition_probs[vector[0]];
00172
00173 for (int32_t i=1; i<len; i++)
00174 result+=log_transition_probs[i*num_symbols+vector[i]];
00175
00176 return result;
00177 }
00178
00179 float64_t CLinearHMM::get_likelihood_example(uint16_t* vector, int32_t len)
00180 {
00181 float64_t result=transition_probs[vector[0]];
00182
00183 for (int32_t i=1; i<len; i++)
00184 result*=transition_probs[i*num_symbols+vector[i]];
00185
00186 return result;
00187 }
00188
00189 float64_t CLinearHMM::get_log_derivative(int32_t num_param, int32_t num_example)
00190 {
00191 int32_t len;
00192 uint16_t* vector=((CStringFeatures<uint16_t>*) features)->
00193 get_feature_vector(num_example, len);
00194 float64_t result=0;
00195 int32_t position=num_param/num_symbols;
00196 ASSERT(position>=0 && position<len);
00197 uint16_t sym=(uint16_t) (num_param-position*num_symbols);
00198
00199 if (vector[position]==sym && transition_probs[num_param]!=0)
00200 result=1.0/transition_probs[num_param];
00201
00202 return result;
00203 }
00204
00205 void CLinearHMM::get_transition_probs(float64_t** dst, int32_t* num)
00206 {
00207 *num=num_params;
00208 size_t sz=sizeof(*transition_probs)*(*num);
00209 *dst=(float64_t*) malloc(sz);
00210 ASSERT(dst);
00211
00212 memcpy(*dst, transition_probs, sz);
00213 }
00214
00215 bool CLinearHMM::set_transition_probs(const float64_t* src, int32_t num)
00216 {
00217 if (num!=-1)
00218 ASSERT(num==num_params);
00219
00220 if (!log_transition_probs)
00221 log_transition_probs=new float64_t[num_params];
00222
00223 if (!transition_probs)
00224 transition_probs=new float64_t[num_params];
00225
00226 for (int32_t i=0; i<num_params; i++)
00227 {
00228 transition_probs[i]=src[i];
00229 log_transition_probs[i]=log(transition_probs[i]);
00230 }
00231
00232 return true;
00233 }
00234
00235 void CLinearHMM::get_log_transition_probs(float64_t** dst, int32_t* num)
00236 {
00237 *num=num_params;
00238 size_t sz=sizeof(*log_transition_probs)*(*num);
00239 *dst=(float64_t*) malloc(sz);
00240 ASSERT(dst);
00241
00242 memcpy(*dst, log_transition_probs, sz);
00243 }
00244
00245 bool CLinearHMM::set_log_transition_probs(const float64_t* src, int32_t num)
00246 {
00247 if (num!=-1)
00248 ASSERT(num==num_params);
00249
00250 if (!log_transition_probs)
00251 log_transition_probs=new float64_t[num_params];
00252
00253 if (!transition_probs)
00254 transition_probs=new float64_t[num_params];
00255
00256 for (int32_t i=0; i< num_params; i++)
00257 {
00258 log_transition_probs[i]=src[i];
00259 transition_probs[i]=exp(log_transition_probs[i]);
00260 }
00261
00262 return true;
00263 }
00264
00265
00266
00267