00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "features/TOPFeatures.h"
00013 #include "lib/io.h"
00014 #include "lib/Mathematics.h"
00015
00016 CTOPFeatures::CTOPFeatures(
00017 int32_t size, CHMM* p, CHMM* n, bool neglin, bool poslin)
00018 : CSimpleFeatures<float64_t>(size), neglinear(neglin), poslinear(poslin)
00019 {
00020 memset(&pos_relevant_indizes, 0, sizeof(pos_relevant_indizes));
00021 memset(&neg_relevant_indizes, 0, sizeof(neg_relevant_indizes));
00022 set_models(p,n);
00023 }
00024
00025 CTOPFeatures::CTOPFeatures(const CTOPFeatures &orig)
00026 : CSimpleFeatures<float64_t>(orig), pos(orig.pos), neg(orig.neg), neglinear(orig.neglinear),
00027 poslinear(orig.poslinear)
00028 {
00029 }
00030
00031 CTOPFeatures::~CTOPFeatures()
00032 {
00033 delete[] pos_relevant_indizes.idx_p;
00034 delete[] pos_relevant_indizes.idx_q;
00035 delete[] pos_relevant_indizes.idx_a_cols;
00036 delete[] pos_relevant_indizes.idx_a_rows;
00037 delete[] pos_relevant_indizes.idx_b_cols;
00038 delete[] pos_relevant_indizes.idx_b_rows;
00039
00040 delete[] neg_relevant_indizes.idx_p;
00041 delete[] neg_relevant_indizes.idx_q;
00042 delete[] neg_relevant_indizes.idx_a_cols;
00043 delete[] neg_relevant_indizes.idx_a_rows;
00044 delete[] neg_relevant_indizes.idx_b_cols;
00045 delete[] neg_relevant_indizes.idx_b_rows;
00046
00047 SG_UNREF(pos);
00048 SG_UNREF(neg);
00049 }
00050
00051 void CTOPFeatures::set_models(CHMM* p, CHMM* n)
00052 {
00053 ASSERT(p && n);
00054 SG_REF(p);
00055 SG_REF(n);
00056
00057 pos=p;
00058 neg=n;
00059 set_num_vectors(0);
00060
00061 delete[] feature_matrix ;
00062 feature_matrix=NULL ;
00063
00064
00065 if (pos && pos->get_observations())
00066 set_num_vectors(pos->get_observations()->get_num_vectors());
00067
00068 compute_relevant_indizes(p, &pos_relevant_indizes);
00069 compute_relevant_indizes(n, &neg_relevant_indizes);
00070 num_features=compute_num_features();
00071
00072 SG_DEBUG( "pos_feat=[%i,%i,%i,%i],neg_feat=[%i,%i,%i,%i] -> %i features\n", pos->get_N(), pos->get_N(), pos->get_N()*pos->get_N(), pos->get_N()*pos->get_M(), neg->get_N(), neg->get_N(), neg->get_N()*neg->get_N(), neg->get_N()*neg->get_M(),num_features) ;
00073 }
00074
00075 float64_t* CTOPFeatures::compute_feature_vector(
00076 int32_t num, int32_t &len, float64_t* target)
00077 {
00078 float64_t* featurevector=target;
00079
00080 if (!featurevector)
00081 featurevector=new float64_t[get_num_features()];
00082
00083 if (!featurevector)
00084 return NULL;
00085
00086 compute_feature_vector(featurevector, num, len);
00087
00088 return featurevector;
00089 }
00090
00091 void CTOPFeatures::compute_feature_vector(
00092 float64_t* featurevector, int32_t num, int32_t& len)
00093 {
00094 int32_t i,j,p=0,x=num;
00095 int32_t idx=0;
00096
00097 float64_t posx=(poslinear) ?
00098 (pos->linear_model_probability(x)) : (pos->model_probability(x));
00099 float64_t negx=(neglinear) ?
00100 (neg->linear_model_probability(x)) : (neg->model_probability(x));
00101
00102 len=get_num_features();
00103
00104 featurevector[p++]=(posx-negx);
00105
00106
00107 if (poslinear)
00108 {
00109 for (i=0; i<pos->get_N(); i++)
00110 {
00111 for (j=0; j<pos->get_M(); j++)
00112 featurevector[p++]=exp(pos->linear_model_derivative(i, j, x)-posx);
00113 }
00114 }
00115 else
00116 {
00117 for (idx=0; idx< pos_relevant_indizes.num_p; idx++)
00118 featurevector[p++]=exp(pos->model_derivative_p(pos_relevant_indizes.idx_p[idx], x)-posx);
00119
00120 for (idx=0; idx< pos_relevant_indizes.num_q; idx++)
00121 featurevector[p++]=exp(pos->model_derivative_q(pos_relevant_indizes.idx_q[idx], x)-posx);
00122
00123 for (idx=0; idx< pos_relevant_indizes.num_a; idx++)
00124 featurevector[p++]=exp(pos->model_derivative_a(pos_relevant_indizes.idx_a_rows[idx], pos_relevant_indizes.idx_a_cols[idx], x)-posx);
00125
00126 for (idx=0; idx< pos_relevant_indizes.num_b; idx++)
00127 featurevector[p++]=exp(pos->model_derivative_b(pos_relevant_indizes.idx_b_rows[idx], pos_relevant_indizes.idx_b_cols[idx], x)-posx);
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141 }
00142
00143
00144 if (neglinear)
00145 {
00146 for (i=0; i<neg->get_N(); i++)
00147 {
00148 for (j=0; j<neg->get_M(); j++)
00149 featurevector[p++]= - exp(neg->linear_model_derivative(i, j, x)-negx);
00150 }
00151 }
00152 else
00153 {
00154 for (idx=0; idx< neg_relevant_indizes.num_p; idx++)
00155 featurevector[p++]= - exp(neg->model_derivative_p(neg_relevant_indizes.idx_p[idx], x)-negx);
00156
00157 for (idx=0; idx< neg_relevant_indizes.num_q; idx++)
00158 featurevector[p++]= - exp(neg->model_derivative_q(neg_relevant_indizes.idx_q[idx], x)-negx);
00159
00160 for (idx=0; idx< neg_relevant_indizes.num_a; idx++)
00161 featurevector[p++]= - exp(neg->model_derivative_a(neg_relevant_indizes.idx_a_rows[idx], neg_relevant_indizes.idx_a_cols[idx], x)-negx);
00162
00163 for (idx=0; idx< neg_relevant_indizes.num_b; idx++)
00164 featurevector[p++]= - exp(neg->model_derivative_b(neg_relevant_indizes.idx_b_rows[idx], neg_relevant_indizes.idx_b_cols[idx], x)-negx);
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177 }
00178 }
00179
00180 float64_t* CTOPFeatures::set_feature_matrix()
00181 {
00182 int32_t len=0;
00183
00184 num_features=get_num_features();
00185 ASSERT(num_features);
00186 ASSERT(pos);
00187 ASSERT(pos->get_observations());
00188
00189 num_vectors=pos->get_observations()->get_num_vectors();
00190 SG_INFO( "allocating top feature cache of size %.2fM\n", sizeof(float64_t)*num_features*num_vectors/1024.0/1024.0);
00191 delete[] feature_matrix;
00192 feature_matrix=new float64_t[num_features*num_vectors];
00193 if (!feature_matrix)
00194 {
00195 SG_ERROR( "allocation not successful!");
00196 return NULL ;
00197 } ;
00198
00199 SG_INFO( "calculating top feature matrix\n");
00200
00201 for (int32_t x=0; x<num_vectors; x++)
00202 {
00203 if (!(x % (num_vectors/10+1)))
00204 SG_DEBUG( "%02d%%.", (int) (100.0*x/num_vectors));
00205 else if (!(x % (num_vectors/200+1)))
00206 SG_DEBUG( ".");
00207
00208 compute_feature_vector(&feature_matrix[x*num_features], x, len);
00209 }
00210
00211 SG_DONE();
00212
00213 num_vectors=get_num_vectors() ;
00214 num_features=get_num_features() ;
00215
00216 return feature_matrix;
00217 }
00218
00219 bool CTOPFeatures::compute_relevant_indizes(CHMM* hmm, T_HMM_INDIZES* hmm_idx)
00220 {
00221 int32_t i=0;
00222 int32_t j=0;
00223
00224 hmm_idx->num_p=0;
00225 hmm_idx->num_q=0;
00226 hmm_idx->num_a=0;
00227 hmm_idx->num_b=0;
00228
00229 for (i=0; i<hmm->get_N(); i++)
00230 {
00231 if (hmm->get_p(i)>CMath::ALMOST_NEG_INFTY)
00232 hmm_idx->num_p++;
00233
00234 if (hmm->get_q(i)>CMath::ALMOST_NEG_INFTY)
00235 hmm_idx->num_q++;
00236
00237 for (j=0; j<hmm->get_N(); j++)
00238 {
00239 if (hmm->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
00240 hmm_idx->num_a++;
00241 }
00242
00243 for (j=0; j<pos->get_M(); j++)
00244 {
00245 if (hmm->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
00246 hmm_idx->num_b++;
00247 }
00248 }
00249
00250 if (hmm_idx->num_p > 0)
00251 {
00252 hmm_idx->idx_p=new int32_t[hmm_idx->num_p];
00253 ASSERT(hmm_idx->idx_p);
00254 }
00255
00256 if (hmm_idx->num_q > 0)
00257 {
00258 hmm_idx->idx_q=new int32_t[hmm_idx->num_q];
00259 ASSERT(hmm_idx->idx_q);
00260 }
00261
00262 if (hmm_idx->num_a > 0)
00263 {
00264 hmm_idx->idx_a_rows=new int32_t[hmm_idx->num_a];
00265 hmm_idx->idx_a_cols=new int32_t[hmm_idx->num_a];
00266 ASSERT(hmm_idx->idx_a_rows);
00267 ASSERT(hmm_idx->idx_a_cols);
00268 }
00269
00270 if (hmm_idx->num_b > 0)
00271 {
00272 hmm_idx->idx_b_rows=new int32_t[hmm_idx->num_b];
00273 hmm_idx->idx_b_cols=new int32_t[hmm_idx->num_b];
00274 ASSERT(hmm_idx->idx_b_rows);
00275 ASSERT(hmm_idx->idx_b_cols);
00276 }
00277
00278
00279 int32_t idx_p=0;
00280 int32_t idx_q=0;
00281 int32_t idx_a=0;
00282 int32_t idx_b=0;
00283
00284 for (i=0; i<hmm->get_N(); i++)
00285 {
00286 if (hmm->get_p(i)>CMath::ALMOST_NEG_INFTY)
00287 {
00288 ASSERT(idx_p < hmm_idx->num_p);
00289 hmm_idx->idx_p[idx_p++]=i;
00290 }
00291
00292 if (hmm->get_q(i)>CMath::ALMOST_NEG_INFTY)
00293 {
00294 ASSERT(idx_q < hmm_idx->num_q);
00295 hmm_idx->idx_q[idx_q++]=i;
00296 }
00297
00298 for (j=0; j<hmm->get_N(); j++)
00299 {
00300 if (hmm->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
00301 {
00302 ASSERT(idx_a < hmm_idx->num_a);
00303 hmm_idx->idx_a_rows[idx_a]=i;
00304 hmm_idx->idx_a_cols[idx_a++]=j;
00305 }
00306 }
00307
00308 for (j=0; j<pos->get_M(); j++)
00309 {
00310 if (hmm->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
00311 {
00312 ASSERT(idx_b < hmm_idx->num_b);
00313 hmm_idx->idx_b_rows[idx_b]=i;
00314 hmm_idx->idx_b_cols[idx_b++]=j;
00315 }
00316 }
00317 }
00318
00319 return true;
00320 }
00321
00322 int32_t CTOPFeatures::compute_num_features()
00323 {
00324 int32_t num=0;
00325
00326 if (pos && neg)
00327 {
00328 num+=1;
00329
00330 if (poslinear)
00331 num+=pos->get_N()*pos->get_M();
00332 else
00333 {
00334 num+= pos_relevant_indizes.num_p + pos_relevant_indizes.num_q + pos_relevant_indizes.num_a + pos_relevant_indizes.num_b;
00335 }
00336
00337 if (neglinear)
00338 num+=neg->get_N()*neg->get_M();
00339 else
00340 {
00341 num+= neg_relevant_indizes.num_p + neg_relevant_indizes.num_q + neg_relevant_indizes.num_a + neg_relevant_indizes.num_b;
00342 }
00343
00344
00345
00346
00347 }
00348 return num;
00349 }