00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "features/TOPFeatures.h"
00013 #include "lib/io.h"
00014 #include "lib/Mathematics.h"
00015
00016 using namespace shogun;
00017
00018 CTOPFeatures::CTOPFeatures(
00019 int32_t size, CHMM* p, CHMM* n, bool neglin, bool poslin)
00020 : CSimpleFeatures<float64_t>(size), neglinear(neglin), poslinear(poslin)
00021 {
00022 memset(&pos_relevant_indizes, 0, sizeof(pos_relevant_indizes));
00023 memset(&neg_relevant_indizes, 0, sizeof(neg_relevant_indizes));
00024 set_models(p,n);
00025 }
00026
00027 CTOPFeatures::CTOPFeatures(const CTOPFeatures &orig)
00028 : CSimpleFeatures<float64_t>(orig), pos(orig.pos), neg(orig.neg), neglinear(orig.neglinear),
00029 poslinear(orig.poslinear)
00030 {
00031 }
00032
00033 CTOPFeatures::~CTOPFeatures()
00034 {
00035 delete[] pos_relevant_indizes.idx_p;
00036 delete[] pos_relevant_indizes.idx_q;
00037 delete[] pos_relevant_indizes.idx_a_cols;
00038 delete[] pos_relevant_indizes.idx_a_rows;
00039 delete[] pos_relevant_indizes.idx_b_cols;
00040 delete[] pos_relevant_indizes.idx_b_rows;
00041
00042 delete[] neg_relevant_indizes.idx_p;
00043 delete[] neg_relevant_indizes.idx_q;
00044 delete[] neg_relevant_indizes.idx_a_cols;
00045 delete[] neg_relevant_indizes.idx_a_rows;
00046 delete[] neg_relevant_indizes.idx_b_cols;
00047 delete[] neg_relevant_indizes.idx_b_rows;
00048
00049 SG_UNREF(pos);
00050 SG_UNREF(neg);
00051 }
00052
00053 void CTOPFeatures::set_models(CHMM* p, CHMM* n)
00054 {
00055 ASSERT(p && n);
00056 SG_REF(p);
00057 SG_REF(n);
00058
00059 pos=p;
00060 neg=n;
00061 set_num_vectors(0);
00062
00063 delete[] feature_matrix ;
00064 feature_matrix=NULL ;
00065
00066
00067 if (pos && pos->get_observations())
00068 set_num_vectors(pos->get_observations()->get_num_vectors());
00069
00070 compute_relevant_indizes(p, &pos_relevant_indizes);
00071 compute_relevant_indizes(n, &neg_relevant_indizes);
00072 num_features=compute_num_features();
00073
00074 SG_DEBUG( "pos_feat=[%i,%i,%i,%i],neg_feat=[%i,%i,%i,%i] -> %i features\n", pos->get_N(), pos->get_N(), pos->get_N()*pos->get_N(), pos->get_N()*pos->get_M(), neg->get_N(), neg->get_N(), neg->get_N()*neg->get_N(), neg->get_N()*neg->get_M(),num_features) ;
00075 }
00076
00077 float64_t* CTOPFeatures::compute_feature_vector(
00078 int32_t num, int32_t &len, float64_t* target)
00079 {
00080 float64_t* featurevector=target;
00081
00082 if (!featurevector)
00083 featurevector=new float64_t[get_num_features()];
00084
00085 if (!featurevector)
00086 return NULL;
00087
00088 compute_feature_vector(featurevector, num, len);
00089
00090 return featurevector;
00091 }
00092
00093 void CTOPFeatures::compute_feature_vector(
00094 float64_t* featurevector, int32_t num, int32_t& len)
00095 {
00096 int32_t i,j,p=0,x=num;
00097 int32_t idx=0;
00098
00099 float64_t posx=(poslinear) ?
00100 (pos->linear_model_probability(x)) : (pos->model_probability(x));
00101 float64_t negx=(neglinear) ?
00102 (neg->linear_model_probability(x)) : (neg->model_probability(x));
00103
00104 len=get_num_features();
00105
00106 featurevector[p++]=(posx-negx);
00107
00108
00109 if (poslinear)
00110 {
00111 for (i=0; i<pos->get_N(); i++)
00112 {
00113 for (j=0; j<pos->get_M(); j++)
00114 featurevector[p++]=exp(pos->linear_model_derivative(i, j, x)-posx);
00115 }
00116 }
00117 else
00118 {
00119 for (idx=0; idx< pos_relevant_indizes.num_p; idx++)
00120 featurevector[p++]=exp(pos->model_derivative_p(pos_relevant_indizes.idx_p[idx], x)-posx);
00121
00122 for (idx=0; idx< pos_relevant_indizes.num_q; idx++)
00123 featurevector[p++]=exp(pos->model_derivative_q(pos_relevant_indizes.idx_q[idx], x)-posx);
00124
00125 for (idx=0; idx< pos_relevant_indizes.num_a; idx++)
00126 featurevector[p++]=exp(pos->model_derivative_a(pos_relevant_indizes.idx_a_rows[idx], pos_relevant_indizes.idx_a_cols[idx], x)-posx);
00127
00128 for (idx=0; idx< pos_relevant_indizes.num_b; idx++)
00129 featurevector[p++]=exp(pos->model_derivative_b(pos_relevant_indizes.idx_b_rows[idx], pos_relevant_indizes.idx_b_cols[idx], x)-posx);
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143 }
00144
00145
00146 if (neglinear)
00147 {
00148 for (i=0; i<neg->get_N(); i++)
00149 {
00150 for (j=0; j<neg->get_M(); j++)
00151 featurevector[p++]= - exp(neg->linear_model_derivative(i, j, x)-negx);
00152 }
00153 }
00154 else
00155 {
00156 for (idx=0; idx< neg_relevant_indizes.num_p; idx++)
00157 featurevector[p++]= - exp(neg->model_derivative_p(neg_relevant_indizes.idx_p[idx], x)-negx);
00158
00159 for (idx=0; idx< neg_relevant_indizes.num_q; idx++)
00160 featurevector[p++]= - exp(neg->model_derivative_q(neg_relevant_indizes.idx_q[idx], x)-negx);
00161
00162 for (idx=0; idx< neg_relevant_indizes.num_a; idx++)
00163 featurevector[p++]= - exp(neg->model_derivative_a(neg_relevant_indizes.idx_a_rows[idx], neg_relevant_indizes.idx_a_cols[idx], x)-negx);
00164
00165 for (idx=0; idx< neg_relevant_indizes.num_b; idx++)
00166 featurevector[p++]= - exp(neg->model_derivative_b(neg_relevant_indizes.idx_b_rows[idx], neg_relevant_indizes.idx_b_cols[idx], x)-negx);
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179 }
00180 }
00181
00182 float64_t* CTOPFeatures::set_feature_matrix()
00183 {
00184 int32_t len=0;
00185
00186 num_features=get_num_features();
00187 ASSERT(num_features);
00188 ASSERT(pos);
00189 ASSERT(pos->get_observations());
00190
00191 num_vectors=pos->get_observations()->get_num_vectors();
00192 SG_INFO( "allocating top feature cache of size %.2fM\n", sizeof(float64_t)*num_features*num_vectors/1024.0/1024.0);
00193 delete[] feature_matrix;
00194 feature_matrix=new float64_t[num_features*num_vectors];
00195 if (!feature_matrix)
00196 {
00197 SG_ERROR( "allocation not successful!");
00198 return NULL ;
00199 } ;
00200
00201 SG_INFO( "calculating top feature matrix\n");
00202
00203 for (int32_t x=0; x<num_vectors; x++)
00204 {
00205 if (!(x % (num_vectors/10+1)))
00206 SG_DEBUG( "%02d%%.", (int) (100.0*x/num_vectors));
00207 else if (!(x % (num_vectors/200+1)))
00208 SG_DEBUG( ".");
00209
00210 compute_feature_vector(&feature_matrix[x*num_features], x, len);
00211 }
00212
00213 SG_DONE();
00214
00215 num_vectors=get_num_vectors() ;
00216 num_features=get_num_features() ;
00217
00218 return feature_matrix;
00219 }
00220
00221 bool CTOPFeatures::compute_relevant_indizes(CHMM* hmm, T_HMM_INDIZES* hmm_idx)
00222 {
00223 int32_t i=0;
00224 int32_t j=0;
00225
00226 hmm_idx->num_p=0;
00227 hmm_idx->num_q=0;
00228 hmm_idx->num_a=0;
00229 hmm_idx->num_b=0;
00230
00231 for (i=0; i<hmm->get_N(); i++)
00232 {
00233 if (hmm->get_p(i)>CMath::ALMOST_NEG_INFTY)
00234 hmm_idx->num_p++;
00235
00236 if (hmm->get_q(i)>CMath::ALMOST_NEG_INFTY)
00237 hmm_idx->num_q++;
00238
00239 for (j=0; j<hmm->get_N(); j++)
00240 {
00241 if (hmm->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
00242 hmm_idx->num_a++;
00243 }
00244
00245 for (j=0; j<pos->get_M(); j++)
00246 {
00247 if (hmm->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
00248 hmm_idx->num_b++;
00249 }
00250 }
00251
00252 if (hmm_idx->num_p > 0)
00253 {
00254 hmm_idx->idx_p=new int32_t[hmm_idx->num_p];
00255 ASSERT(hmm_idx->idx_p);
00256 }
00257
00258 if (hmm_idx->num_q > 0)
00259 {
00260 hmm_idx->idx_q=new int32_t[hmm_idx->num_q];
00261 ASSERT(hmm_idx->idx_q);
00262 }
00263
00264 if (hmm_idx->num_a > 0)
00265 {
00266 hmm_idx->idx_a_rows=new int32_t[hmm_idx->num_a];
00267 hmm_idx->idx_a_cols=new int32_t[hmm_idx->num_a];
00268 ASSERT(hmm_idx->idx_a_rows);
00269 ASSERT(hmm_idx->idx_a_cols);
00270 }
00271
00272 if (hmm_idx->num_b > 0)
00273 {
00274 hmm_idx->idx_b_rows=new int32_t[hmm_idx->num_b];
00275 hmm_idx->idx_b_cols=new int32_t[hmm_idx->num_b];
00276 ASSERT(hmm_idx->idx_b_rows);
00277 ASSERT(hmm_idx->idx_b_cols);
00278 }
00279
00280
00281 int32_t idx_p=0;
00282 int32_t idx_q=0;
00283 int32_t idx_a=0;
00284 int32_t idx_b=0;
00285
00286 for (i=0; i<hmm->get_N(); i++)
00287 {
00288 if (hmm->get_p(i)>CMath::ALMOST_NEG_INFTY)
00289 {
00290 ASSERT(idx_p < hmm_idx->num_p);
00291 hmm_idx->idx_p[idx_p++]=i;
00292 }
00293
00294 if (hmm->get_q(i)>CMath::ALMOST_NEG_INFTY)
00295 {
00296 ASSERT(idx_q < hmm_idx->num_q);
00297 hmm_idx->idx_q[idx_q++]=i;
00298 }
00299
00300 for (j=0; j<hmm->get_N(); j++)
00301 {
00302 if (hmm->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
00303 {
00304 ASSERT(idx_a < hmm_idx->num_a);
00305 hmm_idx->idx_a_rows[idx_a]=i;
00306 hmm_idx->idx_a_cols[idx_a++]=j;
00307 }
00308 }
00309
00310 for (j=0; j<pos->get_M(); j++)
00311 {
00312 if (hmm->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
00313 {
00314 ASSERT(idx_b < hmm_idx->num_b);
00315 hmm_idx->idx_b_rows[idx_b]=i;
00316 hmm_idx->idx_b_cols[idx_b++]=j;
00317 }
00318 }
00319 }
00320
00321 return true;
00322 }
00323
00324 int32_t CTOPFeatures::compute_num_features()
00325 {
00326 int32_t num=0;
00327
00328 if (pos && neg)
00329 {
00330 num+=1;
00331
00332 if (poslinear)
00333 num+=pos->get_N()*pos->get_M();
00334 else
00335 {
00336 num+= pos_relevant_indizes.num_p + pos_relevant_indizes.num_q + pos_relevant_indizes.num_a + pos_relevant_indizes.num_b;
00337 }
00338
00339 if (neglinear)
00340 num+=neg->get_N()*neg->get_M();
00341 else
00342 {
00343 num+= neg_relevant_indizes.num_p + neg_relevant_indizes.num_q + neg_relevant_indizes.num_a + neg_relevant_indizes.num_b;
00344 }
00345
00346
00347
00348
00349 }
00350 return num;
00351 }