00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "distributions/hmm/HMM.h"
00012 #include "lib/Mathematics.h"
00013 #include "lib/io.h"
00014 #include "lib/config.h"
00015 #include "lib/Signal.h"
00016 #include "base/Parallel.h"
00017 #include "features/StringFeatures.h"
00018 #include "features/Alphabet.h"
00019
00020 #include <stdlib.h>
00021 #include <stdio.h>
00022 #include <time.h>
00023 #include <ctype.h>
00024
00025 #define VAL_MACRO log((default_value == 0) ? (CMath::random(MIN_RAND, MAX_RAND)) : default_value)
00026 #define ARRAY_SIZE 65336
00027
00029
00031
00032 const int32_t CHMM::GOTN= (1<<1);
00033 const int32_t CHMM::GOTM= (1<<2);
00034 const int32_t CHMM::GOTO= (1<<3);
00035 const int32_t CHMM::GOTa= (1<<4);
00036 const int32_t CHMM::GOTb= (1<<5);
00037 const int32_t CHMM::GOTp= (1<<6);
00038 const int32_t CHMM::GOTq= (1<<7);
00039
00040 const int32_t CHMM::GOTlearn_a= (1<<1);
00041 const int32_t CHMM::GOTlearn_b= (1<<2);
00042 const int32_t CHMM::GOTlearn_p= (1<<3);
00043 const int32_t CHMM::GOTlearn_q= (1<<4);
00044 const int32_t CHMM::GOTconst_a= (1<<5);
00045 const int32_t CHMM::GOTconst_b= (1<<6);
00046 const int32_t CHMM::GOTconst_p= (1<<7);
00047 const int32_t CHMM::GOTconst_q= (1<<8);
00048
00049 enum E_STATE
00050 {
00051 INITIAL,
00052 ARRAYs,
00053 GET_N,
00054 GET_M,
00055 GET_a,
00056 GET_b,
00057 GET_p,
00058 GET_q,
00059 GET_learn_a,
00060 GET_learn_b,
00061 GET_learn_p,
00062 GET_learn_q,
00063 GET_const_a,
00064 GET_const_b,
00065 GET_const_p,
00066 GET_const_q,
00067 COMMENT,
00068 END
00069 };
00070
00071
00072 #ifdef FIX_POS
00073 const char CModel::FIX_DISALLOWED=0 ;
00074 const char CModel::FIX_ALLOWED=1 ;
00075 const char CModel::FIX_DEFAULT=-1 ;
00076 const float64_t CModel::DISALLOWED_PENALTY=CMath::ALMOST_NEG_INFTY ;
00077 #endif
00078
00079 CModel::CModel()
00080 {
00081 const_a=new int[ARRAY_SIZE];
00082 const_b=new int[ARRAY_SIZE];
00083 const_p=new int[ARRAY_SIZE];
00084 const_q=new int[ARRAY_SIZE];
00085 const_a_val=new float64_t[ARRAY_SIZE];
00086 const_b_val=new float64_t[ARRAY_SIZE];
00087 const_p_val=new float64_t[ARRAY_SIZE];
00088 const_q_val=new float64_t[ARRAY_SIZE];
00089
00090
00091 learn_a=new int[ARRAY_SIZE];
00092 learn_b=new int[ARRAY_SIZE];
00093 learn_p=new int[ARRAY_SIZE];
00094 learn_q=new int[ARRAY_SIZE];
00095
00096 #ifdef FIX_POS
00097 fix_pos_state = new char[ARRAY_SIZE];
00098 #endif
00099 for (int32_t i=0; i<ARRAY_SIZE; i++)
00100 {
00101 const_a[i]=-1 ;
00102 const_b[i]=-1 ;
00103 const_p[i]=-1 ;
00104 const_q[i]=-1 ;
00105 const_a_val[i]=1.0 ;
00106 const_b_val[i]=1.0 ;
00107 const_p_val[i]=1.0 ;
00108 const_q_val[i]=1.0 ;
00109 learn_a[i]=-1 ;
00110 learn_b[i]=-1 ;
00111 learn_p[i]=-1 ;
00112 learn_q[i]=-1 ;
00113 #ifdef FIX_POS
00114 fix_pos_state[i] = FIX_DEFAULT ;
00115 #endif
00116 } ;
00117 }
00118
00119 CModel::~CModel()
00120 {
00121 delete[] const_a;
00122 delete[] const_b;
00123 delete[] const_p;
00124 delete[] const_q;
00125 delete[] const_a_val;
00126 delete[] const_b_val;
00127 delete[] const_p_val;
00128 delete[] const_q_val;
00129
00130 delete[] learn_a;
00131 delete[] learn_b;
00132 delete[] learn_p;
00133 delete[] learn_q;
00134
00135 #ifdef FIX_POS
00136 delete[] fix_pos_state;
00137 #endif
00138
00139 }
00140
00141 CHMM::CHMM(CHMM* h)
00142 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00143 {
00144 SG_INFO( "hmm is using %i separate tables\n", parallel->get_num_threads()) ;
00145
00146 this->N=h->get_N();
00147 this->M=h->get_M();
00148 status=initialize(NULL, h->get_pseudo());
00149 this->copy_model(h);
00150 set_observations(h->p_observations);
00151 }
00152
00153 CHMM::CHMM(int32_t p_N, int32_t p_M, CModel* p_model, float64_t p_PSEUDO)
00154 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00155 {
00156 this->N=p_N;
00157 this->M=p_M;
00158 model=NULL ;
00159
00160 SG_INFO( "hmm is using %i separate tables\n", parallel->get_num_threads()) ;
00161
00162 status=initialize(p_model, p_PSEUDO);
00163 }
00164
00165 CHMM::CHMM(
00166 CStringFeatures<uint16_t>* obs, int32_t p_N, int32_t p_M,
00167 float64_t p_PSEUDO)
00168 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00169 {
00170 this->N=p_N;
00171 this->M=p_M;
00172 model=NULL ;
00173
00174 SG_INFO( "hmm is using %i separate tables\n", parallel->get_num_threads()) ;
00175
00176 initialize(model, p_PSEUDO);
00177 set_observations(obs);
00178 }
00179
00180 CHMM::CHMM(int32_t p_N, float64_t* p, float64_t* q, float64_t* a)
00181 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00182 {
00183 this->N=p_N;
00184 this->M=0;
00185 model=NULL ;
00186
00187 trans_list_forward = NULL ;
00188 trans_list_forward_cnt = NULL ;
00189 trans_list_forward_val = NULL ;
00190 trans_list_backward = NULL ;
00191 trans_list_backward_cnt = NULL ;
00192 trans_list_len = 0 ;
00193 mem_initialized = false ;
00194
00195 this->transition_matrix_a=NULL;
00196 this->observation_matrix_b=NULL;
00197 this->initial_state_distribution_p=NULL;
00198 this->end_state_distribution_q=NULL;
00199 this->PSEUDO= PSEUDO;
00200 this->model= model;
00201 this->p_observations=NULL;
00202 this->reused_caches=false;
00203
00204 #ifdef USE_HMMPARALLEL_STRUCTURES
00205 this->alpha_cache=NULL;
00206 this->beta_cache=NULL;
00207 #else
00208 this->alpha_cache.table=NULL;
00209 this->beta_cache.table=NULL;
00210 this->alpha_cache.dimension=0;
00211 this->beta_cache.dimension=0;
00212 #endif
00213
00214 this->states_per_observation_psi=NULL ;
00215 this->path=NULL;
00216 arrayN1=NULL ;
00217 arrayN2=NULL ;
00218
00219 this->loglikelihood=false;
00220 mem_initialized = true ;
00221
00222 transition_matrix_a=a ;
00223 observation_matrix_b=NULL ;
00224 initial_state_distribution_p=p ;
00225 end_state_distribution_q=q ;
00226 transition_matrix_A=NULL ;
00227 observation_matrix_B=NULL ;
00228
00229
00230 }
00231
00232 CHMM::CHMM(
00233 int32_t p_N, float64_t* p, float64_t* q, int32_t num_trans,
00234 float64_t* a_trans)
00235 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00236 {
00237 model=NULL ;
00238
00239 this->N=p_N;
00240 this->M=0;
00241
00242 trans_list_forward = NULL ;
00243 trans_list_forward_cnt = NULL ;
00244 trans_list_forward_val = NULL ;
00245 trans_list_backward = NULL ;
00246 trans_list_backward_cnt = NULL ;
00247 trans_list_len = 0 ;
00248 mem_initialized = false ;
00249
00250 this->transition_matrix_a=NULL;
00251 this->observation_matrix_b=NULL;
00252 this->initial_state_distribution_p=NULL;
00253 this->end_state_distribution_q=NULL;
00254 this->PSEUDO= PSEUDO;
00255 this->model= model;
00256 this->p_observations=NULL;
00257 this->reused_caches=false;
00258
00259 #ifdef USE_HMMPARALLEL_STRUCTURES
00260 this->alpha_cache=NULL;
00261 this->beta_cache=NULL;
00262 #else
00263 this->alpha_cache.table=NULL;
00264 this->beta_cache.table=NULL;
00265 this->alpha_cache.dimension=0;
00266 this->beta_cache.dimension=0;
00267 #endif
00268
00269 this->states_per_observation_psi=NULL ;
00270 this->path=NULL;
00271 arrayN1=NULL ;
00272 arrayN2=NULL ;
00273
00274 this->loglikelihood=false;
00275 mem_initialized = true ;
00276
00277 trans_list_forward_cnt=NULL ;
00278 trans_list_len = N ;
00279 trans_list_forward = new T_STATES*[N] ;
00280 trans_list_forward_val = new float64_t*[N] ;
00281 trans_list_forward_cnt = new T_STATES[N] ;
00282
00283 int32_t start_idx=0;
00284 for (int32_t j=0; j<N; j++)
00285 {
00286 int32_t old_start_idx=start_idx;
00287
00288 while (start_idx<num_trans && a_trans[start_idx+num_trans]==j)
00289 {
00290 start_idx++;
00291
00292 if (start_idx>1 && start_idx<num_trans)
00293 ASSERT(a_trans[start_idx+num_trans-1]<=
00294 a_trans[start_idx+num_trans]);
00295 }
00296
00297 if (start_idx>1 && start_idx<num_trans)
00298 ASSERT(a_trans[start_idx+num_trans-1]<=
00299 a_trans[start_idx+num_trans]);
00300
00301 int32_t len=start_idx-old_start_idx;
00302 ASSERT(len>=0);
00303
00304 trans_list_forward_cnt[j] = 0 ;
00305
00306 if (len>0)
00307 {
00308 trans_list_forward[j] = new T_STATES[len] ;
00309 trans_list_forward_val[j] = new float64_t[len] ;
00310 }
00311 else
00312 {
00313 trans_list_forward[j] = NULL;
00314 trans_list_forward_val[j] = NULL;
00315 }
00316 }
00317
00318 for (int32_t i=0; i<num_trans; i++)
00319 {
00320 int32_t from = (int32_t)a_trans[i+num_trans] ;
00321 int32_t to = (int32_t)a_trans[i] ;
00322 float64_t val = a_trans[i+num_trans*2] ;
00323
00324 ASSERT(from>=0 && from<N);
00325 ASSERT(to>=0 && to<N);
00326
00327 trans_list_forward[from][trans_list_forward_cnt[from]]=to ;
00328 trans_list_forward_val[from][trans_list_forward_cnt[from]]=val ;
00329 trans_list_forward_cnt[from]++ ;
00330
00331 } ;
00332
00333 transition_matrix_a=NULL ;
00334 observation_matrix_b=NULL ;
00335 initial_state_distribution_p=p ;
00336 end_state_distribution_q=q ;
00337 transition_matrix_A=NULL ;
00338 observation_matrix_B=NULL ;
00339
00340
00341 }
00342
00343
00344 CHMM::CHMM(FILE* model_file, float64_t p_PSEUDO)
00345 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00346 {
00347 SG_INFO( "hmm is using %i separate tables\n", parallel->get_num_threads()) ;
00348
00349 status=initialize(NULL, p_PSEUDO, model_file);
00350 }
00351
00352 CHMM::~CHMM()
00353 {
00354 SG_UNREF(p_observations);
00355
00356 if (trans_list_forward_cnt)
00357 delete[] trans_list_forward_cnt ;
00358 if (trans_list_backward_cnt)
00359 delete[] trans_list_backward_cnt ;
00360 if (trans_list_forward)
00361 {
00362 for (int32_t i=0; i<trans_list_len; i++)
00363 if (trans_list_forward[i])
00364 delete[] trans_list_forward[i] ;
00365 delete[] trans_list_forward ;
00366 }
00367 if (trans_list_forward_val)
00368 {
00369 for (int32_t i=0; i<trans_list_len; i++)
00370 if (trans_list_forward_val[i])
00371 delete[] trans_list_forward_val[i] ;
00372 delete[] trans_list_forward_val ;
00373 }
00374 if (trans_list_backward)
00375 {
00376 for (int32_t i=0; i<trans_list_len; i++)
00377 if (trans_list_backward[i])
00378 delete[] trans_list_backward[i] ;
00379 delete[] trans_list_backward ;
00380 } ;
00381
00382 free_state_dependend_arrays();
00383
00384 if (!reused_caches)
00385 {
00386 #ifdef USE_HMMPARALLEL_STRUCTURES
00387 for (int32_t i=0; i<parallel->get_num_threads(); i++)
00388 {
00389 delete[] alpha_cache[i].table;
00390 delete[] beta_cache[i].table;
00391 alpha_cache[i].table=NULL;
00392 beta_cache[i].table=NULL;
00393 }
00394
00395 delete[] alpha_cache;
00396 delete[] beta_cache;
00397 alpha_cache=NULL;
00398 beta_cache=NULL;
00399 #else // USE_HMMPARALLEL_STRUCTURES
00400 delete[] alpha_cache.table;
00401 delete[] beta_cache.table;
00402 alpha_cache.table=NULL;
00403 beta_cache.table=NULL;
00404 #endif // USE_HMMPARALLEL_STRUCTURES
00405
00406 delete[] states_per_observation_psi;
00407 states_per_observation_psi=NULL;
00408 }
00409
00410 #ifdef USE_LOGSUMARRAY
00411 #ifdef USE_HMMPARALLEL_STRUCTURES
00412 {
00413 for (int32_t i=0; i<parallel->get_num_threads(); i++)
00414 delete[] arrayS[i];
00415 delete[] arrayS ;
00416 } ;
00417 #else //USE_HMMPARALLEL_STRUCTURES
00418 delete[] arrayS;
00419 #endif //USE_HMMPARALLEL_STRUCTURES
00420 #endif //USE_LOGSUMARRAY
00421
00422 if (!reused_caches)
00423 {
00424 #ifdef USE_HMMPARALLEL_STRUCTURES
00425 delete[] path_prob_updated ;
00426 delete[] path_prob_dimension ;
00427 for (int32_t i=0; i<parallel->get_num_threads(); i++)
00428 delete[] path[i] ;
00429 #endif //USE_HMMPARALLEL_STRUCTURES
00430 delete[] path;
00431 }
00432 }
00433
00434 bool CHMM::alloc_state_dependend_arrays()
00435 {
00436
00437 if (!transition_matrix_a && !observation_matrix_b &&
00438 !initial_state_distribution_p && !end_state_distribution_q)
00439 {
00440 transition_matrix_a=new float64_t[N*N];
00441 observation_matrix_b=new float64_t[N*M];
00442 initial_state_distribution_p=new float64_t[N];
00443 end_state_distribution_q=new float64_t[N];
00444 init_model_random();
00445 convert_to_log();
00446 }
00447
00448 #ifdef USE_HMMPARALLEL_STRUCTURES
00449 for (int32_t i=0; i<parallel->get_num_threads(); i++)
00450 {
00451 arrayN1[i]=new float64_t[N];
00452 arrayN2[i]=new float64_t[N];
00453 }
00454 #else //USE_HMMPARALLEL_STRUCTURES
00455 arrayN1=new float64_t[N];
00456 arrayN2=new float64_t[N];
00457 #endif //USE_HMMPARALLEL_STRUCTURES
00458
00459 #ifdef LOG_SUMARRAY
00460 #ifdef USE_HMMPARALLEL_STRUCTURES
00461 for (int32_t i=0; i<parallel->get_num_threads(); i++)
00462 arrayS[i]=new float64_t[(int32_t)(this->N/2+1)];
00463 #else //USE_HMMPARALLEL_STRUCTURES
00464 arrayS=new float64_t[(int32_t)(this->N/2+1)];
00465 #endif //USE_HMMPARALLEL_STRUCTURES
00466 #endif //LOG_SUMARRAY
00467 transition_matrix_A=new float64_t[this->N*this->N];
00468 observation_matrix_B=new float64_t[this->N*this->M];
00469
00470 if (p_observations)
00471 {
00472 #ifdef USE_HMMPARALLEL_STRUCTURES
00473 if (alpha_cache[0].table!=NULL)
00474 #else //USE_HMMPARALLEL_STRUCTURES
00475 if (alpha_cache.table!=NULL)
00476 #endif //USE_HMMPARALLEL_STRUCTURES
00477 set_observations(p_observations);
00478 else
00479 set_observation_nocache(p_observations);
00480 SG_UNREF(p_observations);
00481 }
00482
00483 this->invalidate_model();
00484
00485 return ((transition_matrix_A != NULL) && (observation_matrix_B != NULL) &&
00486 (transition_matrix_a != NULL) && (observation_matrix_b != NULL) &&
00487 (initial_state_distribution_p != NULL) &&
00488 (end_state_distribution_q != NULL));
00489 }
00490
00491 void CHMM::free_state_dependend_arrays()
00492 {
00493 #ifdef USE_HMMPARALLEL_STRUCTURES
00494 for (int32_t i=0; i<parallel->get_num_threads(); i++)
00495 {
00496 delete[] arrayN1[i];
00497 delete[] arrayN2[i];
00498
00499 arrayN1[i]=NULL;
00500 arrayN2[i]=NULL;
00501 }
00502 #else
00503 delete[] arrayN1;
00504 delete[] arrayN2;
00505 arrayN1=NULL;
00506 arrayN2=NULL;
00507 #endif
00508 if (observation_matrix_b)
00509 {
00510 delete[] transition_matrix_A;
00511 delete[] observation_matrix_B;
00512 delete[] transition_matrix_a;
00513 delete[] observation_matrix_b;
00514 delete[] initial_state_distribution_p;
00515 delete[] end_state_distribution_q;
00516 } ;
00517
00518 transition_matrix_A=NULL;
00519 observation_matrix_B=NULL;
00520 transition_matrix_a=NULL;
00521 observation_matrix_b=NULL;
00522 initial_state_distribution_p=NULL;
00523 end_state_distribution_q=NULL;
00524 }
00525
00526 bool CHMM::initialize(CModel* m, float64_t pseudo, FILE* modelfile)
00527 {
00528
00529 bool files_ok=true;
00530
00531 trans_list_forward = NULL ;
00532 trans_list_forward_cnt = NULL ;
00533 trans_list_forward_val = NULL ;
00534 trans_list_backward = NULL ;
00535 trans_list_backward_cnt = NULL ;
00536 trans_list_len = 0 ;
00537 mem_initialized = false ;
00538
00539 this->transition_matrix_a=NULL;
00540 this->observation_matrix_b=NULL;
00541 this->initial_state_distribution_p=NULL;
00542 this->end_state_distribution_q=NULL;
00543 this->PSEUDO= pseudo;
00544 this->model= m;
00545 this->p_observations=NULL;
00546 this->reused_caches=false;
00547
00548 #ifdef USE_HMMPARALLEL_STRUCTURES
00549 alpha_cache=new T_ALPHA_BETA[parallel->get_num_threads()] ;
00550 beta_cache=new T_ALPHA_BETA[parallel->get_num_threads()] ;
00551 states_per_observation_psi=new P_STATES[parallel->get_num_threads()] ;
00552
00553 for (int32_t i=0; i<parallel->get_num_threads(); i++)
00554 {
00555 this->alpha_cache[i].table=NULL;
00556 this->beta_cache[i].table=NULL;
00557 this->alpha_cache[i].dimension=0;
00558 this->beta_cache[i].dimension=0;
00559 this->states_per_observation_psi[i]=NULL ;
00560 }
00561
00562 #else // USE_HMMPARALLEL_STRUCTURES
00563 this->alpha_cache.table=NULL;
00564 this->beta_cache.table=NULL;
00565 this->alpha_cache.dimension=0;
00566 this->beta_cache.dimension=0;
00567 this->states_per_observation_psi=NULL ;
00568 #endif //USE_HMMPARALLEL_STRUCTURES
00569
00570 if (modelfile)
00571 files_ok= files_ok && load_model(modelfile);
00572
00573 #ifdef USE_HMMPARALLEL_STRUCTURES
00574 path_prob_updated=new bool[parallel->get_num_threads()];
00575 path_prob_dimension=new int[parallel->get_num_threads()];
00576
00577 path=new P_STATES[parallel->get_num_threads()];
00578
00579 for (int32_t i=0; i<parallel->get_num_threads(); i++)
00580 this->path[i]=NULL;
00581
00582 #else // USE_HMMPARALLEL_STRUCTURES
00583 this->path=NULL;
00584
00585 #endif //USE_HMMPARALLEL_STRUCTURES
00586
00587 #ifdef USE_HMMPARALLEL_STRUCTURES
00588 arrayN1=new float64_t*[parallel->get_num_threads()];
00589 arrayN2=new float64_t*[parallel->get_num_threads()];
00590 #endif //USE_HMMPARALLEL_STRUCTURES
00591
00592 #ifdef LOG_SUMARRAY
00593 #ifdef USE_HMMPARALLEL_STRUCTURES
00594 arrayS=new float64_t*[parallel->get_num_threads()] ;
00595 #endif // USE_HMMPARALLEL_STRUCTURES
00596 #endif //LOG_SUMARRAY
00597
00598 alloc_state_dependend_arrays();
00599
00600 this->loglikelihood=false;
00601 mem_initialized = true ;
00602 this->invalidate_model();
00603
00604 return ((files_ok) &&
00605 (transition_matrix_A != NULL) && (observation_matrix_B != NULL) &&
00606 (transition_matrix_a != NULL) && (observation_matrix_b != NULL) && (initial_state_distribution_p != NULL) &&
00607 (end_state_distribution_q != NULL));
00608 }
00609
00610
00611
00612
00613
00614
00615 float64_t CHMM::forward_comp(int32_t time, int32_t state, int32_t dimension)
00616 {
00617 T_ALPHA_BETA_TABLE* alpha_new;
00618 T_ALPHA_BETA_TABLE* alpha;
00619 T_ALPHA_BETA_TABLE* dummy;
00620 if (time<1)
00621 time=0;
00622
00623
00624 int32_t wanted_time=time;
00625
00626 if (ALPHA_CACHE(dimension).table)
00627 {
00628 alpha=&ALPHA_CACHE(dimension).table[0];
00629 alpha_new=&ALPHA_CACHE(dimension).table[N];
00630 time=p_observations->get_vector_length(dimension)+1;
00631 }
00632 else
00633 {
00634 alpha_new=(T_ALPHA_BETA_TABLE*)ARRAYN1(dimension);
00635 alpha=(T_ALPHA_BETA_TABLE*)ARRAYN2(dimension);
00636 }
00637
00638 if (time<1)
00639 return get_p(state) + get_b(state, p_observations->get_feature(dimension,0));
00640 else
00641 {
00642
00643 for (int32_t i=0; i<N; i++)
00644 alpha[i] = get_p(i) + get_b(i, p_observations->get_feature(dimension,0)) ;
00645
00646
00647 for (register int32_t t=1; t<time && t < p_observations->get_vector_length(dimension); t++)
00648 {
00649
00650 for (int32_t j=0; j<N; j++)
00651 {
00652 register int32_t i, num = trans_list_forward_cnt[j] ;
00653 float64_t sum=-CMath::INFTY;
00654 for (i=0; i < num; i++)
00655 {
00656 int32_t ii = trans_list_forward[j][i] ;
00657 sum = CMath::logarithmic_sum(sum, alpha[ii] + get_a(ii,j));
00658 } ;
00659
00660 alpha_new[j]= sum + get_b(j, p_observations->get_feature(dimension,t));
00661 }
00662
00663 if (!ALPHA_CACHE(dimension).table)
00664 {
00665 dummy=alpha;
00666 alpha=alpha_new;
00667 alpha_new=dummy;
00668 }
00669 else
00670 {
00671 alpha=alpha_new;
00672 alpha_new+=N;
00673 }
00674 }
00675
00676
00677 if (time<p_observations->get_vector_length(dimension))
00678 {
00679 register int32_t i, num=trans_list_forward_cnt[state];
00680 register float64_t sum=-CMath::INFTY;
00681 for (i=0; i<num; i++)
00682 {
00683 int32_t ii = trans_list_forward[state][i] ;
00684 sum= CMath::logarithmic_sum(sum, alpha[ii] + get_a(ii, state));
00685 } ;
00686
00687 return sum + get_b(state, p_observations->get_feature(dimension,time));
00688 }
00689 else
00690 {
00691
00692 register int32_t i ;
00693 float64_t sum ;
00694 sum=-CMath::INFTY;
00695 for (i=0; i<N; i++)
00696 sum=CMath::logarithmic_sum(sum, alpha[i] + get_q(i));
00697
00698 if (!ALPHA_CACHE(dimension).table)
00699 return sum;
00700 else
00701 {
00702 ALPHA_CACHE(dimension).dimension=dimension;
00703 ALPHA_CACHE(dimension).updated=true;
00704 ALPHA_CACHE(dimension).sum=sum;
00705
00706 if (wanted_time<p_observations->get_vector_length(dimension))
00707 return ALPHA_CACHE(dimension).table[wanted_time*N+state];
00708 else
00709 return ALPHA_CACHE(dimension).sum;
00710 }
00711 }
00712 }
00713 }
00714
00715
00716
00717
00718
00719 float64_t CHMM::forward_comp_old(int32_t time, int32_t state, int32_t dimension)
00720 {
00721 T_ALPHA_BETA_TABLE* alpha_new;
00722 T_ALPHA_BETA_TABLE* alpha;
00723 T_ALPHA_BETA_TABLE* dummy;
00724 if (time<1)
00725 time=0;
00726
00727 int32_t wanted_time=time;
00728
00729 if (ALPHA_CACHE(dimension).table)
00730 {
00731 alpha=&ALPHA_CACHE(dimension).table[0];
00732 alpha_new=&ALPHA_CACHE(dimension).table[N];
00733 time=p_observations->get_vector_length(dimension)+1;
00734 }
00735 else
00736 {
00737 alpha_new=(T_ALPHA_BETA_TABLE*)ARRAYN1(dimension);
00738 alpha=(T_ALPHA_BETA_TABLE*)ARRAYN2(dimension);
00739 }
00740
00741 if (time<1)
00742 return get_p(state) + get_b(state, p_observations->get_feature(dimension,0));
00743 else
00744 {
00745
00746 for (int32_t i=0; i<N; i++)
00747 alpha[i] = get_p(i) + get_b(i, p_observations->get_feature(dimension,0)) ;
00748
00749
00750 for (register int32_t t=1; t<time && t < p_observations->get_vector_length(dimension); t++)
00751 {
00752
00753 for (int32_t j=0; j<N; j++)
00754 {
00755 register int32_t i ;
00756 #ifdef USE_LOGSUMARRAY
00757 for (i=0; i<(N>>1); i++)
00758 ARRAYS(dimension)[i]=CMath::logarithmic_sum(alpha[i<<1] + get_a(i<<1,j),
00759 alpha[(i<<1)+1] + get_a((i<<1)+1,j));
00760 if (N%2==1)
00761 alpha_new[j]=get_b(j, p_observations->get_feature(dimension,t))+
00762 CMath::logarithmic_sum(alpha[N-1]+get_a(N-1,j),
00763 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
00764 else
00765 alpha_new[j]=get_b(j, p_observations->get_feature(dimension,t))+CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
00766 #else //USE_LOGSUMARRAY
00767 float64_t sum=-CMath::INFTY;
00768 for (i=0; i<N; i++)
00769 sum= CMath::logarithmic_sum(sum, alpha[i] + get_a(i,j));
00770
00771 alpha_new[j]= sum + get_b(j, p_observations->get_feature(dimension,t));
00772 #endif //USE_LOGSUMARRAY
00773 }
00774
00775 if (!ALPHA_CACHE(dimension).table)
00776 {
00777 dummy=alpha;
00778 alpha=alpha_new;
00779 alpha_new=dummy;
00780 }
00781 else
00782 {
00783 alpha=alpha_new;
00784 alpha_new+=N;
00785 }
00786 }
00787
00788
00789 if (time<p_observations->get_vector_length(dimension))
00790 {
00791 register int32_t i;
00792 #ifdef USE_LOGSUMARRAY
00793 for (i=0; i<(N>>1); i++)
00794 ARRAYS(dimension)[i]=CMath::logarithmic_sum(alpha[i<<1] + get_a(i<<1,state),
00795 alpha[(i<<1)+1] + get_a((i<<1)+1,state));
00796 if (N%2==1)
00797 return get_b(state, p_observations->get_feature(dimension,time))+
00798 CMath::logarithmic_sum(alpha[N-1]+get_a(N-1,state),
00799 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
00800 else
00801 return get_b(state, p_observations->get_feature(dimension,time))+CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
00802 #else //USE_LOGSUMARRAY
00803 register float64_t sum=-CMath::INFTY;
00804 for (i=0; i<N; i++)
00805 sum= CMath::logarithmic_sum(sum, alpha[i] + get_a(i, state));
00806
00807 return sum + get_b(state, p_observations->get_feature(dimension,time));
00808 #endif //USE_LOGSUMARRAY
00809 }
00810 else
00811 {
00812
00813 register int32_t i ;
00814 float64_t sum ;
00815 #ifdef USE_LOGSUMARRAY
00816 for (i=0; i<(N>>1); i++)
00817 ARRAYS(dimension)[i]=CMath::logarithmic_sum(alpha[i<<1] + get_q(i<<1),
00818 alpha[(i<<1)+1] + get_q((i<<1)+1));
00819 if (N%2==1)
00820 sum=CMath::logarithmic_sum(alpha[N-1]+get_q(N-1),
00821 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
00822 else
00823 sum=CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
00824 #else //USE_LOGSUMARRAY
00825 sum=-CMath::INFTY;
00826 for (i=0; i<N; i++)
00827 sum=CMath::logarithmic_sum(sum, alpha[i] + get_q(i));
00828 #endif //USE_LOGSUMARRAY
00829
00830 if (!ALPHA_CACHE(dimension).table)
00831 return sum;
00832 else
00833 {
00834 ALPHA_CACHE(dimension).dimension=dimension;
00835 ALPHA_CACHE(dimension).updated=true;
00836 ALPHA_CACHE(dimension).sum=sum;
00837
00838 if (wanted_time<p_observations->get_vector_length(dimension))
00839 return ALPHA_CACHE(dimension).table[wanted_time*N+state];
00840 else
00841 return ALPHA_CACHE(dimension).sum;
00842 }
00843 }
00844 }
00845 }
00846
00847
00848
00849
00850
00851 float64_t CHMM::backward_comp(int32_t time, int32_t state, int32_t dimension)
00852 {
00853 T_ALPHA_BETA_TABLE* beta_new;
00854 T_ALPHA_BETA_TABLE* beta;
00855 T_ALPHA_BETA_TABLE* dummy;
00856 int32_t wanted_time=time;
00857
00858 if (time<0)
00859 forward(time, state, dimension);
00860
00861 if (BETA_CACHE(dimension).table)
00862 {
00863 beta=&BETA_CACHE(dimension).table[N*(p_observations->get_vector_length(dimension)-1)];
00864 beta_new=&BETA_CACHE(dimension).table[N*(p_observations->get_vector_length(dimension)-2)];
00865 time=-1;
00866 }
00867 else
00868 {
00869 beta_new=(T_ALPHA_BETA_TABLE*)ARRAYN1(dimension);
00870 beta=(T_ALPHA_BETA_TABLE*)ARRAYN2(dimension);
00871 }
00872
00873 if (time>=p_observations->get_vector_length(dimension)-1)
00874
00875
00876 return get_q(state);
00877 else
00878 {
00879
00880 for (register int32_t i=0; i<N; i++)
00881 beta[i]=get_q(i);
00882
00883
00884 for (register int32_t t=p_observations->get_vector_length(dimension)-1; t>time+1 && t>0; t--)
00885 {
00886 for (register int32_t i=0; i<N; i++)
00887 {
00888 register int32_t j, num=trans_list_backward_cnt[i] ;
00889 float64_t sum=-CMath::INFTY;
00890 for (j=0; j<num; j++)
00891 {
00892 int32_t jj = trans_list_backward[i][j] ;
00893 sum= CMath::logarithmic_sum(sum, get_a(i, jj) + get_b(jj, p_observations->get_feature(dimension,t)) + beta[jj]);
00894 } ;
00895 beta_new[i]=sum;
00896 }
00897
00898 if (!BETA_CACHE(dimension).table)
00899 {
00900 dummy=beta;
00901 beta=beta_new;
00902 beta_new=dummy;
00903 }
00904 else
00905 {
00906 beta=beta_new;
00907 beta_new-=N;
00908 }
00909 }
00910
00911 if (time>=0)
00912 {
00913 register int32_t j, num=trans_list_backward_cnt[state] ;
00914 float64_t sum=-CMath::INFTY;
00915 for (j=0; j<num; j++)
00916 {
00917 int32_t jj = trans_list_backward[state][j] ;
00918 sum= CMath::logarithmic_sum(sum, get_a(state, jj) + get_b(jj, p_observations->get_feature(dimension,time+1))+beta[jj]);
00919 } ;
00920 return sum;
00921 }
00922 else
00923 {
00924 if (BETA_CACHE(dimension).table)
00925 {
00926 float64_t sum=-CMath::INFTY;
00927 for (register int32_t j=0; j<N; j++)
00928 sum= CMath::logarithmic_sum(sum, get_p(j) + get_b(j, p_observations->get_feature(dimension,0))+beta[j]);
00929 BETA_CACHE(dimension).sum=sum;
00930 BETA_CACHE(dimension).dimension=dimension;
00931 BETA_CACHE(dimension).updated=true;
00932
00933 if (wanted_time<p_observations->get_vector_length(dimension))
00934 return BETA_CACHE(dimension).table[wanted_time*N+state];
00935 else
00936 return BETA_CACHE(dimension).sum;
00937 }
00938 else
00939 {
00940 float64_t sum=-CMath::INFTY;
00941 for (register int32_t j=0; j<N; j++)
00942 sum= CMath::logarithmic_sum(sum, get_p(j) + get_b(j, p_observations->get_feature(dimension,0))+beta[j]);
00943 return sum;
00944 }
00945 }
00946 }
00947 }
00948
00949
00950 float64_t CHMM::backward_comp_old(
00951 int32_t time, int32_t state, int32_t dimension)
00952 {
00953 T_ALPHA_BETA_TABLE* beta_new;
00954 T_ALPHA_BETA_TABLE* beta;
00955 T_ALPHA_BETA_TABLE* dummy;
00956 int32_t wanted_time=time;
00957
00958 if (time<0)
00959 forward(time, state, dimension);
00960
00961 if (BETA_CACHE(dimension).table)
00962 {
00963 beta=&BETA_CACHE(dimension).table[N*(p_observations->get_vector_length(dimension)-1)];
00964 beta_new=&BETA_CACHE(dimension).table[N*(p_observations->get_vector_length(dimension)-2)];
00965 time=-1;
00966 }
00967 else
00968 {
00969 beta_new=(T_ALPHA_BETA_TABLE*)ARRAYN1(dimension);
00970 beta=(T_ALPHA_BETA_TABLE*)ARRAYN2(dimension);
00971 }
00972
00973 if (time>=p_observations->get_vector_length(dimension)-1)
00974
00975
00976 return get_q(state);
00977 else
00978 {
00979
00980 for (register int32_t i=0; i<N; i++)
00981 beta[i]=get_q(i);
00982
00983
00984 for (register int32_t t=p_observations->get_vector_length(dimension)-1; t>time+1 && t>0; t--)
00985 {
00986 for (register int32_t i=0; i<N; i++)
00987 {
00988 register int32_t j ;
00989 #ifdef USE_LOGSUMARRAY
00990 for (j=0; j<(N>>1); j++)
00991 ARRAYS(dimension)[j]=CMath::logarithmic_sum(
00992 get_a(i, j<<1) + get_b(j<<1, p_observations->get_feature(dimension,t)) + beta[j<<1],
00993 get_a(i, (j<<1)+1) + get_b((j<<1)+1, p_observations->get_feature(dimension,t)) + beta[(j<<1)+1]);
00994 if (N%2==1)
00995 beta_new[i]=CMath::logarithmic_sum(get_a(i, N-1) + get_b(N-1, p_observations->get_feature(dimension,t)) + beta[N-1],
00996 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
00997 else
00998 beta_new[i]=CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
00999 #else //USE_LOGSUMARRAY
01000 float64_t sum=-CMath::INFTY;
01001 for (j=0; j<N; j++)
01002 sum= CMath::logarithmic_sum(sum, get_a(i, j) + get_b(j, p_observations->get_feature(dimension,t)) + beta[j]);
01003
01004 beta_new[i]=sum;
01005 #endif //USE_LOGSUMARRAY
01006 }
01007
01008 if (!BETA_CACHE(dimension).table)
01009 {
01010 dummy=beta;
01011 beta=beta_new;
01012 beta_new=dummy;
01013 }
01014 else
01015 {
01016 beta=beta_new;
01017 beta_new-=N;
01018 }
01019 }
01020
01021 if (time>=0)
01022 {
01023 register int32_t j ;
01024 #ifdef USE_LOGSUMARRAY
01025 for (j=0; j<(N>>1); j++)
01026 ARRAYS(dimension)[j]=CMath::logarithmic_sum(
01027 get_a(state, j<<1) + get_b(j<<1, p_observations->get_feature(dimension,time+1)) + beta[j<<1],
01028 get_a(state, (j<<1)+1) + get_b((j<<1)+1, p_observations->get_feature(dimension,time+1)) + beta[(j<<1)+1]);
01029 if (N%2==1)
01030 return CMath::logarithmic_sum(get_a(state, N-1) + get_b(N-1, p_observations->get_feature(dimension,time+1)) + beta[N-1],
01031 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
01032 else
01033 return CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
01034 #else //USE_LOGSUMARRAY
01035 float64_t sum=-CMath::INFTY;
01036 for (j=0; j<N; j++)
01037 sum= CMath::logarithmic_sum(sum, get_a(state, j) + get_b(j, p_observations->get_feature(dimension,time+1))+beta[j]);
01038
01039 return sum;
01040 #endif //USE_LOGSUMARRAY
01041 }
01042 else
01043 {
01044 if (BETA_CACHE(dimension).table)
01045 {
01046 #ifdef USE_LOGSUMARRAY//AAA
01047 for (int32_t j=0; j<(N>>1); j++)
01048 ARRAYS(dimension)[j]=CMath::logarithmic_sum(get_p(j<<1) + get_b(j<<1, p_observations->get_feature(dimension,0))+beta[j<<1],
01049 get_p((j<<1)+1) + get_b((j<<1)+1, p_observations->get_feature(dimension,0))+beta[(j<<1)+1]) ;
01050 if (N%2==1)
01051 BETA_CACHE(dimension).sum=CMath::logarithmic_sum(get_p(N-1) + get_b(N-1, p_observations->get_feature(dimension,0))+beta[N-1],
01052 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
01053 else
01054 BETA_CACHE(dimension).sum=CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
01055 #else //USE_LOGSUMARRAY
01056 float64_t sum=-CMath::INFTY;
01057 for (register int32_t j=0; j<N; j++)
01058 sum= CMath::logarithmic_sum(sum, get_p(j) + get_b(j, p_observations->get_feature(dimension,0))+beta[j]);
01059 BETA_CACHE(dimension).sum=sum;
01060 #endif //USE_LOGSUMARRAY
01061 BETA_CACHE(dimension).dimension=dimension;
01062 BETA_CACHE(dimension).updated=true;
01063
01064 if (wanted_time<p_observations->get_vector_length(dimension))
01065 return BETA_CACHE(dimension).table[wanted_time*N+state];
01066 else
01067 return BETA_CACHE(dimension).sum;
01068 }
01069 else
01070 {
01071 float64_t sum=-CMath::INFTY;
01072 for (register int32_t j=0; j<N; j++)
01073 sum= CMath::logarithmic_sum(sum, get_p(j) + get_b(j, p_observations->get_feature(dimension,0))+beta[j]);
01074 return sum;
01075 }
01076 }
01077 }
01078 }
01079
01080
01081
01082 float64_t CHMM::best_path(int32_t dimension)
01083 {
01084 if (!p_observations)
01085 return -1;
01086
01087 if (dimension==-1)
01088 {
01089 if (!all_path_prob_updated)
01090 {
01091 SG_INFO( "computing full viterbi likelihood\n") ;
01092 float64_t sum = 0 ;
01093 for (int32_t i=0; i<p_observations->get_num_vectors(); i++)
01094 sum+=best_path(i) ;
01095 sum /= p_observations->get_num_vectors() ;
01096 all_pat_prob=sum ;
01097 all_path_prob_updated=true ;
01098 return sum ;
01099 } else
01100 return all_pat_prob ;
01101 } ;
01102
01103 if (!STATES_PER_OBSERVATION_PSI(dimension))
01104 return -1 ;
01105
01106 int32_t len=0;
01107 if (!p_observations->get_feature_vector(dimension,len))
01108 return -1;
01109
01110 if (PATH_PROB_UPDATED(dimension) && dimension==PATH_PROB_DIMENSION(dimension))
01111 return pat_prob;
01112 else
01113 {
01114 register float64_t* delta= ARRAYN2(dimension);
01115 register float64_t* delta_new= ARRAYN1(dimension);
01116
01117 {
01118 for (register int32_t i=0; i<N; i++)
01119 {
01120 delta[i]=get_p(i)+get_b(i, p_observations->get_feature(dimension,0));
01121 set_psi(0, i, 0, dimension);
01122 }
01123 }
01124
01125 #ifdef USE_PATHDEBUG
01126 float64_t worst=-CMath::INFTY/4 ;
01127 #endif
01128
01129 for (register int32_t t=1; t<p_observations->get_vector_length(dimension); t++)
01130 {
01131 register float64_t* dummy;
01132 register int32_t NN=N ;
01133 for (register int32_t j=0; j<NN; j++)
01134 {
01135 register float64_t * matrix_a=&transition_matrix_a[j*N] ;
01136 register float64_t maxj=delta[0] + matrix_a[0];
01137 register int32_t argmax=0;
01138
01139 for (register int32_t i=1; i<NN; i++)
01140 {
01141 register float64_t temp = delta[i] + matrix_a[i];
01142
01143 if (temp>maxj)
01144 {
01145 maxj=temp;
01146 argmax=i;
01147 }
01148 }
01149 #ifdef FIX_POS
01150 if ((!model) || (model->get_fix_pos_state(t,j,NN)!=CModel::FIX_DISALLOWED))
01151 #endif
01152 delta_new[j]=maxj + get_b(j,p_observations->get_feature(dimension,t));
01153 #ifdef FIX_POS
01154 else
01155 delta_new[j]=maxj + get_b(j,p_observations->get_feature(dimension,t)) + CModel::DISALLOWED_PENALTY;
01156 #endif
01157 set_psi(t, j, argmax, dimension);
01158 }
01159
01160 #ifdef USE_PATHDEBUG
01161 float64_t best=log(0) ;
01162 for (int32_t jj=0; jj<N; jj++)
01163 if (delta_new[jj]>best)
01164 best=delta_new[jj] ;
01165
01166 if (best<-CMath::INFTY/2)
01167 {
01168 SG_DEBUG( "worst case at %i: %e:%e\n", t, best, worst) ;
01169 worst=best ;
01170 } ;
01171 #endif
01172
01173 dummy=delta;
01174 delta=delta_new;
01175 delta_new=dummy;
01176 }
01177
01178
01179 {
01180 register float64_t maxj=delta[0]+get_q(0);
01181 register int32_t argmax=0;
01182
01183 for (register int32_t i=1; i<N; i++)
01184 {
01185 register float64_t temp=delta[i]+get_q(i);
01186
01187 if (temp>maxj)
01188 {
01189 maxj=temp;
01190 argmax=i;
01191 }
01192 }
01193 pat_prob=maxj;
01194 PATH(dimension)[p_observations->get_vector_length(dimension)-1]=argmax;
01195 } ;
01196
01197
01198 {
01199 for (register int32_t t=p_observations->get_vector_length(dimension)-1; t>0; t--)
01200 {
01201 PATH(dimension)[t-1]=get_psi(t, PATH(dimension)[t], dimension);
01202 }
01203 }
01204 PATH_PROB_UPDATED(dimension)=true;
01205 PATH_PROB_DIMENSION(dimension)=dimension;
01206 return pat_prob ;
01207 }
01208 }
01209
01210 #ifndef USE_HMMPARALLEL
01211 float64_t CHMM::model_probability_comp()
01212 {
01213
01214 mod_prob=0 ;
01215 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
01216 mod_prob+=forward(p_observations->get_vector_length(dim), 0, dim);
01217
01218 mod_prob_updated=true;
01219 return mod_prob;
01220 }
01221
01222 #else
01223
01224 float64_t CHMM::model_probability_comp()
01225 {
01226 pthread_t *threads=new pthread_t[parallel->get_num_threads()];
01227 S_BW_THREAD_PARAM *params=new S_BW_THREAD_PARAM[parallel->get_num_threads()];
01228
01229 SG_INFO( "computing full model probablity\n");
01230 mod_prob=0;
01231
01232 for (int32_t cpu=0; cpu<parallel->get_num_threads(); cpu++)
01233 {
01234 params[cpu].hmm=this ;
01235 params[cpu].dim_start= p_observations->get_num_vectors()*cpu/parallel->get_num_threads();
01236 params[cpu].dim_stop= p_observations->get_num_vectors()*(cpu+1)/parallel->get_num_threads();
01237 params[cpu].p_buf=new float64_t[N];
01238 params[cpu].q_buf=new float64_t[N];
01239 params[cpu].a_buf=new float64_t[N*N];
01240 params[cpu].b_buf=new float64_t[N*M];
01241 pthread_create(&threads[cpu], NULL, bw_dim_prefetch, (void*)¶ms[cpu]);
01242 }
01243
01244 for (int32_t cpu=0; cpu<parallel->get_num_threads(); cpu++)
01245 {
01246 pthread_join(threads[cpu], NULL);
01247 mod_prob+=params[cpu].ret;
01248 }
01249
01250 for (int32_t i=0; i<parallel->get_num_threads(); i++)
01251 {
01252 delete[] params[i].p_buf;
01253 delete[] params[i].q_buf;
01254 delete[] params[i].a_buf;
01255 delete[] params[i].b_buf;
01256 }
01257
01258 delete[] threads;
01259 delete[] params;
01260
01261 mod_prob_updated=true;
01262 return mod_prob;
01263 }
01264
01265 void* CHMM::bw_dim_prefetch(void* params)
01266 {
01267 CHMM* hmm=((S_BW_THREAD_PARAM*) params)->hmm;
01268 int32_t start=((S_BW_THREAD_PARAM*) params)->dim_start;
01269 int32_t stop=((S_BW_THREAD_PARAM*) params)->dim_stop;
01270 float64_t* p_buf=((S_BW_THREAD_PARAM*) params)->p_buf;
01271 float64_t* q_buf=((S_BW_THREAD_PARAM*) params)->q_buf;
01272 float64_t* a_buf=((S_BW_THREAD_PARAM*) params)->a_buf;
01273 float64_t* b_buf=((S_BW_THREAD_PARAM*) params)->b_buf;
01274 ((S_BW_THREAD_PARAM*)params)->ret=0;
01275
01276 for (int32_t dim=start; dim<stop; dim++)
01277 {
01278 hmm->forward_comp(hmm->p_observations->get_vector_length(dim), hmm->N-1, dim) ;
01279 hmm->backward_comp(hmm->p_observations->get_vector_length(dim), hmm->N-1, dim) ;
01280 float64_t modprob=hmm->model_probability(dim) ;
01281 hmm->ab_buf_comp(p_buf, q_buf, a_buf, b_buf, dim) ;
01282 ((S_BW_THREAD_PARAM*)params)->ret+= modprob;
01283 }
01284 return NULL ;
01285 }
01286
01287 void* CHMM::bw_single_dim_prefetch(void * params)
01288 {
01289 CHMM* hmm=((S_BW_THREAD_PARAM*)params)->hmm ;
01290 int32_t dim=((S_DIM_THREAD_PARAM*)params)->dim ;
01291 ((S_DIM_THREAD_PARAM*)params)->prob_sum = hmm->model_probability(dim);
01292 return NULL ;
01293 }
01294
01295 void* CHMM::vit_dim_prefetch(void * params)
01296 {
01297 CHMM* hmm=((S_DIM_THREAD_PARAM*)params)->hmm ;
01298 int32_t dim=((S_DIM_THREAD_PARAM*)params)->dim ;
01299 ((S_DIM_THREAD_PARAM*)params)->prob_sum = hmm->best_path(dim);
01300 return NULL ;
01301 }
01302
01303 #endif //USE_HMMPARALLEL
01304
01305
01306 #ifdef USE_HMMPARALLEL
01307
01308 void CHMM::ab_buf_comp(
01309 float64_t* p_buf, float64_t* q_buf, float64_t *a_buf, float64_t* b_buf,
01310 int32_t dim)
01311 {
01312 int32_t i,j,t ;
01313 float64_t a_sum;
01314 float64_t b_sum;
01315
01316 float64_t dimmodprob=model_probability(dim);
01317
01318 for (i=0; i<N; i++)
01319 {
01320
01321 p_buf[i]=get_p(i)+get_b(i,p_observations->get_feature(dim,0))+backward(0,i,dim) - dimmodprob;
01322 q_buf[i]=forward(p_observations->get_vector_length(dim)-1, i, dim)+get_q(i) - dimmodprob;
01323
01324
01325 for (j=0; j<N; j++)
01326 {
01327 a_sum=-CMath::INFTY;
01328
01329 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01330 {
01331 a_sum= CMath::logarithmic_sum(a_sum, forward(t,i,dim)+
01332 get_a(i,j)+get_b(j,p_observations->get_feature(dim,t+1))+backward(t+1,j,dim));
01333 }
01334 a_buf[N*i+j]=a_sum-dimmodprob ;
01335 }
01336
01337
01338 for (j=0; j<M; j++)
01339 {
01340 b_sum=-CMath::INFTY;
01341
01342 for (t=0; t<p_observations->get_vector_length(dim); t++)
01343 {
01344 if (p_observations->get_feature(dim,t)==j)
01345 b_sum=CMath::logarithmic_sum(b_sum, forward(t,i,dim)+backward(t, i, dim));
01346 }
01347
01348 b_buf[M*i+j]=b_sum-dimmodprob ;
01349 }
01350 }
01351 }
01352
01353
01354 void CHMM::estimate_model_baum_welch(CHMM* hmm)
01355 {
01356 int32_t i,j,cpu;
01357 float64_t fullmodprob=0;
01358
01359
01360 for (i=0; i<N; i++)
01361 {
01362 if (hmm->get_p(i)>CMath::ALMOST_NEG_INFTY)
01363 set_p(i,log(PSEUDO));
01364 else
01365 set_p(i,hmm->get_p(i));
01366 if (hmm->get_q(i)>CMath::ALMOST_NEG_INFTY)
01367 set_q(i,log(PSEUDO));
01368 else
01369 set_q(i,hmm->get_q(i));
01370
01371 for (j=0; j<N; j++)
01372 if (hmm->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
01373 set_a(i,j, log(PSEUDO));
01374 else
01375 set_a(i,j,hmm->get_a(i,j));
01376 for (j=0; j<M; j++)
01377 if (hmm->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
01378 set_b(i,j, log(PSEUDO));
01379 else
01380 set_b(i,j,hmm->get_b(i,j));
01381 }
01382 invalidate_model();
01383
01384 int32_t num_threads = parallel->get_num_threads();
01385
01386 pthread_t *threads=new pthread_t[num_threads] ;
01387 S_BW_THREAD_PARAM *params=new S_BW_THREAD_PARAM[num_threads] ;
01388
01389 if (p_observations->get_num_vectors()<num_threads)
01390 num_threads=p_observations->get_num_vectors();
01391
01392 for (cpu=0; cpu<num_threads; cpu++)
01393 {
01394 params[cpu].p_buf=new float64_t[N];
01395 params[cpu].q_buf=new float64_t[N];
01396 params[cpu].a_buf=new float64_t[N*N];
01397 params[cpu].b_buf=new float64_t[N*M];
01398
01399 params[cpu].hmm=hmm;
01400 int32_t start = p_observations->get_num_vectors()*cpu / num_threads;
01401 int32_t stop=p_observations->get_num_vectors()*(cpu+1) / num_threads;
01402
01403 if (cpu == parallel->get_num_threads()-1)
01404 stop=p_observations->get_num_vectors();
01405
01406 ASSERT(start<stop);
01407 params[cpu].dim_start=start;
01408 params[cpu].dim_stop=stop;
01409
01410 pthread_create(&threads[cpu], NULL, bw_dim_prefetch, ¶ms[cpu]);
01411 }
01412
01413 for (cpu=0; cpu<num_threads; cpu++)
01414 {
01415 pthread_join(threads[cpu], NULL);
01416
01417 for (i=0; i<N; i++)
01418 {
01419
01420 set_p(i, CMath::logarithmic_sum(get_p(i), params[cpu].p_buf[i]));
01421 set_q(i, CMath::logarithmic_sum(get_q(i), params[cpu].q_buf[i]));
01422
01423
01424 for (j=0; j<N; j++)
01425 set_a(i,j, CMath::logarithmic_sum(get_a(i,j), params[cpu].a_buf[N*i+j]));
01426
01427
01428 for (j=0; j<M; j++)
01429 set_b(i,j, CMath::logarithmic_sum(get_b(i,j), params[cpu].b_buf[M*i+j]));
01430 }
01431
01432 fullmodprob+=params[cpu].ret;
01433
01434 }
01435
01436 for (cpu=0; cpu<num_threads; cpu++)
01437 {
01438 delete[] params[cpu].p_buf;
01439 delete[] params[cpu].q_buf;
01440 delete[] params[cpu].a_buf;
01441 delete[] params[cpu].b_buf;
01442 }
01443
01444 delete[] threads ;
01445 delete[] params ;
01446
01447
01448 hmm->mod_prob=fullmodprob;
01449 hmm->mod_prob_updated=true ;
01450
01451
01452 normalize();
01453 invalidate_model();
01454 }
01455
01456 #else // USE_HMMPARALLEL
01457
01458
01459 void CHMM::estimate_model_baum_welch(CHMM* estimate)
01460 {
01461 int32_t i,j,t,dim;
01462 float64_t a_sum, b_sum;
01463 float64_t dimmodprob=0;
01464 float64_t fullmodprob=0;
01465
01466
01467 for (i=0; i<N; i++)
01468 {
01469 if (estimate->get_p(i)>CMath::ALMOST_NEG_INFTY)
01470 set_p(i,log(PSEUDO));
01471 else
01472 set_p(i,estimate->get_p(i));
01473 if (estimate->get_q(i)>CMath::ALMOST_NEG_INFTY)
01474 set_q(i,log(PSEUDO));
01475 else
01476 set_q(i,estimate->get_q(i));
01477
01478 for (j=0; j<N; j++)
01479 if (estimate->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
01480 set_a(i,j, log(PSEUDO));
01481 else
01482 set_a(i,j,estimate->get_a(i,j));
01483 for (j=0; j<M; j++)
01484 if (estimate->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
01485 set_b(i,j, log(PSEUDO));
01486 else
01487 set_b(i,j,estimate->get_b(i,j));
01488 }
01489 invalidate_model();
01490
01491
01492 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
01493 {
01494 dimmodprob=estimate->model_probability(dim);
01495 fullmodprob+=dimmodprob ;
01496
01497 for (i=0; i<N; i++)
01498 {
01499
01500 set_p(i, CMath::logarithmic_sum(get_p(i), estimate->get_p(i)+estimate->get_b(i,p_observations->get_feature(dim,0))+estimate->backward(0,i,dim) - dimmodprob));
01501 set_q(i, CMath::logarithmic_sum(get_q(i), estimate->forward(p_observations->get_vector_length(dim)-1, i, dim)+estimate->get_q(i) - dimmodprob ));
01502
01503 int32_t num = trans_list_backward_cnt[i] ;
01504
01505
01506 for (j=0; j<num; j++)
01507 {
01508 int32_t jj = trans_list_backward[i][j] ;
01509 a_sum=-CMath::INFTY;
01510
01511 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01512 {
01513 a_sum= CMath::logarithmic_sum(a_sum, estimate->forward(t,i,dim)+
01514 estimate->get_a(i,jj)+estimate->get_b(jj,p_observations->get_feature(dim,t+1))+estimate->backward(t+1,jj,dim));
01515 }
01516 set_a(i,jj, CMath::logarithmic_sum(get_a(i,jj), a_sum-dimmodprob));
01517 }
01518
01519
01520 for (j=0; j<M; j++)
01521 {
01522 b_sum=-CMath::INFTY;
01523
01524 for (t=0; t<p_observations->get_vector_length(dim); t++)
01525 {
01526 if (p_observations->get_feature(dim,t)==j)
01527 b_sum=CMath::logarithmic_sum(b_sum, estimate->forward(t,i,dim)+estimate->backward(t, i, dim));
01528 }
01529
01530 set_b(i,j, CMath::logarithmic_sum(get_b(i,j), b_sum-dimmodprob));
01531 }
01532 }
01533 }
01534
01535
01536 estimate->mod_prob=fullmodprob;
01537 estimate->mod_prob_updated=true ;
01538
01539
01540 normalize();
01541 invalidate_model();
01542 }
01543
01544
01545 void CHMM::estimate_model_baum_welch_old(CHMM* estimate)
01546 {
01547 int32_t i,j,t,dim;
01548 float64_t a_sum, b_sum;
01549 float64_t dimmodprob=0;
01550 float64_t fullmodprob=0;
01551
01552
01553 for (i=0; i<N; i++)
01554 {
01555 if (estimate->get_p(i)>CMath::ALMOST_NEG_INFTY)
01556 set_p(i,log(PSEUDO));
01557 else
01558 set_p(i,estimate->get_p(i));
01559 if (estimate->get_q(i)>CMath::ALMOST_NEG_INFTY)
01560 set_q(i,log(PSEUDO));
01561 else
01562 set_q(i,estimate->get_q(i));
01563
01564 for (j=0; j<N; j++)
01565 if (estimate->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
01566 set_a(i,j, log(PSEUDO));
01567 else
01568 set_a(i,j,estimate->get_a(i,j));
01569 for (j=0; j<M; j++)
01570 if (estimate->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
01571 set_b(i,j, log(PSEUDO));
01572 else
01573 set_b(i,j,estimate->get_b(i,j));
01574 }
01575 invalidate_model();
01576
01577
01578 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
01579 {
01580 dimmodprob=estimate->model_probability(dim);
01581 fullmodprob+=dimmodprob ;
01582
01583 for (i=0; i<N; i++)
01584 {
01585
01586 set_p(i, CMath::logarithmic_sum(get_p(i), estimate->get_p(i)+estimate->get_b(i,p_observations->get_feature(dim,0))+estimate->backward(0,i,dim) - dimmodprob));
01587 set_q(i, CMath::logarithmic_sum(get_q(i), estimate->forward(p_observations->get_vector_length(dim)-1, i, dim)+estimate->get_q(i) - dimmodprob ));
01588
01589
01590 for (j=0; j<N; j++)
01591 {
01592 a_sum=-CMath::INFTY;
01593
01594 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01595 {
01596 a_sum= CMath::logarithmic_sum(a_sum, estimate->forward(t,i,dim)+
01597 estimate->get_a(i,j)+estimate->get_b(j,p_observations->get_feature(dim,t+1))+estimate->backward(t+1,j,dim));
01598 }
01599 set_a(i,j, CMath::logarithmic_sum(get_a(i,j), a_sum-dimmodprob));
01600 }
01601
01602
01603 for (j=0; j<M; j++)
01604 {
01605 b_sum=-CMath::INFTY;
01606
01607 for (t=0; t<p_observations->get_vector_length(dim); t++)
01608 {
01609 if (p_observations->get_feature(dim,t)==j)
01610 b_sum=CMath::logarithmic_sum(b_sum, estimate->forward(t,i,dim)+estimate->backward(t, i, dim));
01611 }
01612
01613 set_b(i,j, CMath::logarithmic_sum(get_b(i,j), b_sum-dimmodprob));
01614 }
01615 }
01616 }
01617
01618
01619 estimate->mod_prob=fullmodprob;
01620 estimate->mod_prob_updated=true ;
01621
01622
01623 normalize();
01624 invalidate_model();
01625 }
01626 #endif // USE_HMMPARALLEL
01627
01628
01629
01630 void CHMM::estimate_model_baum_welch_trans(CHMM* estimate)
01631 {
01632 int32_t i,j,t,dim;
01633 float64_t a_sum;
01634 float64_t dimmodprob=0;
01635 float64_t fullmodprob=0;
01636
01637
01638 for (i=0; i<N; i++)
01639 {
01640 if (estimate->get_p(i)>CMath::ALMOST_NEG_INFTY)
01641 set_p(i,log(PSEUDO));
01642 else
01643 set_p(i,estimate->get_p(i));
01644 if (estimate->get_q(i)>CMath::ALMOST_NEG_INFTY)
01645 set_q(i,log(PSEUDO));
01646 else
01647 set_q(i,estimate->get_q(i));
01648
01649 for (j=0; j<N; j++)
01650 if (estimate->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
01651 set_a(i,j, log(PSEUDO));
01652 else
01653 set_a(i,j,estimate->get_a(i,j));
01654 for (j=0; j<M; j++)
01655 set_b(i,j,estimate->get_b(i,j));
01656 }
01657 invalidate_model();
01658
01659
01660 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
01661 {
01662 dimmodprob=estimate->model_probability(dim);
01663 fullmodprob+=dimmodprob ;
01664
01665 for (i=0; i<N; i++)
01666 {
01667
01668 set_p(i, CMath::logarithmic_sum(get_p(i), estimate->get_p(i)+estimate->get_b(i,p_observations->get_feature(dim,0))+estimate->backward(0,i,dim) - dimmodprob));
01669 set_q(i, CMath::logarithmic_sum(get_q(i), estimate->forward(p_observations->get_vector_length(dim)-1, i, dim)+estimate->get_q(i) - dimmodprob ));
01670
01671 int32_t num = trans_list_backward_cnt[i] ;
01672
01673 for (j=0; j<num; j++)
01674 {
01675 int32_t jj = trans_list_backward[i][j] ;
01676 a_sum=-CMath::INFTY;
01677
01678 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01679 {
01680 a_sum= CMath::logarithmic_sum(a_sum, estimate->forward(t,i,dim)+
01681 estimate->get_a(i,jj)+estimate->get_b(jj,p_observations->get_feature(dim,t+1))+estimate->backward(t+1,jj,dim));
01682 }
01683 set_a(i,jj, CMath::logarithmic_sum(get_a(i,jj), a_sum-dimmodprob));
01684 }
01685 }
01686 }
01687
01688
01689 estimate->mod_prob=fullmodprob;
01690 estimate->mod_prob_updated=true ;
01691
01692
01693 normalize();
01694 invalidate_model();
01695 }
01696
01697
01698
01699
01700 void CHMM::estimate_model_baum_welch_defined(CHMM* estimate)
01701 {
01702 int32_t i,j,old_i,k,t,dim;
01703 float64_t a_sum_num, b_sum_num;
01704 float64_t a_sum_denom, b_sum_denom;
01705 float64_t dimmodprob=-CMath::INFTY;
01706 float64_t fullmodprob=0;
01707 float64_t* A=ARRAYN1(0);
01708 float64_t* B=ARRAYN2(0);
01709
01710
01711
01712 for (k=0; (i=model->get_learn_p(k))!=-1; k++)
01713 set_p(i,log(PSEUDO));
01714
01715 for (k=0; (i=model->get_learn_q(k))!=-1; k++)
01716 set_q(i,log(PSEUDO));
01717
01718 for (k=0; (i=model->get_learn_a(k,0))!=-1; k++)
01719 {
01720 j=model->get_learn_a(k,1);
01721 set_a(i,j, log(PSEUDO));
01722 }
01723
01724 for (k=0; (i=model->get_learn_b(k,0))!=-1; k++)
01725 {
01726 j=model->get_learn_b(k,1);
01727 set_b(i,j, log(PSEUDO));
01728 }
01729
01730 for (i=0; i<N; i++)
01731 {
01732 A[i]=log(PSEUDO);
01733 B[i]=log(PSEUDO);
01734 }
01735
01736 #ifdef USE_HMMPARALLEL
01737 int32_t num_threads = parallel->get_num_threads();
01738 pthread_t *threads=new pthread_t[num_threads] ;
01739 S_DIM_THREAD_PARAM *params=new S_DIM_THREAD_PARAM[num_threads] ;
01740
01741 if (p_observations->get_num_vectors()<num_threads)
01742 num_threads=p_observations->get_num_vectors();
01743 #endif
01744
01745
01746 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
01747 {
01748 #ifdef USE_HMMPARALLEL
01749 if (dim%num_threads==0)
01750 {
01751 for (i=0; i<num_threads; i++)
01752 {
01753 if (dim+i<p_observations->get_num_vectors())
01754 {
01755 params[i].hmm=estimate ;
01756 params[i].dim=dim+i ;
01757 pthread_create(&threads[i], NULL, bw_single_dim_prefetch, (void*)¶ms[i]) ;
01758 }
01759 }
01760 for (i=0; i<num_threads; i++)
01761 {
01762 if (dim+i<p_observations->get_num_vectors())
01763 {
01764 pthread_join(threads[i], NULL);
01765 dimmodprob = params[i].prob_sum;
01766 }
01767 }
01768 }
01769 #else
01770 dimmodprob=estimate->model_probability(dim);
01771 #endif // USE_HMMPARALLEL
01772
01773
01774 fullmodprob+= dimmodprob;
01775
01776
01777 for (k=0; (i=model->get_learn_p(k))!=-1; k++)
01778 set_p(i, CMath::logarithmic_sum(get_p(i), estimate->forward(0,i,dim)+estimate->backward(0,i,dim) - dimmodprob ) );
01779
01780 for (k=0; (i=model->get_learn_q(k))!=-1; k++)
01781 set_q(i, CMath::logarithmic_sum(get_q(i), estimate->forward(p_observations->get_vector_length(dim)-1, i, dim)+
01782 estimate->backward(p_observations->get_vector_length(dim)-1, i, dim) - dimmodprob ) );
01783
01784
01785 old_i=-1;
01786 for (k=0; (i=model->get_learn_a(k,0))!=-1; k++)
01787 {
01788
01789
01790 if (old_i!=i)
01791 {
01792 old_i=i;
01793 a_sum_denom=-CMath::INFTY;
01794
01795 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01796 a_sum_denom= CMath::logarithmic_sum(a_sum_denom, estimate->forward(t,i,dim)+estimate->backward(t,i,dim));
01797
01798 A[i]= CMath::logarithmic_sum(A[i], a_sum_denom-dimmodprob);
01799 }
01800
01801 j=model->get_learn_a(k,1);
01802 a_sum_num=-CMath::INFTY;
01803 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01804 {
01805 a_sum_num= CMath::logarithmic_sum(a_sum_num, estimate->forward(t,i,dim)+
01806 estimate->get_a(i,j)+estimate->get_b(j,p_observations->get_feature(dim,t+1))+estimate->backward(t+1,j,dim));
01807 }
01808
01809 set_a(i,j, CMath::logarithmic_sum(get_a(i,j), a_sum_num-dimmodprob));
01810 }
01811
01812
01813 old_i=-1;
01814 for (k=0; (i=model->get_learn_b(k,0))!=-1; k++)
01815 {
01816
01817
01818
01819 if (old_i!=i)
01820 {
01821 old_i=i;
01822 b_sum_denom=-CMath::INFTY;
01823
01824 for (t=0; t<p_observations->get_vector_length(dim); t++)
01825 b_sum_denom= CMath::logarithmic_sum(b_sum_denom, estimate->forward(t,i,dim)+estimate->backward(t,i,dim));
01826
01827 B[i]= CMath::logarithmic_sum(B[i], b_sum_denom-dimmodprob);
01828 }
01829
01830 j=model->get_learn_b(k,1);
01831 b_sum_num=-CMath::INFTY;
01832 for (t=0; t<p_observations->get_vector_length(dim); t++)
01833 {
01834 if (p_observations->get_feature(dim,t)==j)
01835 b_sum_num=CMath::logarithmic_sum(b_sum_num, estimate->forward(t,i,dim)+estimate->backward(t, i, dim));
01836 }
01837
01838 set_b(i,j, CMath::logarithmic_sum(get_b(i,j), b_sum_num-dimmodprob));
01839 }
01840 }
01841 #ifdef USE_HMMPARALLEL
01842 delete[] threads ;
01843 delete[] params ;
01844 #endif
01845
01846
01847
01848 for (k=0; (i=model->get_learn_p(k))!=-1; k++)
01849 set_p(i, get_p(i)-log(p_observations->get_num_vectors()+N*PSEUDO) );
01850
01851 for (k=0; (i=model->get_learn_q(k))!=-1; k++)
01852 set_q(i, get_q(i)-log(p_observations->get_num_vectors()+N*PSEUDO) );
01853
01854 for (k=0; (i=model->get_learn_a(k,0))!=-1; k++)
01855 {
01856 j=model->get_learn_a(k,1);
01857 set_a(i,j, get_a(i,j) - A[i]);
01858 }
01859
01860 for (k=0; (i=model->get_learn_b(k,0))!=-1; k++)
01861 {
01862 j=model->get_learn_b(k,1);
01863 set_b(i,j, get_b(i,j) - B[i]);
01864 }
01865
01866
01867 estimate->mod_prob=fullmodprob;
01868 estimate->mod_prob_updated=true ;
01869
01870
01871 normalize();
01872 invalidate_model();
01873 }
01874
01875
01876 void CHMM::estimate_model_viterbi(CHMM* estimate)
01877 {
01878 int32_t i,j,t;
01879 float64_t sum;
01880 float64_t* P=ARRAYN1(0);
01881 float64_t* Q=ARRAYN2(0);
01882
01883 path_deriv_updated=false ;
01884
01885
01886 for (i=0; i<N; i++)
01887 {
01888 for (j=0; j<N; j++)
01889 set_A(i,j, PSEUDO);
01890
01891 for (j=0; j<M; j++)
01892 set_B(i,j, PSEUDO);
01893
01894 P[i]=PSEUDO;
01895 Q[i]=PSEUDO;
01896 }
01897
01898 float64_t allpatprob=0 ;
01899
01900 #ifdef USE_HMMPARALLEL
01901 int32_t num_threads = parallel->get_num_threads();
01902 pthread_t *threads=new pthread_t[num_threads] ;
01903 S_DIM_THREAD_PARAM *params=new S_DIM_THREAD_PARAM[num_threads] ;
01904
01905 if (p_observations->get_num_vectors()<num_threads)
01906 num_threads=p_observations->get_num_vectors();
01907 #endif
01908
01909 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
01910 {
01911
01912 #ifdef USE_HMMPARALLEL
01913 if (dim%num_threads==0)
01914 {
01915 for (i=0; i<num_threads; i++)
01916 {
01917 if (dim+i<p_observations->get_num_vectors())
01918 {
01919 params[i].hmm=estimate ;
01920 params[i].dim=dim+i ;
01921 pthread_create(&threads[i], NULL, vit_dim_prefetch, (void*)¶ms[i]) ;
01922 }
01923 }
01924 for (i=0; i<num_threads; i++)
01925 {
01926 if (dim+i<p_observations->get_num_vectors())
01927 {
01928 pthread_join(threads[i], NULL);
01929 allpatprob += params[i].prob_sum;
01930 }
01931 }
01932 }
01933 #else
01934
01935 allpatprob += estimate->best_path(dim);
01936 #endif // USE_HMMPARALLEL
01937
01938
01939 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01940 {
01941 set_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1], get_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1])+1);
01942 set_B(estimate->PATH(dim)[t], p_observations->get_feature(dim,t), get_B(estimate->PATH(dim)[t], p_observations->get_feature(dim,t))+1);
01943 }
01944
01945 set_B(estimate->PATH(dim)[p_observations->get_vector_length(dim)-1], p_observations->get_feature(dim,p_observations->get_vector_length(dim)-1), get_B(estimate->PATH(dim)[p_observations->get_vector_length(dim)-1], p_observations->get_feature(dim,p_observations->get_vector_length(dim)-1)) + 1 );
01946
01947 P[estimate->PATH(dim)[0]]++;
01948 Q[estimate->PATH(dim)[p_observations->get_vector_length(dim)-1]]++;
01949 }
01950
01951 #ifdef USE_HMMPARALLEL
01952 delete[] threads;
01953 delete[] params;
01954 #endif
01955
01956 allpatprob/=p_observations->get_num_vectors() ;
01957 estimate->all_pat_prob=allpatprob ;
01958 estimate->all_path_prob_updated=true ;
01959
01960
01961 for (i=0; i<N; i++)
01962 {
01963 sum=0;
01964 for (j=0; j<N; j++)
01965 sum+=get_A(i,j);
01966
01967 for (j=0; j<N; j++)
01968 set_a(i,j, log(get_A(i,j)/sum));
01969 }
01970
01971
01972 for (i=0; i<N; i++)
01973 {
01974 sum=0;
01975 for (j=0; j<M; j++)
01976 sum+=get_B(i,j);
01977
01978 for (j=0; j<M; j++)
01979 set_b(i,j, log(get_B(i, j)/sum));
01980 }
01981
01982
01983 sum=0;
01984 for (i=0; i<N; i++)
01985 sum+=P[i];
01986
01987 for (i=0; i<N; i++)
01988 set_p(i, log(P[i]/sum));
01989
01990
01991 sum=0;
01992 for (i=0; i<N; i++)
01993 sum+=Q[i];
01994
01995 for (i=0; i<N; i++)
01996 set_q(i, log(Q[i]/sum));
01997
01998
01999 invalidate_model();
02000 }
02001
02002
02003 void CHMM::estimate_model_viterbi_defined(CHMM* estimate)
02004 {
02005 int32_t i,j,k,t;
02006 float64_t sum;
02007 float64_t* P=ARRAYN1(0);
02008 float64_t* Q=ARRAYN2(0);
02009
02010 path_deriv_updated=false ;
02011
02012
02013 for (i=0; i<N; i++)
02014 {
02015 for (j=0; j<N; j++)
02016 set_A(i,j, PSEUDO);
02017
02018 for (j=0; j<M; j++)
02019 set_B(i,j, PSEUDO);
02020
02021 P[i]=PSEUDO;
02022 Q[i]=PSEUDO;
02023 }
02024
02025 #ifdef USE_HMMPARALLEL
02026 int32_t num_threads = parallel->get_num_threads();
02027 pthread_t *threads=new pthread_t[num_threads] ;
02028 S_DIM_THREAD_PARAM *params=new S_DIM_THREAD_PARAM[num_threads] ;
02029 #endif
02030
02031 float64_t allpatprob=0.0 ;
02032 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
02033 {
02034
02035 #ifdef USE_HMMPARALLEL
02036 if (dim%num_threads==0)
02037 {
02038 for (i=0; i<num_threads; i++)
02039 {
02040 if (dim+i<p_observations->get_num_vectors())
02041 {
02042 params[i].hmm=estimate ;
02043 params[i].dim=dim+i ;
02044 pthread_create(&threads[i], NULL, vit_dim_prefetch, (void*)¶ms[i]) ;
02045 }
02046 }
02047 for (i=0; i<num_threads; i++)
02048 {
02049 if (dim+i<p_observations->get_num_vectors())
02050 {
02051 pthread_join(threads[i], NULL);
02052 allpatprob += params[i].prob_sum;
02053 }
02054 }
02055 }
02056 #else // USE_HMMPARALLEL
02057
02058 allpatprob += estimate->best_path(dim);
02059 #endif // USE_HMMPARALLEL
02060
02061
02062
02063 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
02064 {
02065 set_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1], get_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1])+1);
02066 set_B(estimate->PATH(dim)[t], p_observations->get_feature(dim,t), get_B(estimate->PATH(dim)[t], p_observations->get_feature(dim,t))+1);
02067 }
02068
02069 set_B(estimate->PATH(dim)[p_observations->get_vector_length(dim)-1], p_observations->get_feature(dim,p_observations->get_vector_length(dim)-1), get_B(estimate->PATH(dim)[p_observations->get_vector_length(dim)-1], p_observations->get_feature(dim,p_observations->get_vector_length(dim)-1)) + 1 );
02070
02071 P[estimate->PATH(dim)[0]]++;
02072 Q[estimate->PATH(dim)[p_observations->get_vector_length(dim)-1]]++;
02073 }
02074
02075 #ifdef USE_HMMPARALLEL
02076 delete[] threads ;
02077 delete[] params ;
02078 #endif
02079
02080
02081
02082
02083 allpatprob/=p_observations->get_num_vectors() ;
02084 estimate->all_pat_prob=allpatprob ;
02085 estimate->all_path_prob_updated=true ;
02086
02087
02088
02089 for (i=0; i<N; i++)
02090 {
02091 for (j=0; j<N; j++)
02092 set_a(i,j, estimate->get_a(i,j));
02093
02094 for (j=0; j<M; j++)
02095 set_b(i,j, estimate->get_b(i,j));
02096 }
02097
02098
02099 i=0;
02100 sum=0;
02101 j=model->get_learn_a(i,0);
02102 k=i;
02103 while (model->get_learn_a(i,0)!=-1 || k<i)
02104 {
02105 if (j==model->get_learn_a(i,0))
02106 {
02107 sum+=get_A(model->get_learn_a(i,0), model->get_learn_a(i,1));
02108 i++;
02109 }
02110 else
02111 {
02112 while (k<i)
02113 {
02114 set_a(model->get_learn_a(k,0), model->get_learn_a(k,1), log (get_A(model->get_learn_a(k,0), model->get_learn_a(k,1)) / sum));
02115 k++;
02116 }
02117
02118 sum=0;
02119 j=model->get_learn_a(i,0);
02120 k=i;
02121 }
02122 }
02123
02124
02125 i=0;
02126 sum=0;
02127 j=model->get_learn_b(i,0);
02128 k=i;
02129 while (model->get_learn_b(i,0)!=-1 || k<i)
02130 {
02131 if (j==model->get_learn_b(i,0))
02132 {
02133 sum+=get_B(model->get_learn_b(i,0),model->get_learn_b(i,1));
02134 i++;
02135 }
02136 else
02137 {
02138 while (k<i)
02139 {
02140 set_b(model->get_learn_b(k,0),model->get_learn_b(k,1), log (get_B(model->get_learn_b(k,0), model->get_learn_b(k,1)) / sum));
02141 k++;
02142 }
02143
02144 sum=0;
02145 j=model->get_learn_b(i,0);
02146 k=i;
02147 }
02148 }
02149
02150 i=0;
02151 sum=0;
02152 while (model->get_learn_p(i)!=-1)
02153 {
02154 sum+=P[model->get_learn_p(i)] ;
02155 i++ ;
02156 } ;
02157 i=0 ;
02158 while (model->get_learn_p(i)!=-1)
02159 {
02160 set_p(model->get_learn_p(i), log(P[model->get_learn_p(i)]/sum));
02161 i++ ;
02162 } ;
02163
02164 i=0;
02165 sum=0;
02166 while (model->get_learn_q(i)!=-1)
02167 {
02168 sum+=Q[model->get_learn_q(i)] ;
02169 i++ ;
02170 } ;
02171 i=0 ;
02172 while (model->get_learn_q(i)!=-1)
02173 {
02174 set_q(model->get_learn_q(i), log(Q[model->get_learn_q(i)]/sum));
02175 i++ ;
02176 } ;
02177
02178
02179
02180 invalidate_model();
02181 }
02182
02183
02184
02185 void CHMM::output_model(bool verbose)
02186 {
02187 int32_t i,j;
02188 float64_t checksum;
02189
02190
02191 SG_INFO( "log(Pr[O|model])=%e, #states: %i, #observationssymbols: %i, #observations: %ix%i\n",
02192 (float64_t)((p_observations) ? model_probability() : -CMath::INFTY),
02193 N, M, ((p_observations) ? p_observations->get_max_vector_length() : 0), ((p_observations) ? p_observations->get_num_vectors() : 0));
02194
02195 if (verbose)
02196 {
02197
02198 SG_INFO( "\ntransition matrix\n");
02199 for (i=0; i<N; i++)
02200 {
02201 checksum= get_q(i);
02202 for (j=0; j<N; j++)
02203 {
02204 checksum= CMath::logarithmic_sum(checksum, get_a(i,j));
02205
02206 SG_INFO( "a(%02i,%02i)=%1.4f ",i,j, (float32_t) exp(get_a(i,j)));
02207
02208 if (j % 4 == 3)
02209 SG_PRINT( "\n");
02210 }
02211 if (fabs(checksum)>1e-5)
02212 SG_DEBUG( " checksum % E ******* \n",checksum);
02213 else
02214 SG_DEBUG( " checksum % E\n",checksum);
02215 }
02216
02217
02218 SG_INFO( "\ndistribution of start states\n");
02219 checksum=-CMath::INFTY;
02220 for (i=0; i<N; i++)
02221 {
02222 checksum= CMath::logarithmic_sum(checksum, get_p(i));
02223 SG_INFO( "p(%02i)=%1.4f ",i, (float32_t) exp(get_p(i)));
02224 if (i % 4 == 3)
02225 SG_PRINT( "\n");
02226 }
02227 if (fabs(checksum)>1e-5)
02228 SG_DEBUG( " checksum % E ******* \n",checksum);
02229 else
02230 SG_DEBUG( " checksum=% E\n", checksum);
02231
02232
02233 SG_INFO( "\ndistribution of terminal states\n");
02234 checksum=-CMath::INFTY;
02235 for (i=0; i<N; i++)
02236 {
02237 checksum= CMath::logarithmic_sum(checksum, get_q(i));
02238 SG_INFO("q(%02i)=%1.4f ",i, (float32_t) exp(get_q(i)));
02239 if (i % 4 == 3)
02240 SG_INFO("\n");
02241 }
02242 if (fabs(checksum)>1e-5)
02243 SG_DEBUG( " checksum % E ******* \n",checksum);
02244 else
02245 SG_DEBUG( " checksum=% E\n", checksum);
02246
02247
02248 SG_INFO("\ndistribution of observations given the state\n");
02249 for (i=0; i<N; i++)
02250 {
02251 checksum=-CMath::INFTY;
02252 for (j=0; j<M; j++)
02253 {
02254 checksum=CMath::logarithmic_sum(checksum, get_b(i,j));
02255 SG_INFO("b(%02i,%02i)=%1.4f ",i,j, (float32_t) exp(get_b(i,j)));
02256 if (j % 4 == 3)
02257 SG_PRINT("\n");
02258 }
02259 if (fabs(checksum)>1e-5)
02260 SG_DEBUG( " checksum % E ******* \n",checksum);
02261 else
02262 SG_DEBUG( " checksum % E\n",checksum);
02263 }
02264 }
02265 SG_PRINT("\n");
02266 }
02267
02268
02269 void CHMM::output_model_defined(bool verbose)
02270 {
02271 int32_t i,j;
02272 if (!model)
02273 return ;
02274
02275
02276 SG_INFO("log(Pr[O|model])=%e, #states: %i, #observationssymbols: %i, #observations: %ix%i\n",
02277 (float64_t)((p_observations) ? model_probability() : -CMath::INFTY),
02278 N, M, ((p_observations) ? p_observations->get_max_vector_length() : 0), ((p_observations) ? p_observations->get_num_vectors() : 0));
02279
02280 if (verbose)
02281 {
02282
02283 SG_INFO("\ntransition matrix\n");
02284
02285
02286 i=0;
02287 j=model->get_learn_a(i,0);
02288 while (model->get_learn_a(i,0)!=-1)
02289 {
02290 if (j!=model->get_learn_a(i,0))
02291 {
02292 j=model->get_learn_a(i,0);
02293 SG_PRINT("\n");
02294 }
02295
02296 SG_INFO("a(%02i,%02i)=%1.4f ",model->get_learn_a(i,0), model->get_learn_a(i,1), (float32_t) exp(get_a(model->get_learn_a(i,0), model->get_learn_a(i,1))));
02297 i++;
02298 }
02299
02300
02301 SG_INFO("\n\ndistribution of observations given the state\n");
02302 i=0;
02303 j=model->get_learn_b(i,0);
02304 while (model->get_learn_b(i,0)!=-1)
02305 {
02306 if (j!=model->get_learn_b(i,0))
02307 {
02308 j=model->get_learn_b(i,0);
02309 SG_PRINT("\n");
02310 }
02311
02312 SG_INFO("b(%02i,%02i)=%1.4f ",model->get_learn_b(i,0),model->get_learn_b(i,1), (float32_t) exp(get_b(model->get_learn_b(i,0),model->get_learn_b(i,1))));
02313 i++;
02314 }
02315
02316 SG_PRINT("\n");
02317 }
02318 SG_PRINT("\n");
02319 }
02320
02321
02322
02323
02324 void CHMM::convert_to_log()
02325 {
02326 int32_t i,j;
02327
02328 for (i=0; i<N; i++)
02329 {
02330 if (get_p(i)!=0)
02331 set_p(i, log(get_p(i)));
02332 else
02333 set_p(i, -CMath::INFTY);;
02334 }
02335
02336 for (i=0; i<N; i++)
02337 {
02338 if (get_q(i)!=0)
02339 set_q(i, log(get_q(i)));
02340 else
02341 set_q(i, -CMath::INFTY);;
02342 }
02343
02344 for (i=0; i<N; i++)
02345 {
02346 for (j=0; j<N; j++)
02347 {
02348 if (get_a(i,j)!=0)
02349 set_a(i,j, log(get_a(i,j)));
02350 else
02351 set_a(i,j, -CMath::INFTY);
02352 }
02353 }
02354
02355 for (i=0; i<N; i++)
02356 {
02357 for (j=0; j<M; j++)
02358 {
02359 if (get_b(i,j)!=0)
02360 set_b(i,j, log(get_b(i,j)));
02361 else
02362 set_b(i,j, -CMath::INFTY);
02363 }
02364 }
02365 loglikelihood=true;
02366
02367 invalidate_model();
02368 }
02369
02370
02371 void CHMM::init_model_random()
02372 {
02373 const float64_t MIN_RAND=23e-3;
02374
02375 float64_t sum;
02376 int32_t i,j;
02377
02378
02379 for (i=0; i<N; i++)
02380 {
02381 sum=0;
02382 for (j=0; j<N; j++)
02383 {
02384 set_a(i,j, CMath::random(MIN_RAND, 1.0));
02385
02386 sum+=get_a(i,j);
02387 }
02388
02389 for (j=0; j<N; j++)
02390 set_a(i,j, get_a(i,j)/sum);
02391 }
02392
02393
02394 sum=0;
02395 for (i=0; i<N; i++)
02396 {
02397 set_p(i, CMath::random(MIN_RAND, 1.0));
02398
02399 sum+=get_p(i);
02400 }
02401
02402 for (i=0; i<N; i++)
02403 set_p(i, get_p(i)/sum);
02404
02405
02406 sum=0;
02407 for (i=0; i<N; i++)
02408 {
02409 set_q(i, CMath::random(MIN_RAND, 1.0));
02410
02411 sum+=get_q(i);
02412 }
02413
02414 for (i=0; i<N; i++)
02415 set_q(i, get_q(i)/sum);
02416
02417
02418 for (i=0; i<N; i++)
02419 {
02420 sum=0;
02421 for (j=0; j<M; j++)
02422 {
02423 set_b(i,j, CMath::random(MIN_RAND, 1.0));
02424
02425 sum+=get_b(i,j);
02426 }
02427
02428 for (j=0; j<M; j++)
02429 set_b(i,j, get_b(i,j)/sum);
02430 }
02431
02432
02433 invalidate_model();
02434 }
02435
02436
02437 void CHMM::init_model_defined()
02438 {
02439 int32_t i,j,k,r;
02440 float64_t sum;
02441 const float64_t MIN_RAND=23e-3;
02442
02443
02444 for (i=0; i<N; i++)
02445 for (j=0; j<N; j++)
02446 set_a(i,j, 0);
02447
02448
02449 for (i=0; i<N; i++)
02450 set_p(i, 0);
02451
02452
02453 for (i=0; i<N; i++)
02454 set_q(i, 0);
02455
02456
02457 for (i=0; i<N; i++)
02458 for (j=0; j<M; j++)
02459 set_b(i,j, 0);
02460
02461
02462
02463 float64_t *R=new float64_t[N] ;
02464 for (r=0; r<N; r++) R[r]=CMath::random(MIN_RAND,1.0);
02465 i=0; sum=0; k=i;
02466 j=model->get_learn_a(i,0);
02467 while (model->get_learn_a(i,0)!=-1 || k<i)
02468 {
02469 if (j==model->get_learn_a(i,0))
02470 {
02471 sum+=R[model->get_learn_a(i,1)] ;
02472 i++;
02473 }
02474 else
02475 {
02476 while (k<i)
02477 {
02478 set_a(model->get_learn_a(k,0), model->get_learn_a(k,1),
02479 R[model->get_learn_a(k,1)]/sum);
02480 k++;
02481 }
02482 j=model->get_learn_a(i,0);
02483 k=i;
02484 sum=0;
02485 for (r=0; r<N; r++) R[r]=CMath::random(MIN_RAND,1.0);
02486 }
02487 }
02488 delete[] R ; R=NULL ;
02489
02490
02491 R=new float64_t[M] ;
02492 for (r=0; r<M; r++) R[r]=CMath::random(MIN_RAND,1.0);
02493 i=0; sum=0; k=0 ;
02494 j=model->get_learn_b(i,0);
02495 while (model->get_learn_b(i,0)!=-1 || k<i)
02496 {
02497 if (j==model->get_learn_b(i,0))
02498 {
02499 sum+=R[model->get_learn_b(i,1)] ;
02500 i++;
02501 }
02502 else
02503 {
02504 while (k<i)
02505 {
02506 set_b(model->get_learn_b(k,0),model->get_learn_b(k,1),
02507 R[model->get_learn_b(k,1)]/sum);
02508 k++;
02509 }
02510
02511 j=model->get_learn_b(i,0);
02512 k=i;
02513 sum=0;
02514 for (r=0; r<M; r++) R[r]=CMath::random(MIN_RAND,1.0);
02515 }
02516 }
02517 delete[] R ; R=NULL ;
02518
02519
02520 i=0;
02521 while (model->get_const_a(i,0) != -1)
02522 {
02523 set_a(model->get_const_a(i,0), model->get_const_a(i,1), model->get_const_a_val(i));
02524 i++;
02525 }
02526
02527
02528 i=0;
02529 while (model->get_const_b(i,0) != -1)
02530 {
02531 set_b(model->get_const_b(i,0), model->get_const_b(i,1), model->get_const_b_val(i));
02532 i++;
02533 }
02534
02535
02536 i=0;
02537 while (model->get_const_p(i) != -1)
02538 {
02539 set_p(model->get_const_p(i), model->get_const_p_val(i));
02540 i++;
02541 }
02542
02543
02544 for (i=0; i<N; i++)
02545 set_q(i, 0.0);
02546
02547
02548 i=0;
02549 while (model->get_const_q(i) != -1)
02550 {
02551 set_q(model->get_const_q(i), model->get_const_q_val(i));
02552 i++;
02553 }
02554
02555
02556 i=0;
02557 sum=0;
02558 while (model->get_learn_p(i)!=-1)
02559 {
02560 set_p(model->get_learn_p(i),CMath::random(MIN_RAND,1.0)) ;
02561 sum+=get_p(model->get_learn_p(i)) ;
02562 i++ ;
02563 } ;
02564 i=0 ;
02565 while (model->get_learn_p(i)!=-1)
02566 {
02567 set_p(model->get_learn_p(i), get_p(model->get_learn_p(i))/sum);
02568 i++ ;
02569 } ;
02570
02571
02572 i=0;
02573 sum=0;
02574 while (model->get_learn_q(i)!=-1)
02575 {
02576 set_q(model->get_learn_q(i),CMath::random(MIN_RAND,1.0)) ;
02577 sum+=get_q(model->get_learn_q(i)) ;
02578 i++ ;
02579 } ;
02580 i=0 ;
02581 while (model->get_learn_q(i)!=-1)
02582 {
02583 set_q(model->get_learn_q(i), get_q(model->get_learn_q(i))/sum);
02584 i++ ;
02585 } ;
02586
02587
02588 invalidate_model();
02589 }
02590
02591 void CHMM::clear_model()
02592 {
02593 int32_t i,j;
02594 for (i=0; i<N; i++)
02595 {
02596 set_p(i, log(PSEUDO));
02597 set_q(i, log(PSEUDO));
02598
02599 for (j=0; j<N; j++)
02600 set_a(i,j, log(PSEUDO));
02601
02602 for (j=0; j<M; j++)
02603 set_b(i,j, log(PSEUDO));
02604 }
02605 }
02606
02607 void CHMM::clear_model_defined()
02608 {
02609 int32_t i,j,k;
02610
02611 for (i=0; (j=model->get_learn_p(i))!=-1; i++)
02612 set_p(j, log(PSEUDO));
02613
02614 for (i=0; (j=model->get_learn_q(i))!=-1; i++)
02615 set_q(j, log(PSEUDO));
02616
02617 for (i=0; (j=model->get_learn_a(i,0))!=-1; i++)
02618 {
02619 k=model->get_learn_a(i,1);
02620 set_a(j,k, log(PSEUDO));
02621 }
02622
02623 for (i=0; (j=model->get_learn_b(i,0))!=-1; i++)
02624 {
02625 k=model->get_learn_b(i,1);
02626 set_b(j,k, log(PSEUDO));
02627 }
02628 }
02629
02630 void CHMM::copy_model(CHMM* l)
02631 {
02632 int32_t i,j;
02633 for (i=0; i<N; i++)
02634 {
02635 set_p(i, l->get_p(i));
02636 set_q(i, l->get_q(i));
02637
02638 for (j=0; j<N; j++)
02639 set_a(i,j, l->get_a(i,j));
02640
02641 for (j=0; j<M; j++)
02642 set_b(i,j, l->get_b(i,j));
02643 }
02644 }
02645
02646 void CHMM::invalidate_model()
02647 {
02648
02649 this->mod_prob=0.0;
02650 this->mod_prob_updated=false;
02651
02652 if (mem_initialized)
02653 {
02654 if (trans_list_forward_cnt)
02655 delete[] trans_list_forward_cnt ;
02656 trans_list_forward_cnt=NULL ;
02657 if (trans_list_backward_cnt)
02658 delete[] trans_list_backward_cnt ;
02659 trans_list_backward_cnt=NULL ;
02660 if (trans_list_forward)
02661 {
02662 for (int32_t i=0; i<trans_list_len; i++)
02663 if (trans_list_forward[i])
02664 delete[] trans_list_forward[i] ;
02665 delete[] trans_list_forward ;
02666 trans_list_forward=NULL ;
02667 }
02668 if (trans_list_backward)
02669 {
02670 for (int32_t i=0; i<trans_list_len; i++)
02671 if (trans_list_backward[i])
02672 delete[] trans_list_backward[i] ;
02673 delete[] trans_list_backward ;
02674 trans_list_backward = NULL ;
02675 } ;
02676
02677 trans_list_len = N ;
02678 trans_list_forward = new T_STATES*[N] ;
02679 trans_list_forward_cnt = new T_STATES[N] ;
02680
02681 for (int32_t j=0; j<N; j++)
02682 {
02683 trans_list_forward_cnt[j]= 0 ;
02684 trans_list_forward[j]= new T_STATES[N] ;
02685 for (int32_t i=0; i<N; i++)
02686 if (get_a(i,j)>CMath::ALMOST_NEG_INFTY)
02687 {
02688 trans_list_forward[j][trans_list_forward_cnt[j]]=i ;
02689 trans_list_forward_cnt[j]++ ;
02690 }
02691 } ;
02692
02693 trans_list_backward = new T_STATES*[N] ;
02694 trans_list_backward_cnt = new T_STATES[N] ;
02695
02696 for (int32_t i=0; i<N; i++)
02697 {
02698 trans_list_backward_cnt[i]= 0 ;
02699 trans_list_backward[i]= new T_STATES[N] ;
02700 for (int32_t j=0; j<N; j++)
02701 if (get_a(i,j)>CMath::ALMOST_NEG_INFTY)
02702 {
02703 trans_list_backward[i][trans_list_backward_cnt[i]]=j ;
02704 trans_list_backward_cnt[i]++ ;
02705 }
02706 } ;
02707 } ;
02708 this->all_pat_prob=0.0;
02709 this->pat_prob=0.0;
02710 this->path_deriv_updated=false ;
02711 this->path_deriv_dimension=-1 ;
02712 this->all_path_prob_updated=false;
02713
02714 #ifdef USE_HMMPARALLEL_STRUCTURES
02715 {
02716 for (int32_t i=0; i<parallel->get_num_threads(); i++)
02717 {
02718 this->alpha_cache[i].updated=false;
02719 this->beta_cache[i].updated=false;
02720 path_prob_updated[i]=false ;
02721 path_prob_dimension[i]=-1 ;
02722 } ;
02723 }
02724 #else // USE_HMMPARALLEL_STRUCTURES
02725 this->alpha_cache.updated=false;
02726 this->beta_cache.updated=false;
02727 this->path_prob_dimension=-1;
02728 this->path_prob_updated=false;
02729
02730 #endif // USE_HMMPARALLEL_STRUCTURES
02731 }
02732
02733 void CHMM::open_bracket(FILE* file)
02734 {
02735 int32_t value;
02736 while (((value=fgetc(file)) != EOF) && (value!='['))
02737 {
02738 if (value=='\n')
02739 line++;
02740 }
02741
02742 if (value==EOF)
02743 error(line, "expected \"[\" in input file");
02744
02745 while (((value=fgetc(file)) != EOF) && (isspace(value)))
02746 {
02747 if (value=='\n')
02748 line++;
02749 }
02750
02751 ungetc(value, file);
02752 }
02753
02754 void CHMM::close_bracket(FILE* file)
02755 {
02756 int32_t value;
02757 while (((value=fgetc(file)) != EOF) && (value!=']'))
02758 {
02759 if (value=='\n')
02760 line++;
02761 }
02762
02763 if (value==EOF)
02764 error(line, "expected \"]\" in input file");
02765 }
02766
02767 bool CHMM::comma_or_space(FILE* file)
02768 {
02769 int32_t value;
02770 while (((value=fgetc(file)) != EOF) && (value!=',') && (value!=';') && (value!=']'))
02771 {
02772 if (value=='\n')
02773 line++;
02774 }
02775 if (value==']')
02776 {
02777 ungetc(value, file);
02778 SG_ERROR( "found ']' instead of ';' or ','\n") ;
02779 return false ;
02780 } ;
02781
02782 if (value==EOF)
02783 error(line, "expected \";\" or \",\" in input file");
02784
02785 while (((value=fgetc(file)) != EOF) && (isspace(value)))
02786 {
02787 if (value=='\n')
02788 line++;
02789 }
02790 ungetc(value, file);
02791 return true ;
02792 }
02793
02794 bool CHMM::get_numbuffer(FILE* file, char* buffer, int32_t length)
02795 {
02796 signed char value;
02797
02798 while (((value=fgetc(file)) != EOF) &&
02799 !isdigit(value) && (value!='A')
02800 && (value!='C') && (value!='G') && (value!='T')
02801 && (value!='N') && (value!='n')
02802 && (value!='.') && (value!='-') && (value!='e') && (value!=']'))
02803 {
02804 if (value=='\n')
02805 line++;
02806 }
02807 if (value==']')
02808 {
02809 ungetc(value,file) ;
02810 return false ;
02811 } ;
02812 if (value!=EOF)
02813 {
02814 int32_t i=0;
02815 switch (value)
02816 {
02817 case 'A':
02818 value='0' +CAlphabet::B_A;
02819 break;
02820 case 'C':
02821 value='0' +CAlphabet::B_C;
02822 break;
02823 case 'G':
02824 value='0' +CAlphabet::B_G;
02825 break;
02826 case 'T':
02827 value='0' +CAlphabet::B_T;
02828 break;
02829 };
02830
02831 buffer[i++]=value;
02832
02833 while (((value=fgetc(file)) != EOF) &&
02834 (isdigit(value) || (value=='.') || (value=='-') || (value=='e')
02835 || (value=='A') || (value=='C') || (value=='G')|| (value=='T')
02836 || (value=='N') || (value=='n')) && (i<length))
02837 {
02838 switch (value)
02839 {
02840 case 'A':
02841 value='0' +CAlphabet::B_A;
02842 break;
02843 case 'C':
02844 value='0' +CAlphabet::B_C;
02845 break;
02846 case 'G':
02847 value='0' +CAlphabet::B_G;
02848 break;
02849 case 'T':
02850 value='0' +CAlphabet::B_T;
02851 break;
02852 case '1': case '2': case'3': case '4': case'5':
02853 case '6': case '7': case'8': case '9': case '0': break ;
02854 case '.': case 'e': case '-': break ;
02855 default:
02856 SG_ERROR( "found crap: %i %c (pos:%li)\n",i,value,ftell(file)) ;
02857 };
02858 buffer[i++]=value;
02859 }
02860 ungetc(value, file);
02861 buffer[i]='\0';
02862
02863 return (i<=length) && (i>0);
02864 }
02865 return false;
02866 }
02867
02868
02869
02870
02871
02872
02873
02874
02875
02876
02877
02878
02879
02880
02881
02882
02883
02884
02885
02886
02887
02888
02889
02890
02891
02892
02893
02894
02895
02896
02897
02898
02899
02900
02901
02902
02903 bool CHMM::load_model(FILE* file)
02904 {
02905 int32_t received_params=0;
02906
02907 bool result=false;
02908 E_STATE state=INITIAL;
02909 char buffer[1024];
02910
02911 line=1;
02912 int32_t i,j;
02913
02914 if (file)
02915 {
02916 while (state!=END)
02917 {
02918 int32_t value=fgetc(file);
02919
02920 if (value=='\n')
02921 line++;
02922 if (value==EOF)
02923 state=END;
02924
02925 switch (state)
02926 {
02927 case INITIAL:
02928 if (value=='N')
02929 {
02930 if (received_params & GOTN)
02931 error(line, "in model file: \"p double defined\"");
02932 else
02933 state=GET_N;
02934 }
02935 else if (value=='M')
02936 {
02937 if (received_params & GOTM)
02938 error(line, "in model file: \"p double defined\"");
02939 else
02940 state=GET_M;
02941 }
02942 else if (value=='%')
02943 {
02944 state=COMMENT;
02945 }
02946 case ARRAYs:
02947 if (value=='p')
02948 {
02949 if (received_params & GOTp)
02950 error(line, "in model file: \"p double defined\"");
02951 else
02952 state=GET_p;
02953 }
02954 if (value=='q')
02955 {
02956 if (received_params & GOTq)
02957 error(line, "in model file: \"q double defined\"");
02958 else
02959 state=GET_q;
02960 }
02961 else if (value=='a')
02962 {
02963 if (received_params & GOTa)
02964 error(line, "in model file: \"a double defined\"");
02965 else
02966 state=GET_a;
02967 }
02968 else if (value=='b')
02969 {
02970 if (received_params & GOTb)
02971 error(line, "in model file: \"b double defined\"");
02972 else
02973 state=GET_b;
02974 }
02975 else if (value=='%')
02976 {
02977 state=COMMENT;
02978 }
02979 break;
02980 case GET_N:
02981 if (value=='=')
02982 {
02983 if (get_numbuffer(file, buffer, 4))
02984 {
02985 this->N= atoi(buffer);
02986 received_params|=GOTN;
02987 state= (received_params == (GOTN | GOTM | GOTO)) ? ARRAYs : INITIAL;
02988 }
02989 else
02990 state=END;
02991 }
02992 break;
02993 case GET_M:
02994 if (value=='=')
02995 {
02996 if (get_numbuffer(file, buffer, 4))
02997 {
02998 this->M= atoi(buffer);
02999 received_params|=GOTM;
03000 state= (received_params == (GOTN | GOTM | GOTO)) ? ARRAYs : INITIAL;
03001 }
03002 else
03003 state=END;
03004 }
03005 break;
03006 case GET_a:
03007 if (value=='=')
03008 {
03009 float64_t f;
03010
03011 transition_matrix_a=new float64_t[N*N];
03012 open_bracket(file);
03013 for (i=0; i<this->N; i++)
03014 {
03015 open_bracket(file);
03016
03017 for (j=0; j<this->N ; j++)
03018 {
03019
03020 if (fscanf( file, "%le", &f ) != 1)
03021 error(line, "float64_t expected");
03022 else
03023 set_a(i,j, f);
03024
03025 if (j<this->N-1)
03026 comma_or_space(file);
03027 else
03028 close_bracket(file);
03029 }
03030
03031 if (i<this->N-1)
03032 comma_or_space(file);
03033 else
03034 close_bracket(file);
03035 }
03036 received_params|=GOTa;
03037 }
03038 state= (received_params == (GOTa | GOTb | GOTp | GOTq)) ? END : ARRAYs;
03039 break;
03040 case GET_b:
03041 if (value=='=')
03042 {
03043 float64_t f;
03044
03045 observation_matrix_b=new float64_t[N*M];
03046 open_bracket(file);
03047 for (i=0; i<this->N; i++)
03048 {
03049 open_bracket(file);
03050
03051 for (j=0; j<this->M ; j++)
03052 {
03053
03054 if (fscanf( file, "%le", &f ) != 1)
03055 error(line, "float64_t expected");
03056 else
03057 set_b(i,j, f);
03058
03059 if (j<this->M-1)
03060 comma_or_space(file);
03061 else
03062 close_bracket(file);
03063 }
03064
03065 if (i<this->N-1)
03066 comma_or_space(file);
03067 else
03068 close_bracket(file);
03069 }
03070 received_params|=GOTb;
03071 }
03072 state= ((received_params & (GOTa | GOTb | GOTp | GOTq)) == (GOTa | GOTb | GOTp | GOTq)) ? END : ARRAYs;
03073 break;
03074 case GET_p:
03075 if (value=='=')
03076 {
03077 float64_t f;
03078
03079 initial_state_distribution_p=new float64_t[N];
03080 open_bracket(file);
03081 for (i=0; i<this->N ; i++)
03082 {
03083 if (fscanf( file, "%le", &f ) != 1)
03084 error(line, "float64_t expected");
03085 else
03086 set_p(i, f);
03087
03088 if (i<this->N-1)
03089 comma_or_space(file);
03090 else
03091 close_bracket(file);
03092 }
03093 received_params|=GOTp;
03094 }
03095 state= (received_params == (GOTa | GOTb | GOTp | GOTq)) ? END : ARRAYs;
03096 break;
03097 case GET_q:
03098 if (value=='=')
03099 {
03100 float64_t f;
03101
03102 end_state_distribution_q=new float64_t[N];
03103 open_bracket(file);
03104 for (i=0; i<this->N ; i++)
03105 {
03106 if (fscanf( file, "%le", &f ) != 1)
03107 error(line, "float64_t expected");
03108 else
03109 set_q(i, f);
03110
03111 if (i<this->N-1)
03112 comma_or_space(file);
03113 else
03114 close_bracket(file);
03115 }
03116 received_params|=GOTq;
03117 }
03118 state= (received_params == (GOTa | GOTb | GOTp | GOTq)) ? END : ARRAYs;
03119 break;
03120 case COMMENT:
03121 if (value==EOF)
03122 state=END;
03123 else if (value=='\n')
03124 {
03125 line++;
03126 state=INITIAL;
03127 }
03128 break;
03129
03130 default:
03131 break;
03132 }
03133 }
03134 result= (received_params== (GOTa | GOTb | GOTp | GOTq | GOTN | GOTM | GOTO));
03135 }
03136
03137 SG_WARNING( "not normalizing anymore, call normalize_hmm to make sure the hmm is valid!!\n");
03139 return result;
03140 }
03141
03142
03143
03144
03145
03146
03147
03148
03149
03150
03151
03152
03153
03154
03155
03156
03157
03158
03159
03160
03161
03162
03163
03164
03165
03166
03167
03168
03169
03170
03171
03172
03173
03174
03175
03176
03177
03178
03179
03180
03181
03182
03183
03184
03185
03186
03187
03188
03189
03190
03191
03192
03193
03194
03195
03196
03197
03198
03199
03200
03201 bool CHMM::load_definitions(FILE* file, bool verbose, bool init)
03202 {
03203 if (model)
03204 delete model ;
03205 model=new CModel();
03206
03207 int32_t received_params=0x0000000;
03208 char buffer[1024];
03209
03210 bool result=false;
03211 E_STATE state=INITIAL;
03212
03213 {
03214 model->set_learn_a(0, -1);
03215 model->set_learn_a(1, -1);
03216 model->set_const_a(0, -1);
03217 model->set_const_a(1, -1);
03218 model->set_const_a_val(0, 1.0);
03219 model->set_learn_b(0, -1);
03220 model->set_const_b(0, -1);
03221 model->set_const_b_val(0, 1.0);
03222 model->set_learn_p(0, -1);
03223 model->set_learn_q(0, -1);
03224 model->set_const_p(0, -1);
03225 model->set_const_q(0, -1);
03226 } ;
03227
03228 line=1;
03229
03230 if (file)
03231 {
03232 while (state!=END)
03233 {
03234 int32_t value=fgetc(file);
03235
03236 if (value=='\n')
03237 line++;
03238
03239 if (value==EOF)
03240 state=END;
03241
03242 switch (state)
03243 {
03244 case INITIAL:
03245 if (value=='l')
03246 {
03247 if (fgetc(file)=='e' && fgetc(file)=='a' && fgetc(file)=='r' && fgetc(file)=='n' && fgetc(file)=='_')
03248 {
03249 switch(fgetc(file))
03250 {
03251 case 'a':
03252 state=GET_learn_a;
03253 break;
03254 case 'b':
03255 state=GET_learn_b;
03256 break;
03257 case 'p':
03258 state=GET_learn_p;
03259 break;
03260 case 'q':
03261 state=GET_learn_q;
03262 break;
03263 default:
03264 error(line, "a,b,p or q expected in train definition file");
03265 };
03266 }
03267 }
03268 else if (value=='c')
03269 {
03270 if (fgetc(file)=='o' && fgetc(file)=='n' && fgetc(file)=='s'
03271 && fgetc(file)=='t' && fgetc(file)=='_')
03272 {
03273 switch(fgetc(file))
03274 {
03275 case 'a':
03276 state=GET_const_a;
03277 break;
03278 case 'b':
03279 state=GET_const_b;
03280 break;
03281 case 'p':
03282 state=GET_const_p;
03283 break;
03284 case 'q':
03285 state=GET_const_q;
03286 break;
03287 default:
03288 error(line, "a,b,p or q expected in train definition file");
03289 };
03290 }
03291 }
03292 else if (value=='%')
03293 {
03294 state=COMMENT;
03295 }
03296 else if (value==EOF)
03297 {
03298 state=END;
03299 }
03300 break;
03301 case GET_learn_a:
03302 if (value=='=')
03303 {
03304 open_bracket(file);
03305 bool finished=false;
03306 int32_t i=0;
03307
03308 if (verbose)
03309 SG_DEBUG( "\nlearn for transition matrix: ") ;
03310 while (!finished)
03311 {
03312 open_bracket(file);
03313
03314 if (get_numbuffer(file, buffer, 4))
03315 {
03316 value=atoi(buffer);
03317 model->set_learn_a(i++, value);
03318
03319 if (value<0)
03320 {
03321 finished=true;
03322 break;
03323 }
03324 if (value>=N)
03325 SG_ERROR( "invalid value for learn_a(%i,0): %i\n",i/2,(int)value) ;
03326 }
03327 else
03328 break;
03329
03330 comma_or_space(file);
03331
03332 if (get_numbuffer(file, buffer, 4))
03333 {
03334 value=atoi(buffer);
03335 model->set_learn_a(i++, value);
03336
03337 if (value<0)
03338 {
03339 finished=true;
03340 break;
03341 }
03342 if (value>=N)
03343 SG_ERROR( "invalid value for learn_a(%i,1): %i\n",i/2-1,(int)value) ;
03344
03345 }
03346 else
03347 break;
03348 close_bracket(file);
03349 }
03350 close_bracket(file);
03351 if (verbose)
03352 SG_DEBUG( "%i Entries",(int)(i/2)) ;
03353
03354 if (finished)
03355 {
03356 received_params|=GOTlearn_a;
03357
03358 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03359 }
03360 else
03361 state=END;
03362 }
03363 break;
03364 case GET_learn_b:
03365 if (value=='=')
03366 {
03367 open_bracket(file);
03368 bool finished=false;
03369 int32_t i=0;
03370
03371 if (verbose)
03372 SG_DEBUG( "\nlearn for emission matrix: ") ;
03373
03374 while (!finished)
03375 {
03376 open_bracket(file);
03377
03378 int32_t combine=0;
03379
03380 for (int32_t j=0; j<2; j++)
03381 {
03382 if (get_numbuffer(file, buffer, 4))
03383 {
03384 value=atoi(buffer);
03385
03386 if (j==0)
03387 {
03388 model->set_learn_b(i++, value);
03389
03390 if (value<0)
03391 {
03392 finished=true;
03393 break;
03394 }
03395 if (value>=N)
03396 SG_ERROR( "invalid value for learn_b(%i,0): %i\n",i/2,(int)value) ;
03397 }
03398 else
03399 combine=value;
03400 }
03401 else
03402 break;
03403
03404 if (j<1)
03405 comma_or_space(file);
03406 else
03407 close_bracket(file);
03408 }
03409 model->set_learn_b(i++, combine);
03410 if (combine>=M)
03411
03412 SG_ERROR("invalid value for learn_b(%i,1): %i\n",i/2-1,(int)value) ;
03413 }
03414 close_bracket(file);
03415 if (verbose)
03416 SG_DEBUG( "%i Entries",(int)(i/2-1)) ;
03417
03418 if (finished)
03419 {
03420 received_params|=GOTlearn_b;
03421 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03422 }
03423 else
03424 state=END;
03425 }
03426 break;
03427 case GET_learn_p:
03428 if (value=='=')
03429 {
03430 open_bracket(file);
03431 bool finished=false;
03432 int32_t i=0;
03433
03434 if (verbose)
03435 SG_DEBUG( "\nlearn start states: ") ;
03436 while (!finished)
03437 {
03438 if (get_numbuffer(file, buffer, 4))
03439 {
03440 value=atoi(buffer);
03441
03442 model->set_learn_p(i++, value);
03443
03444 if (value<0)
03445 {
03446 finished=true;
03447 break;
03448 }
03449 if (value>=N)
03450 SG_ERROR( "invalid value for learn_p(%i): %i\n",i-1,(int)value) ;
03451 }
03452 else
03453 break;
03454
03455 comma_or_space(file);
03456 }
03457
03458 close_bracket(file);
03459 if (verbose)
03460 SG_DEBUG( "%i Entries",i-1) ;
03461
03462 if (finished)
03463 {
03464 received_params|=GOTlearn_p;
03465 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03466 }
03467 else
03468 state=END;
03469 }
03470 break;
03471 case GET_learn_q:
03472 if (value=='=')
03473 {
03474 open_bracket(file);
03475 bool finished=false;
03476 int32_t i=0;
03477
03478 if (verbose)
03479 SG_DEBUG( "\nlearn terminal states: ") ;
03480 while (!finished)
03481 {
03482 if (get_numbuffer(file, buffer, 4))
03483 {
03484 value=atoi(buffer);
03485 model->set_learn_q(i++, value);
03486
03487 if (value<0)
03488 {
03489 finished=true;
03490 break;
03491 }
03492 if (value>=N)
03493 SG_ERROR( "invalid value for learn_q(%i): %i\n",i-1,(int)value) ;
03494 }
03495 else
03496 break;
03497
03498 comma_or_space(file);
03499 }
03500
03501 close_bracket(file);
03502 if (verbose)
03503 SG_DEBUG( "%i Entries",i-1) ;
03504
03505 if (finished)
03506 {
03507 received_params|=GOTlearn_q;
03508 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03509 }
03510 else
03511 state=END;
03512 }
03513 break;
03514 case GET_const_a:
03515 if (value=='=')
03516 {
03517 open_bracket(file);
03518 bool finished=false;
03519 int32_t i=0;
03520
03521 if (verbose)
03522 #ifdef USE_HMMDEBUG
03523 SG_DEBUG( "\nconst for transition matrix: \n") ;
03524 #else
03525 SG_DEBUG( "\nconst for transition matrix: ") ;
03526 #endif
03527 while (!finished)
03528 {
03529 open_bracket(file);
03530
03531 if (get_numbuffer(file, buffer, 4))
03532 {
03533 value=atoi(buffer);
03534 model->set_const_a(i++, value);
03535
03536 if (value<0)
03537 {
03538 finished=true;
03539 model->set_const_a(i++, value);
03540 model->set_const_a_val((int32_t)i/2 - 1, value);
03541 break;
03542 }
03543 if (value>=N)
03544 SG_ERROR( "invalid value for const_a(%i,0): %i\n",i/2,(int)value) ;
03545 }
03546 else
03547 break;
03548
03549 comma_or_space(file);
03550
03551 if (get_numbuffer(file, buffer, 4))
03552 {
03553 value=atoi(buffer);
03554 model->set_const_a(i++, value);
03555
03556 if (value<0)
03557 {
03558 finished=true;
03559 model->set_const_a_val((int32_t)i/2 - 1, value);
03560 break;
03561 }
03562 if (value>=N)
03563 SG_ERROR( "invalid value for const_a(%i,1): %i\n",i/2-1,(int)value) ;
03564 }
03565 else
03566 break;
03567
03568 if (!comma_or_space(file))
03569 model->set_const_a_val((int32_t)i/2 - 1, 1.0);
03570 else
03571 if (get_numbuffer(file, buffer, 10))
03572 {
03573 float64_t dvalue=atof(buffer);
03574 model->set_const_a_val((int32_t)i/2 - 1, dvalue);
03575 if (dvalue<0)
03576 {
03577 finished=true;
03578 break;
03579 }
03580 if ((dvalue>1.0) || (dvalue<0.0))
03581 SG_ERROR( "invalid value for const_a_val(%i): %e\n",(int)i/2-1,dvalue) ;
03582 }
03583 else
03584 model->set_const_a_val((int32_t)i/2 - 1, 1.0);
03585
03586 #ifdef USE_HMMDEBUG
03587 if (verbose)
03588 SG_ERROR("const_a(%i,%i)=%e\n", model->get_const_a((int32_t)i/2-1,0),model->get_const_a((int32_t)i/2-1,1),model->get_const_a_val((int32_t)i/2-1)) ;
03589 #endif
03590 close_bracket(file);
03591 }
03592 close_bracket(file);
03593 if (verbose)
03594 SG_DEBUG( "%i Entries",(int)i/2-1) ;
03595
03596 if (finished)
03597 {
03598 received_params|=GOTconst_a;
03599 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03600 }
03601 else
03602 state=END;
03603 }
03604 break;
03605
03606 case GET_const_b:
03607 if (value=='=')
03608 {
03609 open_bracket(file);
03610 bool finished=false;
03611 int32_t i=0;
03612
03613 if (verbose)
03614 #ifdef USE_HMMDEBUG
03615 SG_DEBUG( "\nconst for emission matrix: \n") ;
03616 #else
03617 SG_DEBUG( "\nconst for emission matrix: ") ;
03618 #endif
03619 while (!finished)
03620 {
03621 open_bracket(file);
03622 int32_t combine=0;
03623 for (int32_t j=0; j<3; j++)
03624 {
03625 if (get_numbuffer(file, buffer, 10))
03626 {
03627 if (j==0)
03628 {
03629 value=atoi(buffer);
03630
03631 model->set_const_b(i++, value);
03632
03633 if (value<0)
03634 {
03635 finished=true;
03636
03637 break;
03638 }
03639 if (value>=N)
03640 SG_ERROR( "invalid value for const_b(%i,0): %i\n",i/2-1,(int)value) ;
03641 }
03642 else if (j==2)
03643 {
03644 float64_t dvalue=atof(buffer);
03645 model->set_const_b_val((int32_t)(i-1)/2, dvalue);
03646 if (dvalue<0)
03647 {
03648 finished=true;
03649 break;
03650 } ;
03651 if ((dvalue>1.0) || (dvalue<0.0))
03652 SG_ERROR( "invalid value for const_b_val(%i,1): %e\n",i/2-1,dvalue) ;
03653 }
03654 else
03655 {
03656 value=atoi(buffer);
03657 combine= value;
03658 } ;
03659 }
03660 else
03661 {
03662 if (j==2)
03663 model->set_const_b_val((int32_t)(i-1)/2, 1.0);
03664 break;
03665 } ;
03666 if (j<2)
03667 if ((!comma_or_space(file)) && (j==1))
03668 {
03669 model->set_const_b_val((int32_t)(i-1)/2, 1.0) ;
03670 break ;
03671 } ;
03672 }
03673 close_bracket(file);
03674 model->set_const_b(i++, combine);
03675 if (combine>=M)
03676 SG_ERROR("invalid value for const_b(%i,1): %i\n",i/2-1, combine) ;
03677 #ifdef USE_HMMDEBUG
03678 if (verbose && !finished)
03679 SG_ERROR("const_b(%i,%i)=%e\n", model->get_const_b((int32_t)i/2-1,0),model->get_const_b((int32_t)i/2-1,1),model->get_const_b_val((int32_t)i/2-1)) ;
03680 #endif
03681 }
03682 close_bracket(file);
03683 if (verbose)
03684 SG_ERROR( "%i Entries",(int)i/2-1) ;
03685
03686 if (finished)
03687 {
03688 received_params|=GOTconst_b;
03689 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03690 }
03691 else
03692 state=END;
03693 }
03694 break;
03695 case GET_const_p:
03696 if (value=='=')
03697 {
03698 open_bracket(file);
03699 bool finished=false;
03700 int32_t i=0;
03701
03702 if (verbose)
03703 #ifdef USE_HMMDEBUG
03704 SG_DEBUG( "\nconst for start states: \n") ;
03705 #else
03706 SG_DEBUG( "\nconst for start states: ") ;
03707 #endif
03708 while (!finished)
03709 {
03710 open_bracket(file);
03711
03712 if (get_numbuffer(file, buffer, 4))
03713 {
03714 value=atoi(buffer);
03715 model->set_const_p(i, value);
03716
03717 if (value<0)
03718 {
03719 finished=true;
03720 model->set_const_p_val(i++, value);
03721 break;
03722 }
03723 if (value>=N)
03724 SG_ERROR( "invalid value for const_p(%i): %i\n",i,(int)value) ;
03725
03726 }
03727 else
03728 break;
03729
03730 if (!comma_or_space(file))
03731 model->set_const_p_val(i++, 1.0);
03732 else
03733 if (get_numbuffer(file, buffer, 10))
03734 {
03735 float64_t dvalue=atof(buffer);
03736 model->set_const_p_val(i++, dvalue);
03737 if (dvalue<0)
03738 {
03739 finished=true;
03740 break;
03741 }
03742 if ((dvalue>1) || (dvalue<0))
03743 SG_ERROR( "invalid value for const_p_val(%i): %e\n",i,dvalue) ;
03744 }
03745 else
03746 model->set_const_p_val(i++, 1.0);
03747
03748 close_bracket(file);
03749
03750 #ifdef USE_HMMDEBUG
03751 if (verbose)
03752 SG_DEBUG("const_p(%i)=%e\n", model->get_const_p(i-1),model->get_const_p_val(i-1)) ;
03753 #endif
03754 }
03755 if (verbose)
03756 SG_DEBUG( "%i Entries",i-1) ;
03757
03758 close_bracket(file);
03759
03760 if (finished)
03761 {
03762 received_params|=GOTconst_p;
03763 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03764 }
03765 else
03766 state=END;
03767 }
03768 break;
03769 case GET_const_q:
03770 if (value=='=')
03771 {
03772 open_bracket(file);
03773 bool finished=false;
03774 if (verbose)
03775 #ifdef USE_HMMDEBUG
03776 SG_DEBUG( "\nconst for terminal states: \n") ;
03777 #else
03778 SG_DEBUG( "\nconst for terminal states: ") ;
03779 #endif
03780 int32_t i=0;
03781
03782 while (!finished)
03783 {
03784 open_bracket(file) ;
03785 if (get_numbuffer(file, buffer, 4))
03786 {
03787 value=atoi(buffer);
03788 model->set_const_q(i, value);
03789 if (value<0)
03790 {
03791 finished=true;
03792 model->set_const_q_val(i++, value);
03793 break;
03794 }
03795 if (value>=N)
03796 SG_ERROR( "invalid value for const_q(%i): %i\n",i,(int)value) ;
03797 }
03798 else
03799 break;
03800
03801 if (!comma_or_space(file))
03802 model->set_const_q_val(i++, 1.0);
03803 else
03804 if (get_numbuffer(file, buffer, 10))
03805 {
03806 float64_t dvalue=atof(buffer);
03807 model->set_const_q_val(i++, dvalue);
03808 if (dvalue<0)
03809 {
03810 finished=true;
03811 break;
03812 }
03813 if ((dvalue>1) || (dvalue<0))
03814 SG_ERROR("invalid value for const_q_val(%i): %e\n",i,(double) dvalue) ;
03815 }
03816 else
03817 model->set_const_q_val(i++, 1.0);
03818
03819 close_bracket(file);
03820 #ifdef USE_HMMDEBUG
03821 if (verbose)
03822 SG_DEBUG("const_q(%i)=%e\n", model->get_const_q(i-1),model->get_const_q_val(i-1)) ;
03823 #endif
03824 }
03825 if (verbose)
03826 SG_DEBUG( "%i Entries",i-1) ;
03827
03828 close_bracket(file);
03829
03830 if (finished)
03831 {
03832 received_params|=GOTconst_q;
03833 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03834 }
03835 else
03836 state=END;
03837 }
03838 break;
03839 case COMMENT:
03840 if (value==EOF)
03841 state=END;
03842 else if (value=='\n')
03843 state=INITIAL;
03844 break;
03845
03846 default:
03847 break;
03848 }
03849 }
03850 }
03851
03852
03853
03854
03855
03856 result=1 ;
03857 if (result)
03858 {
03859 model->sort_learn_a() ;
03860 model->sort_learn_b() ;
03861 if (init)
03862 {
03863 init_model_defined(); ;
03864 convert_to_log();
03865 } ;
03866 }
03867 if (verbose)
03868 SG_DEBUG( "\n") ;
03869 return result;
03870 }
03871
03872
03873
03874
03875
03876
03877
03878
03879
03880
03881
03882
03883
03884
03885
03886
03887
03888
03889
03890
03891
03892
03893
03894
03895
03896
03897
03898
03899
03900
03901
03902
03903
03904
03905
03906 bool CHMM::save_model(FILE* file)
03907 {
03908 bool result=false;
03909 int32_t i,j;
03910 const float32_t NAN_REPLACEMENT = (float32_t) CMath::ALMOST_NEG_INFTY ;
03911
03912 if (file)
03913 {
03914 fprintf(file,"%s","% HMM - specification\n% N - number of states\n% M - number of observation_tokens\n% a is state_transition_matrix\n% size(a)= [N,N]\n%\n% b is observation_per_state_matrix\n% size(b)= [N,M]\n%\n% p is initial distribution\n% size(p)= [1, N]\n\n% q is distribution of end states\n% size(q)= [1, N]\n");
03915 fprintf(file,"N=%d;\n",N);
03916 fprintf(file,"M=%d;\n",M);
03917
03918 fprintf(file,"p=[");
03919 for (i=0; i<N; i++)
03920 {
03921 if (i<N-1) {
03922 if (CMath::is_finite(get_p(i)))
03923 fprintf(file, "%e,", (double)get_p(i));
03924 else
03925 fprintf(file, "%f,", NAN_REPLACEMENT);
03926 }
03927 else {
03928 if (CMath::is_finite(get_p(i)))
03929 fprintf(file, "%e", (double)get_p(i));
03930 else
03931 fprintf(file, "%f", NAN_REPLACEMENT);
03932 }
03933 }
03934
03935 fprintf(file,"];\n\nq=[");
03936 for (i=0; i<N; i++)
03937 {
03938 if (i<N-1) {
03939 if (CMath::is_finite(get_q(i)))
03940 fprintf(file, "%e,", (double)get_q(i));
03941 else
03942 fprintf(file, "%f,", NAN_REPLACEMENT);
03943 }
03944 else {
03945 if (CMath::is_finite(get_q(i)))
03946 fprintf(file, "%e", (double)get_q(i));
03947 else
03948 fprintf(file, "%f", NAN_REPLACEMENT);
03949 }
03950 }
03951 fprintf(file,"];\n\na=[");
03952
03953 for (i=0; i<N; i++)
03954 {
03955 fprintf(file, "\t[");
03956
03957 for (j=0; j<N; j++)
03958 {
03959 if (j<N-1) {
03960 if (CMath::is_finite(get_a(i,j)))
03961 fprintf(file, "%e,", (double)get_a(i,j));
03962 else
03963 fprintf(file, "%f,", NAN_REPLACEMENT);
03964 }
03965 else {
03966 if (CMath::is_finite(get_a(i,j)))
03967 fprintf(file, "%e];\n", (double)get_a(i,j));
03968 else
03969 fprintf(file, "%f];\n", NAN_REPLACEMENT);
03970 }
03971 }
03972 }
03973
03974 fprintf(file," ];\n\nb=[");
03975
03976 for (i=0; i<N; i++)
03977 {
03978 fprintf(file, "\t[");
03979
03980 for (j=0; j<M; j++)
03981 {
03982 if (j<M-1) {
03983 if (CMath::is_finite(get_b(i,j)))
03984 fprintf(file, "%e,", (double)get_b(i,j));
03985 else
03986 fprintf(file, "%f,", NAN_REPLACEMENT);
03987 }
03988 else {
03989 if (CMath::is_finite(get_b(i,j)))
03990 fprintf(file, "%e];\n", (double)get_b(i,j));
03991 else
03992 fprintf(file, "%f];\n", NAN_REPLACEMENT);
03993 }
03994 }
03995 }
03996 result= (fprintf(file," ];\n") == 5);
03997 }
03998
03999 return result;
04000 }
04001
04002 T_STATES* CHMM::get_path(int32_t dim, float64_t& prob)
04003 {
04004 T_STATES* result = NULL;
04005
04006 prob = best_path(dim);
04007 result = new T_STATES[p_observations->get_vector_length(dim)];
04008
04009 for (int32_t i=0; i<p_observations->get_vector_length(dim); i++)
04010 result[i]=PATH(dim)[i];
04011
04012 return result;
04013 }
04014
04015 bool CHMM::save_path(FILE* file)
04016 {
04017 bool result=false;
04018
04019 if (file)
04020 {
04021 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04022 {
04023 if (dim%100==0)
04024 SG_PRINT( "%i..", dim) ;
04025 float64_t prob = best_path(dim);
04026 fprintf(file,"%i. path probability:%e\nstate sequence:\n", dim, prob);
04027 for (int32_t i=0; i<p_observations->get_vector_length(dim)-1; i++)
04028 fprintf(file,"%d ", PATH(dim)[i]);
04029 fprintf(file,"%d", PATH(dim)[p_observations->get_vector_length(dim)-1]);
04030 fprintf(file,"\n\n") ;
04031 }
04032 SG_DONE();
04033 result=true;
04034 }
04035
04036 return result;
04037 }
04038
04039 bool CHMM::save_likelihood_bin(FILE* file)
04040 {
04041 bool result=false;
04042
04043 if (file)
04044 {
04045 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04046 {
04047 float32_t prob= (float32_t) model_probability(dim);
04048 fwrite(&prob, sizeof(float32_t), 1, file);
04049 }
04050 result=true;
04051 }
04052
04053 return result;
04054 }
04055
04056 bool CHMM::save_likelihood(FILE* file)
04057 {
04058 bool result=false;
04059
04060 if (file)
04061 {
04062 fprintf(file, "%% likelihood of model per observation\n%% P[O|model]=[ P[O|model]_1 P[O|model]_2 ... P[O|model]_dim ]\n%%\n");
04063
04064 fprintf(file, "P=[");
04065 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04066 fprintf(file, "%e ", (double) model_probability(dim));
04067
04068 fprintf(file,"];");
04069 result=true;
04070 }
04071
04072 return result;
04073 }
04074
04075 #define FLOATWRITE(file, value) { float32_t rrr=float32_t(value); fwrite(&rrr, sizeof(float32_t), 1, file); num_floats++;}
04076
04077 bool CHMM::save_model_bin(FILE* file)
04078 {
04079 int32_t i,j,q, num_floats=0 ;
04080 if (!model)
04081 {
04082 if (file)
04083 {
04084
04085 FLOATWRITE(file, (float32_t)CMath::INFTY);
04086 FLOATWRITE(file, (float32_t) 1);
04087
04088
04089 for (i=0; i<N; i++)
04090 FLOATWRITE(file, get_p(i));
04091 SG_INFO( "wrote %i parameters for p\n",N) ;
04092
04093 for (i=0; i<N; i++)
04094 FLOATWRITE(file, get_q(i)) ;
04095 SG_INFO( "wrote %i parameters for q\n",N) ;
04096
04097
04098 for (i=0; i<N; i++)
04099 for (j=0; j<N; j++)
04100 FLOATWRITE(file, get_a(i,j));
04101 SG_INFO( "wrote %i parameters for a\n",N*N) ;
04102
04103 for (i=0; i<N; i++)
04104 for (j=0; j<M; j++)
04105 FLOATWRITE(file, get_b(i,j));
04106 SG_INFO( "wrote %i parameters for b\n",N*M) ;
04107
04108
04109 FLOATWRITE(file, (float32_t)CMath::INFTY);
04110 FLOATWRITE(file, (float32_t) 3);
04111
04112
04113 FLOATWRITE(file, (float32_t) N);
04114 FLOATWRITE(file, (float32_t) N);
04115 FLOATWRITE(file, (float32_t) N*N);
04116 FLOATWRITE(file, (float32_t) N*M);
04117 FLOATWRITE(file, (float32_t) N);
04118 FLOATWRITE(file, (float32_t) M);
04119 } ;
04120 }
04121 else
04122 {
04123 if (file)
04124 {
04125 int32_t num_p, num_q, num_a, num_b ;
04126
04127 FLOATWRITE(file, (float32_t)CMath::INFTY);
04128 FLOATWRITE(file, (float32_t) 2);
04129
04130 for (i=0; model->get_learn_p(i)>=0; i++)
04131 FLOATWRITE(file, get_p(model->get_learn_p(i)));
04132 num_p=i ;
04133 SG_INFO( "wrote %i parameters for p\n",num_p) ;
04134
04135 for (i=0; model->get_learn_q(i)>=0; i++)
04136 FLOATWRITE(file, get_q(model->get_learn_q(i)));
04137 num_q=i ;
04138 SG_INFO( "wrote %i parameters for q\n",num_q) ;
04139
04140
04141 for (q=0; model->get_learn_a(q,1)>=0; q++)
04142 {
04143 i=model->get_learn_a(q,0) ;
04144 j=model->get_learn_a(q,1) ;
04145 FLOATWRITE(file, (float32_t)i);
04146 FLOATWRITE(file, (float32_t)j);
04147 FLOATWRITE(file, get_a(i,j));
04148 } ;
04149 num_a=q ;
04150 SG_INFO( "wrote %i parameters for a\n",num_a) ;
04151
04152 for (q=0; model->get_learn_b(q,0)>=0; q++)
04153 {
04154 i=model->get_learn_b(q,0) ;
04155 j=model->get_learn_b(q,1) ;
04156 FLOATWRITE(file, (float32_t)i);
04157 FLOATWRITE(file, (float32_t)j);
04158 FLOATWRITE(file, get_b(i,j));
04159 } ;
04160 num_b=q ;
04161 SG_INFO( "wrote %i parameters for b\n",num_b) ;
04162
04163
04164 FLOATWRITE(file, (float32_t)CMath::INFTY);
04165 FLOATWRITE(file, (float32_t) 3);
04166
04167
04168 FLOATWRITE(file, (float32_t) num_p);
04169 FLOATWRITE(file, (float32_t) num_q);
04170 FLOATWRITE(file, (float32_t) num_a);
04171 FLOATWRITE(file, (float32_t) num_b);
04172 FLOATWRITE(file, (float32_t) N);
04173 FLOATWRITE(file, (float32_t) M);
04174 } ;
04175 } ;
04176 return true ;
04177 }
04178
04179 bool CHMM::save_path_derivatives(FILE* logfile)
04180 {
04181 int32_t dim,i,j;
04182 float64_t prob;
04183
04184 if (logfile)
04185 {
04186 fprintf(logfile,"%% lambda denotes the model\n%% O denotes the observation sequence\n%% Q denotes the path\n%% \n%% calculating derivatives of P[O,Q|lambda]=p_{Q1}b_{Q1}(O_1}*a_{Q1}{Q2}b_{Q2}(O2)*...*q_{T-1}{T}b_{QT}(O_T}q_{Q_T} against p,q,a,b\n%%\n");
04187 fprintf(logfile,"%% dPr[...]=[ [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
04188 fprintf(logfile,"%% [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
04189 fprintf(logfile,"%% ............................. \n");
04190 fprintf(logfile,"%% [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_MM]\n");
04191 fprintf(logfile,"%% ];\n%%\n\ndPr(log()) = [\n");
04192 }
04193 else
04194 return false ;
04195
04196 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
04197 {
04198 prob=best_path(dim);
04199
04200 fprintf(logfile, "[ ");
04201
04202
04203 for (i=0; i<N; i++)
04204 fprintf(logfile,"%e, ", (double) path_derivative_p(i,dim) );
04205
04206 for (i=0; i<N; i++)
04207 fprintf(logfile,"%e, ", (double) path_derivative_q(i,dim) );
04208
04209
04210 for (i=0; i<N; i++)
04211 for (j=0; j<N; j++)
04212 fprintf(logfile, "%e,", (double) path_derivative_a(i,j,dim) );
04213
04214 for (i=0; i<N; i++)
04215 for (j=0; j<M; j++)
04216 fprintf(logfile, "%e,", (double) path_derivative_b(i,j,dim) );
04217
04218 fseek(logfile,ftell(logfile)-1,SEEK_SET);
04219 fprintf(logfile, " ];\n");
04220 }
04221
04222 fprintf(logfile, "];");
04223
04224 return true ;
04225 }
04226
04227 bool CHMM::save_path_derivatives_bin(FILE* logfile)
04228 {
04229 bool result=false;
04230 int32_t dim,i,j,q;
04231 float64_t prob=0 ;
04232 int32_t num_floats=0 ;
04233
04234 float64_t sum_prob=0.0 ;
04235 if (!model)
04236 SG_WARNING( "No definitions loaded -- writing derivatives of all weights\n") ;
04237 else
04238 SG_INFO( "writing derivatives of changed weights only\n") ;
04239
04240 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
04241 {
04242 if (dim%100==0)
04243 {
04244 SG_PRINT( ".") ;
04245
04246 } ;
04247
04248 prob=best_path(dim);
04249 sum_prob+=prob ;
04250
04251 if (!model)
04252 {
04253 if (logfile)
04254 {
04255
04256 FLOATWRITE(logfile, prob);
04257
04258 for (i=0; i<N; i++)
04259 FLOATWRITE(logfile, path_derivative_p(i,dim));
04260
04261 for (i=0; i<N; i++)
04262 FLOATWRITE(logfile, path_derivative_q(i,dim));
04263
04264 for (i=0; i<N; i++)
04265 for (j=0; j<N; j++)
04266 FLOATWRITE(logfile, path_derivative_a(i,j,dim));
04267
04268 for (i=0; i<N; i++)
04269 for (j=0; j<M; j++)
04270 FLOATWRITE(logfile, path_derivative_b(i,j,dim));
04271
04272 }
04273 }
04274 else
04275 {
04276 if (logfile)
04277 {
04278
04279 FLOATWRITE(logfile, prob);
04280
04281 for (i=0; model->get_learn_p(i)>=0; i++)
04282 FLOATWRITE(logfile, path_derivative_p(model->get_learn_p(i),dim));
04283
04284 for (i=0; model->get_learn_q(i)>=0; i++)
04285 FLOATWRITE(logfile, path_derivative_q(model->get_learn_q(i),dim));
04286
04287 for (q=0; model->get_learn_a(q,0)>=0; q++)
04288 {
04289 i=model->get_learn_a(q,0) ;
04290 j=model->get_learn_a(q,1) ;
04291 FLOATWRITE(logfile, path_derivative_a(i,j,dim));
04292 } ;
04293
04294 for (q=0; model->get_learn_b(q,0)>=0; q++)
04295 {
04296 i=model->get_learn_b(q,0) ;
04297 j=model->get_learn_b(q,1) ;
04298 FLOATWRITE(logfile, path_derivative_b(i,j,dim));
04299 } ;
04300 }
04301 } ;
04302 }
04303 save_model_bin(logfile) ;
04304
04305 result=true;
04306 SG_PRINT( "\n") ;
04307 return result;
04308 }
04309
04310 bool CHMM::save_model_derivatives_bin(FILE* file)
04311 {
04312 bool result=false;
04313 int32_t dim,i,j,q ;
04314 int32_t num_floats=0 ;
04315
04316 if (!model)
04317 SG_WARNING( "No definitions loaded -- writing derivatives of all weights\n") ;
04318 else
04319 SG_INFO( "writing derivatives of changed weights only\n") ;
04320
04321 #ifdef USE_HMMPARALLEL
04322 int32_t num_threads = parallel->get_num_threads();
04323 pthread_t *threads=new pthread_t[num_threads] ;
04324 S_DIM_THREAD_PARAM *params=new S_DIM_THREAD_PARAM[num_threads] ;
04325
04326 if (p_observations->get_num_vectors()<num_threads)
04327 num_threads=p_observations->get_num_vectors();
04328 #endif
04329
04330 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
04331 {
04332 if (dim%20==0)
04333 {
04334 SG_PRINT( ".") ;
04335
04336 } ;
04337
04338 #ifdef USE_HMMPARALLEL
04339 if (dim%num_threads==0)
04340 {
04341 for (i=0; i<num_threads; i++)
04342 {
04343 if (dim+i<p_observations->get_num_vectors())
04344 {
04345 params[i].hmm=this ;
04346 params[i].dim=dim+i ;
04347 pthread_create(&threads[i], NULL, bw_dim_prefetch, (void*)¶ms[i]) ;
04348 }
04349 }
04350
04351 for (i=0; i<num_threads; i++)
04352 {
04353 if (dim+i<p_observations->get_num_vectors())
04354 pthread_join(threads[i], NULL);
04355 }
04356 }
04357 #endif
04358
04359 float64_t prob=model_probability(dim) ;
04360 if (!model)
04361 {
04362 if (file)
04363 {
04364
04365 FLOATWRITE(file, prob);
04366
04367
04368 for (i=0; i<N; i++)
04369 FLOATWRITE(file, model_derivative_p(i,dim));
04370
04371 for (i=0; i<N; i++)
04372 FLOATWRITE(file, model_derivative_q(i,dim));
04373
04374
04375 for (i=0; i<N; i++)
04376 for (j=0; j<N; j++)
04377 FLOATWRITE(file, model_derivative_a(i,j,dim));
04378
04379 for (i=0; i<N; i++)
04380 for (j=0; j<M; j++)
04381 FLOATWRITE(file, model_derivative_b(i,j,dim));
04382
04383 if (dim==0)
04384 SG_INFO("Number of parameters (including posterior prob.): %i\n", num_floats) ;
04385 } ;
04386 }
04387 else
04388 {
04389 if (file)
04390 {
04391
04392 FLOATWRITE(file, prob);
04393
04394 for (i=0; model->get_learn_p(i)>=0; i++)
04395 FLOATWRITE(file, model_derivative_p(model->get_learn_p(i),dim));
04396
04397 for (i=0; model->get_learn_q(i)>=0; i++)
04398 FLOATWRITE(file, model_derivative_q(model->get_learn_q(i),dim));
04399
04400
04401 for (q=0; model->get_learn_a(q,1)>=0; q++)
04402 {
04403 i=model->get_learn_a(q,0) ;
04404 j=model->get_learn_a(q,1) ;
04405 FLOATWRITE(file, model_derivative_a(i,j,dim));
04406 } ;
04407
04408 for (q=0; model->get_learn_b(q,0)>=0; q++)
04409 {
04410 i=model->get_learn_b(q,0) ;
04411 j=model->get_learn_b(q,1) ;
04412 FLOATWRITE(file, model_derivative_b(i,j,dim));
04413 } ;
04414 if (dim==0)
04415 SG_INFO("Number of parameters (including posterior prob.): %i\n", num_floats) ;
04416 } ;
04417 } ;
04418 }
04419 save_model_bin(file) ;
04420
04421 #ifdef USE_HMMPARALLEL
04422 delete[] threads ;
04423 delete[] params ;
04424 #endif
04425
04426 result=true;
04427 SG_PRINT( "\n") ;
04428 return result;
04429 }
04430
04431 bool CHMM::save_model_derivatives(FILE* file)
04432 {
04433 bool result=false;
04434 int32_t dim,i,j;
04435
04436 if (file)
04437 {
04438
04439 fprintf(file,"%% lambda denotes the model\n%% O denotes the observation sequence\n%% Q denotes the path\n%%\n%% calculating derivatives of P[O|lambda]=sum_{all Q}p_{Q1}b_{Q1}(O_1}*a_{Q1}{Q2}b_{Q2}(O2)*...*q_{T-1}{T}b_{QT}(O_T}q_{Q_T} against p,q,a,b\n%%\n");
04440 fprintf(file,"%% dPr[...]=[ [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
04441 fprintf(file,"%% [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
04442 fprintf(file,"%% ............................. \n");
04443 fprintf(file,"%% [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_MM]\n");
04444 fprintf(file,"%% ];\n%%\n\nlog(dPr) = [\n");
04445
04446
04447 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
04448 {
04449 fprintf(file, "[ ");
04450
04451
04452 for (i=0; i<N; i++)
04453 fprintf(file,"%e, ", (double) model_derivative_p(i, dim) );
04454
04455 for (i=0; i<N; i++)
04456 fprintf(file,"%e, ", (double) model_derivative_q(i, dim) );
04457
04458
04459 for (i=0; i<N; i++)
04460 for (j=0; j<N; j++)
04461 fprintf(file, "%e,", (double) model_derivative_a(i,j,dim) );
04462
04463 for (i=0; i<N; i++)
04464 for (j=0; j<M; j++)
04465 fprintf(file, "%e,", (double) model_derivative_b(i,j,dim) );
04466
04467 fseek(file,ftell(file)-1,SEEK_SET);
04468 fprintf(file, " ];\n");
04469 }
04470
04471
04472 fprintf(file, "];");
04473
04474 result=true;
04475 }
04476 return result;
04477 }
04478
04479 bool CHMM::check_model_derivatives_combined()
04480 {
04481
04482 const float64_t delta=5e-4 ;
04483
04484 int32_t i ;
04485
04486
04487
04488
04489
04490
04491
04492
04493
04494
04495
04496
04497
04498
04499
04500
04501
04502
04503
04504
04505
04506
04507
04508
04509
04510
04511
04512
04513
04514
04515
04516 i=0;
04517 {
04518 for (int32_t j=0; j<M; j++)
04519 {
04520 float64_t old_b=get_b(i,j) ;
04521
04522 set_b(i,j, log(exp(old_b)-delta)) ;
04523 invalidate_model() ;
04524 float64_t prob_old=(model_probability(-1)*p_observations->get_num_vectors()) ;
04525
04526 set_b(i,j, log(exp(old_b)+delta)) ;
04527 invalidate_model() ;
04528 float64_t prob_new=(model_probability(-1)*p_observations->get_num_vectors());
04529
04530 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04531
04532 set_b(i,j, old_b) ;
04533 invalidate_model() ;
04534
04535 float64_t deriv_calc=0 ;
04536 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04537 {
04538 deriv_calc+=exp(model_derivative_b(i, j, dim)-model_probability(dim)) ;
04539 if (j==1)
04540 SG_INFO("deriv_calc[%i]=%e\n",dim,deriv_calc) ;
04541 } ;
04542
04543 SG_ERROR( "b(%i,%i)=%e db(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j,exp(old_b),i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04544 } ;
04545 } ;
04546 return true ;
04547 }
04548
04549 bool CHMM::check_model_derivatives()
04550 {
04551 bool result=false;
04552 const float64_t delta=3e-4 ;
04553
04554 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04555 {
04556 int32_t i ;
04557
04558 for (i=0; i<N; i++)
04559 {
04560 for (int32_t j=0; j<N; j++)
04561 {
04562 float64_t old_a=get_a(i,j) ;
04563
04564 set_a(i,j, log(exp(old_a)-delta)) ;
04565 invalidate_model() ;
04566 float64_t prob_old=exp(model_probability(dim)) ;
04567
04568 set_a(i,j, log(exp(old_a)+delta)) ;
04569 invalidate_model() ;
04570 float64_t prob_new=exp(model_probability(dim));
04571
04572 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04573
04574 set_a(i,j, old_a) ;
04575 invalidate_model() ;
04576 float64_t deriv_calc=exp(model_derivative_a(i, j, dim)) ;
04577
04578 SG_DEBUG( "da(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04579 invalidate_model() ;
04580 } ;
04581 } ;
04582 for (i=0; i<N; i++)
04583 {
04584 for (int32_t j=0; j<M; j++)
04585 {
04586 float64_t old_b=get_b(i,j) ;
04587
04588 set_b(i,j, log(exp(old_b)-delta)) ;
04589 invalidate_model() ;
04590 float64_t prob_old=exp(model_probability(dim)) ;
04591
04592 set_b(i,j, log(exp(old_b)+delta)) ;
04593 invalidate_model() ;
04594 float64_t prob_new=exp(model_probability(dim));
04595
04596 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04597
04598 set_b(i,j, old_b) ;
04599 invalidate_model() ;
04600 float64_t deriv_calc=exp(model_derivative_b(i, j, dim));
04601
04602 SG_DEBUG( "db(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/(deriv_calc));
04603 } ;
04604 } ;
04605
04606 #ifdef TEST
04607 for (i=0; i<N; i++)
04608 {
04609 float64_t old_p=get_p(i) ;
04610
04611 set_p(i, log(exp(old_p)-delta)) ;
04612 invalidate_model() ;
04613 float64_t prob_old=exp(model_probability(dim)) ;
04614
04615 set_p(i, log(exp(old_p)+delta)) ;
04616 invalidate_model() ;
04617 float64_t prob_new=exp(model_probability(dim));
04618 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04619
04620 set_p(i, old_p) ;
04621 invalidate_model() ;
04622 float64_t deriv_calc=exp(model_derivative_p(i, dim));
04623
04624
04625 SG_DEBUG( "dp(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04626 } ;
04627 for (i=0; i<N; i++)
04628 {
04629 float64_t old_q=get_q(i) ;
04630
04631 set_q(i, log(exp(old_q)-delta)) ;
04632 invalidate_model() ;
04633 float64_t prob_old=exp(model_probability(dim)) ;
04634
04635 set_q(i, log(exp(old_q)+delta)) ;
04636 invalidate_model() ;
04637 float64_t prob_new=exp(model_probability(dim));
04638
04639 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04640
04641 set_q(i, old_q) ;
04642 invalidate_model() ;
04643 float64_t deriv_calc=exp(model_derivative_q(i, dim));
04644
04645
04646 SG_DEBUG( "dq(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04647 } ;
04648 #endif
04649 }
04650 return result;
04651 }
04652
04653 #ifdef USE_HMMDEBUG
04654 bool CHMM::check_path_derivatives()
04655 {
04656 bool result=false;
04657 const float64_t delta=1e-4 ;
04658
04659 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04660 {
04661 int32_t i ;
04662
04663 for (i=0; i<N; i++)
04664 {
04665 for (int32_t j=0; j<N; j++)
04666 {
04667 float64_t old_a=get_a(i,j) ;
04668
04669 set_a(i,j, log(exp(old_a)-delta)) ;
04670 invalidate_model() ;
04671 float64_t prob_old=best_path(dim) ;
04672
04673 set_a(i,j, log(exp(old_a)+delta)) ;
04674 invalidate_model() ;
04675 float64_t prob_new=best_path(dim);
04676
04677 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04678
04679 set_a(i,j, old_a) ;
04680 invalidate_model() ;
04681 float64_t deriv_calc=path_derivative_a(i, j, dim) ;
04682
04683 SG_DEBUG( "da(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04684 } ;
04685 } ;
04686 for (i=0; i<N; i++)
04687 {
04688 for (int32_t j=0; j<M; j++)
04689 {
04690 float64_t old_b=get_b(i,j) ;
04691
04692 set_b(i,j, log(exp(old_b)-delta)) ;
04693 invalidate_model() ;
04694 float64_t prob_old=best_path(dim) ;
04695
04696 set_b(i,j, log(exp(old_b)+delta)) ;
04697 invalidate_model() ;
04698 float64_t prob_new=best_path(dim);
04699
04700 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04701
04702 set_b(i,j, old_b) ;
04703 invalidate_model() ;
04704 float64_t deriv_calc=path_derivative_b(i, j, dim);
04705
04706 SG_DEBUG( "db(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/(deriv_calc));
04707 } ;
04708 } ;
04709
04710 for (i=0; i<N; i++)
04711 {
04712 float64_t old_p=get_p(i) ;
04713
04714 set_p(i, log(exp(old_p)-delta)) ;
04715 invalidate_model() ;
04716 float64_t prob_old=best_path(dim) ;
04717
04718 set_p(i, log(exp(old_p)+delta)) ;
04719 invalidate_model() ;
04720 float64_t prob_new=best_path(dim);
04721 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04722
04723 set_p(i, old_p) ;
04724 invalidate_model() ;
04725 float64_t deriv_calc=path_derivative_p(i, dim);
04726
04727
04728 SG_DEBUG( "dp(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04729 } ;
04730 for (i=0; i<N; i++)
04731 {
04732 float64_t old_q=get_q(i) ;
04733
04734 set_q(i, log(exp(old_q)-delta)) ;
04735 invalidate_model() ;
04736 float64_t prob_old=best_path(dim) ;
04737
04738 set_q(i, log(exp(old_q)+delta)) ;
04739 invalidate_model() ;
04740 float64_t prob_new=best_path(dim);
04741
04742 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04743
04744 set_q(i, old_q) ;
04745 invalidate_model() ;
04746 float64_t deriv_calc=path_derivative_q(i, dim);
04747
04748
04749 SG_DEBUG( "dq(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04750 } ;
04751 }
04752 return result;
04753 }
04754 #endif // USE_HMMDEBUG
04755
04756
04757 void CHMM::normalize(bool keep_dead_states)
04758 {
04759 int32_t i,j;
04760 const float64_t INF=-1e10;
04761 float64_t sum_p =INF;
04762
04763 for (i=0; i<N; i++)
04764 {
04765 sum_p=CMath::logarithmic_sum(sum_p, get_p(i));
04766
04767 float64_t sum_b =INF;
04768 float64_t sum_a =get_q(i);
04769
04770 for (j=0; j<N; j++)
04771 sum_a=CMath::logarithmic_sum(sum_a, get_a(i,j));
04772
04773 if (sum_a>CMath::ALMOST_NEG_INFTY/N || (!keep_dead_states) )
04774 {
04775 for (j=0; j<N; j++)
04776 set_a(i,j, get_a(i,j)-sum_a);
04777 set_q(i, get_q(i)-sum_a);
04778 }
04779
04780 for (j=0; j<M; j++)
04781 sum_b=CMath::logarithmic_sum(sum_b, get_b(i,j));
04782 for (j=0; j<M; j++)
04783 set_b(i,j, get_b(i,j)-sum_b);
04784 }
04785
04786 for (i=0; i<N; i++)
04787 set_p(i, get_p(i)-sum_p);
04788
04789 invalidate_model();
04790 }
04791
04792 bool CHMM::append_model(CHMM* app_model)
04793 {
04794 bool result=false;
04795 const int32_t num_states=app_model->get_N();
04796 int32_t i,j;
04797
04798 SG_DEBUG( "cur N:%d M:%d\n", N, M);
04799 SG_DEBUG( "old N:%d M:%d\n", app_model->get_N(), app_model->get_M());
04800 if (app_model->get_M() == get_M())
04801 {
04802 float64_t* n_p=new float64_t[N+num_states];
04803 float64_t* n_q=new float64_t[N+num_states];
04804 float64_t* n_a=new float64_t[(N+num_states)*(N+num_states)];
04805
04806 float64_t* n_b=new float64_t[(N+num_states)*M];
04807
04808
04809 for (i=0; i<N+num_states; i++)
04810 {
04811 n_p[i]=-CMath::INFTY;
04812 n_q[i]=-CMath::INFTY;
04813
04814 for (j=0; j<N+num_states; j++)
04815 n_a[(N+num_states)*i+j]=-CMath::INFTY;
04816
04817 for (j=0; j<M; j++)
04818 n_b[M*i+j]=-CMath::INFTY;
04819 }
04820
04821
04822
04823
04824
04825
04826 for (i=0; i<N; i++)
04827 {
04828 n_p[i]=get_p(i);
04829
04830 for (j=0; j<N; j++)
04831 n_a[(N+num_states)*j+i]=get_a(i,j);
04832
04833 for (j=0; j<M; j++)
04834 {
04835 n_b[M*i+j]=get_b(i,j);
04836 }
04837 }
04838
04839
04840 for (i=0; i<app_model->get_N(); i++)
04841 {
04842 n_q[i+N]=app_model->get_q(i);
04843
04844 for (j=0; j<app_model->get_N(); j++)
04845 n_a[(N+num_states)*(j+N)+(i+N)]=app_model->get_a(i,j);
04846 for (j=0; j<app_model->get_M(); j++)
04847 n_b[M*(i+N)+j]=app_model->get_b(i,j);
04848 }
04849
04850
04851
04852 for (i=0; i<N; i++)
04853 {
04854 for (j=N; j<N+num_states; j++)
04855 n_a[(N+num_states)*j + i]=CMath::logarithmic_sum(get_q(i)+app_model->get_p(j-N), n_a[(N+num_states)*j + i]);
04856 }
04857
04858 free_state_dependend_arrays();
04859 N+=num_states;
04860
04861 alloc_state_dependend_arrays();
04862
04863
04864 delete[] initial_state_distribution_p;
04865 delete[] end_state_distribution_q;
04866 delete[] transition_matrix_a;
04867 delete[] observation_matrix_b;
04868
04869 transition_matrix_a=n_a;
04870 observation_matrix_b=n_b;
04871 initial_state_distribution_p=n_p;
04872 end_state_distribution_q=n_q;
04873
04874 SG_WARNING( "not normalizing anymore, call normalize_hmm to make sure the hmm is valid!!\n");
04876 invalidate_model();
04877 }
04878 else
04879 SG_ERROR( "number of observations is different for append model, doing nothing!\n");
04880
04881 return result;
04882 }
04883
04884 bool CHMM::append_model(CHMM* app_model, float64_t* cur_out, float64_t* app_out)
04885 {
04886 bool result=false;
04887 const int32_t num_states=app_model->get_N()+2;
04888 int32_t i,j;
04889
04890 if (app_model->get_M() == get_M())
04891 {
04892 float64_t* n_p=new float64_t[N+num_states];
04893 float64_t* n_q=new float64_t[N+num_states];
04894 float64_t* n_a=new float64_t[(N+num_states)*(N+num_states)];
04895
04896 float64_t* n_b=new float64_t[(N+num_states)*M];
04897
04898
04899 for (i=0; i<N+num_states; i++)
04900 {
04901 n_p[i]=-CMath::INFTY;
04902 n_q[i]=-CMath::INFTY;
04903
04904 for (j=0; j<N+num_states; j++)
04905 n_a[(N+num_states)*j+i]=-CMath::INFTY;
04906
04907 for (j=0; j<M; j++)
04908 n_b[M*i+j]=-CMath::INFTY;
04909 }
04910
04911
04912
04913
04914
04915
04916 for (i=0; i<N; i++)
04917 {
04918 n_p[i]=get_p(i);
04919
04920 for (j=0; j<N; j++)
04921 n_a[(N+num_states)*j+i]=get_a(i,j);
04922
04923 for (j=0; j<M; j++)
04924 {
04925 n_b[M*i+j]=get_b(i,j);
04926 }
04927 }
04928
04929
04930 for (i=0; i<app_model->get_N(); i++)
04931 {
04932 n_q[i+N+2]=app_model->get_q(i);
04933
04934 for (j=0; j<app_model->get_N(); j++)
04935 n_a[(N+num_states)*(j+N+2)+(i+N+2)]=app_model->get_a(i,j);
04936 for (j=0; j<app_model->get_M(); j++)
04937 n_b[M*(i+N+2)+j]=app_model->get_b(i,j);
04938 }
04939
04940
04941
04942
04943 for (i=0; i<M; i++)
04944 {
04945 n_b[M*N+i]=cur_out[i];
04946 n_b[M*(N+1)+i]=app_out[i];
04947 }
04948
04949
04950 for (i=0; i<N+num_states; i++)
04951 {
04952
04953 if (i==N+1)
04954 n_a[(N+num_states)*i + N]=0;
04955
04956
04957
04958 if (i<N)
04959 n_a[(N+num_states)*N+i]=get_q(i);
04960
04961
04962
04963 if (i>=N+2)
04964 n_a[(N+num_states)*i+(N+1)]=app_model->get_p(i-(N+2));
04965 }
04966
04967 free_state_dependend_arrays();
04968 N+=num_states;
04969
04970 alloc_state_dependend_arrays();
04971
04972
04973 delete[] initial_state_distribution_p;
04974 delete[] end_state_distribution_q;
04975 delete[] transition_matrix_a;
04976 delete[] observation_matrix_b;
04977
04978 transition_matrix_a=n_a;
04979 observation_matrix_b=n_b;
04980 initial_state_distribution_p=n_p;
04981 end_state_distribution_q=n_q;
04982
04983 SG_WARNING( "not normalizing anymore, call normalize_hmm to make sure the hmm is valid!!\n");
04985 invalidate_model();
04986 }
04987
04988 return result;
04989 }
04990
04991
04992 void CHMM::add_states(int32_t num_states, float64_t default_value)
04993 {
04994 int32_t i,j;
04995 const float64_t MIN_RAND=1e-2;
04996 const float64_t MAX_RAND=2e-1;
04997
04998 float64_t* n_p=new float64_t[N+num_states];
04999 float64_t* n_q=new float64_t[N+num_states];
05000 float64_t* n_a=new float64_t[(N+num_states)*(N+num_states)];
05001
05002 float64_t* n_b=new float64_t[(N+num_states)*M];
05003
05004
05005
05006 for (i=0; i<N; i++)
05007 {
05008 n_p[i]=get_p(i);
05009 n_q[i]=get_q(i);
05010
05011 for (j=0; j<N; j++)
05012 n_a[(N+num_states)*j+i]=get_a(i,j);
05013
05014 for (j=0; j<M; j++)
05015 n_b[M*i+j]=get_b(i,j);
05016 }
05017
05018 for (i=N; i<N+num_states; i++)
05019 {
05020 n_p[i]=VAL_MACRO;
05021 n_q[i]=VAL_MACRO;
05022
05023 for (j=0; j<N; j++)
05024 n_a[(N+num_states)*i+j]=VAL_MACRO;
05025
05026 for (j=0; j<N+num_states; j++)
05027 n_a[(N+num_states)*j+i]=VAL_MACRO;
05028
05029 for (j=0; j<M; j++)
05030 n_b[M*i+j]=VAL_MACRO;
05031 }
05032 free_state_dependend_arrays();
05033 N+=num_states;
05034
05035 alloc_state_dependend_arrays();
05036
05037
05038 delete[] initial_state_distribution_p;
05039 delete[] end_state_distribution_q;
05040 delete[] transition_matrix_a;
05041 delete[] observation_matrix_b;
05042
05043 transition_matrix_a=n_a;
05044 observation_matrix_b=n_b;
05045 initial_state_distribution_p=n_p;
05046 end_state_distribution_q=n_q;
05047
05048 invalidate_model();
05049 normalize();
05050 }
05051
05052 void CHMM::chop(float64_t value)
05053 {
05054 for (int32_t i=0; i<N; i++)
05055 {
05056 int32_t j;
05057
05058 if (exp(get_p(i)) < value)
05059 set_p(i, CMath::ALMOST_NEG_INFTY);
05060
05061 if (exp(get_q(i)) < value)
05062 set_q(i, CMath::ALMOST_NEG_INFTY);
05063
05064 for (j=0; j<N; j++)
05065 {
05066 if (exp(get_a(i,j)) < value)
05067 set_a(i,j, CMath::ALMOST_NEG_INFTY);
05068 }
05069
05070 for (j=0; j<M; j++)
05071 {
05072 if (exp(get_b(i,j)) < value)
05073 set_b(i,j, CMath::ALMOST_NEG_INFTY);
05074 }
05075 }
05076 normalize();
05077 invalidate_model();
05078 }
05079
05080 bool CHMM::linear_train(bool right_align)
05081 {
05082 if (p_observations)
05083 {
05084 int32_t histsize=(get_M()*get_N());
05085 int32_t* hist=new int32_t[histsize];
05086 int32_t* startendhist=new int32_t[get_N()];
05087 int32_t i,dim;
05088
05089 ASSERT(p_observations->get_max_vector_length()<=get_N());
05090
05091 for (i=0; i<histsize; i++)
05092 hist[i]=0;
05093
05094 for (i=0; i<get_N(); i++)
05095 startendhist[i]=0;
05096
05097 if (right_align)
05098 {
05099 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
05100 {
05101 int32_t len=0;
05102 uint16_t* obs=p_observations->get_feature_vector(dim, len);
05103
05104 ASSERT(len<=get_N());
05105 startendhist[(get_N()-len)]++;
05106
05107 for (i=0;i<len;i++)
05108 hist[(get_N()-len+i)*get_M() + *obs++]++;
05109 }
05110
05111 set_q(get_N()-1, 1);
05112 for (i=0; i<get_N()-1; i++)
05113 set_q(i, 0);
05114
05115 for (i=0; i<get_N(); i++)
05116 set_p(i, startendhist[i]+PSEUDO);
05117
05118 for (i=0;i<get_N();i++)
05119 {
05120 for (int32_t j=0; j<get_N(); j++)
05121 {
05122 if (i==j-1)
05123 set_a(i,j, 1);
05124 else
05125 set_a(i,j, 0);
05126 }
05127 }
05128 }
05129 else
05130 {
05131 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
05132 {
05133 int32_t len=0;
05134 uint16_t* obs=p_observations->get_feature_vector(dim, len);
05135
05136 ASSERT(len<=get_N());
05137 for (i=0;i<len;i++)
05138 hist[i*get_M() + *obs++]++;
05139
05140 startendhist[len-1]++;
05141 }
05142
05143 set_p(0, 1);
05144 for (i=1; i<get_N(); i++)
05145 set_p(i, 0);
05146
05147 for (i=0; i<get_N(); i++)
05148 set_q(i, startendhist[i]+PSEUDO);
05149
05150 int32_t total=p_observations->get_num_vectors();
05151
05152 for (i=0;i<get_N();i++)
05153 {
05154 total-= startendhist[i] ;
05155
05156 for (int32_t j=0; j<get_N(); j++)
05157 {
05158 if (i==j-1)
05159 set_a(i,j, total+PSEUDO);
05160 else
05161 set_a(i,j, 0);
05162 }
05163 }
05164 ASSERT(total==0);
05165 }
05166
05167 for (i=0;i<get_N();i++)
05168 {
05169 for (int32_t j=0; j<get_M(); j++)
05170 {
05171 float64_t sum=0;
05172 int32_t offs=i*get_M()+ p_observations->get_masked_symbols((uint16_t) j, (uint8_t) 254);
05173
05174 for (int32_t k=0; k<p_observations->get_original_num_symbols(); k++)
05175 sum+=hist[offs+k];
05176
05177 set_b(i,j, (PSEUDO+hist[i*get_M()+j])/(sum+PSEUDO*p_observations->get_original_num_symbols()));
05178 }
05179 }
05180
05181 delete[] hist;
05182 delete[] startendhist;
05183 convert_to_log();
05184 invalidate_model();
05185 return true;
05186 }
05187 else
05188 return false;
05189 }
05190
05191 void CHMM::set_observation_nocache(CStringFeatures<uint16_t>* obs)
05192 {
05193 ASSERT(obs);
05194 p_observations=obs;
05195 SG_REF(obs);
05196
05197 if (obs)
05198 if (obs->get_num_symbols() > M)
05199 SG_ERROR( "number of symbols in observation (%d) larger than M (%d)\n", obs->get_num_symbols(), M);
05200
05201 if (!reused_caches)
05202 {
05203 #ifdef USE_HMMPARALLEL_STRUCTURES
05204 for (int32_t i=0; i<parallel->get_num_threads(); i++)
05205 {
05206 delete[] alpha_cache[i].table;
05207 delete[] beta_cache[i].table;
05208 delete[] states_per_observation_psi[i];
05209 delete[] path[i];
05210
05211 alpha_cache[i].table=NULL;
05212 beta_cache[i].table=NULL;
05213 states_per_observation_psi[i]=NULL;
05214 path[i]=NULL;
05215 } ;
05216 #else
05217 delete[] alpha_cache.table;
05218 delete[] beta_cache.table;
05219 delete[] states_per_observation_psi;
05220 delete[] path;
05221
05222 alpha_cache.table=NULL;
05223 beta_cache.table=NULL;
05224 states_per_observation_psi=NULL;
05225 path=NULL;
05226
05227 #endif //USE_HMMPARALLEL_STRUCTURES
05228 }
05229
05230 invalidate_model();
05231 }
05232
05233 void CHMM::set_observations(CStringFeatures<uint16_t>* obs, CHMM* lambda)
05234 {
05235 ASSERT(obs);
05236 p_observations=obs;
05237 SG_REF(obs);
05238
05239
05240
05241 features=obs;
05242
05243 SG_DEBUG("num symbols alphabet: %ld\n", obs->get_alphabet()->get_num_symbols());
05244 SG_DEBUG("num symbols: %ld\n", obs->get_num_symbols());
05245 SG_DEBUG("M: %d\n", M);
05246
05247 if (obs)
05248 {
05249 if (obs->get_num_symbols() > M)
05250 {
05251 SG_ERROR( "number of symbols in observation (%d) larger than M (%d)\n", obs->get_num_symbols(), M);
05252 }
05253 }
05254
05255 if (!reused_caches)
05256 {
05257 #ifdef USE_HMMPARALLEL_STRUCTURES
05258 for (int32_t i=0; i<parallel->get_num_threads(); i++)
05259 {
05260 delete[] alpha_cache[i].table;
05261 delete[] beta_cache[i].table;
05262 delete[] states_per_observation_psi[i];
05263 delete[] path[i];
05264
05265 alpha_cache[i].table=NULL;
05266 beta_cache[i].table=NULL;
05267 states_per_observation_psi[i]=NULL;
05268 path[i]=NULL;
05269 } ;
05270 #else
05271 delete[] alpha_cache.table;
05272 delete[] beta_cache.table;
05273 delete[] states_per_observation_psi;
05274 delete[] path;
05275
05276 alpha_cache.table=NULL;
05277 beta_cache.table=NULL;
05278 states_per_observation_psi=NULL;
05279 path=NULL;
05280
05281 #endif //USE_HMMPARALLEL_STRUCTURES
05282 }
05283
05284 if (obs!=NULL)
05285 {
05286 int32_t max_T=obs->get_max_vector_length();
05287
05288 if (lambda)
05289 {
05290 #ifdef USE_HMMPARALLEL_STRUCTURES
05291 for (int32_t i=0; i<parallel->get_num_threads(); i++)
05292 {
05293 this->alpha_cache[i].table= lambda->alpha_cache[i].table;
05294 this->beta_cache[i].table= lambda->beta_cache[i].table;
05295 this->states_per_observation_psi[i]=lambda->states_per_observation_psi[i] ;
05296 this->path[i]=lambda->path[i];
05297 } ;
05298 #else
05299 this->alpha_cache.table= lambda->alpha_cache.table;
05300 this->beta_cache.table= lambda->beta_cache.table;
05301 this->states_per_observation_psi= lambda->states_per_observation_psi;
05302 this->path=lambda->path;
05303 #endif //USE_HMMPARALLEL_STRUCTURES
05304
05305 this->reused_caches=true;
05306 }
05307 else
05308 {
05309 this->reused_caches=false;
05310 #ifdef USE_HMMPARALLEL_STRUCTURES
05311 SG_INFO( "allocating mem for path-table of size %.2f Megabytes (%d*%d) each:\n", ((float32_t)max_T)*N*sizeof(T_STATES)/(1024*1024), max_T, N);
05312 for (int32_t i=0; i<parallel->get_num_threads(); i++)
05313 {
05314 if ((states_per_observation_psi[i]=new T_STATES[max_T*N])!=NULL)
05315 SG_DEBUG( "path_table[%i] successfully allocated\n",i) ;
05316 else
05317 SG_ERROR( "failed allocating memory for path_table[%i].\n",i) ;
05318 path[i]=new T_STATES[max_T];
05319 }
05320 #else // no USE_HMMPARALLEL_STRUCTURES
05321 SG_INFO( "allocating mem of size %.2f Megabytes (%d*%d) for path-table ....", ((float32_t)max_T)*N*sizeof(T_STATES)/(1024*1024), max_T, N);
05322 if ((states_per_observation_psi=new T_STATES[max_T*N]) != NULL)
05323 SG_DONE();
05324 else
05325 SG_ERROR( "failed.\n") ;
05326
05327 path=new T_STATES[max_T];
05328 #endif // USE_HMMPARALLEL_STRUCTURES
05329 #ifdef USE_HMMCACHE
05330 SG_INFO( "allocating mem for caches each of size %.2f Megabytes (%d*%d) ....\n", ((float32_t)max_T)*N*sizeof(T_ALPHA_BETA_TABLE)/(1024*1024), max_T, N);
05331
05332 #ifdef USE_HMMPARALLEL_STRUCTURES
05333 for (int32_t i=0; i<parallel->get_num_threads(); i++)
05334 {
05335 if ((alpha_cache[i].table=new T_ALPHA_BETA_TABLE[max_T*N])!=NULL)
05336 SG_DEBUG( "alpha_cache[%i].table successfully allocated\n",i) ;
05337 else
05338 SG_ERROR("allocation of alpha_cache[%i].table failed\n",i) ;
05339
05340 if ((beta_cache[i].table=new T_ALPHA_BETA_TABLE[max_T*N]) != NULL)
05341 SG_DEBUG("beta_cache[%i].table successfully allocated\n",i) ;
05342 else
05343 SG_ERROR("allocation of beta_cache[%i].table failed\n",i) ;
05344 } ;
05345 #else // USE_HMMPARALLEL_STRUCTURES
05346 if ((alpha_cache.table=new T_ALPHA_BETA_TABLE[max_T*N]) != NULL)
05347 SG_DEBUG( "alpha_cache.table successfully allocated\n") ;
05348 else
05349 SG_ERROR( "allocation of alpha_cache.table failed\n") ;
05350
05351 if ((beta_cache.table=new T_ALPHA_BETA_TABLE[max_T*N]) != NULL)
05352 SG_DEBUG( "beta_cache.table successfully allocated\n") ;
05353 else
05354 SG_ERROR( "allocation of beta_cache.table failed\n") ;
05355
05356 #endif // USE_HMMPARALLEL_STRUCTURES
05357 #else // USE_HMMCACHE
05358 #ifdef USE_HMMPARALLEL_STRUCTURES
05359 for (int32_t i=0; i<parallel->get_num_threads(); i++)
05360 {
05361 alpha_cache[i].table=NULL ;
05362 beta_cache[i].table=NULL ;
05363 } ;
05364 #else //USE_HMMPARALLEL_STRUCTURES
05365 alpha_cache.table=NULL ;
05366 beta_cache.table=NULL ;
05367 #endif //USE_HMMPARALLEL_STRUCTURES
05368 #endif //USE_HMMCACHE
05369 }
05370 }
05371
05372
05373 invalidate_model();
05374 }
05375
05376 bool CHMM::permutation_entropy(int32_t window_width, int32_t sequence_number)
05377 {
05378 if (p_observations && window_width>0 &&
05379 ( sequence_number<0 || sequence_number < p_observations->get_num_vectors()))
05380 {
05381 int32_t min_sequence=sequence_number;
05382 int32_t max_sequence=sequence_number;
05383
05384 if (sequence_number<0)
05385 {
05386 min_sequence=0;
05387 max_sequence=p_observations->get_num_vectors();
05388 SG_INFO( "numseq: %d\n", max_sequence);
05389 }
05390
05391 SG_INFO( "min_sequence: %d max_sequence: %d\n", min_sequence, max_sequence);
05392 for (sequence_number=min_sequence; sequence_number<max_sequence; sequence_number++)
05393 {
05394 int32_t sequence_length=0;
05395 uint16_t* obs=p_observations->get_feature_vector(sequence_number, sequence_length);
05396
05397 int32_t histsize=get_M();
05398 int64_t* hist=new int64_t[histsize];
05399 int32_t i,j;
05400
05401 for (i=0; i<sequence_length-window_width; i++)
05402 {
05403 for (j=0; j<histsize; j++)
05404 hist[j]=0;
05405
05406 uint16_t* ptr=&obs[i];
05407 for (j=0; j<window_width; j++)
05408 {
05409 hist[*ptr++]++;
05410 }
05411
05412 float64_t perm_entropy=0;
05413 for (j=0; j<get_M(); j++)
05414 {
05415 float64_t p=
05416 (((float64_t) hist[j])+PSEUDO)/
05417 (window_width+get_M()*PSEUDO);
05418 perm_entropy+=p*log(p);
05419 }
05420
05421 SG_PRINT( "%f\n", perm_entropy);
05422 }
05423
05424 delete[] hist;
05425 }
05426 return true;
05427 }
05428 else
05429 return false;
05430 }
05431
05432 float64_t CHMM::get_log_derivative(int32_t num_param, int32_t num_example)
05433 {
05434 if (num_param<N)
05435 return model_derivative_p(num_param, num_example);
05436 else if (num_param<2*N)
05437 return model_derivative_q(num_param-N, num_example);
05438 else if (num_param<N*(N+2))
05439 {
05440 int32_t k=num_param-2*N;
05441 int32_t i=k/N;
05442 int32_t j=k%N;
05443 return model_derivative_a(i,j, num_example);
05444 }
05445 else if (num_param<N*(N+2+M))
05446 {
05447 int32_t k=num_param-N*(N+2);
05448 int32_t i=k/M;
05449 int32_t j=k%M;
05450 return model_derivative_b(i,j, num_example);
05451 }
05452
05453 ASSERT(false);
05454 return -1;
05455 }
05456
05457 float64_t CHMM::get_log_model_parameter(int32_t num_param)
05458 {
05459 if (num_param<N)
05460 return get_p(num_param);
05461 else if (num_param<2*N)
05462 return get_q(num_param-N);
05463 else if (num_param<N*(N+2))
05464 return transition_matrix_a[num_param-2*N];
05465 else if (num_param<N*(N+2+M))
05466 return observation_matrix_b[num_param-N*(N+2)];
05467
05468 ASSERT(false);
05469 return -1;
05470 }
05471
05472
05473
05474 bool CHMM::converged(float64_t x, float64_t y)
05475 {
05476 float64_t diff=y-x;
05477 float64_t absdiff=fabs(diff);
05478
05479 SG_INFO( "\n #%03d\tbest result so far: %G (eps: %f)", iteration_count, y, diff);
05480
05481 if (iteration_count--==0 || (absdiff<epsilon && conv_it<=0))
05482 {
05483 iteration_count=iterations;
05484 SG_INFO( "...finished\n");
05485 conv_it=5;
05486 return true;
05487 }
05488 else
05489 {
05490 if (absdiff<epsilon)
05491 conv_it--;
05492 else
05493 conv_it=5;
05494
05495 return false;
05496 }
05497 }
05498
05499 bool CHMM::baum_welch_viterbi_train(BaumWelchViterbiType type)
05500 {
05501 CHMM* estimate=new CHMM(this);
05502 CHMM* working=this;
05503 float64_t prob_max=-CMath::INFTY;
05504 float64_t prob=-CMath::INFTY;
05505 float64_t prob_train=CMath::ALMOST_NEG_INFTY;
05506 iteration_count=iterations;
05507
05508 while (!converged(prob, prob_train) && (!CSignal::cancel_computations()))
05509 {
05510 CMath::swap(working, estimate);
05511 prob=prob_train;
05512
05513 switch (type) {
05514 case BW_NORMAL:
05515 working->estimate_model_baum_welch(estimate); break;
05516 case BW_TRANS:
05517 working->estimate_model_baum_welch_trans(estimate); break;
05518 case BW_DEFINED:
05519 working->estimate_model_baum_welch_defined(estimate); break;
05520 case VIT_NORMAL:
05521 working->estimate_model_viterbi(estimate); break;
05522 case VIT_DEFINED:
05523 working->estimate_model_viterbi_defined(estimate); break;
05524 }
05525 prob_train=estimate->model_probability();
05526
05527 if (prob_max<prob_train)
05528 prob_max=prob_train;
05529 }
05530
05531 if (estimate == this)
05532 {
05533 estimate->copy_model(working);
05534 delete working;
05535 }
05536 else
05537 delete estimate;
05538
05539 return true;
05540 }
05541