00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #include "structure/DynProg.h"
00014 #include "lib/Mathematics.h"
00015 #include "lib/io.h"
00016 #include "lib/config.h"
00017 #include "features/StringFeatures.h"
00018 #include "features/Alphabet.h"
00019 #include "structure/Plif.h"
00020 #include "lib/Array.h"
00021 #include "lib/Array2.h"
00022 #include "lib/Array3.h"
00023
00024 #include <stdlib.h>
00025 #include <stdio.h>
00026 #include <time.h>
00027 #include <ctype.h>
00028
00029 template void CDynProg::best_path_trans<1,true,false>(
00030 const float64_t *seq, int32_t seq_len, const int32_t *pos,
00031 const int32_t *orf_info, CPlifBase **PLif_matrix,
00032 CPlifBase **Plif_state_signals, int32_t max_num_signals,
00033 int32_t genestr_num, float64_t *prob_nbest, int32_t *my_state_seq,
00034 int32_t *my_pos_seq, bool use_orf);
00035
00036 template void CDynProg::best_path_trans<2,true,false>(
00037 const float64_t *seq, int32_t seq_len, const int32_t *pos,
00038 const int32_t *orf_info, CPlifBase **PLif_matrix,
00039 CPlifBase **Plif_state_signals, int32_t max_num_signals,
00040 int32_t genestr_num, float64_t *prob_nbest, int32_t *my_state_seq,
00041 int32_t *my_pos_seq, bool use_orf);
00042
00043 template void CDynProg::best_path_trans<1,false,false>(
00044 const float64_t *seq, int32_t seq_len, const int32_t *pos,
00045 const int32_t *orf_info, CPlifBase **PLif_matrix,
00046 CPlifBase **Plif_state_signals, int32_t max_num_signals,
00047 int32_t genestr_num, float64_t *prob_nbest, int32_t *my_state_seq,
00048 int32_t *my_pos_seq, bool use_orf);
00049
00050 template void CDynProg::best_path_trans<2,false,false>(
00051 const float64_t *seq, int32_t seq_len, const int32_t *pos,
00052 const int32_t *orf_info, CPlifBase **PLif_matrix,
00053 CPlifBase **Plif_state_signals, int32_t max_num_signals,
00054 int32_t genestr_num, float64_t *prob_nbest, int32_t *my_state_seq,
00055 int32_t *my_pos_seq, bool use_orf);
00056
00057
00058
00059
00060
00061
00062
00063 static int32_t word_degree_default[4]={3,4,5,6} ;
00064 static int32_t cum_num_words_default[5]={0,64,320,1344,5440} ;
00065 static int32_t num_words_default[4]= {64,256,1024,4096} ;
00066 static int32_t mod_words_default[32] = {1,1,1,1,1,1,1,1,
00067 1,1,1,1,1,1,1,1,
00068 0,0,0,0,0,0,0,0,
00069 0,0,0,0,0,0,0,0} ;
00070 static bool sign_words_default[16] = {true,true,true,true,true,true,true,true,
00071 false,false,false,false,false,false,false,false} ;
00072 static int32_t string_words_default[16] = {0,0,0,0,0,0,0,0,
00073 1,1,1,1,1,1,1,1} ;
00074
00075 CDynProg::CDynProg(int32_t p_num_svms )
00076 : CSGObject(), transition_matrix_a_id(1,1), transition_matrix_a(1,1),
00077 transition_matrix_a_deriv(1,1), initial_state_distribution_p(1),
00078 initial_state_distribution_p_deriv(1), end_state_distribution_q(1),
00079 end_state_distribution_q_deriv(1), dict_weights(1,1),
00080 dict_weights_array(dict_weights.get_array()),
00081
00082
00083 num_degrees(4),
00084 num_svms(p_num_svms),
00085 num_strings(1),
00086 word_degree(word_degree_default, num_degrees, true, true),
00087 cum_num_words(cum_num_words_default, num_degrees+1, true, true),
00088 cum_num_words_array(cum_num_words.get_array()),
00089 num_words(num_words_default, num_degrees, true, true),
00090 num_words_array(num_words.get_array()),
00091 mod_words(mod_words_default, num_svms, 2, true, true),
00092 mod_words_array(mod_words.get_array()),
00093 sign_words(sign_words_default, num_svms, true, true),
00094 sign_words_array(sign_words.get_array()),
00095 string_words(string_words_default, num_svms, true, true),
00096 string_words_array(string_words.get_array()),
00097
00098
00099
00100 svm_pos_start(num_degrees),
00101 num_unique_words(num_degrees),
00102 svm_arrays_clean(true),
00103
00104
00105 num_svms_single(1),
00106 word_degree_single(1),
00107 num_words_single(4),
00108 word_used_single(num_words_single),
00109 svm_value_unnormalized_single(num_svms_single),
00110 num_unique_words_single(0),
00111
00112 max_a_id(0), m_seq(1,1,1), m_pos(1), m_orf_info(1,2),
00113 m_segment_sum_weights(1,1), m_plif_list(1),
00114 m_PEN(1,1), m_PEN_state_signals(1,1),
00115 m_genestr(1,1), m_dict_weights(1,1), m_segment_loss(1,1,2),
00116 m_segment_ids(1),
00117 m_segment_mask(1),
00118 m_scores(1), m_states(1,1), m_positions(1,1), m_genestr_stop(1),
00119 m_lin_feat(1,1)
00120
00121 {
00122 trans_list_forward = NULL ;
00123 trans_list_forward_cnt = NULL ;
00124 trans_list_forward_val = NULL ;
00125 trans_list_forward_id = NULL ;
00126 trans_list_len = 0 ;
00127
00128 mem_initialized = true ;
00129
00130 this->N=1;
00131 m_step=0;
00132
00133 m_raw_intensities = NULL;
00134 m_probe_pos = NULL;
00135 m_num_probes_cum = new int32_t[100];
00136 m_num_probes_cum[0] = 0;
00137
00138 m_num_lin_feat_plifs_cum = new int32_t[100];
00139 m_num_lin_feat_plifs_cum[0] = num_svms;
00140 m_num_raw_data = 0;
00141 m_genestr_len = 0;
00142 #ifdef ARRAY_STATISTICS
00143 word_degree.set_name("word_degree");
00144 #endif
00145 }
00146
00147 CDynProg::~CDynProg()
00148 {
00149 if (trans_list_forward_cnt)
00150 delete[] trans_list_forward_cnt;
00151 if (trans_list_forward)
00152 {
00153 for (int32_t i=0; i<trans_list_len; i++)
00154 {
00155 if (trans_list_forward[i])
00156 delete[] trans_list_forward[i];
00157 }
00158 delete[] trans_list_forward;
00159 }
00160 if (trans_list_forward_val)
00161 {
00162 for (int32_t i=0; i<trans_list_len; i++)
00163 {
00164 if (trans_list_forward_val[i])
00165 delete[] trans_list_forward_val[i];
00166 }
00167 delete[] trans_list_forward_val;
00168 }
00169 if (trans_list_forward_id)
00170 {
00171 for (int32_t i=0; i<trans_list_len; i++)
00172 {
00173 if (trans_list_forward_id[i])
00174 delete[] trans_list_forward_id[i];
00175 }
00176 delete[] trans_list_forward_id;
00177 }
00178 if (m_raw_intensities)
00179 delete[] m_raw_intensities;
00180 if (m_probe_pos)
00181 delete[] m_probe_pos;
00182 if (m_num_probes_cum)
00183 delete[] m_num_probes_cum ;
00184 if (m_num_lin_feat_plifs_cum)
00185 delete[] m_num_lin_feat_plifs_cum ;
00186 }
00187
00189 void CDynProg::set_genestr_len(int32_t genestr_len)
00190 {
00191 m_genestr_len = genestr_len;
00192 }
00193
00194 int32_t CDynProg::get_num_svms()
00195 {
00196 return num_svms;
00197 }
00198
00199 void CDynProg::precompute_stop_codons(const char* sequence, int32_t length)
00200 {
00201 m_genestr_stop.resize_array(length) ;
00202 m_genestr_stop.zero() ;
00203 m_genestr_stop.set_name("genestr_stop") ;
00204 {
00205 for (int32_t i=0; i<length-2; i++)
00206 if ((sequence[i]=='t' || sequence[i]=='T') &&
00207 (((sequence[i+1]=='a' || sequence[i+1]=='A') &&
00208 (sequence[i+2]=='a' || sequence[i+2]=='g' || sequence[i+2]=='A' || sequence[i+2]=='G')) ||
00209 ((sequence[i+1]=='g'||sequence[i+1]=='G') && (sequence[i+2]=='a' || sequence[i+2]=='A') )))
00210 m_genestr_stop.element(i)=true ;
00211 else
00212 m_genestr_stop.element(i)=false ;
00213 m_genestr_stop.element(length-1)=false ;
00214 m_genestr_stop.element(length-1)=false ;
00215 }
00216 }
00217
00218 void CDynProg::set_num_states(int32_t p_N)
00219 {
00220 N=p_N ;
00221
00222 transition_matrix_a_id.resize_array(N,N) ;
00223 transition_matrix_a.resize_array(N,N) ;
00224 transition_matrix_a_deriv.resize_array(N,N) ;
00225 initial_state_distribution_p.resize_array(N) ;
00226 initial_state_distribution_p_deriv.resize_array(N) ;
00227 end_state_distribution_q.resize_array(N);
00228 end_state_distribution_q_deriv.resize_array(N) ;
00229
00230 m_orf_info.resize_array(N,2) ;
00231 m_PEN.resize_array(N,N) ;
00232 m_PEN_state_signals.resize_array(N,1) ;
00233 }
00234
00235 int32_t CDynProg::get_num_states()
00236 {
00237 return N;
00238 }
00239
00240 void CDynProg::init_tiling_data(
00241 int32_t* probe_pos, float64_t* intensities, const int32_t num_probes,
00242 const int32_t seq_len)
00243 {
00244 m_num_raw_data++;
00245 m_num_probes_cum[m_num_raw_data] = m_num_probes_cum[m_num_raw_data-1]+num_probes;
00246
00247 int32_t* tmp_probe_pos = new int32_t[m_num_probes_cum[m_num_raw_data]];
00248 float64_t* tmp_raw_intensities = new float64_t[m_num_probes_cum[m_num_raw_data]];
00249
00250
00251 if (m_num_raw_data==1){
00252 memcpy(tmp_probe_pos, probe_pos, num_probes*sizeof(int32_t));
00253 memcpy(tmp_raw_intensities, intensities, num_probes*sizeof(float64_t));
00254
00255 }else{
00256 memcpy(tmp_probe_pos, m_probe_pos, m_num_probes_cum[m_num_raw_data-1]*sizeof(int32_t));
00257 memcpy(tmp_raw_intensities, m_raw_intensities, m_num_probes_cum[m_num_raw_data-1]*sizeof(float64_t));
00258 memcpy(tmp_probe_pos+m_num_probes_cum[m_num_raw_data-1], probe_pos, num_probes*sizeof(int32_t));
00259 memcpy(tmp_raw_intensities+m_num_probes_cum[m_num_raw_data-1], intensities, num_probes*sizeof(float64_t));
00260 }
00261 delete[] m_probe_pos;
00262 delete[] m_raw_intensities;
00263 m_probe_pos = new int32_t[num_probes];
00264 m_raw_intensities = new float64_t[num_probes];
00265
00266 memcpy(m_probe_pos, probe_pos, num_probes*sizeof(int32_t));
00267 memcpy(m_raw_intensities, intensities, num_probes*sizeof(float64_t));
00268
00269 }
00270
00271 void CDynProg::init_content_svm_value_array(const int32_t p_num_svms, const int32_t seq_len)
00272 {
00273 m_lin_feat.resize_array(p_num_svms, seq_len);
00274 }
00275 void CDynProg::resize_lin_feat(const int32_t num_new_feat, const int32_t seq_len)
00276 {
00277 int32_t dim1, dim2;
00278 m_lin_feat.get_array_size(dim1, dim2);
00279 ASSERT(dim1==m_num_lin_feat_plifs_cum[m_num_raw_data-1]);
00280 ASSERT(dim2==seq_len);
00281
00282
00283 float64_t* arr = m_lin_feat.get_array();
00284 float64_t* tmp = new float64_t[(dim1+num_new_feat)*dim2];
00285 memset(tmp, 0, (dim1+num_new_feat)*dim2*sizeof(float64_t)) ;
00286 for(int32_t j=0;j<seq_len;j++)
00287 for(int32_t k=0;k<m_num_lin_feat_plifs_cum[m_num_raw_data-1];k++)
00288 tmp[j*(dim1+num_new_feat)+k] = arr[j*dim1+k];
00289
00290 m_lin_feat.set_array(tmp, dim1+num_new_feat,dim2);
00291
00292
00293
00294
00295
00296
00297
00298
00299
00300
00301
00302
00303
00304 }
00305
00306
00307 void CDynProg::precompute_tiling_plifs(
00308 CPlif** PEN, const int32_t* tiling_plif_ids,
00309 const int32_t num_tiling_plifs, const int32_t seq_len, const int32_t* pos)
00310 {
00311 SG_PRINT("precompute_tiling_plifs:%f num_tiling_plifs:%i\n",m_raw_intensities[0], num_tiling_plifs);
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324 m_num_lin_feat_plifs_cum[m_num_raw_data] = m_num_lin_feat_plifs_cum[m_num_raw_data-1]+ num_tiling_plifs;
00325
00326
00327 float64_t* tiling_plif = new float64_t[num_tiling_plifs];
00328 float64_t* svm_value = new float64_t[m_num_lin_feat_plifs_cum[m_num_raw_data]];
00329 for (int32_t i=0; i<m_num_lin_feat_plifs_cum[m_num_raw_data]; i++)
00330 svm_value[i]=0.0;
00331 int32_t tiling_rows[num_tiling_plifs];
00332 for (int32_t i=0; i<num_tiling_plifs; i++)
00333 {
00334 tiling_plif[i]=0.0;
00335 CPlif * plif = PEN[tiling_plif_ids[i]];
00336 tiling_rows[i] = plif->get_use_svm();
00337
00338
00339
00340
00341
00342 ASSERT(tiling_rows[i]-1==m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i)
00343 }
00344 resize_lin_feat(num_tiling_plifs, seq_len);
00345
00346
00347 int32_t* p_tiling_pos = &m_probe_pos[m_num_probes_cum[m_num_raw_data-1]];
00348 float64_t* p_tiling_data = &m_raw_intensities[m_num_probes_cum[m_num_raw_data-1]];
00349 int32_t num=m_num_probes_cum[m_num_raw_data-1];
00350
00351 for (int32_t pos_idx=0;pos_idx<seq_len;pos_idx++)
00352 {
00353
00354
00355 while (num<m_num_probes_cum[m_num_raw_data]&&*p_tiling_pos<pos[pos_idx])
00356 {
00357
00358 for (int32_t i=0; i<num_tiling_plifs; i++)
00359 {
00360 svm_value[m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i]=*p_tiling_data;
00361
00362
00363 CPlif * plif = PEN[tiling_plif_ids[i]];
00364
00365 ASSERT(m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i==plif->get_use_svm()-1);
00366 plif->set_do_calc(true);
00367 tiling_plif[i]+=plif->lookup_penalty(0,svm_value);
00368
00369 plif->set_do_calc(false);
00370
00371 }
00372
00373 p_tiling_data++;
00374 p_tiling_pos++;
00375 num++;
00376 }
00377 for (int32_t i=0; i<num_tiling_plifs; i++)
00378 m_lin_feat.set_element(tiling_plif[i],tiling_rows[i]-1,pos_idx);
00379 }
00380 delete[] svm_value;
00381 delete[] tiling_plif;
00382 }
00383
00384 void CDynProg::create_word_string(
00385 const char* genestr, int32_t genestr_num, int32_t genestr_len,
00386 uint16_t*** wordstr)
00387 {
00388 for (int32_t k=0; k<genestr_num; k++)
00389 {
00390 wordstr[k]=new uint16_t*[num_degrees] ;
00391 for (int32_t j=0; j<num_degrees; j++)
00392 {
00393 wordstr[k][j]=NULL ;
00394 {
00395 wordstr[k][j]=new uint16_t[genestr_len] ;
00396 for (int32_t i=0; i<genestr_len; i++)
00397 switch (genestr[i])
00398 {
00399 case 'A':
00400 case 'a': wordstr[k][j][i]=0 ; break ;
00401 case 'C':
00402 case 'c': wordstr[k][j][i]=1 ; break ;
00403 case 'G':
00404 case 'g': wordstr[k][j][i]=2 ; break ;
00405 case 'T':
00406 case 't': wordstr[k][j][i]=3 ; break ;
00407 default: ASSERT(0) ;
00408 }
00409 translate_from_single_order(wordstr[k][j], genestr_len, word_degree[j]-1, word_degree[j]) ;
00410 }
00411 }
00412 }
00413
00414 }
00415
00416 void CDynProg::precompute_content_values(
00417 uint16_t*** wordstr, const int32_t *pos,const int32_t seq_len,
00418 const int32_t genestr_len,float64_t *dictionary_weights,int32_t dict_len)
00419 {
00420
00421
00422 dict_weights.set_array(dictionary_weights, dict_len, num_svms, false, false) ;
00423 dict_weights_array=dict_weights.get_array() ;
00424
00425
00426
00427
00428
00429
00430
00431
00432
00433
00434
00435 for (int32_t p=0 ; p<seq_len-1 ; p++)
00436 {
00437 int32_t from_pos = pos[p];
00438 int32_t to_pos = pos[p+1];
00439 float64_t my_svm_values_unnormalized[num_svms] ;
00440
00441
00442 ASSERT(from_pos<=genestr_len)
00443 ASSERT(to_pos<=genestr_len)
00444
00445 for (int32_t s=0; s<num_svms; s++)
00446 {
00447 my_svm_values_unnormalized[s]=0.0;
00448 }
00449 for (int32_t i=from_pos; i<to_pos;i++)
00450 {
00451 for (int32_t j=0; j<num_degrees; j++)
00452 {
00453 uint16_t word = wordstr[0][j][i] ;
00454 for (int32_t s=0; s<num_svms; s++)
00455 {
00456
00457 if (mod_words.get_element(s,0)==3 && i%3!=mod_words.get_element(s,1))
00458 continue;
00459 my_svm_values_unnormalized[s] += dict_weights_array[(word+cum_num_words_array[j])+s*cum_num_words_array[num_degrees]] ;
00460 }
00461 }
00462 }
00463 for (int32_t s=0; s<num_svms; s++)
00464 {
00465 float64_t prev = m_lin_feat.get_element(s,p);
00466 m_lin_feat.set_element(prev + my_svm_values_unnormalized[s],s,p+1);
00467
00468 ASSERT(prev>-1e20);
00469 }
00470 }
00471 for (int32_t j=0; j<num_degrees; j++)
00472 delete[] wordstr[0][j] ;
00473 delete[] wordstr[0] ;
00474 }
00475
00476 void CDynProg::set_p_vector(float64_t *p, int32_t p_N)
00477 {
00478 if (!(p_N==N))
00479 SG_ERROR("length of start prob vector p (%i) is not equal to the number of states (%i), p_N: %i\n",p_N, N);
00480
00481
00482
00483 initial_state_distribution_p.set_array(p, p_N, true, true);
00484 }
00485
00486 void CDynProg::set_q_vector(float64_t *q, int32_t q_N)
00487 {
00488 if (!(q_N==N))
00489 SG_ERROR("length of end prob vector q (%i) is not equal to the number of states (%i), p_N: %i\n",q_N, N);
00490 end_state_distribution_q.set_array(q, q_N, true, true);
00491 }
00492
00493 void CDynProg::set_a(float64_t *a, int32_t p_M, int32_t p_N)
00494 {
00495 ASSERT(p_N==N);
00496 ASSERT(p_M==p_N);
00497 transition_matrix_a.set_array(a, p_N, p_N, true, true);
00498 transition_matrix_a_deriv.resize_array(p_N, p_N);
00499 }
00500
00501 void CDynProg::set_a_id(int32_t *a, int32_t p_M, int32_t p_N)
00502 {
00503 ASSERT(p_N==N);
00504 ASSERT(p_M==p_N);
00505 transition_matrix_a_id.set_array(a, p_N, p_N, true, true);
00506 max_a_id = 0;
00507 for (int32_t i=0; i<p_N; i++)
00508 for (int32_t j=0; j<p_N; j++)
00509 max_a_id=CMath::max(max_a_id, transition_matrix_a_id.element(i,j));
00510 }
00511
00512 void CDynProg::set_a_trans_matrix(
00513 float64_t *a_trans, int32_t num_trans, int32_t p_N)
00514 {
00515 if (!((p_N==3) || (p_N==4)))
00516 SG_ERROR("!((p_N==3) || (p_N==4)), p_N: %i\n",p_N);
00517
00518 delete[] trans_list_forward ;
00519 delete[] trans_list_forward_cnt ;
00520 delete[] trans_list_forward_val ;
00521 delete[] trans_list_forward_id ;
00522
00523 trans_list_forward = NULL ;
00524 trans_list_forward_cnt = NULL ;
00525 trans_list_forward_val = NULL ;
00526 trans_list_len = 0 ;
00527
00528 transition_matrix_a.zero() ;
00529 transition_matrix_a_id.zero() ;
00530
00531 mem_initialized = true ;
00532
00533 trans_list_forward_cnt=NULL ;
00534 trans_list_len = N ;
00535 trans_list_forward = new T_STATES*[N] ;
00536 trans_list_forward_cnt = new T_STATES[N] ;
00537 trans_list_forward_val = new float64_t*[N] ;
00538 trans_list_forward_id = new int32_t*[N] ;
00539
00540 int32_t start_idx=0;
00541 for (int32_t j=0; j<N; j++)
00542 {
00543 int32_t old_start_idx=start_idx;
00544
00545 while (start_idx<num_trans && a_trans[start_idx+num_trans]==j)
00546 {
00547 start_idx++;
00548
00549 if (start_idx>1 && start_idx<num_trans)
00550 ASSERT(a_trans[start_idx+num_trans-1] <= a_trans[start_idx+num_trans]);
00551 }
00552
00553 if (start_idx>1 && start_idx<num_trans)
00554 ASSERT(a_trans[start_idx+num_trans-1] <= a_trans[start_idx+num_trans]);
00555
00556 int32_t len=start_idx-old_start_idx;
00557 ASSERT(len>=0);
00558
00559 trans_list_forward_cnt[j] = 0 ;
00560
00561 if (len>0)
00562 {
00563 trans_list_forward[j] = new T_STATES[len] ;
00564 trans_list_forward_val[j] = new float64_t[len] ;
00565 trans_list_forward_id[j] = new int32_t[len] ;
00566 }
00567 else
00568 {
00569 trans_list_forward[j] = NULL;
00570 trans_list_forward_val[j] = NULL;
00571 trans_list_forward_id[j] = NULL;
00572 }
00573 }
00574
00575 for (int32_t i=0; i<num_trans; i++)
00576 {
00577 int32_t from_state = (int32_t)a_trans[i] ;
00578 int32_t to_state = (int32_t)a_trans[i+num_trans] ;
00579 float64_t val = a_trans[i+num_trans*2] ;
00580 int32_t id = 0 ;
00581 if (p_N==4)
00582 id = (int32_t)a_trans[i+num_trans*3] ;
00583
00584
00585 ASSERT(to_state>=0 && to_state<N);
00586 ASSERT(from_state>=0 && from_state<N);
00587
00588 trans_list_forward[to_state][trans_list_forward_cnt[to_state]]=from_state ;
00589 trans_list_forward_val[to_state][trans_list_forward_cnt[to_state]]=val ;
00590 trans_list_forward_id[to_state][trans_list_forward_cnt[to_state]]=id ;
00591 trans_list_forward_cnt[to_state]++ ;
00592 transition_matrix_a.element(from_state, to_state) = val ;
00593 transition_matrix_a_id.element(from_state, to_state) = id ;
00594
00595 } ;
00596
00597 max_a_id = 0 ;
00598 for (int32_t i=0; i<N; i++)
00599 for (int32_t j=0; j<N; j++)
00600 {
00601
00602
00603 max_a_id = CMath::max(max_a_id, transition_matrix_a_id.element(i,j)) ;
00604 }
00605
00606 }
00607
00608 void CDynProg::init_svm_arrays(int32_t p_num_degrees, int32_t p_num_svms)
00609 {
00610 svm_arrays_clean=false ;
00611
00612 word_degree.resize_array(num_degrees) ;
00613
00614 cum_num_words.resize_array(num_degrees+1) ;
00615 cum_num_words_array=cum_num_words.get_array() ;
00616
00617 num_words.resize_array(num_degrees) ;
00618 num_words_array=num_words.get_array() ;
00619
00620
00621 svm_pos_start.resize_array(num_degrees) ;
00622 num_unique_words.resize_array(num_degrees) ;
00623 }
00624
00625
00626 void CDynProg::init_word_degree_array(
00627 int32_t * p_word_degree_array, int32_t num_elem)
00628 {
00629 svm_arrays_clean=false ;
00630
00631 word_degree.resize_array(num_degrees) ;
00632 ASSERT(num_degrees==num_elem);
00633
00634
00635 for (int32_t i=0; i<num_degrees; i++)
00636 word_degree[i]=p_word_degree_array[i] ;
00637
00638 }
00639
00640 void CDynProg::init_cum_num_words_array(
00641 int32_t * p_cum_num_words_array, int32_t num_elem)
00642 {
00643 svm_arrays_clean=false ;
00644
00645 cum_num_words.resize_array(num_degrees+1) ;
00646 cum_num_words_array=cum_num_words.get_array() ;
00647 ASSERT(num_degrees+1==num_elem);
00648
00649 for (int32_t i=0; i<num_degrees+1; i++)
00650 cum_num_words[i]=p_cum_num_words_array[i] ;
00651 }
00652
00653 void CDynProg::init_num_words_array(
00654 int32_t * p_num_words_array, int32_t num_elem)
00655 {
00656 svm_arrays_clean=false ;
00657
00658 num_words.resize_array(num_degrees) ;
00659 num_words_array=num_words.get_array() ;
00660 ASSERT(num_degrees==num_elem);
00661
00662 for (int32_t i=0; i<num_degrees; i++)
00663 num_words[i]=p_num_words_array[i] ;
00664
00665
00666
00667 }
00668
00669 void CDynProg::init_mod_words_array(
00670 int32_t * mod_words_input, int32_t num_elem, int32_t num_columns)
00671 {
00672
00673
00674
00675
00676
00677
00678 svm_arrays_clean=false ;
00679
00680 ASSERT(num_svms==num_elem);
00681 ASSERT(num_columns==2);
00682
00683 mod_words.set_array(mod_words_input, num_elem, 2, true, true) ;
00684 mod_words_array = mod_words.get_array() ;
00685
00686
00687
00688
00689
00690 }
00691
00692 void CDynProg::init_sign_words_array(bool* p_sign_words_array, int32_t num_elem)
00693 {
00694 svm_arrays_clean=false ;
00695
00696 ASSERT(num_svms==num_elem);
00697
00698 sign_words.set_array(p_sign_words_array, num_elem, true, true) ;
00699 sign_words_array = sign_words.get_array() ;
00700 }
00701
00702 void CDynProg::init_string_words_array(
00703 int32_t* p_string_words_array, int32_t num_elem)
00704 {
00705 svm_arrays_clean=false ;
00706
00707 ASSERT(num_svms==num_elem);
00708
00709 string_words.set_array(p_string_words_array, num_elem, true, true) ;
00710 string_words_array = string_words.get_array() ;
00711 }
00712
00713 bool CDynProg::check_svm_arrays()
00714 {
00715
00716 if ((word_degree.get_dim1()==num_degrees) &&
00717 (cum_num_words.get_dim1()==num_degrees+1) &&
00718 (num_words.get_dim1()==num_degrees) &&
00719
00720
00721
00722
00723
00724 (svm_pos_start.get_dim1()==num_degrees) &&
00725 (num_unique_words.get_dim1()==num_degrees) &&
00726 (mod_words.get_dim1()==num_svms) &&
00727 (mod_words.get_dim2()==2) &&
00728 (sign_words.get_dim1()==num_svms) &&
00729 (string_words.get_dim1()==num_svms))
00730 {
00731 svm_arrays_clean=true ;
00732 return true ;
00733 }
00734 else
00735 {
00736 if ((num_unique_words.get_dim1()==num_degrees) &&
00737 (mod_words.get_dim1()==num_svms) &&
00738 (mod_words.get_dim2()==2) &&
00739 (sign_words.get_dim1()==num_svms) &&
00740 (string_words.get_dim1()==num_svms))
00741 SG_PRINT("OK\n") ;
00742 else
00743 SG_PRINT("not OK\n") ;
00744
00745 if (!(word_degree.get_dim1()==num_degrees))
00746 SG_WARNING("SVM array: word_degree.get_dim1()!=num_degrees") ;
00747 if (!(cum_num_words.get_dim1()==num_degrees+1))
00748 SG_WARNING("SVM array: cum_num_words.get_dim1()!=num_degrees+1") ;
00749 if (!(num_words.get_dim1()==num_degrees))
00750 SG_WARNING("SVM array: num_words.get_dim1()==num_degrees") ;
00751 if (!(svm_pos_start.get_dim1()==num_degrees))
00752 SG_WARNING("SVM array: svm_pos_start.get_dim1()!=num_degrees") ;
00753 if (!(num_unique_words.get_dim1()==num_degrees))
00754 SG_WARNING("SVM array: num_unique_words.get_dim1()!=num_degrees") ;
00755 if (!(mod_words.get_dim1()==num_svms))
00756 SG_WARNING("SVM array: mod_words.get_dim1()!=num_svms") ;
00757 if (!(mod_words.get_dim2()==2))
00758 SG_WARNING("SVM array: mod_words.get_dim2()!=2") ;
00759 if (!(sign_words.get_dim1()==num_svms))
00760 SG_WARNING("SVM array: sign_words.get_dim1()!=num_svms") ;
00761 if (!(string_words.get_dim1()==num_svms))
00762 SG_WARNING("SVM array: string_words.get_dim1()!=num_svms") ;
00763
00764 svm_arrays_clean=false ;
00765 return false ;
00766 }
00767 }
00768
00769 void CDynProg::best_path_set_seq(float64_t *seq, int32_t p_N, int32_t seq_len)
00770 {
00771 if (!svm_arrays_clean)
00772 {
00773 SG_ERROR( "SVM arrays not clean") ;
00774 return ;
00775 } ;
00776
00777 ASSERT(p_N==N);
00778 ASSERT(initial_state_distribution_p.get_dim1()==N);
00779 ASSERT(end_state_distribution_q.get_dim1()==N);
00780
00781 m_seq.set_array(seq, N, seq_len, 1, true, true) ;
00782 this->N=N ;
00783
00784 m_call=3 ;
00785 m_step=2 ;
00786 }
00787
00788 void CDynProg::best_path_set_seq3d(
00789 float64_t *seq, int32_t p_N, int32_t seq_len, int32_t max_num_signals)
00790 {
00791 if (!svm_arrays_clean)
00792 {
00793 SG_ERROR( "SVM arrays not clean") ;
00794 return ;
00795 } ;
00796
00797 ASSERT(p_N==N);
00798 ASSERT(initial_state_distribution_p.get_dim1()==N);
00799 ASSERT(end_state_distribution_q.get_dim1()==N);
00800
00801 m_seq.set_array(seq, N, seq_len, max_num_signals, true, true) ;
00802 this->N=N ;
00803
00804 m_call=3 ;
00805 m_step=2 ;
00806 }
00807
00808 void CDynProg::best_path_set_pos(int32_t *pos, int32_t seq_len)
00809 {
00810 if (m_step!=2)
00811 SG_ERROR( "please call best_path_set_seq first\n") ;
00812
00813 if (seq_len!=m_seq.get_dim2())
00814 SG_ERROR( "pos size does not match previous info %i!=%i\n", seq_len, m_seq.get_dim2()) ;
00815
00816 m_pos.set_array(pos, seq_len, true, true) ;
00817
00818 m_step=3 ;
00819 }
00820
00821 void CDynProg::best_path_set_orf_info(int32_t *orf_info, int32_t m, int32_t n)
00822 {
00823
00824
00825
00826
00827
00828 if (n!=2)
00829 SG_ERROR( "orf_info size incorrect %i!=2\n", n) ;
00830 m_orf_info.set_array(orf_info, m, n, true, true) ;
00831
00832 m_call=1 ;
00833 m_step=4 ;
00834 }
00835
00836 void CDynProg::best_path_set_segment_sum_weights(
00837 float64_t *segment_sum_weights, int32_t num_states, int32_t seq_len)
00838 {
00839 if (m_step!=3)
00840 SG_ERROR( "please call best_path_set_pos first\n") ;
00841
00842 if (num_states!=N)
00843 SG_ERROR( "segment_sum_weights size does not match previous info %i!=%i\n", num_states, N) ;
00844 if (seq_len!=m_pos.get_dim1())
00845 SG_ERROR( "segment_sum_weights size incorrect %i!=%i\n", seq_len, m_pos.get_dim1()) ;
00846
00847 m_segment_sum_weights.set_array(segment_sum_weights, num_states, seq_len, true, true) ;
00848
00849 m_call=2 ;
00850 m_step=4 ;
00851 }
00852
00853 void CDynProg::best_path_set_plif_list(CDynamicArray<CPlifBase*>* plifs)
00854 {
00855 ASSERT(plifs);
00856 CPlifBase** plif_list=plifs->get_array();
00857 int32_t num_plif=plifs->get_num_elements();
00858
00859 if (m_step!=4)
00860 SG_ERROR( "please call best_path_set_orf_info or best_path_segment_sum_weights first\n") ;
00861
00862 m_plif_list.set_array(plif_list, num_plif, true, true) ;
00863
00864 m_step=5 ;
00865 }
00866
00867 void CDynProg::best_path_set_plif_id_matrix(
00868 int32_t *plif_id_matrix, int32_t m, int32_t n)
00869 {
00870 if (m_step!=5)
00871 SG_ERROR( "please call best_path_set_plif_list first\n") ;
00872
00873 if ((m!=N) || (n!=N))
00874 SG_ERROR( "plif_id_matrix size does not match previous info %i!=%i or %i!=%i\n", m, N, n, N) ;
00875
00876 CArray2<int32_t> id_matrix(plif_id_matrix, N, N, false, false) ;
00877 #ifdef DYNPROG_DEBUG
00878 id_matrix.set_name("id_matrix");
00879 id_matrix.display_array();
00880 #endif //DYNPROG_DEBUG
00881 m_PEN.resize_array(N, N) ;
00882 for (int32_t i=0; i<N; i++)
00883 for (int32_t j=0; j<N; j++)
00884 if (id_matrix.element(i,j)>=0)
00885 m_PEN.element(i,j)=m_plif_list[id_matrix.element(i,j)] ;
00886 else
00887 m_PEN.element(i,j)=NULL ;
00888
00889 m_step=6 ;
00890 }
00891
00892 void CDynProg::best_path_set_plif_state_signal_matrix(
00893 int32_t *plif_id_matrix, int32_t m, int32_t max_num_signals)
00894 {
00895 if (m_step!=6)
00896 SG_ERROR( "please call best_path_set_plif_id_matrix first\n") ;
00897
00898 if (m!=N)
00899 SG_ERROR( "plif_state_signal_matrix size does not match previous info %i!=%i\n", m, N) ;
00900
00901 if (m_seq.get_dim3() != max_num_signals)
00902 SG_ERROR( "size(plif_state_signal_matrix,2) does not match with size(m_seq,3): %i!=%i\nSorry, Soeren... interface changed\n", m_seq.get_dim3(), max_num_signals) ;
00903
00904 CArray2<int32_t> id_matrix(plif_id_matrix, N, max_num_signals, false, false) ;
00905 m_PEN_state_signals.resize_array(N, max_num_signals) ;
00906 for (int32_t i=0; i<N; i++)
00907 {
00908 for (int32_t j=0; j<max_num_signals; j++)
00909 {
00910 if (id_matrix.element(i,j)>=0)
00911 m_PEN_state_signals.element(i,j)=m_plif_list[id_matrix.element(i,j)] ;
00912 else
00913 m_PEN_state_signals.element(i,j)=NULL ;
00914 }
00915 }
00916
00917 m_step=6 ;
00918 }
00919
00920 void CDynProg::best_path_set_genestr(
00921 char* genestr, int32_t genestr_len, int32_t genestr_num)
00922 {
00923 if (m_step!=6)
00924 SG_ERROR( "please call best_path_set_plif_id_matrix first\n") ;
00925
00926 ASSERT(genestr);
00927 ASSERT(genestr_len>0);
00928 ASSERT(genestr_num>0);
00929
00930 m_genestr.set_array(genestr, genestr_len, genestr_num, true, true) ;
00931
00932 m_step=7 ;
00933 }
00934
00935 void CDynProg::best_path_set_my_state_seq(
00936 int32_t* my_state_seq, int32_t seq_len)
00937 {
00938 ASSERT(my_state_seq && seq_len>0);
00939 m_my_state_seq.resize_array(seq_len);
00940 for (int32_t i=0; i<seq_len; i++)
00941 m_my_state_seq[i]=my_state_seq[i];
00942 }
00943
00944 void CDynProg::best_path_set_my_pos_seq(int32_t* my_pos_seq, int32_t seq_len)
00945 {
00946 ASSERT(my_pos_seq && seq_len>0);
00947 m_my_pos_seq.resize_array(seq_len);
00948 for (int32_t i=0; i<seq_len; i++)
00949 m_my_pos_seq[i]=my_pos_seq[i];
00950 }
00951
00952 void CDynProg::best_path_set_dict_weights(
00953 float64_t* dictionary_weights, int32_t dict_len, int32_t n)
00954 {
00955 if (m_step!=7)
00956 SG_ERROR( "please call best_path_set_genestr first\n") ;
00957
00958 if (num_svms!=n)
00959 SG_ERROR( "dict_weights array does not match num_svms=%i!=%i\n", num_svms, n) ;
00960
00961 m_dict_weights.set_array(dictionary_weights, dict_len, num_svms, true, true) ;
00962
00963
00964 m_segment_loss.resize_array(max_a_id+1, max_a_id+1, 2) ;
00965 m_segment_loss.zero() ;
00966 m_segment_ids.resize_array(m_seq.get_dim2()) ;
00967 m_segment_mask.resize_array(m_seq.get_dim2()) ;
00968 m_segment_ids.zero() ;
00969 m_segment_mask.zero() ;
00970
00971 m_step=8 ;
00972 }
00973
00974 void CDynProg::best_path_set_segment_loss(
00975 float64_t* segment_loss, int32_t m, int32_t n)
00976 {
00977
00978 if (2*m!=n)
00979 SG_ERROR( "segment_loss should be 2 x quadratic matrix: %i!=%i\n", 2*m, n) ;
00980
00981 if (m!=max_a_id+1)
00982 SG_ERROR( "segment_loss size should match max_a_id: %i!=%i\n", m, max_a_id+1) ;
00983
00984 m_segment_loss.set_array(segment_loss, m, n/2, 2, true, true) ;
00985
00986
00987
00988 }
00989
00990 void CDynProg::best_path_set_segment_ids_mask(
00991 int32_t* segment_ids, float64_t* segment_mask, int32_t m)
00992 {
00993 int32_t max_id = 0;
00994 for (int32_t i=1;i<m;i++)
00995 max_id = CMath::max(max_id,segment_ids[i]);
00996 SG_PRINT("max_id: %i, m:%i\n",max_id, m);
00997 m_segment_ids.set_array(segment_ids, m, false, true) ;
00998 m_segment_ids.set_name("m_segment_ids");
00999 m_segment_mask.set_array(segment_mask, m, false, true) ;
01000 m_segment_mask.set_name("m_segment_mask");
01001 }
01002
01003
01004 void CDynProg::best_path_call(int32_t nbest, bool use_orf)
01005 {
01006 if (m_step!=8)
01007 SG_ERROR( "please call best_path_set_dict_weights first\n") ;
01008 if (m_call!=1)
01009 SG_ERROR( "please call best_path_set_orf_info first\n") ;
01010 ASSERT(N==m_seq.get_dim1()) ;
01011 ASSERT(m_seq.get_dim2()==m_pos.get_dim1()) ;
01012
01013 m_scores.resize_array(nbest) ;
01014 m_states.resize_array(nbest, m_seq.get_dim2()) ;
01015 m_positions.resize_array(nbest, m_seq.get_dim2()) ;
01016
01017 m_call=1 ;
01018
01019 ASSERT(nbest==1||nbest==2) ;
01020 ASSERT(m_genestr.get_dim2()==1) ;
01021 if (nbest==1)
01022 best_path_trans<1,false,false>(m_seq.get_array(), m_seq.get_dim2(), m_pos.get_array(),
01023 m_orf_info.get_array(), m_PEN.get_array(),
01024 m_PEN_state_signals.get_array(), m_PEN_state_signals.get_dim2(),
01025 m_genestr.get_dim2(), m_scores.get_array(),
01026 m_states.get_array(), m_positions.get_array(),
01027 use_orf) ;
01028 else
01029 best_path_trans<2,false,false>(m_seq.get_array(), m_seq.get_dim2(), m_pos.get_array(),
01030 m_orf_info.get_array(), m_PEN.get_array(),
01031 m_PEN_state_signals.get_array(), m_PEN_state_signals.get_dim2(),
01032 m_genestr.get_dim2(), m_scores.get_array(),
01033 m_states.get_array(), m_positions.get_array(),
01034 use_orf) ;
01035
01036 m_step=9 ;
01037 }
01038
01039 void CDynProg::best_path_deriv_call()
01040 {
01041
01042
01043
01044
01045 ASSERT(N==m_seq.get_dim1()) ;
01046 ASSERT(m_seq.get_dim2()==m_pos.get_dim1()) ;
01047
01048 m_call=5 ;
01049
01050 m_my_scores.resize_array(m_my_state_seq.get_array_size()) ;
01051 m_my_losses.resize_array(m_my_state_seq.get_array_size()) ;
01052
01053 best_path_trans_deriv(m_my_state_seq.get_array(), m_my_pos_seq.get_array(),
01054 m_my_scores.get_array(), m_my_losses.get_array(), m_my_state_seq.get_array_size(),
01055 m_seq.get_array(), m_seq.get_dim2(), m_pos.get_array(),
01056 m_PEN.get_array(), m_PEN_state_signals.get_array(), m_PEN_state_signals.get_dim2(), m_genestr.get_dim2()) ;
01057
01058 m_step=12 ;
01059 }
01060
01061
01062 void CDynProg::best_path_2struct_call(int32_t nbest)
01063 {
01064 if (m_step!=8)
01065 SG_ERROR( "please call best_path_set_orf_dict_weights first\n") ;
01066 if (m_call!=2)
01067 SG_ERROR( "please call best_path_set_segment_sum_weights first\n") ;
01068 ASSERT(N==m_seq.get_dim1()) ;
01069 ASSERT(m_seq.get_dim2()==m_pos.get_dim1()) ;
01070
01071 m_scores.resize_array(nbest) ;
01072 m_states.resize_array(nbest, m_seq.get_dim2()) ;
01073 m_positions.resize_array(nbest, m_seq.get_dim2()) ;
01074
01075 m_call=2 ;
01076
01077 best_path_2struct(m_seq.get_array(), m_seq.get_dim2(), m_pos.get_array(),
01078 m_PEN.get_array(),
01079 m_genestr.get_array(), m_genestr.get_dim1(),
01080 nbest,
01081 m_scores.get_array(), m_states.get_array(), m_positions.get_array(),
01082 m_dict_weights.get_array(), m_dict_weights.get_dim1()*m_dict_weights.get_dim2(),
01083 m_segment_sum_weights.get_array()) ;
01084
01085 m_step=9 ;
01086 }
01087
01088 void CDynProg::best_path_simple_call(int32_t nbest)
01089 {
01090 if (m_step!=2)
01091 SG_ERROR( "please call best_path_set_seq first\n") ;
01092 if (m_call!=3)
01093 SG_ERROR( "please call best_path_set_seq first\n") ;
01094 ASSERT(N==m_seq.get_dim1()) ;
01095
01096 m_scores.resize_array(nbest) ;
01097 m_states.resize_array(nbest, m_seq.get_dim2()) ;
01098
01099 m_call=3 ;
01100
01101 best_path_trans_simple(m_seq.get_array(), m_seq.get_dim2(),
01102 nbest,
01103 m_scores.get_array(), m_states.get_array()) ;
01104
01105 m_step=9 ;
01106 }
01107
01108 void CDynProg::best_path_deriv_call(int32_t nbest)
01109 {
01110 if (!svm_arrays_clean)
01111 {
01112 SG_ERROR( "SVM arrays not clean") ;
01113 return ;
01114 } ;
01115
01116
01117 }
01118
01119
01120 void CDynProg::best_path_get_scores(float64_t **scores, int32_t *m)
01121 {
01122 if (m_step!=9 && m_step!=12)
01123 SG_ERROR( "please call best_path*_call first\n") ;
01124
01125 if (m_step==9)
01126 {
01127 *scores=m_scores.get_array() ;
01128 *m=m_scores.get_dim1() ;
01129 } else
01130 {
01131 *scores=m_my_scores.get_array() ;
01132 *m=m_my_scores.get_dim1() ;
01133 }
01134
01135 m_step=10 ;
01136 }
01137
01138 void CDynProg::best_path_get_states(int32_t **states, int32_t *m, int32_t *n)
01139 {
01140 if (m_step!=10)
01141 SG_ERROR( "please call best_path_get_score first\n") ;
01142
01143 *states=m_states.get_array() ;
01144 *m=m_states.get_dim1() ;
01145 *n=m_states.get_dim2() ;
01146
01147 m_step=11 ;
01148 }
01149
01150 void CDynProg::best_path_get_positions(
01151 int32_t **positions, int32_t *m, int32_t *n)
01152 {
01153 if (m_step!=11)
01154 SG_ERROR( "please call best_path_get_positions first\n") ;
01155 if (m_call==3)
01156 SG_ERROR( "no position information for best_path_simple\n") ;
01157
01158 *positions=m_positions.get_array() ;
01159 *m=m_positions.get_dim1() ;
01160 *n=m_positions.get_dim2() ;
01161 }
01162
01163 void CDynProg::best_path_get_losses(float64_t** losses, int32_t* seq_len)
01164 {
01165 ASSERT(losses && seq_len);
01166 *losses=m_my_losses.get_array();
01167 *seq_len=m_my_losses.get_dim1();
01168 }
01169
01170
01172
01173 float64_t CDynProg::best_path_no_b(
01174 int32_t max_iter, int32_t &best_iter, int32_t *my_path)
01175 {
01176 CArray2<T_STATES> psi(max_iter, N) ;
01177 CArray<float64_t>* delta = new CArray<float64_t>(N) ;
01178 CArray<float64_t>* delta_new = new CArray<float64_t>(N) ;
01179
01180 {
01181 for (int32_t i=0; i<N; i++)
01182 {
01183 delta->element(i) = get_p(i) ;
01184 psi.element(0, i)= 0 ;
01185 }
01186 }
01187
01188 float64_t best_iter_prob = CMath::ALMOST_NEG_INFTY ;
01189 best_iter = 0 ;
01190
01191
01192 for (int32_t t=1; t<max_iter; t++)
01193 {
01194 CArray<float64_t>* dummy;
01195 int32_t NN=N ;
01196 for (int32_t j=0; j<NN; j++)
01197 {
01198 float64_t maxj = delta->element(0) + transition_matrix_a.element(0,j);
01199 int32_t argmax=0;
01200
01201 for (int32_t i=1; i<NN; i++)
01202 {
01203 float64_t temp = delta->element(i) + transition_matrix_a.element(i,j);
01204
01205 if (temp>maxj)
01206 {
01207 maxj=temp;
01208 argmax=i;
01209 }
01210 }
01211 delta_new->element(j)=maxj ;
01212 psi.element(t, j)=argmax ;
01213 }
01214
01215 dummy=delta;
01216 delta=delta_new;
01217 delta_new=dummy;
01218
01219 {
01220 float64_t maxj=delta->element(0)+get_q(0);
01221 int32_t argmax=0;
01222
01223 for (int32_t i=1; i<N; i++)
01224 {
01225 float64_t temp=delta->element(i)+get_q(i);
01226
01227 if (temp>maxj)
01228 {
01229 maxj=temp;
01230 argmax=i;
01231 }
01232 }
01233
01234
01235 if (maxj>best_iter_prob)
01236 {
01237 my_path[t]=argmax;
01238 best_iter=t ;
01239 best_iter_prob = maxj ;
01240 } ;
01241 } ;
01242 }
01243
01244
01245 {
01246 for (int32_t t = best_iter; t>0; t--)
01247 {
01248 my_path[t-1]=psi.element(t, my_path[t]);
01249 }
01250 }
01251
01252 delete delta ;
01253 delete delta_new ;
01254
01255 return best_iter_prob ;
01256 }
01257
01258 void CDynProg::best_path_no_b_trans(
01259 int32_t max_iter, int32_t &max_best_iter, int16_t nbest,
01260 float64_t *prob_nbest, int32_t *my_paths)
01261 {
01262
01263 CArray3<T_STATES> psi(max_iter, N, nbest) ;
01264 CArray3<int16_t> ktable(max_iter, N, nbest) ;
01265 CArray2<int16_t> ktable_ends(max_iter, nbest) ;
01266
01267 CArray<float64_t> tempvv(nbest*N) ;
01268 CArray<int32_t> tempii(nbest*N) ;
01269
01270 CArray2<T_STATES> path_ends(max_iter, nbest) ;
01271 CArray2<float64_t> *delta=new CArray2<float64_t>(N, nbest) ;
01272 CArray2<float64_t> *delta_new=new CArray2<float64_t>(N, nbest) ;
01273 CArray2<float64_t> delta_end(max_iter, nbest) ;
01274
01275 CArray2<int32_t> paths(max_iter, nbest) ;
01276 paths.set_array(my_paths, max_iter, nbest, false, false) ;
01277
01278 {
01279 for (T_STATES i=0; i<N; i++)
01280 {
01281 delta->element(i,0) = get_p(i) ;
01282 for (int16_t k=1; k<nbest; k++)
01283 {
01284 delta->element(i,k)=-CMath::INFTY ;
01285 ktable.element(0,i,k)=0 ;
01286 }
01287 }
01288 }
01289
01290
01291 for (int32_t t=1; t<max_iter; t++)
01292 {
01293 CArray2<float64_t>* dummy=NULL;
01294
01295 for (T_STATES j=0; j<N; j++)
01296 {
01297 const T_STATES num_elem = trans_list_forward_cnt[j] ;
01298 const T_STATES *elem_list = trans_list_forward[j] ;
01299 const float64_t *elem_val = trans_list_forward_val[j] ;
01300
01301 int32_t list_len=0 ;
01302 for (int16_t diff=0; diff<nbest; diff++)
01303 {
01304 for (int32_t i=0; i<num_elem; i++)
01305 {
01306 T_STATES ii = elem_list[i] ;
01307
01308 tempvv.element(list_len) = -(delta->element(ii,diff) + elem_val[i]) ;
01309 tempii.element(list_len) = diff*N + ii ;
01310 list_len++ ;
01311 }
01312 }
01313 CMath::qsort_index(tempvv.get_array(), tempii.get_array(), list_len) ;
01314
01315 for (int16_t k=0; k<nbest; k++)
01316 {
01317 if (k<list_len)
01318 {
01319 delta_new->element(j,k) = -tempvv[k] ;
01320 psi.element(t,j,k) = (tempii[k]%N) ;
01321 ktable.element(t,j,k) = (tempii[k]-(tempii[k]%N))/N ;
01322 }
01323 else
01324 {
01325 delta_new->element(j,k) = -CMath::INFTY ;
01326 psi.element(t,j,k) = 0 ;
01327 ktable.element(t,j,k) = 0 ;
01328 }
01329 }
01330 }
01331
01332 dummy=delta;
01333 delta=delta_new;
01334 delta_new=dummy;
01335
01336 {
01337 int32_t list_len = 0 ;
01338 for (int16_t diff=0; diff<nbest; diff++)
01339 {
01340 for (T_STATES i=0; i<N; i++)
01341 {
01342 tempvv.element(list_len) = -(delta->element(i,diff)+get_q(i));
01343 tempii.element(list_len) = diff*N + i ;
01344 list_len++ ;
01345 }
01346 }
01347 CMath::qsort_index(tempvv.get_array(), tempii.get_array(), list_len) ;
01348
01349 for (int16_t k=0; k<nbest; k++)
01350 {
01351 delta_end.element(t-1,k) = -tempvv[k] ;
01352 path_ends.element(t-1,k) = (tempii[k]%N) ;
01353 ktable_ends.element(t-1,k) = (tempii[k]-(tempii[k]%N))/N ;
01354 }
01355 }
01356 }
01357
01358 {
01359 max_best_iter=0 ;
01360
01361 CArray<float64_t> sort_delta_end(max_iter*nbest) ;
01362 CArray<int16_t> sort_k(max_iter*nbest) ;
01363 CArray<int32_t> sort_t(max_iter*nbest) ;
01364 CArray<int32_t> sort_idx(max_iter*nbest) ;
01365
01366 int32_t i=0 ;
01367 for (int32_t iter=0; iter<max_iter-1; iter++)
01368 for (int16_t k=0; k<nbest; k++)
01369 {
01370 sort_delta_end[i]=-delta_end.element(iter,k) ;
01371 sort_k[i]=k ;
01372 sort_t[i]=iter+1 ;
01373 sort_idx[i]=i ;
01374 i++ ;
01375 }
01376
01377 CMath::qsort_index(sort_delta_end.get_array(), sort_idx.get_array(), (max_iter-1)*nbest) ;
01378
01379 for (int16_t n=0; n<nbest; n++)
01380 {
01381 int16_t k=sort_k[sort_idx[n]] ;
01382 int32_t iter=sort_t[sort_idx[n]] ;
01383 prob_nbest[n]=-sort_delta_end[n] ;
01384
01385 if (iter>max_best_iter)
01386 max_best_iter=iter ;
01387
01388 ASSERT(k<nbest) ;
01389 ASSERT(iter<max_iter) ;
01390
01391 paths.element(iter,n) = path_ends.element(iter-1, k) ;
01392 int16_t q = ktable_ends.element(iter-1, k) ;
01393
01394 for (int32_t t = iter; t>0; t--)
01395 {
01396 paths.element(t-1,n)=psi.element(t, paths.element(t,n), q);
01397 q = ktable.element(t, paths.element(t,n), q) ;
01398 }
01399 }
01400 }
01401
01402 delete delta ;
01403 delete delta_new ;
01404 }
01405
01406
01407 void CDynProg::translate_from_single_order(
01408 uint16_t* obs, int32_t sequence_length, int32_t start, int32_t order,
01409 int32_t max_val)
01410 {
01411 int32_t i,j;
01412 uint16_t value=0;
01413
01414 for (i=sequence_length-1; i>=order-1; i--)
01415 {
01416 value=0;
01417 for (j=i; j>=i-order+1; j--)
01418 value= (value >> max_val) | (obs[j] << (max_val * (order-1)));
01419
01420 obs[i]=value;
01421 }
01422
01423 for (i=order-2;i>=0;i--)
01424 {
01425 value=0;
01426 for (j=i; j>=i-order+1; j--)
01427 {
01428 value= (value >> max_val);
01429 if (j>=0)
01430 value|=obs[j] << (max_val * (order-1));
01431 }
01432 obs[i]=value;
01433
01434 }
01435 if (start>0)
01436 for (i=start; i<sequence_length; i++)
01437 obs[i-start]=obs[i];
01438 }
01439
01440 void CDynProg::reset_svm_value(
01441 int32_t pos, int32_t & last_svm_pos, float64_t * svm_value)
01442 {
01443 for (int32_t i=0; i<num_words_single; i++)
01444 word_used_single[i]=false ;
01445 for (int32_t s=0; s<num_svms; s++)
01446 svm_value_unnormalized_single[s] = 0 ;
01447 for (int32_t s=0; s<num_svms; s++)
01448 svm_value[s] = 0 ;
01449 last_svm_pos = pos - 6+1 ;
01450 num_unique_words_single=0 ;
01451 }
01452
01453 void CDynProg::extend_svm_value(
01454 uint16_t* wordstr, int32_t pos, int32_t &last_svm_pos, float64_t* svm_value)
01455 {
01456 bool did_something = false ;
01457 for (int32_t i=last_svm_pos-1; (i>=pos) && (i>=0); i--)
01458 {
01459 if (wordstr[i]>=num_words_single)
01460 SG_DEBUG( "wordstr[%i]=%i\n", i, wordstr[i]) ;
01461
01462 if (!word_used_single[wordstr[i]])
01463 {
01464 for (int32_t s=0; s<num_svms_single; s++)
01465 svm_value_unnormalized_single[s]+=dict_weights.element(wordstr[i],s) ;
01466
01467 word_used_single[wordstr[i]]=true ;
01468 num_unique_words_single++ ;
01469 did_something=true ;
01470 }
01471 } ;
01472 if (num_unique_words_single>0)
01473 {
01474 last_svm_pos=pos ;
01475 if (did_something)
01476 for (int32_t s=0; s<num_svms; s++)
01477 svm_value[s]= svm_value_unnormalized_single[s]/sqrt((float64_t)num_unique_words_single) ;
01478 }
01479 else
01480 {
01481
01482 for (int32_t s=0; s<num_svms; s++)
01483 svm_value[s]=0 ;
01484 }
01485
01486 }
01487
01488
01489 void CDynProg::reset_segment_sum_value(
01490 int32_t num_states, int32_t pos, int32_t & last_segment_sum_pos,
01491 float64_t * segment_sum_value)
01492 {
01493 for (int32_t s=0; s<num_states; s++)
01494 segment_sum_value[s] = 0 ;
01495 last_segment_sum_pos = pos ;
01496
01497 }
01498
01499 void CDynProg::extend_segment_sum_value(
01500 float64_t *segment_sum_weights, int32_t seqlen, int32_t num_states,
01501 int32_t pos, int32_t &last_segment_sum_pos, float64_t* segment_sum_value)
01502 {
01503 for (int32_t i=last_segment_sum_pos-1; (i>=pos) && (i>=0); i--)
01504 {
01505 for (int32_t s=0; s<num_states; s++)
01506 segment_sum_value[s] += segment_sum_weights[i*num_states+s] ;
01507 } ;
01508
01509 last_segment_sum_pos = pos ;
01510 }
01511
01512
01513 void CDynProg::best_path_2struct(
01514 const float64_t *seq_array, int32_t seq_len, const int32_t *pos,
01515 CPlifBase **Plif_matrix, const char *genestr, int32_t genestr_len,
01516 int16_t nbest, float64_t *prob_nbest, int32_t *my_state_seq,
01517 int32_t *my_pos_seq, float64_t *dictionary_weights, int32_t dict_len,
01518 float64_t *segment_sum_weights)
01519 {
01520 const int32_t default_look_back = 100 ;
01521 int32_t max_look_back = default_look_back ;
01522 bool use_svm = false ;
01523 ASSERT(dict_len==num_svms*num_words_single) ;
01524 dict_weights.set_array(dictionary_weights, dict_len, num_svms, false, false) ;
01525 dict_weights_array=dict_weights.get_array() ;
01526
01527 CArray2<CPlifBase*> PEN(Plif_matrix, N, N, false) ;
01528 CArray2<float64_t> seq((float64_t *)seq_array, N, seq_len, false) ;
01529
01530 float64_t svm_value[num_svms] ;
01531 float64_t segment_sum_value[N] ;
01532
01533 {
01534 for (int32_t s=0; s<num_svms; s++)
01535 svm_value[s]=0 ;
01536 }
01537
01538 {
01539 for (int32_t i=0; i<N; i++)
01540 for (int32_t j=0; j<N; j++)
01541 {
01542 CPlifBase *penij=PEN.element(i,j) ;
01543 if (penij==NULL)
01544 continue ;
01545 if (penij->get_max_value()>max_look_back)
01546 max_look_back=(int32_t) CMath::ceil(penij->get_max_value());
01547 if (penij->uses_svm_values())
01548 use_svm=true ;
01549 }
01550 }
01551 max_look_back = CMath::min(genestr_len, max_look_back) ;
01552
01553
01554
01555 const int32_t look_back_buflen = (max_look_back+1)*nbest*N ;
01556
01557 const float64_t mem_use = (float64_t)(seq_len*N*nbest*(sizeof(T_STATES)+sizeof(int16_t)+sizeof(int32_t))+
01558 look_back_buflen*(2*sizeof(float64_t)+sizeof(int32_t))+
01559 seq_len*(sizeof(T_STATES)+sizeof(int32_t))+
01560 genestr_len*sizeof(bool))/(1024*1024)
01561 ;
01562 bool is_big = (mem_use>200) || (seq_len>5000) ;
01563
01564 if (is_big)
01565 {
01566 SG_DEBUG("calling best_path_2struct: seq_len=%i, N=%i, lookback=%i nbest=%i\n",
01567 seq_len, N, max_look_back, nbest) ;
01568 SG_DEBUG("allocating %1.2fMB of memory\n",
01569 mem_use) ;
01570 }
01571 ASSERT(nbest<32000) ;
01572
01573 CArray3<float64_t> delta(max_look_back+1, N, nbest) ;
01574 CArray3<T_STATES> psi(seq_len,N,nbest) ;
01575 CArray3<int16_t> ktable(seq_len,N,nbest) ;
01576 CArray3<int32_t> ptable(seq_len,N,nbest) ;
01577
01578 CArray<float64_t> delta_end(nbest) ;
01579 CArray<T_STATES> path_ends(nbest) ;
01580 CArray<int16_t> ktable_end(nbest) ;
01581
01582 CArray<float64_t> tempvv(look_back_buflen) ;
01583 CArray<int32_t> tempii(look_back_buflen) ;
01584
01585 CArray<T_STATES> state_seq(seq_len) ;
01586 CArray<int32_t> pos_seq(seq_len) ;
01587
01588
01589 uint16_t* wordstr=NULL ;
01590 if (use_svm)
01591 {
01592 ASSERT(dictionary_weights!=NULL) ;
01593 wordstr=new uint16_t[genestr_len] ;
01594 for (int32_t i=0; i<genestr_len; i++)
01595 switch (genestr[i])
01596 {
01597 case 'A':
01598 case 'a': wordstr[i]=0 ; break ;
01599 case 'C':
01600 case 'c': wordstr[i]=1 ; break ;
01601 case 'G':
01602 case 'g': wordstr[i]=2 ; break ;
01603 case 'T':
01604 case 't': wordstr[i]=3 ; break ;
01605 default: ASSERT(0) ;
01606 }
01607 translate_from_single_order(wordstr, genestr_len, word_degree_single-1, word_degree_single) ;
01608 }
01609
01610
01611 {
01612 for (T_STATES i=0; i<N; i++)
01613 {
01614 delta.element(0,i,0) = get_p(i) + seq.element(i,0) ;
01615 psi.element(0,i,0) = 0 ;
01616 ktable.element(0,i,0) = 0 ;
01617 ptable.element(0,i,0) = 0 ;
01618 for (int16_t k=1; k<nbest; k++)
01619 {
01620 delta.element(0,i,k) = -CMath::INFTY ;
01621 psi.element(0,i,0) = 0 ;
01622 ktable.element(0,i,k) = 0 ;
01623 ptable.element(0,i,k) = 0 ;
01624 }
01625 }
01626 }
01627
01628
01629 for (int32_t t=1; t<seq_len; t++)
01630 {
01631 if (is_big && t%(seq_len/10000)==1)
01632 SG_PROGRESS(t, 0, seq_len);
01633
01634
01635 for (T_STATES j=0; j<N; j++)
01636 {
01637 if (seq.element(j,t)<-1e20)
01638 {
01639 for (int16_t k=0; k<nbest; k++)
01640 {
01641 delta.element(t%max_look_back,j,k) = seq.element(j,t) ;
01642 psi.element(t,j,k) = 0 ;
01643 ktable.element(t,j,k) = 0 ;
01644 ptable.element(t,j,k) = 0 ;
01645 }
01646 }
01647 else
01648 {
01649 const T_STATES num_elem = trans_list_forward_cnt[j] ;
01650 const T_STATES *elem_list = trans_list_forward[j] ;
01651 const float64_t *elem_val = trans_list_forward_val[j] ;
01652
01653 int32_t list_len=0 ;
01654 for (int32_t i=0; i<num_elem; i++)
01655 {
01656 T_STATES ii = elem_list[i] ;
01657
01658
01659 const CPlifBase * penalty = PEN.element(j,ii) ;
01660 int32_t look_back = default_look_back ;
01661 if (penalty!=NULL)
01662 look_back=(int32_t) (CMath::ceil(penalty->get_max_value()));
01663
01664 int32_t last_svm_pos ;
01665 if (use_svm)
01666 reset_svm_value(pos[t], last_svm_pos, svm_value) ;
01667
01668 int32_t last_segment_sum_pos ;
01669 reset_segment_sum_value(N, pos[t], last_segment_sum_pos, segment_sum_value) ;
01670
01671 for (int32_t ts=t-1; ts>=0 && pos[t]-pos[ts]<=look_back; ts--)
01672 {
01673 if (use_svm)
01674 extend_svm_value(wordstr, pos[ts], last_svm_pos, svm_value) ;
01675
01676 extend_segment_sum_value(segment_sum_weights, seq_len, N, pos[ts], last_segment_sum_pos, segment_sum_value) ;
01677
01678 float64_t pen_val = 0.0 ;
01679 if (penalty)
01680 pen_val=penalty->lookup_penalty(pos[t]-pos[ts], svm_value) + segment_sum_value[j] ;
01681 for (int16_t diff=0; diff<nbest; diff++)
01682 {
01683 float64_t val = delta.element(ts%max_look_back,ii,diff) + elem_val[i] ;
01684 val += pen_val ;
01685
01686 tempvv[list_len] = -val ;
01687 tempii[list_len] = ii + diff*N + ts*N*nbest;
01688
01689 list_len++ ;
01690 }
01691 }
01692 }
01693 CMath::nmin<int32_t>(tempvv.get_array(), tempii.get_array(), list_len, nbest) ;
01694
01695 for (int16_t k=0; k<nbest; k++)
01696 {
01697 if (k<list_len)
01698 {
01699 delta.element(t%max_look_back,j,k) = -tempvv[k] + seq.element(j,t);
01700 psi.element(t,j,k) = (tempii[k]%N) ;
01701 ktable.element(t,j,k) = (tempii[k]%(N*nbest)-psi.element(t,j,k))/N ;
01702 ptable.element(t,j,k) = (tempii[k]-(tempii[k]%(N*nbest)))/(N*nbest) ;
01703 }
01704 else
01705 {
01706 delta.element(t%max_look_back,j,k) = -CMath::INFTY ;
01707 psi.element(t,j,k) = 0 ;
01708 ktable.element(t,j,k) = 0 ;
01709 ptable.element(t,j,k) = 0 ;
01710 }
01711 }
01712 }
01713 }
01714 }
01715
01716 {
01717 int32_t list_len = 0 ;
01718 for (int16_t diff=0; diff<nbest; diff++)
01719 {
01720 for (T_STATES i=0; i<N; i++)
01721 {
01722 tempvv[list_len] = -(delta.element((seq_len-1)%max_look_back,i,diff)+get_q(i)) ;
01723 tempii[list_len] = i + diff*N ;
01724 list_len++ ;
01725 }
01726 }
01727
01728 CMath::nmin(tempvv.get_array(), tempii.get_array(), list_len, nbest) ;
01729
01730 for (int16_t k=0; k<nbest; k++)
01731 {
01732 delta_end.element(k) = -tempvv[k] ;
01733 path_ends.element(k) = (tempii[k]%N) ;
01734 ktable_end.element(k) = (tempii[k]-path_ends.element(k))/N ;
01735 }
01736 }
01737
01738 {
01739 for (int16_t k=0; k<nbest; k++)
01740 {
01741 prob_nbest[k]= delta_end.element(k) ;
01742
01743 int32_t i = 0 ;
01744 state_seq[i] = path_ends.element(k) ;
01745 int16_t q = ktable_end.element(k) ;
01746 pos_seq[i] = seq_len-1 ;
01747
01748 while (pos_seq[i]>0)
01749 {
01750
01751 state_seq[i+1] = psi.element(pos_seq[i], state_seq[i], q);
01752 pos_seq[i+1] = ptable.element(pos_seq[i], state_seq[i], q) ;
01753 q = ktable.element(pos_seq[i], state_seq[i], q) ;
01754 i++ ;
01755 }
01756
01757 int32_t num_states = i+1 ;
01758 for (i=0; i<num_states;i++)
01759 {
01760 my_state_seq[i+k*(seq_len+1)] = state_seq[num_states-i-1] ;
01761 my_pos_seq[i+k*(seq_len+1)] = pos_seq[num_states-i-1] ;
01762 }
01763 my_state_seq[num_states+k*(seq_len+1)]=-1 ;
01764 my_pos_seq[num_states+k*(seq_len+1)]=-1 ;
01765 }
01766 }
01767 if (is_big)
01768 SG_PRINT( "DONE. \n") ;
01769 }
01770
01771
01772
01773
01774
01775
01776
01777
01778
01779
01780
01781
01782
01783
01784
01785
01786
01787
01788
01789
01790
01791
01792
01793
01794
01795
01796
01797
01798
01799
01800
01801
01802
01803
01804
01805
01806
01807
01808
01809
01810
01811
01812
01813
01814
01815
01816
01817
01818
01819
01820
01821
01822
01823
01824
01825 void CDynProg::init_segment_loss(
01826 struct segment_loss_struct & loss, int32_t seqlen, int32_t howmuchlookback)
01827 {
01828 #ifdef DYNPROG_TIMING
01829 MyTime.start() ;
01830 #endif
01831 int32_t clear_size = CMath::min(howmuchlookback,seqlen) ;
01832
01833 if (!loss.num_segment_id)
01834 {
01835 loss.segments_changed = new int32_t[seqlen] ;
01836 loss.num_segment_id = new float64_t[(max_a_id+1)*seqlen] ;
01837 loss.length_segment_id = new int32_t[(max_a_id+1)*seqlen] ;
01838
01839 clear_size = seqlen ;
01840 }
01841
01842 for (int32_t j=0; j<clear_size; j++)
01843 {
01844 loss.segments_changed[j]=0 ;
01845 for (int32_t i=0; i<max_a_id+1; i++)
01846 {
01847 loss.num_segment_id[i*seqlen+j] = 0;
01848 loss.length_segment_id[i*seqlen+j] = 0;
01849 }
01850 }
01851
01852 loss.maxlookback = howmuchlookback ;
01853 loss.seqlen = seqlen;
01854
01855 #ifdef DYNPROG_TIMING
01856 MyTime.stop() ;
01857 segment_init_time += MyTime.time_diff_sec() ;
01858 #endif
01859 }
01860
01861 void CDynProg::clear_segment_loss(struct segment_loss_struct & loss)
01862 {
01863 #ifdef DYNPROG_TIMING
01864 MyTime.start() ;
01865 #endif
01866
01867 if (loss.num_segment_id != NULL)
01868 {
01869 delete[] loss.segments_changed ;
01870 delete[] loss.num_segment_id ;
01871 delete[] loss.length_segment_id ;
01872 loss.segments_changed = NULL ;
01873 loss.num_segment_id = NULL ;
01874 loss.length_segment_id = NULL ;
01875 }
01876 #ifdef DYNPROG_TIMING
01877 MyTime.stop() ;
01878 segment_clean_time += MyTime.time_diff_sec() ;
01879 #endif
01880 }
01881
01882 float64_t CDynProg::extend_segment_loss(
01883 struct segment_loss_struct & loss, const int32_t * pos_array,
01884 int32_t segment_id, int32_t pos, int32_t & last_pos, float64_t &last_value)
01885 {
01886 #ifdef DYNPROG_TIMING
01887 MyTime.start() ;
01888 #endif
01889
01890 if (pos==last_pos)
01891 return last_value ;
01892 ASSERT(pos<last_pos) ;
01893
01894 last_pos-- ;
01895 bool changed = false ;
01896 while (last_pos>=pos)
01897 {
01898 if (loss.segments_changed[last_pos])
01899 {
01900 changed=true ;
01901 break ;
01902 }
01903 last_pos-- ;
01904 }
01905 if (last_pos<pos)
01906 last_pos = pos ;
01907
01908 if (!changed)
01909 {
01910 ASSERT(last_pos>=0) ;
01911 ASSERT(last_pos<loss.seqlen) ;
01912 float64_t length_contrib = (pos_array[last_pos]-pos_array[pos])*m_segment_loss.element(m_segment_ids.element(pos), segment_id, 1) ;
01913 float64_t ret = last_value + length_contrib ;
01914 last_pos = pos ;
01915 return ret ;
01916 }
01917
01918 CArray2<float64_t> num_segment_id(loss.num_segment_id, loss.seqlen, max_a_id+1, false, false) ;
01919 CArray2<int32_t> length_segment_id(loss.length_segment_id, loss.seqlen, max_a_id+1, false, false) ;
01920 float64_t ret = 0.0 ;
01921 for (int32_t i=0; i<max_a_id+1; i++)
01922 {
01923
01924
01925 if (num_segment_id.element(pos, i)!=0)
01926 {
01927 ret += num_segment_id.element(pos, i)*m_segment_loss.element(i, segment_id, 0) ;
01928
01929
01930
01931
01932
01933
01934
01935 }
01936 if (length_segment_id.element(pos, i)!=0)
01937 ret += length_segment_id.element(pos, i)*m_segment_loss.element(i, segment_id, 1) ;
01938 }
01939 last_pos = pos ;
01940 last_value = ret ;
01941
01942 #ifdef DYNPROG_TIMING
01943 MyTime.stop() ;
01944 segment_extend_time += MyTime.time_diff_sec() ;
01945 #endif
01946 return ret ;
01947 }
01948
01949 void CDynProg::find_segment_loss_till_pos(
01950 const int32_t * pos, int32_t t_end, CArray<int32_t>& segment_ids,
01951 CArray<float64_t>& segment_mask, struct segment_loss_struct & loss)
01952 {
01953 #ifdef DYNPROG_TIMING
01954 MyTime.start() ;
01955 #endif
01956 CArray2<float64_t> num_segment_id(loss.num_segment_id, loss.seqlen, max_a_id+1, false, false) ;
01957 CArray2<int32_t> length_segment_id(loss.length_segment_id, loss.seqlen, max_a_id+1, false, false) ;
01958
01959 for (int32_t i=0; i<max_a_id+1; i++)
01960 {
01961 num_segment_id.element(t_end, i) = 0 ;
01962 length_segment_id.element(t_end, i) = 0 ;
01963 }
01964 int32_t wobble_pos_segment_id_switch = 0 ;
01965 int32_t last_segment_id = -1 ;
01966 int32_t ts = t_end-1 ;
01967 while ((ts>=0) && (pos[t_end] - pos[ts] <= loss.maxlookback))
01968 {
01969 int32_t cur_segment_id = segment_ids.element(ts) ;
01970
01971 bool wobble_pos = (CMath::abs(segment_mask.element(ts))<1e-7) && (wobble_pos_segment_id_switch==0) ;
01972 if (!(cur_segment_id<=max_a_id))
01973 SG_ERROR("(cur_segment_id<=max_a_id), cur_segment_id:%i max_a_id:%i\n",cur_segment_id,max_a_id);
01974 ASSERT(cur_segment_id>=0) ;
01975
01976 for (int32_t i=0; i<max_a_id+1; i++)
01977 {
01978 num_segment_id.element(ts, i) = num_segment_id.element(ts+1, i) ;
01979 length_segment_id.element(ts, i) = length_segment_id.element(ts+1, i) ;
01980 }
01981
01982 if (cur_segment_id!=last_segment_id)
01983 {
01984 if (wobble_pos)
01985 {
01986
01987 wobble_pos_segment_id_switch++ ;
01988
01989 }
01990 else
01991 {
01992
01993 loss.segments_changed[ts] = true ;
01994 num_segment_id.element(ts, cur_segment_id) += segment_mask.element(ts);
01995 length_segment_id.element(ts, cur_segment_id) += (int32_t)((pos[ts+1]-pos[ts])*segment_mask.element(ts));
01996 wobble_pos_segment_id_switch = 0 ;
01997 }
01998 last_segment_id = cur_segment_id ;
01999 }
02000 else
02001 if (!wobble_pos)
02002 length_segment_id.element(ts, cur_segment_id) += pos[ts+1] - pos[ts] ;
02003
02004 ts--;
02005 }
02006 #ifdef DYNPROG_TIMING
02007 MyTime.stop() ;
02008 segment_pos_time += MyTime.time_diff_sec() ;
02009 #endif
02010 }
02011
02012 void CDynProg::init_svm_values(
02013 struct svm_values_struct & svs, int32_t start_pos, int32_t seqlen,
02014 int32_t maxlookback)
02015 {
02016 #ifdef DYNPROG_TIMING
02017 MyTime.start() ;
02018 #endif
02019
02020
02021
02022
02023
02024
02025
02026
02027 int32_t clear_size = CMath::min(maxlookback,seqlen) ;
02028
02029 if (!svs.svm_values)
02030 {
02031 svs.svm_values = new float64_t[seqlen*num_svms] ;
02032 svs.num_unique_words = new int32_t*[num_degrees] ;
02033 svs.svm_values_unnormalized = new float64_t*[num_degrees] ;
02034 svs.word_used = new bool**[num_degrees] ;
02035 for (int32_t j=0; j<num_degrees; j++)
02036 {
02037 svs.word_used[j] = new bool*[num_svms] ;
02038 for (int32_t s=0; s<num_svms; s++)
02039 svs.word_used[j][s] = new bool[num_words_array[j]] ;
02040 }
02041 for (int32_t j=0; j<num_degrees; j++)
02042 {
02043 svs.svm_values_unnormalized[j] = new float64_t[num_svms] ;
02044 svs.num_unique_words[j] = new int32_t[num_svms] ;
02045 }
02046 svs.start_pos = new int32_t[num_svms] ;
02047 clear_size = seqlen ;
02048 }
02049
02050
02051
02052 memset(svs.svm_values, 0, clear_size*num_svms*sizeof(float64_t)) ;
02053
02054 for (int32_t j=0; j<num_degrees; j++)
02055 {
02056
02057
02058 memset(svs.svm_values_unnormalized[j], 0, num_svms*sizeof(float64_t)) ;
02059
02060
02061
02062 memset(svs.num_unique_words[j], 0, num_svms*sizeof(int32_t)) ;
02063 }
02064
02065 for (int32_t j=0; j<num_degrees; j++)
02066 for (int32_t s=0; s<num_svms; s++)
02067 {
02068
02069
02070 memset(svs.word_used[j][s], 0, num_words_array[j]*sizeof(bool)) ;
02071 }
02072
02073 for (int32_t s=0; s<num_svms; s++)
02074 svs.start_pos[s] = start_pos - mod_words.element(s,1) ;
02075
02076 svs.maxlookback = maxlookback ;
02077 svs.seqlen = seqlen ;
02078
02079 #ifdef DYNPROG_TIMING
02080 MyTime.stop() ;
02081 svm_init_time += MyTime.time_diff_sec() ;
02082 #endif
02083 }
02084
02085 void CDynProg::clear_svm_values(struct svm_values_struct & svs)
02086 {
02087 #ifdef DYNPROG_TIMING
02088 MyTime.start() ;
02089 #endif
02090 if (NULL != svs.svm_values)
02091 {
02092 for (int32_t j=0; j<num_degrees; j++)
02093 {
02094 for (int32_t s=0; s<num_svms; s++)
02095 delete[] svs.word_used[j][s] ;
02096 delete[] svs.word_used[j];
02097 }
02098 delete[] svs.word_used;
02099
02100 for (int32_t j=0; j<num_degrees; j++)
02101 delete[] svs.svm_values_unnormalized[j] ;
02102 for (int32_t j=0; j<num_degrees; j++)
02103 delete[] svs.num_unique_words[j] ;
02104
02105 delete[] svs.svm_values_unnormalized;
02106 delete[] svs.svm_values;
02107 delete[] svs.num_unique_words ;
02108
02109 svs.word_used=NULL ;
02110 svs.svm_values=NULL ;
02111 svs.svm_values_unnormalized=NULL ;
02112 }
02113
02114 #ifdef DYNPROG_TIMING
02115 MyTime.stop() ;
02116 svm_clean_time += MyTime.time_diff_sec() ;
02117 #endif
02118 }
02119
02120
02121 void CDynProg::find_svm_values_till_pos(
02122 uint16_t*** wordstr, const int32_t *pos, int32_t t_end,
02123 struct svm_values_struct &svs)
02124 {
02125 #ifdef DYNPROG_TIMING
02126 MyTime.start() ;
02127 #endif
02128
02129
02130
02131
02132
02133
02134
02135
02136
02137
02138
02139
02140
02141
02142
02143
02144 for (int32_t j=0; j<num_degrees; j++)
02145
02146 {
02147 int32_t plen = 1;
02148 int32_t ts = t_end-1;
02149 int32_t offset;
02150
02151 int32_t posprev = pos[t_end]-word_degree[j]+1;
02152 int32_t poscurrent = pos[ts];
02153
02154
02155
02156 if (poscurrent<0)
02157 poscurrent = 0;
02158 float64_t * my_svm_values_unnormalized = svs.svm_values_unnormalized[j] ;
02159 int32_t * my_num_unique_words = svs.num_unique_words[j] ;
02160 bool ** my_word_used = svs.word_used[j] ;
02161
02162 int32_t len = pos[t_end] - poscurrent;
02163 while ((ts>=0) && (len <= svs.maxlookback))
02164 {
02165 for (int32_t i=posprev-1 ; (i>=poscurrent) && (i>=0) ; i--)
02166 {
02167
02168
02169 uint16_t word = wordstr[string_words_array[0]][j][i] ;
02170 int32_t last_string = string_words_array[0] ;
02171 for (int32_t s=0; s<num_svms; s++)
02172 {
02173
02174
02175 if (last_string != string_words_array[s])
02176 {
02177 last_string = string_words_array[s] ;
02178 word = wordstr[last_string][j][i] ;
02179 }
02180
02181
02182 if (sign_words_array[s] && my_word_used[s][word])
02183 continue ;
02184
02185
02186
02187 {
02188 my_svm_values_unnormalized[s] += dict_weights_array[(word+cum_num_words_array[j])+s*cum_num_words_array[num_degrees]] ;
02189
02190 my_num_unique_words[s]++ ;
02191 if (sign_words_array[s])
02192 my_word_used[s][word]=true ;
02193 }
02194 }
02195 }
02196 offset = plen*num_svms ;
02197 for (int32_t s=0; s<num_svms; s++)
02198 {
02199 float64_t normalization_factor = 1.0;
02200 if (my_num_unique_words[s] > 0)
02201 {
02202 if (sign_words_array[s])
02203 normalization_factor = sqrt((float64_t)my_num_unique_words[s]);
02204 else
02205 normalization_factor = (float64_t)my_num_unique_words[s];
02206 }
02207
02208 if (j==0)
02209 svs.svm_values[offset+s]=0 ;
02210 svs.svm_values[offset+s] += my_svm_values_unnormalized[s] / normalization_factor;
02211 }
02212
02213 if (posprev > poscurrent)
02214 posprev = poscurrent;
02215
02216 ts--;
02217 plen++;
02218
02219 if (ts>=0)
02220 {
02221 poscurrent=pos[ts];
02222 if (poscurrent<0)
02223 poscurrent = 0;
02224 len = pos[t_end] - poscurrent;
02225 }
02226 }
02227 }
02228
02229 #ifdef DYNPROG_TIMING
02230 MyTime.stop() ;
02231 svm_pos_time += MyTime.time_diff_sec() ;
02232 #endif
02233 }
02234
02235
02236 void CDynProg::find_svm_values_till_pos(
02237 uint16_t** wordstr, const int32_t *pos, int32_t t_end,
02238 struct svm_values_struct &svs)
02239 {
02240 #ifdef DYNPROG_TIMING
02241 MyTime.start() ;
02242 #endif
02243
02244
02245
02246
02247
02248
02249
02250
02251
02252
02253
02254
02255
02256
02257
02258 for (int32_t j=0; j<num_degrees; j++)
02259
02260 {
02261 int32_t plen = 1;
02262 int32_t ts = t_end-1;
02263 int32_t offset;
02264
02265 int32_t posprev = pos[t_end]-word_degree[j]+1;
02266 int32_t poscurrent = pos[ts];
02267
02268
02269
02270 if (poscurrent<0)
02271 poscurrent = 0;
02272 float64_t * my_svm_values_unnormalized = svs.svm_values_unnormalized[j] ;
02273 int32_t * my_num_unique_words = svs.num_unique_words[j] ;
02274 bool ** my_word_used = svs.word_used[j] ;
02275
02276 int32_t len = pos[t_end] - poscurrent;
02277 while ((ts>=0) && (len <= svs.maxlookback))
02278 {
02279 for (int32_t i=posprev-1 ; (i>=poscurrent) && (i>=0) ; i--)
02280 {
02281
02282
02283 uint16_t word = wordstr[j][i] ;
02284 for (int32_t s=0; s<num_svms; s++)
02285 {
02286
02287
02288 if (sign_words_array[s] && my_word_used[s][word])
02289 continue ;
02290
02291
02292
02293 {
02294 my_svm_values_unnormalized[s] += dict_weights_array[(word+cum_num_words_array[j])+s*cum_num_words_array[num_degrees]] ;
02295
02296 my_num_unique_words[s]++ ;
02297 if (sign_words_array[s])
02298 my_word_used[s][word]=true ;
02299 }
02300 }
02301 }
02302 offset = plen*num_svms ;
02303 for (int32_t s=0; s<num_svms; s++)
02304 {
02305 float64_t normalization_factor = 1.0;
02306 if (my_num_unique_words[s] > 0)
02307 {
02308 if (sign_words_array[s])
02309 normalization_factor = sqrt((float64_t)my_num_unique_words[s]);
02310 else
02311 normalization_factor = (float64_t)my_num_unique_words[s];
02312 }
02313
02314 if (j==0)
02315 svs.svm_values[offset+s]=0 ;
02316 svs.svm_values[offset+s] += my_svm_values_unnormalized[s] / normalization_factor;
02317 }
02318
02319 if (posprev > poscurrent)
02320 posprev = poscurrent;
02321
02322 ts--;
02323 plen++;
02324
02325 if (ts>=0)
02326 {
02327 poscurrent=pos[ts];
02328 if (poscurrent<0)
02329 poscurrent = 0;
02330 len = pos[t_end] - poscurrent;
02331 }
02332 }
02333 }
02334
02335 #ifdef DYNPROG_TIMING
02336 MyTime.stop() ;
02337 svm_pos_time += MyTime.time_diff_sec() ;
02338 #endif
02339 }
02340
02341 bool CDynProg::extend_orf(
02342 int32_t orf_from, int32_t orf_to, int32_t start, int32_t &last_pos,
02343 int32_t to)
02344 {
02345 #ifdef DYNPROG_TIMING
02346 MyTime.start() ;
02347 #endif
02348
02349 if (start<0)
02350 start=0 ;
02351 if (to<0)
02352 to=0 ;
02353
02354 int32_t orf_target = orf_to-orf_from ;
02355 if (orf_target<0) orf_target+=3 ;
02356
02357 int32_t pos ;
02358 if (last_pos==to)
02359 pos = to-orf_to-3 ;
02360 else
02361 pos=last_pos ;
02362
02363 if (pos<0)
02364 return true ;
02365
02366 for (; pos>=start; pos-=3)
02367 if (m_genestr_stop[pos])
02368 return false ;
02369
02370 last_pos = CMath::min(pos+3,to-orf_to-3) ;
02371
02372 #ifdef DYNPROG_TIMING
02373 MyTime.stop() ;
02374 orf_time += MyTime.time_diff_sec() ;
02375 #endif
02376 return true ;
02377 }
02378
02379 template <int16_t nbest, bool with_loss, bool with_multiple_sequences>
02380 void CDynProg::best_path_trans(
02381 const float64_t *seq_array, int32_t seq_len, const int32_t *pos,
02382 const int32_t *orf_info_array, CPlifBase **Plif_matrix,
02383 CPlifBase **Plif_state_signals, int32_t max_num_signals,
02384 int32_t genestr_num, float64_t *prob_nbest, int32_t *my_state_seq,
02385 int32_t *my_pos_seq, bool use_orf)
02386 {
02387 #ifdef DYNPROG_TIMING
02388 segment_init_time = 0.0 ;
02389 segment_pos_time = 0.0 ;
02390 segment_extend_time = 0.0 ;
02391 segment_clean_time = 0.0 ;
02392 orf_time = 0.0 ;
02393 svm_init_time = 0.0 ;
02394 svm_pos_time = 0.0 ;
02395 svm_clean_time = 0.0 ;
02396 MyTime2.start() ;
02397 #endif
02398
02399
02400 if (!svm_arrays_clean)
02401 {
02402 SG_ERROR( "SVM arrays not clean") ;
02403 return ;
02404 }
02405
02406 #ifdef DYNPROG_DEBUG
02407 transition_matrix_a.set_name("transition_matrix");
02408 transition_matrix_a.display_array();
02409 mod_words.display_array() ;
02410 sign_words.display_array() ;
02411 string_words.display_array() ;
02412 SG_PRINT("use_orf = %i\n", use_orf) ;
02413 #endif
02414
02415 int32_t max_look_back = 20000 ;
02416 bool use_svm = false ;
02417
02418
02419
02420
02421 SG_PRINT("N:%i, seq_len:%i, max_num_signals:%i\n",N, seq_len, max_num_signals) ;
02422
02423
02424
02425
02426
02427 CArray2<CPlifBase*> PEN(Plif_matrix, N, N, false, true) ;
02428 PEN.set_name("PEN");
02429
02430 CArray2<CPlifBase*> PEN_state_signals(Plif_state_signals, N, max_num_signals, false, true) ;
02431 PEN_state_signals.set_name("state_signals");
02432
02433 CArray3<float64_t> seq_input(seq_array, N, seq_len, max_num_signals) ;
02434 seq_input.set_name("seq_input") ;
02435
02436 CArray2<float64_t> seq(N, seq_len) ;
02437 seq.set_name("seq") ;
02438 seq.zero() ;
02439
02440 CArray2<int32_t> orf_info(orf_info_array, N, 2) ;
02441 orf_info.set_name("orf_info") ;
02442
02443
02444
02445 float64_t svm_value[m_num_lin_feat_plifs_cum[m_num_raw_data]] ;
02446 {
02447 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]; s++)
02448 svm_value[s]=0 ;
02449 }
02450
02451 {
02452
02453 for (int32_t i=0; i<N; i++)
02454 for (int32_t j=0; j<seq_len; j++)
02455 seq.element(i,j) = 0 ;
02456
02457 for (int32_t i=0; i<N; i++)
02458 for (int32_t j=0; j<seq_len; j++)
02459 for (int32_t k=0; k<max_num_signals; k++)
02460 {
02461 if ((PEN_state_signals.element(i,k)==NULL) && (k==0))
02462 {
02463
02464 seq.element(i,j) = seq_input.element(i,j,k) ;
02465 break ;
02466 }
02467 if (PEN_state_signals.element(i,k)!=NULL)
02468 {
02469
02470 if (CMath::is_finite(seq_input.element(i,j,k)))
02471 seq.element(i,j) += PEN_state_signals.element(i,k)->lookup_penalty(seq_input.element(i,j,k), svm_value) ;
02472 else
02473
02474 seq.element(i,j) = seq_input.element(i, j, k) ;
02475 }
02476 else
02477 break ;
02478 }
02479 }
02480
02481
02482 {
02483 for (int32_t i=0; i<N; i++)
02484 {
02485
02486 const T_STATES num_elem = trans_list_forward_cnt[i] ;
02487 const T_STATES *elem_list = trans_list_forward[i] ;
02488
02489 for (int32_t jj=0; jj<num_elem; jj++)
02490 {
02491 T_STATES j = elem_list[jj] ;
02492
02493 CPlifBase *penij=PEN.element(i,j) ;
02494 if (penij==NULL)
02495 continue ;
02496 if (penij->get_max_value()>max_look_back)
02497 {
02498 SG_DEBUG( "%d %d -> value: %f\n", i,j,penij->get_max_value());
02499
02500 max_look_back=(int32_t) (CMath::ceil(penij->get_max_value()));
02501 }
02502 if (penij->uses_svm_values())
02503 use_svm=true ;
02504 }
02505 }
02506 }
02507
02508
02509 max_look_back = CMath::min(m_genestr_len, max_look_back) ;
02510 SG_DEBUG("use_svm=%i\n", use_svm) ;
02511
02512 SG_DEBUG("maxlook: %d N: %d nbest: %d \n", max_look_back, N, nbest);
02513 const int32_t look_back_buflen = (max_look_back*N+1)*nbest ;
02514 SG_DEBUG("look_back_buflen=%i\n", look_back_buflen) ;
02515 const float64_t mem_use = (float64_t)(seq_len*N*nbest*(sizeof(T_STATES)+sizeof(int16_t)+sizeof(int32_t))+
02516 look_back_buflen*(2*sizeof(float64_t)+sizeof(int32_t))+
02517 seq_len*(sizeof(T_STATES)+sizeof(int32_t))+
02518 m_genestr_len*sizeof(bool))/(1024*1024);
02519
02520 bool is_big = (mem_use>200) || (seq_len>5000) ;
02521
02522 if (1)
02523 {
02524 SG_DEBUG("calling best_path_trans: seq_len=%i, N=%i, lookback=%i nbest=%i\n",
02525 seq_len, N, max_look_back, nbest) ;
02526 SG_DEBUG("allocating %1.2fMB of memory\n",
02527 mem_use) ;
02528 }
02529 ASSERT(nbest<32000) ;
02530
02531
02532
02533 CArray3<float64_t> delta(seq_len, N, nbest) ;
02534 delta.set_name("delta");
02535 float64_t* delta_array = delta.get_array() ;
02536
02537
02538 CArray3<T_STATES> psi(seq_len, N, nbest) ;
02539 psi.set_name("psi");
02540
02541
02542 CArray3<int16_t> ktable(seq_len, N, nbest) ;
02543 ktable.set_name("ktable");
02544
02545
02546 CArray3<int32_t> ptable(seq_len, N, nbest) ;
02547 ptable.set_name("ptable");
02548
02549
02550 CArray<float64_t> delta_end(nbest) ;
02551 delta_end.set_name("delta_end");
02552
02553
02554 CArray<T_STATES> path_ends(nbest) ;
02555 path_ends.set_name("path_ends");
02556
02557
02558 CArray<int16_t> ktable_end(nbest) ;
02559 ktable_end.set_name("ktable_end");
02560
02561
02562 float64_t * fixedtempvv=new float64_t[look_back_buflen] ;
02563 memset(fixedtempvv, 0, look_back_buflen*sizeof(float64_t)) ;
02564 int32_t * fixedtempii=new int32_t[look_back_buflen] ;
02565 memset(fixedtempii, 0, look_back_buflen*sizeof(int32_t)) ;
02566
02567 CArray<float64_t> oldtempvv(look_back_buflen) ;
02568 oldtempvv.set_name("oldtempvv");
02569 CArray<float64_t> oldtempvv2(look_back_buflen) ;
02570 oldtempvv2.set_name("oldtempvv2");
02571
02572
02573
02574 CArray<int32_t> oldtempii(look_back_buflen) ;
02575 oldtempii.set_name("oldtempii");
02576 CArray<int32_t> oldtempii2(look_back_buflen) ;
02577 oldtempii2.set_name("oldtempii2");
02578
02579
02580 CArray<T_STATES> state_seq(seq_len) ;
02581 state_seq.set_name("state_seq");
02582
02583
02584 CArray<int32_t> pos_seq(seq_len) ;
02585 pos_seq.set_name("pos_seq");
02586
02587
02588
02589
02590 word_degree.set_name("word_degree") ;
02591 cum_num_words.set_name("cum_num_words") ;
02592 num_words.set_name("num_words") ;
02593
02594
02595 svm_pos_start.set_name("svm_pos_start") ;
02596 num_unique_words.set_name("num_unique_words") ;
02597
02598 PEN.set_name("PEN") ;
02599 seq.set_name("seq") ;
02600 orf_info.set_name("orf_info") ;
02601
02602 delta.set_name("delta") ;
02603 psi.set_name("psi") ;
02604 ktable.set_name("ktable") ;
02605 ptable.set_name("ptable") ;
02606 delta_end.set_name("delta_end") ;
02607 path_ends.set_name("path_ends") ;
02608 ktable_end.set_name("ktable_end") ;
02609
02610 #ifdef USE_TMP_ARRAYCLASS
02611 fixedtempvv.set_name("fixedtempvv") ;
02612 fixedtempii.set_name("fixedtempvv") ;
02613 #endif
02614
02615 oldtempvv.set_name("oldtempvv") ;
02616 oldtempvv2.set_name("oldtempvv2") ;
02617 oldtempii.set_name("oldtempii") ;
02618 oldtempii2.set_name("oldtempii2") ;
02619
02620
02622
02623 #ifdef DYNPROG_DEBUG
02624 state_seq.display_size() ;
02625 pos_seq.display_size() ;
02626
02627 dict_weights.display_size() ;
02628 word_degree.display_array() ;
02629 cum_num_words.display_array() ;
02630 num_words.display_array() ;
02631
02632
02633 svm_pos_start.display_array() ;
02634 num_unique_words.display_array() ;
02635
02636 PEN.display_size() ;
02637 PEN_state_signals.display_size() ;
02638 seq.display_size() ;
02639 orf_info.display_size() ;
02640
02641
02642 delta.display_size() ;
02643 psi.display_size() ;
02644 ktable.display_size() ;
02645 ptable.display_size() ;
02646 delta_end.display_size() ;
02647 path_ends.display_size() ;
02648 ktable_end.display_size() ;
02649
02650 #ifdef USE_TMP_ARRAYCLASS
02651 fixedtempvv.display_size() ;
02652 fixedtempii.display_size() ;
02653 #endif
02654
02655
02656
02657
02658 state_seq.display_size() ;
02659 pos_seq.display_size() ;
02660
02661 CArray<int32_t>pp = CArray<int32_t>(pos, seq_len) ;
02662 pp.set_name("pp");
02663 pp.display_array() ;
02664
02665
02666
02667
02668 #endif //DYNPROG_DEBUG
02669
02671
02672
02673
02674 {
02675 for (int32_t s=0; s<num_svms; s++)
02676 ASSERT(string_words_array[s]<genestr_num) ;
02677 }
02678
02679
02680
02681
02682
02683 {
02684
02685 for (T_STATES i=0; i<N; i++)
02686 {
02687
02688 delta.element(delta_array, 0, i, 0, seq_len, N) = get_p(i) + seq.element(i,0) ;
02689 psi.element(0,i,0) = 0 ;
02690 if (nbest>1)
02691 ktable.element(0,i,0) = 0 ;
02692 ptable.element(0,i,0) = 0 ;
02693 for (int16_t k=1; k<nbest; k++)
02694 {
02695 int32_t dim1, dim2, dim3 ;
02696 delta.get_array_size(dim1, dim2, dim3) ;
02697
02698
02699 delta.element(delta_array, 0, i, k, seq_len, N) = -CMath::INFTY ;
02700 psi.element(0,i,0) = 0 ;
02701 if (nbest>1)
02702 ktable.element(0,i,k) = 0 ;
02703 ptable.element(0,i,k) = 0 ;
02704 }
02705
02706
02707
02708
02709
02710
02711
02712
02713
02714
02715
02716
02717
02718
02719
02720
02721
02722
02723 }
02724 }
02725
02726
02727
02728
02729
02730
02731
02732 struct segment_loss_struct loss;
02733 loss.segments_changed = NULL;
02734 loss.num_segment_id = NULL;
02735
02736 SG_DEBUG("START_RECURSION \n\n");
02737
02738
02739 for (int32_t t=1; t<seq_len; t++)
02740 {
02741 if (is_big && t%(1+(seq_len/1000))==1)
02742 SG_PROGRESS(t, 0, seq_len);
02743
02744
02745 if (with_loss)
02746 {
02747 init_segment_loss(loss, seq_len, max_look_back);
02748 find_segment_loss_till_pos(pos, t, m_segment_ids, m_segment_mask, loss);
02749 }
02750
02751 for (T_STATES j=0; j<N; j++)
02752 {
02753 if (seq.element(j,t)<=-1e20)
02754 {
02755 for (int16_t k=0; k<nbest; k++)
02756 {
02757 delta.element(delta_array, t, j, k, seq_len, N) = seq.element(j,t) ;
02758 psi.element(t,j,k) = 0 ;
02759 if (nbest>1)
02760 ktable.element(t,j,k) = 0 ;
02761 ptable.element(t,j,k) = 0 ;
02762 }
02763 }
02764 else
02765 {
02766 const T_STATES num_elem = trans_list_forward_cnt[j] ;
02767 const T_STATES *elem_list = trans_list_forward[j] ;
02768 const float64_t *elem_val = trans_list_forward_val[j] ;
02769 const int32_t *elem_id = trans_list_forward_id[j] ;
02770
02771 int32_t fixed_list_len = 0 ;
02772 float64_t fixedtempvv_ = CMath::INFTY ;
02773 int32_t fixedtempii_ = 0 ;
02774
02775
02776 for (int32_t i=0; i<num_elem; i++)
02777 {
02778 T_STATES ii = elem_list[i] ;
02779
02780 const CPlifBase * penalty = PEN.element(j,ii) ;
02781 int32_t look_back = max_look_back ;
02782 {
02783 CPlifBase *pen = (CPlifBase*) penalty ;
02784 if (pen!=NULL)
02785 look_back=(int32_t) (CMath::ceil(pen->get_max_value()));
02786 if (look_back>=1e6)
02787 SG_PRINT("%i,%i -> %d from %ld\n", j, ii, look_back, (long)pen) ;
02788 ASSERT(look_back<1e6);
02789 }
02790
02791
02792
02793
02794
02795
02796 int32_t orf_from = orf_info.element(ii,0) ;
02797 int32_t orf_to = orf_info.element(j,1) ;
02798 if((orf_from!=-1)!=(orf_to!=-1))
02799 SG_DEBUG("j=%i ii=%i orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i]) ;
02800 ASSERT((orf_from!=-1)==(orf_to!=-1)) ;
02801
02802 int32_t orf_target = -1 ;
02803 if (orf_from!=-1)
02804 {
02805 orf_target=orf_to-orf_from ;
02806 if (orf_target<0)
02807 orf_target+=3 ;
02808 ASSERT(orf_target>=0 && orf_target<3) ;
02809 }
02810
02811 int32_t orf_last_pos = pos[t] ;
02812 int32_t loss_last_pos = t ;
02813 float64_t last_loss = 0.0 ;
02814
02815 for (int32_t ts=t-1; ts>=0 && pos[t]-pos[ts]<=look_back; ts--)
02816 {
02817 bool ok ;
02818
02819
02820
02821
02822
02823
02824
02825
02826
02827 if (orf_target==-1)
02828 ok=true ;
02829 else if (pos[ts]!=-1 && (pos[t]-pos[ts])%3==orf_target)
02830 ok=(!use_orf) || extend_orf(orf_from, orf_to, pos[ts], orf_last_pos, pos[t]) ;
02831 else
02832 ok=false ;
02833
02834 if (ok)
02835 {
02836
02837 float64_t segment_loss = 0.0 ;
02838 if (with_loss)
02839 segment_loss = extend_segment_loss(loss, pos, elem_id[i], ts, loss_last_pos, last_loss) ;
02840
02842
02844 int32_t frame = orf_info.element(ii,0);
02845 lookup_content_svm_values(ts, t, pos[ts], pos[t], svm_value, frame);
02846
02847
02848
02849
02850
02851
02852
02853
02854
02855
02856 float64_t pen_val = 0.0 ;
02857 if (penalty)
02858 pen_val = penalty->lookup_penalty(pos[t]-pos[ts], svm_value) ;
02859 if (nbest==1)
02860 {
02861 float64_t val = elem_val[i] + pen_val ;
02862 if (with_loss)
02863 val += segment_loss ;
02864
02865 float64_t mval = -(val + delta.element(delta_array, ts, ii, 0, seq_len, N)) ;
02866 if (mval<fixedtempvv_)
02867 {
02868 fixedtempvv_ = mval ;
02869 fixedtempii_ = ii + ts*N;
02870 fixed_list_len = 1 ;
02871 }
02872 }
02873 else
02874 {
02875 for (int16_t diff=0; diff<nbest; diff++)
02876 {
02877 float64_t val = elem_val[i] ;
02878 val += pen_val ;
02879 if (with_loss)
02880 val += segment_loss ;
02881
02882 float64_t mval = -(val + delta.element(delta_array, ts, ii, diff, seq_len, N)) ;
02883
02884
02885
02886
02887
02888 if ((fixed_list_len < nbest) || ((0==fixed_list_len) || (mval < fixedtempvv[fixed_list_len-1])))
02889 {
02890 if ( (fixed_list_len<nbest) && ((0==fixed_list_len) || (mval>fixedtempvv[fixed_list_len-1])) )
02891 {
02892 fixedtempvv[fixed_list_len] = mval ;
02893 fixedtempii[fixed_list_len] = ii + diff*N + ts*N*nbest;
02894 fixed_list_len++ ;
02895 }
02896 else
02897 {
02898 int32_t addhere = fixed_list_len;
02899 while ((addhere > 0) && (mval < fixedtempvv[addhere-1]))
02900 addhere--;
02901
02902
02903 for (int32_t jj=fixed_list_len-1; jj>addhere; jj--)
02904 {
02905 fixedtempvv[jj] = fixedtempvv[jj-1];
02906 fixedtempii[jj] = fixedtempii[jj-1];
02907 }
02908
02909 fixedtempvv[addhere] = mval;
02910 fixedtempii[addhere] = ii + diff*N + ts*N*nbest;
02911
02912 if (fixed_list_len < nbest)
02913 fixed_list_len++;
02914 }
02915 }
02916 }
02917 }
02918 }
02919 }
02920 }
02921
02922
02923 int32_t numEnt = fixed_list_len;
02924
02925 float64_t minusscore;
02926 int64_t fromtjk;
02927
02928 for (int16_t k=0; k<nbest; k++)
02929 {
02930 if (k<numEnt)
02931 {
02932 if (nbest==1)
02933 {
02934 minusscore = fixedtempvv_ ;
02935 fromtjk = fixedtempii_ ;
02936 }
02937 else
02938 {
02939 minusscore = fixedtempvv[k];
02940 fromtjk = fixedtempii[k];
02941 }
02942
02943 delta.element(delta_array, t, j, k, seq_len, N) = -minusscore + seq.element(j,t);
02944 psi.element(t,j,k) = (fromtjk%N) ;
02945 if (nbest>1)
02946 ktable.element(t,j,k) = (fromtjk%(N*nbest)-psi.element(t,j,k))/N ;
02947 ptable.element(t,j,k) = (fromtjk-(fromtjk%(N*nbest)))/(N*nbest) ;
02948 }
02949 else
02950 {
02951 delta.element(delta_array, t, j, k, seq_len, N) = -CMath::INFTY ;
02952 psi.element(t,j,k) = 0 ;
02953 if (nbest>1)
02954 ktable.element(t,j,k) = 0 ;
02955 ptable.element(t,j,k) = 0 ;
02956 }
02957 }
02958 }
02959 }
02960 }
02961
02962
02963 if (with_loss)
02964 clear_segment_loss(loss);
02965
02966 {
02967 int32_t list_len = 0 ;
02968 for (int16_t diff=0; diff<nbest; diff++)
02969 {
02970 for (T_STATES i=0; i<N; i++)
02971 {
02972 oldtempvv[list_len] = -(delta.element(delta_array, (seq_len-1), i, diff, seq_len, N)+get_q(i)) ;
02973 oldtempii[list_len] = i + diff*N ;
02974 list_len++ ;
02975 }
02976 }
02977
02978 CMath::nmin(oldtempvv.get_array(), oldtempii.get_array(), list_len, nbest) ;
02979
02980 for (int16_t k=0; k<nbest; k++)
02981 {
02982 delta_end.element(k) = -oldtempvv[k] ;
02983 path_ends.element(k) = (oldtempii[k]%N) ;
02984 if (nbest>1)
02985 ktable_end.element(k) = (oldtempii[k]-path_ends.element(k))/N ;
02986 }
02987 }
02988
02989 {
02990 for (int16_t k=0; k<nbest; k++)
02991 {
02992 prob_nbest[k]= delta_end.element(k) ;
02993
02994 int32_t i = 0 ;
02995 state_seq[i] = path_ends.element(k) ;
02996 int16_t q = 0 ;
02997 if (nbest>1)
02998 q=ktable_end.element(k) ;
02999 pos_seq[i] = seq_len-1 ;
03000
03001 while (pos_seq[i]>0)
03002 {
03003
03004 state_seq[i+1] = psi.element(pos_seq[i], state_seq[i], q);
03005 pos_seq[i+1] = ptable.element(pos_seq[i], state_seq[i], q) ;
03006 if (nbest>1)
03007 q = ktable.element(pos_seq[i], state_seq[i], q) ;
03008 i++ ;
03009 }
03010
03011 int32_t num_states = i+1 ;
03012 for (i=0; i<num_states;i++)
03013 {
03014 my_state_seq[i+k*seq_len] = state_seq[num_states-i-1] ;
03015 my_pos_seq[i+k*seq_len] = pos_seq[num_states-i-1] ;
03016 }
03017 my_state_seq[num_states+k*seq_len]=-1 ;
03018 my_pos_seq[num_states+k*seq_len]=-1 ;
03019 }
03020 }
03021
03022 if (is_big)
03023 SG_PRINT( "DONE. \n") ;
03024
03025
03026 #ifdef DYNPROG_TIMING
03027 MyTime2.stop() ;
03028
03029 if (is_big)
03030 SG_PRINT("Timing: orf=%1.2f s \n Segment_init=%1.2f s Segment_pos=%1.2f s Segment_extend=%1.2f s Segment_clean=%1.2f s\nsvm_init=%1.2f s svm_pos=%1.2f svm_clean=%1.2f\n total=%1.2f\n", orf_time, segment_init_time, segment_pos_time, segment_extend_time, segment_clean_time, svm_init_time, svm_pos_time, svm_clean_time, MyTime2.time_diff_sec()) ;
03031 #endif
03032
03033 delete[] fixedtempvv ;
03034 delete[] fixedtempii ;
03035 }
03036
03037 void CDynProg::best_path_trans_deriv(
03038 int32_t *my_state_seq, int32_t *my_pos_seq, float64_t *my_scores,
03039 float64_t* my_losses,int32_t my_seq_len, const float64_t *seq_array,
03040 int32_t seq_len, const int32_t *pos, CPlifBase **Plif_matrix,
03041 CPlifBase **Plif_state_signals, int32_t max_num_signals,int32_t genestr_num)
03042 {
03043 if (!svm_arrays_clean)
03044 {
03045 SG_ERROR( "SVM arrays not clean") ;
03046 return ;
03047 } ;
03048
03049
03050
03051
03052
03053 bool use_svm = false ;
03054
03055
03056
03057
03058 CArray2<CPlifBase*> PEN(Plif_matrix, N, N, false, false) ;
03059 CArray2<CPlifBase*> PEN_state_signals(Plif_state_signals, N, max_num_signals, false, false) ;
03060 CArray3<float64_t> seq_input(seq_array, N, seq_len, max_num_signals) ;
03061
03062 {
03063 for (int32_t i=0; i<N; i++)
03064 for (int32_t j=0; j<N; j++)
03065 {
03066 CPlifBase *penij=PEN.element(i,j) ;
03067 if (penij==NULL)
03068 continue ;
03069
03070 if (penij->uses_svm_values())
03071 use_svm=true ;
03072 penij->penalty_clear_derivative() ;
03073 }
03074 for (int32_t i=0; i<N; i++)
03075 for (int32_t j=0; j<max_num_signals; j++)
03076 {
03077 CPlifBase *penij=PEN_state_signals.element(i,j) ;
03078 if (penij==NULL)
03079 continue ;
03080 if (penij->uses_svm_values())
03081 use_svm=true ;
03082 penij->penalty_clear_derivative() ;
03083 }
03084 }
03085
03086 {
03087 for (int32_t i=0; i<N; i++)
03088 {
03089 initial_state_distribution_p_deriv.element(i)=0 ;
03090 end_state_distribution_q_deriv.element(i)=0 ;
03091 for (int32_t j=0; j<N; j++)
03092 transition_matrix_a_deriv.element(i,j)=0 ;
03093 }
03094 }
03095
03096 {
03097 for (int32_t i=0; i<my_seq_len; i++)
03098 {
03099 my_scores[i]=0.0 ;
03100 my_losses[i]=0.0 ;
03101 }
03102 }
03103
03104
03105
03106
03107
03108
03109 {
03110 float64_t* svm_value = new float64_t[m_num_lin_feat_plifs_cum[m_num_raw_data]];
03111 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]; s++)
03112 svm_value[s]=0 ;
03113
03114 for (int32_t i=0; i<my_seq_len; i++)
03115 {
03116 my_scores[i]=0.0 ;
03117 my_losses[i]=0.0 ;
03118 }
03119
03120
03121 float64_t total_score = 0.0 ;
03122 float64_t total_loss = 0.0 ;
03123
03124
03125 ASSERT(my_state_seq[0]>=0) ;
03126 initial_state_distribution_p_deriv.element(my_state_seq[0])++ ;
03127 my_scores[0] += initial_state_distribution_p.element(my_state_seq[0]) ;
03128
03129 ASSERT(my_state_seq[my_seq_len-1]>=0) ;
03130 end_state_distribution_q_deriv.element(my_state_seq[my_seq_len-1])++ ;
03131 my_scores[my_seq_len-1] += end_state_distribution_q.element(my_state_seq[my_seq_len-1]);
03132
03133
03134 total_score += my_scores[0] + my_scores[my_seq_len-1] ;
03135
03136 struct svm_values_struct svs;
03137 svs.num_unique_words = NULL;
03138 svs.svm_values = NULL;
03139 svs.svm_values_unnormalized = NULL;
03140 svs.word_used = NULL;
03141
03142
03143 struct segment_loss_struct loss;
03144 loss.segments_changed = NULL;
03145 loss.num_segment_id = NULL;
03146
03147 for (int32_t i=0; i<my_seq_len-1; i++)
03148 {
03149 if (my_state_seq[i+1]==-1)
03150 break ;
03151 int32_t from_state = my_state_seq[i] ;
03152 int32_t to_state = my_state_seq[i+1] ;
03153 int32_t from_pos = my_pos_seq[i] ;
03154 int32_t to_pos = my_pos_seq[i+1] ;
03155
03156
03157 init_segment_loss(loss, seq_len, pos[to_pos]-pos[from_pos]+10);
03158 find_segment_loss_till_pos(pos, to_pos,m_segment_ids, m_segment_mask, loss);
03159
03160 int32_t loss_last_pos = to_pos ;
03161 float64_t last_loss = 0.0 ;
03162 int32_t elem_id = transition_matrix_a_id.element(from_state, to_state) ;
03163 my_losses[i] = extend_segment_loss(loss, pos, elem_id, from_pos, loss_last_pos, last_loss) ;
03164 #ifdef DYNPROG_DEBUG
03165 io->set_loglevel(M_DEBUG) ;
03166 SG_DEBUG( "%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state) ;
03167 #endif
03168
03169 transition_matrix_a_deriv.element(from_state, to_state)++ ;
03170 my_scores[i] += transition_matrix_a.element(from_state, to_state) ;
03171
03172 #ifdef DYNPROG_DEBUG
03173 SG_DEBUG( "%i. scores[i]=%f\n", i, my_scores[i]) ;
03174 #endif
03175
03176
03177
03178
03179
03180 if (use_svm)
03181 {
03182
03183 int32_t frame = m_orf_info.element(from_state,0);
03184 if (false)
03185 {
03186 int32_t num_current_svms=0;
03187 int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1};
03188 SG_PRINT("penalties(%i, %i), frame:%i ", from_state, to_state, frame);
03189 PEN.element(to_state, from_state)->get_used_svms(&num_current_svms, svm_ids);
03190 SG_PRINT("\n");
03191 }
03192
03193
03194 lookup_content_svm_values(from_pos, to_pos, pos[from_pos],pos[to_pos], svm_value, frame);
03195 if (false)
03196 SG_PRINT("svm_values: %f, %f, %f \n", svm_value[4], svm_value[5], svm_value[6]);
03197
03198
03199
03200
03201
03202 }
03203
03204 if (PEN.element(to_state, from_state)!=NULL)
03205 {
03206 float64_t nscore = PEN.element(to_state, from_state)->lookup_penalty(pos[to_pos]-pos[from_pos], svm_value) ;
03207 my_scores[i] += nscore ;
03208
03209 for (int32_t s=num_svms;s<m_num_lin_feat_plifs_cum[m_num_raw_data];s++)
03210 svm_value[s]=-CMath::INFTY;
03211
03212 #ifdef DYNPROG_DEBUG
03213
03214
03215
03216
03217
03218
03219
03220
03221
03222
03223
03224 #endif
03225 PEN.element(to_state, from_state)->penalty_add_derivative(pos[to_pos]-pos[from_pos], svm_value) ;
03226 for (int32_t d=1;d<=m_num_raw_data;d++)
03227 {
03228 for (int32_t s=0;s<m_num_lin_feat_plifs_cum[m_num_raw_data];s++)
03229 svm_value[s]=-CMath::INFTY;
03230 float64_t* intensities = new float64_t[m_num_probes_cum[d]];
03231 int32_t num_intensities = raw_intensities_interval_query(pos[from_pos], pos[to_pos],intensities, d);
03232
03233 for (int32_t k=0;k<num_intensities;k++)
03234 {
03235 for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
03236 svm_value[j]=intensities[k];
03237
03238 PEN.element(to_state, from_state)->penalty_add_derivative(-CMath::INFTY, svm_value) ;
03239
03240 }
03241 delete[] intensities;
03242 }
03243 }
03244 #ifdef DYNPROG_DEBUG
03245 SG_DEBUG( "%i. scores[i]=%f\n", i, my_scores[i]) ;
03246 #endif
03247
03248
03249 for (int32_t k=0; k<max_num_signals; k++)
03250 {
03251 if ((PEN_state_signals.element(to_state,k)==NULL)&&(k==0))
03252 {
03253 #ifdef DYNPROG_DEBUG
03254 SG_DEBUG( "%i. emmission penalty: to_state=%i to_pos=%i score=%1.2f (no signal plif)\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k)) ;
03255 #endif
03256 my_scores[i] += seq_input.element(to_state, to_pos, k) ;
03257
03258
03259 break ;
03260 }
03261 if (PEN_state_signals.element(to_state, k)!=NULL)
03262 {
03263 float64_t nscore = PEN_state_signals.element(to_state,k)->lookup_penalty(seq_input.element(to_state, to_pos, k), svm_value) ;
03264 my_scores[i] += nscore ;
03265
03266
03267
03268
03269
03270
03271
03272
03273 #ifdef DYNPROG_DEBUG
03274 SG_DEBUG( "%i. emmission penalty: to_state=%i to_pos=%i value=%1.2f score=%1.2f k=%i\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k), nscore, k) ;
03275 #endif
03276 PEN_state_signals.element(to_state,k)->penalty_add_derivative(seq_input.element(to_state, to_pos, k), svm_value) ;
03277 } else
03278 break ;
03279 }
03280
03281
03282
03283
03284 total_score += my_scores[i] ;
03285 total_loss += my_losses[i] ;
03286
03287 }
03288
03289
03290 SG_PRINT( "total loss = %f \n", total_loss) ;
03291
03292 clear_svm_values(svs);
03293 clear_segment_loss(loss);
03294 delete[] svm_value;
03295 }
03296
03297
03298 }
03299
03300
03301 void CDynProg::best_path_trans_simple(
03302 const float64_t *seq_array, int32_t seq_len, int16_t nbest,
03303 float64_t *prob_nbest, int32_t *my_state_seq)
03304 {
03305 if (!svm_arrays_clean)
03306 {
03307 SG_ERROR( "SVM arrays not clean") ;
03308 return ;
03309 } ;
03310
03311 int32_t max_look_back = 2 ;
03312 const int32_t look_back_buflen = max_look_back*nbest*N ;
03313 ASSERT(nbest<32000) ;
03314
03315 CArray2<float64_t> seq((float64_t *)seq_array, N, seq_len, false) ;
03316
03317 CArray3<float64_t> delta(max_look_back, N, nbest) ;
03318 CArray3<T_STATES> psi(seq_len, N, nbest) ;
03319 CArray3<int16_t> ktable(seq_len,N,nbest) ;
03320 CArray3<int32_t> ptable(seq_len,N,nbest) ;
03321
03322 CArray<float64_t> delta_end(nbest) ;
03323 CArray<T_STATES> path_ends(nbest) ;
03324 CArray<int16_t> ktable_end(nbest) ;
03325
03326 CArray<float64_t> oldtempvv(look_back_buflen) ;
03327 CArray<int32_t> oldtempii(look_back_buflen) ;
03328
03329 CArray<T_STATES> state_seq(seq_len) ;
03330 CArray<int32_t> pos_seq(seq_len) ;
03331
03332 {
03333
03334 for (T_STATES i=0; i<N; i++)
03335 {
03336 delta.element(0,i,0) = get_p(i) + seq.element(i,0) ;
03337 psi.element(0,i,0) = 0 ;
03338 ktable.element(0,i,0) = 0 ;
03339 ptable.element(0,i,0) = 0 ;
03340 for (int16_t k=1; k<nbest; k++)
03341 {
03342 delta.element(0,i,k) = -CMath::INFTY ;
03343 psi.element(0,i,0) = 0 ;
03344 ktable.element(0,i,k) = 0 ;
03345 ptable.element(0,i,k) = 0 ;
03346 }
03347 }
03348 }
03349
03350
03351 for (int32_t t=1; t<seq_len; t++)
03352 {
03353 for (T_STATES j=0; j<N; j++)
03354 {
03355 if (seq.element(j,t)<-1e20)
03356 {
03357 for (int16_t k=0; k<nbest; k++)
03358 {
03359 delta.element(t%max_look_back,j,k) = seq.element(j,t) ;
03360 psi.element(t,j,k) = 0 ;
03361 ktable.element(t,j,k) = 0 ;
03362 ptable.element(t,j,k) = 0 ;
03363 }
03364 }
03365 else
03366 {
03367 const T_STATES num_elem = trans_list_forward_cnt[j] ;
03368 const T_STATES *elem_list = trans_list_forward[j] ;
03369 const float64_t *elem_val = trans_list_forward_val[j] ;
03370
03371 int32_t old_list_len = 0 ;
03372
03373 for (int32_t i=0; i<num_elem; i++)
03374 {
03375 T_STATES ii = elem_list[i] ;
03376
03377 int32_t ts=t-1;
03378 if (ts>=0)
03379 {
03380 bool ok=true ;
03381
03382 if (ok)
03383 {
03384
03385
03386 for (int16_t diff=0; diff<nbest; diff++)
03387 {
03388 float64_t val = delta.element(ts%max_look_back,ii,diff) + elem_val[i] ;
03389 float64_t mval = -val;
03390
03391 oldtempvv[old_list_len] = mval ;
03392 oldtempii[old_list_len] = ii + diff*N + ts*N*nbest;
03393 old_list_len++ ;
03394 }
03395 }
03396 }
03397 }
03398
03399 CMath::nmin<int32_t>(oldtempvv.get_array(), oldtempii.get_array(), old_list_len, nbest) ;
03400
03401 int32_t numEnt = 0;
03402 numEnt = old_list_len;
03403
03404 float64_t minusscore;
03405 int64_t fromtjk;
03406
03407 for (int16_t k=0; k<nbest; k++)
03408 {
03409 if (k<numEnt)
03410 {
03411 minusscore = oldtempvv[k];
03412 fromtjk = oldtempii[k];
03413
03414 delta.element(t%max_look_back,j,k) = -minusscore + seq.element(j,t);
03415 psi.element(t,j,k) = (fromtjk%N) ;
03416 ktable.element(t,j,k) = (fromtjk%(N*nbest)-psi.element(t,j,k))/N ;
03417 ptable.element(t,j,k) = (fromtjk-(fromtjk%(N*nbest)))/(N*nbest) ;
03418 }
03419 else
03420 {
03421 delta.element(t%max_look_back,j,k) = -CMath::INFTY ;
03422 psi.element(t,j,k) = 0 ;
03423 ktable.element(t,j,k) = 0 ;
03424 ptable.element(t,j,k) = 0 ;
03425 }
03426 }
03427
03428 }
03429 }
03430 }
03431
03432
03433 {
03434 int32_t list_len = 0 ;
03435 for (int16_t diff=0; diff<nbest; diff++)
03436 {
03437 for (T_STATES i=0; i<N; i++)
03438 {
03439 oldtempvv[list_len] = -(delta.element((seq_len-1)%max_look_back,i,diff)+get_q(i)) ;
03440 oldtempii[list_len] = i + diff*N ;
03441 list_len++ ;
03442 }
03443 }
03444
03445 CMath::nmin(oldtempvv.get_array(), oldtempii.get_array(), list_len, nbest) ;
03446
03447 for (int16_t k=0; k<nbest; k++)
03448 {
03449 delta_end.element(k) = -oldtempvv[k] ;
03450 path_ends.element(k) = (oldtempii[k]%N) ;
03451 ktable_end.element(k) = (oldtempii[k]-path_ends.element(k))/N ;
03452 }
03453 }
03454
03455 {
03456 for (int16_t k=0; k<nbest; k++)
03457 {
03458 prob_nbest[k]= delta_end.element(k) ;
03459
03460 int32_t i = 0 ;
03461 state_seq[i] = path_ends.element(k) ;
03462 int16_t q = ktable_end.element(k) ;
03463 pos_seq[i] = seq_len-1 ;
03464
03465 while (pos_seq[i]>0)
03466 {
03467
03468 state_seq[i+1] = psi.element(pos_seq[i], state_seq[i], q);
03469 pos_seq[i+1] = ptable.element(pos_seq[i], state_seq[i], q) ;
03470 q = ktable.element(pos_seq[i], state_seq[i], q) ;
03471 i++ ;
03472 }
03473
03474 int32_t num_states = i+1 ;
03475 for (i=0; i<num_states;i++)
03476 {
03477 my_state_seq[i+k*seq_len] = state_seq[num_states-i-1] ;
03478 }
03479
03480 }
03481
03482 }
03483 }
03484