DynProg.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 2 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2007 Soeren Sonnenburg
00008  * Written (W) 1999-2007 Gunnar Raetsch
00009  * Written (W) 2008-2009 Jonas Behr
00010  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00011  */
00012 
00013 #include "structure/DynProg.h"
00014 #include "lib/Mathematics.h"
00015 #include "lib/io.h"
00016 #include "lib/config.h"
00017 #include "features/StringFeatures.h"
00018 #include "features/Alphabet.h"
00019 #include "structure/Plif.h"
00020 #include "lib/Array.h"
00021 #include "lib/Array2.h"
00022 #include "lib/Array3.h"
00023 
00024 #include <stdlib.h>
00025 #include <stdio.h>
00026 #include <time.h>
00027 #include <ctype.h>
00028 
00029 template void CDynProg::best_path_trans<1,true,false>(
00030     const float64_t *seq, int32_t seq_len, const int32_t *pos,
00031     const int32_t *orf_info, CPlifBase **PLif_matrix,
00032     CPlifBase **Plif_state_signals, int32_t max_num_signals,
00033     int32_t genestr_num, float64_t *prob_nbest, int32_t *my_state_seq,
00034     int32_t *my_pos_seq, bool use_orf);
00035 
00036 template void CDynProg::best_path_trans<2,true,false>(
00037     const float64_t *seq, int32_t seq_len, const int32_t *pos,
00038     const int32_t *orf_info, CPlifBase **PLif_matrix,
00039     CPlifBase **Plif_state_signals, int32_t max_num_signals,
00040     int32_t genestr_num, float64_t *prob_nbest, int32_t *my_state_seq,
00041     int32_t *my_pos_seq, bool use_orf);
00042 
00043 template void CDynProg::best_path_trans<1,false,false>(
00044     const float64_t *seq, int32_t seq_len, const int32_t *pos,
00045     const int32_t *orf_info, CPlifBase **PLif_matrix,
00046     CPlifBase **Plif_state_signals, int32_t max_num_signals,
00047     int32_t genestr_num, float64_t *prob_nbest, int32_t *my_state_seq,
00048     int32_t *my_pos_seq, bool use_orf);
00049 
00050 template void CDynProg::best_path_trans<2,false,false>(
00051     const float64_t *seq, int32_t seq_len, const int32_t *pos,
00052     const int32_t *orf_info, CPlifBase **PLif_matrix,
00053     CPlifBase **Plif_state_signals, int32_t max_num_signals,
00054     int32_t genestr_num, float64_t *prob_nbest, int32_t *my_state_seq,
00055     int32_t *my_pos_seq, bool use_orf);
00056 
00057 
00058 //#define USE_TMP_ARRAYCLASS
00059 //#define DYNPROG_DEBUG
00060 
00061 //CArray2<int32_t> g_orf_info(1,1) ;
00062 
00063 static int32_t word_degree_default[4]={3,4,5,6} ;
00064 static int32_t cum_num_words_default[5]={0,64,320,1344,5440} ;
00065 static int32_t num_words_default[4]=   {64,256,1024,4096} ;
00066 static int32_t mod_words_default[32] = {1,1,1,1,1,1,1,1,
00067                                     1,1,1,1,1,1,1,1,
00068                                     0,0,0,0,0,0,0,0,
00069                                     0,0,0,0,0,0,0,0} ;  
00070 static bool sign_words_default[16] = {true,true,true,true,true,true,true,true,
00071                                       false,false,false,false,false,false,false,false} ; // whether to use counts or signum of counts
00072 static int32_t string_words_default[16] = {0,0,0,0,0,0,0,0,
00073                                        1,1,1,1,1,1,1,1} ; // which string should be used
00074 
00075 CDynProg::CDynProg(int32_t p_num_svms /*= 8 */)
00076 : CSGObject(), transition_matrix_a_id(1,1), transition_matrix_a(1,1),
00077     transition_matrix_a_deriv(1,1), initial_state_distribution_p(1),
00078     initial_state_distribution_p_deriv(1), end_state_distribution_q(1),
00079     end_state_distribution_q_deriv(1), dict_weights(1,1),
00080     dict_weights_array(dict_weights.get_array()),
00081 
00082       // multi svm
00083       num_degrees(4), 
00084       num_svms(p_num_svms), 
00085       num_strings(1),
00086       word_degree(word_degree_default, num_degrees, true, true),
00087       cum_num_words(cum_num_words_default, num_degrees+1, true, true),
00088       cum_num_words_array(cum_num_words.get_array()),
00089       num_words(num_words_default, num_degrees, true, true),
00090       num_words_array(num_words.get_array()),
00091       mod_words(mod_words_default, num_svms, 2, true, true),
00092       mod_words_array(mod_words.get_array()),
00093       sign_words(sign_words_default, num_svms, true, true),
00094       sign_words_array(sign_words.get_array()),
00095       string_words(string_words_default, num_svms, true, true),
00096       string_words_array(string_words.get_array()),
00097 //    word_used(num_degrees, num_words[num_degrees-1], num_strings),
00098 //    word_used_array(word_used.get_array()),
00099 //    svm_values_unnormalized(num_degrees, num_svms),
00100       svm_pos_start(num_degrees),
00101       num_unique_words(num_degrees),
00102       svm_arrays_clean(true),
00103 
00104       // single svm
00105       num_svms_single(1),
00106       word_degree_single(1),
00107       num_words_single(4), 
00108       word_used_single(num_words_single),
00109       svm_value_unnormalized_single(num_svms_single),
00110       num_unique_words_single(0),
00111 
00112       max_a_id(0), m_seq(1,1,1), m_pos(1), m_orf_info(1,2), 
00113           m_segment_sum_weights(1,1), m_plif_list(1), 
00114       m_PEN(1,1), m_PEN_state_signals(1,1), 
00115       m_genestr(1,1), m_dict_weights(1,1), m_segment_loss(1,1,2), 
00116           m_segment_ids(1),
00117           m_segment_mask(1),
00118       m_scores(1), m_states(1,1), m_positions(1,1), m_genestr_stop(1),
00119       m_lin_feat(1,1) //by Jonas
00120       //m_num_lin_feat(num_svms)
00121 {
00122     trans_list_forward = NULL ;
00123     trans_list_forward_cnt = NULL ;
00124     trans_list_forward_val = NULL ;
00125     trans_list_forward_id = NULL ;
00126     trans_list_len = 0 ;
00127 
00128     mem_initialized = true ;
00129 
00130     this->N=1;
00131     m_step=0;
00132 
00133     m_raw_intensities = NULL;
00134     m_probe_pos = NULL;
00135     m_num_probes_cum = new int32_t[100];
00136     m_num_probes_cum[0] = 0;
00137     //m_use_tiling=false;
00138     m_num_lin_feat_plifs_cum = new int32_t[100];
00139     m_num_lin_feat_plifs_cum[0] = num_svms;
00140     m_num_raw_data = 0;
00141     m_genestr_len = 0;
00142 #ifdef ARRAY_STATISTICS
00143     word_degree.set_name("word_degree");
00144 #endif
00145 }
00146 
00147 CDynProg::~CDynProg()
00148 {
00149     if (trans_list_forward_cnt)
00150         delete[] trans_list_forward_cnt;
00151     if (trans_list_forward)
00152     {
00153         for (int32_t i=0; i<trans_list_len; i++)
00154         {
00155             if (trans_list_forward[i])
00156                 delete[] trans_list_forward[i];
00157         }
00158         delete[] trans_list_forward;
00159     }
00160     if (trans_list_forward_val)
00161     {
00162         for (int32_t i=0; i<trans_list_len; i++)
00163         {
00164             if (trans_list_forward_val[i])
00165                 delete[] trans_list_forward_val[i];
00166         }
00167         delete[] trans_list_forward_val;
00168     }
00169     if (trans_list_forward_id)
00170     {
00171         for (int32_t i=0; i<trans_list_len; i++)
00172         {
00173             if (trans_list_forward_id[i])
00174                 delete[] trans_list_forward_id[i];
00175         }
00176         delete[] trans_list_forward_id;
00177     }
00178     if (m_raw_intensities)
00179         delete[] m_raw_intensities;
00180     if (m_probe_pos)
00181         delete[] m_probe_pos;
00182     if (m_num_probes_cum)
00183       delete[] m_num_probes_cum ;
00184     if (m_num_lin_feat_plifs_cum)
00185       delete[] m_num_lin_feat_plifs_cum ;
00186 }
00187 
00189 void CDynProg::set_genestr_len(int32_t genestr_len)
00190 {
00191     m_genestr_len = genestr_len;
00192 }
00193 
00194 int32_t CDynProg::get_num_svms()
00195 {
00196     return num_svms;
00197 }
00198 
00199 void CDynProg::precompute_stop_codons(const char* sequence, int32_t length)
00200 {
00201     m_genestr_stop.resize_array(length) ;
00202     m_genestr_stop.zero() ;
00203     m_genestr_stop.set_name("genestr_stop") ;
00204     {
00205     for (int32_t i=0; i<length-2; i++)
00206         if ((sequence[i]=='t' || sequence[i]=='T') && 
00207             (((sequence[i+1]=='a' || sequence[i+1]=='A') && 
00208               (sequence[i+2]=='a' || sequence[i+2]=='g' || sequence[i+2]=='A' || sequence[i+2]=='G')) ||
00209              ((sequence[i+1]=='g'||sequence[i+1]=='G') && (sequence[i+2]=='a' || sequence[i+2]=='A') )))
00210             m_genestr_stop.element(i)=true ;
00211         else
00212             m_genestr_stop.element(i)=false ;
00213     m_genestr_stop.element(length-1)=false ;
00214     m_genestr_stop.element(length-1)=false ;
00215     }
00216 }
00217 
00218 void CDynProg::set_num_states(int32_t p_N)
00219 {
00220     N=p_N ;
00221 
00222     transition_matrix_a_id.resize_array(N,N) ;
00223     transition_matrix_a.resize_array(N,N) ;
00224     transition_matrix_a_deriv.resize_array(N,N) ;
00225     initial_state_distribution_p.resize_array(N) ;
00226     initial_state_distribution_p_deriv.resize_array(N) ;
00227     end_state_distribution_q.resize_array(N);
00228     end_state_distribution_q_deriv.resize_array(N) ;
00229 
00230     m_orf_info.resize_array(N,2) ;
00231     m_PEN.resize_array(N,N) ;
00232     m_PEN_state_signals.resize_array(N,1) ;
00233 }
00234 
00235 int32_t CDynProg::get_num_states()
00236 {
00237     return N;
00238 }
00239 
00240 void CDynProg::init_tiling_data(
00241     int32_t* probe_pos, float64_t* intensities, const int32_t num_probes,
00242     const int32_t seq_len)
00243 {
00244     m_num_raw_data++;
00245     m_num_probes_cum[m_num_raw_data] = m_num_probes_cum[m_num_raw_data-1]+num_probes;
00246 
00247     int32_t* tmp_probe_pos = new int32_t[m_num_probes_cum[m_num_raw_data]];
00248     float64_t* tmp_raw_intensities = new float64_t[m_num_probes_cum[m_num_raw_data]];
00249 
00250 
00251     if (m_num_raw_data==1){
00252         memcpy(tmp_probe_pos, probe_pos, num_probes*sizeof(int32_t));
00253         memcpy(tmp_raw_intensities, intensities, num_probes*sizeof(float64_t));
00254         //SG_PRINT("raw_intens:%f \n",*tmp_raw_intensities+2);  
00255     }else{
00256         memcpy(tmp_probe_pos, m_probe_pos, m_num_probes_cum[m_num_raw_data-1]*sizeof(int32_t)); 
00257         memcpy(tmp_raw_intensities, m_raw_intensities, m_num_probes_cum[m_num_raw_data-1]*sizeof(float64_t));   
00258         memcpy(tmp_probe_pos+m_num_probes_cum[m_num_raw_data-1], probe_pos, num_probes*sizeof(int32_t));    
00259         memcpy(tmp_raw_intensities+m_num_probes_cum[m_num_raw_data-1], intensities, num_probes*sizeof(float64_t));  
00260     }
00261     delete[] m_probe_pos;
00262     delete[] m_raw_intensities;
00263     m_probe_pos = new int32_t[num_probes];
00264     m_raw_intensities = new float64_t[num_probes];
00265 
00266     memcpy(m_probe_pos, probe_pos, num_probes*sizeof(int32_t));
00267     memcpy(m_raw_intensities, intensities, num_probes*sizeof(float64_t));
00268 
00269 }
00270 
00271 void CDynProg::init_content_svm_value_array(const int32_t p_num_svms, const int32_t seq_len)
00272 {
00273     m_lin_feat.resize_array(p_num_svms, seq_len);
00274 }
00275 void CDynProg::resize_lin_feat(const int32_t num_new_feat, const int32_t seq_len)
00276 {
00277     int32_t dim1, dim2;
00278     m_lin_feat.get_array_size(dim1, dim2);
00279     ASSERT(dim1==m_num_lin_feat_plifs_cum[m_num_raw_data-1]);
00280     ASSERT(dim2==seq_len); // == number of candidate positions
00281 
00282 
00283     float64_t* arr = m_lin_feat.get_array();
00284     float64_t* tmp = new float64_t[(dim1+num_new_feat)*dim2];   
00285     memset(tmp, 0, (dim1+num_new_feat)*dim2*sizeof(float64_t)) ;
00286     for(int32_t j=0;j<seq_len;j++)
00287                 for(int32_t k=0;k<m_num_lin_feat_plifs_cum[m_num_raw_data-1];k++)
00288             tmp[j*(dim1+num_new_feat)+k] = arr[j*dim1+k];
00289 
00290     m_lin_feat.set_array(tmp, dim1+num_new_feat,dim2);
00291 
00292     /*for(int32_t j=0;j<5;j++)
00293     {
00294         for(int32_t k=0;k<m_num_lin_feat_plifs_cum[m_num_raw_data];k++)
00295         {
00296             SG_PRINT("(%i,%i)%f ",k,j,m_lin_feat.get_element(k,j));
00297         }
00298         SG_PRINT("\n");
00299     }
00300     m_lin_feat.get_array_size(dim1,dim2);
00301     SG_PRINT("resize_lin_feat: dim1:%i, dim2:%i\n",dim1,dim2);*/
00302 
00303     //SG_PRINT("resize_lin_feat: done\n");
00304 }
00305 //void CDynProg::precompute_tiling_plifs(CPlif** PEN, const INT* tiling_plif_ids, const INT num_tiling_plifs, const INT seq_len, const INT* pos)
00306 
00307 void CDynProg::precompute_tiling_plifs(
00308     CPlif** PEN, const int32_t* tiling_plif_ids,
00309     const int32_t num_tiling_plifs, const int32_t seq_len, const int32_t* pos)
00310 {
00311     SG_PRINT("precompute_tiling_plifs:%f num_tiling_plifs:%i\n",m_raw_intensities[0], num_tiling_plifs);
00312 
00313     /*int32_t tiling_plif_ids[num_svms];
00314     int32_t num = 0;
00315         for (int32_t i=0; i<num_penalties; i++)
00316     {
00317         CPlif * plif = PEN[i];
00318         if (plif->get_use_svm()>num_svms)
00319         {
00320             tiling_plif_ids[num] = i;
00321             num++;
00322         }
00323     }*/
00324     m_num_lin_feat_plifs_cum[m_num_raw_data] = m_num_lin_feat_plifs_cum[m_num_raw_data-1]+ num_tiling_plifs;
00325     //for (INT d=0;d<=m_num_raw_data;d++)
00326     //  SG_PRINT("lin_feat_plifs%i: %i\n",d,m_num_lin_feat_plifs_cum[d]);
00327     float64_t* tiling_plif = new float64_t[num_tiling_plifs];
00328     float64_t* svm_value = new float64_t[m_num_lin_feat_plifs_cum[m_num_raw_data]];
00329     for (int32_t i=0; i<m_num_lin_feat_plifs_cum[m_num_raw_data]; i++)
00330         svm_value[i]=0.0;
00331     int32_t tiling_rows[num_tiling_plifs];
00332     for (int32_t i=0; i<num_tiling_plifs; i++)
00333     {
00334         tiling_plif[i]=0.0;
00335         CPlif * plif = PEN[tiling_plif_ids[i]];
00336         tiling_rows[i] = plif->get_use_svm();
00337         //float64_t* limits = plif->get_plif_limits();
00338         //for(int32_t j=0;j<20;j++)
00339         //  SG_PRINT("%.2f, ",limits[j]);
00340         //SG_PRINT("\n ");
00341     //  SG_PRINT("tiling_rows[%i]:%i, tiling_plif_ids[%i]:%i  \n",i,tiling_rows[i],i,tiling_plif_ids[i]);
00342         ASSERT(tiling_rows[i]-1==m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i)
00343     }
00344     resize_lin_feat(num_tiling_plifs, seq_len);
00345 
00346 
00347     int32_t* p_tiling_pos  = &m_probe_pos[m_num_probes_cum[m_num_raw_data-1]];
00348     float64_t* p_tiling_data = &m_raw_intensities[m_num_probes_cum[m_num_raw_data-1]];
00349     int32_t num=m_num_probes_cum[m_num_raw_data-1];
00350 
00351     for (int32_t pos_idx=0;pos_idx<seq_len;pos_idx++)
00352     {
00353         //SG_PRINT("pos[%i]: %i  \n",pos_idx,pos[pos_idx]);
00354         //SG_PRINT("*p_tiling_pos: %i  \n",*p_tiling_pos);
00355         while (num<m_num_probes_cum[m_num_raw_data]&&*p_tiling_pos<pos[pos_idx])
00356         {
00357             //SG_PRINT("raw_intens: %f  \n",*p_tiling_data);
00358             for (int32_t i=0; i<num_tiling_plifs; i++)
00359             {
00360                 svm_value[m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i]=*p_tiling_data;
00361                 //if (svm_value[m_num_lin_feat+i]>15||svm_value[m_num_lin_feat+i]<4)
00362                 //  SG_PRINT("uninitialized value, value:%f, i:%i, pos:%i \n", svm_value[m_num_lin_feat+i], i, pos[pos_idx]);
00363                 CPlif * plif = PEN[tiling_plif_ids[i]];
00364                 //SG_PRINT("m_num_lin_feat_plifs_cum[m_num_raw_data]:%i, i:%i, plif->get_use_svm():%i\n",m_num_lin_feat_plifs_cum[m_num_raw_data],i, plif->get_use_svm());
00365                 ASSERT(m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i==plif->get_use_svm()-1);
00366                 plif->set_do_calc(true);
00367                 tiling_plif[i]+=plif->lookup_penalty(0,svm_value);
00368                 //SG_PRINT("true: plif->lookup_penalty: %f  tiling_plif[%i]:%f \n",plif->lookup_penalty(0,svm_value),i,tiling_plif[i]);
00369                 plif->set_do_calc(false);
00370                 //SG_PRINT("false: plif->lookup_penalty: %f  \n",plif->lookup_penalty(0,svm_value));
00371             }
00372             //SG_PRINT("p_tiling_data:%f\n",*p_tiling_data);
00373             p_tiling_data++;
00374             p_tiling_pos++;
00375             num++;
00376         }
00377         for (int32_t i=0; i<num_tiling_plifs; i++)
00378             m_lin_feat.set_element(tiling_plif[i],tiling_rows[i]-1,pos_idx);
00379     }
00380     delete[] svm_value;
00381     delete[] tiling_plif;
00382 }
00383 
00384 void CDynProg::create_word_string(
00385     const char* genestr, int32_t genestr_num, int32_t genestr_len,
00386     uint16_t*** wordstr)
00387 {
00388     for (int32_t k=0; k<genestr_num; k++)
00389     {
00390         wordstr[k]=new uint16_t*[num_degrees] ;
00391         for (int32_t j=0; j<num_degrees; j++)
00392         {
00393             wordstr[k][j]=NULL ;
00394             {
00395                 wordstr[k][j]=new uint16_t[genestr_len] ;
00396                 for (int32_t i=0; i<genestr_len; i++)
00397                     switch (genestr[i])
00398                     {
00399                     case 'A':
00400                     case 'a': wordstr[k][j][i]=0 ; break ;
00401                     case 'C':
00402                     case 'c': wordstr[k][j][i]=1 ; break ;
00403                     case 'G':
00404                     case 'g': wordstr[k][j][i]=2 ; break ;
00405                     case 'T':
00406                     case 't': wordstr[k][j][i]=3 ; break ;
00407                     default: ASSERT(0) ;
00408                     }
00409                 translate_from_single_order(wordstr[k][j], genestr_len, word_degree[j]-1, word_degree[j]) ;
00410             }
00411         }
00412     }
00413     //precompute_stop_codons(genestr, genestr_len);
00414 }
00415 
00416 void CDynProg::precompute_content_values(
00417     uint16_t*** wordstr, const int32_t *pos,const int32_t seq_len,
00418     const int32_t genestr_len,float64_t *dictionary_weights,int32_t dict_len)
00419 {
00420     //SG_PRINT("seq_len=%i, genestr_len=%i, dict_len=%i, num_svms=%i, num_degrees=%i\n",seq_len, genestr_len, dict_len, num_svms, num_degrees);
00421 
00422     dict_weights.set_array(dictionary_weights, dict_len, num_svms, false, false) ;
00423     dict_weights_array=dict_weights.get_array() ;
00424 
00425     //int32_t d1 = mod_words.get_dim1();
00426     //int32_t d2 = mod_words.get_dim2();
00427     //SG_PRINT("d1:%i, d2:%i \n",d1, d2);
00428     //for (int32_t p=0 ; p<d1 ; p++)
00429     //{
00430     //  for (int32_t q=0 ; q<d2 ; q++)
00431     //      SG_PRINT("%i ",mod_words.get_element(p,q));
00432     //  SG_PRINT("\n");
00433     //}
00434 
00435     for (int32_t p=0 ; p<seq_len-1 ; p++)
00436     {
00437         int32_t from_pos = pos[p];
00438         int32_t to_pos = pos[p+1];
00439         float64_t my_svm_values_unnormalized[num_svms] ;
00440         //SG_PRINT("%i(%i->%i) ",p,from_pos, to_pos);
00441       
00442         ASSERT(from_pos<=genestr_len)   
00443         ASSERT(to_pos<=genestr_len) 
00444             
00445         for (int32_t s=0; s<num_svms; s++)
00446         {
00447             my_svm_values_unnormalized[s]=0.0;//precomputed_svm_values.element(s,p);
00448         }
00449         for (int32_t i=from_pos; i<to_pos;i++)
00450         {
00451             for (int32_t j=0; j<num_degrees; j++)
00452             {
00453                 uint16_t word = wordstr[0][j][i] ;
00454                 for (int32_t s=0; s<num_svms; s++)
00455                 {
00456                     // check if this k-mere should be considered for this SVM
00457                     if (mod_words.get_element(s,0)==3 && i%3!=mod_words.get_element(s,1))
00458                         continue;
00459                     my_svm_values_unnormalized[s] += dict_weights_array[(word+cum_num_words_array[j])+s*cum_num_words_array[num_degrees]] ;
00460                 }
00461             }
00462         }
00463         for (int32_t s=0; s<num_svms; s++)
00464         {
00465             float64_t prev = m_lin_feat.get_element(s,p);
00466             m_lin_feat.set_element(prev + my_svm_values_unnormalized[s],s,p+1);
00467 
00468             ASSERT(prev>-1e20);
00469         }
00470     }
00471     for (int32_t j=0; j<num_degrees; j++)
00472         delete[] wordstr[0][j] ;
00473     delete[] wordstr[0] ;
00474 }
00475 
00476 void CDynProg::set_p_vector(float64_t *p, int32_t p_N)
00477 {
00478     if (!(p_N==N))
00479         SG_ERROR("length of start prob vector p (%i) is not equal to the number of states (%i), p_N: %i\n",p_N, N);
00480     //m_orf_info.resize_array(p_N,2);
00481     //m_PEN.resize_array(p_N,p_N);
00482 
00483     initial_state_distribution_p.set_array(p, p_N, true, true);
00484 }
00485 
00486 void CDynProg::set_q_vector(float64_t *q, int32_t q_N)
00487 {
00488     if (!(q_N==N))
00489         SG_ERROR("length of end prob vector q (%i) is not equal to the number of states (%i), p_N: %i\n",q_N, N);
00490     end_state_distribution_q.set_array(q, q_N, true, true);
00491 }
00492 
00493 void CDynProg::set_a(float64_t *a, int32_t p_M, int32_t p_N)
00494 {
00495     ASSERT(p_N==N);
00496     ASSERT(p_M==p_N);
00497     transition_matrix_a.set_array(a, p_N, p_N, true, true);
00498     transition_matrix_a_deriv.resize_array(p_N, p_N);
00499 }
00500 
00501 void CDynProg::set_a_id(int32_t *a, int32_t p_M, int32_t p_N)
00502 {
00503     ASSERT(p_N==N);
00504     ASSERT(p_M==p_N);
00505     transition_matrix_a_id.set_array(a, p_N, p_N, true, true);
00506     max_a_id = 0;
00507     for (int32_t i=0; i<p_N; i++)
00508         for (int32_t j=0; j<p_N; j++)
00509             max_a_id=CMath::max(max_a_id, transition_matrix_a_id.element(i,j));
00510 }
00511 
00512 void CDynProg::set_a_trans_matrix(
00513     float64_t *a_trans, int32_t num_trans, int32_t p_N)
00514 {
00515     if (!((p_N==3) || (p_N==4)))
00516         SG_ERROR("!((p_N==3) || (p_N==4)), p_N: %i\n",p_N);
00517 
00518     delete[] trans_list_forward ;
00519     delete[] trans_list_forward_cnt ;
00520     delete[] trans_list_forward_val ;
00521     delete[] trans_list_forward_id ;
00522 
00523     trans_list_forward = NULL ;
00524     trans_list_forward_cnt = NULL ;
00525     trans_list_forward_val = NULL ;
00526     trans_list_len = 0 ;
00527 
00528     transition_matrix_a.zero() ;
00529     transition_matrix_a_id.zero() ;
00530 
00531     mem_initialized = true ;
00532 
00533     trans_list_forward_cnt=NULL ;
00534     trans_list_len = N ;
00535     trans_list_forward = new T_STATES*[N] ;
00536     trans_list_forward_cnt = new T_STATES[N] ;
00537     trans_list_forward_val = new float64_t*[N] ;
00538     trans_list_forward_id = new int32_t*[N] ;
00539     
00540     int32_t start_idx=0;
00541     for (int32_t j=0; j<N; j++)
00542     {
00543         int32_t old_start_idx=start_idx;
00544 
00545         while (start_idx<num_trans && a_trans[start_idx+num_trans]==j)
00546         {
00547             start_idx++;
00548             
00549             if (start_idx>1 && start_idx<num_trans)
00550                 ASSERT(a_trans[start_idx+num_trans-1] <= a_trans[start_idx+num_trans]);
00551         }
00552         
00553         if (start_idx>1 && start_idx<num_trans)
00554             ASSERT(a_trans[start_idx+num_trans-1] <= a_trans[start_idx+num_trans]);
00555         
00556         int32_t len=start_idx-old_start_idx;
00557         ASSERT(len>=0);
00558         
00559         trans_list_forward_cnt[j] = 0 ;
00560         
00561         if (len>0)
00562         {
00563             trans_list_forward[j]     = new T_STATES[len] ;
00564             trans_list_forward_val[j] = new float64_t[len] ;
00565             trans_list_forward_id[j] = new int32_t[len] ;
00566         }
00567         else
00568         {
00569             trans_list_forward[j]     = NULL;
00570             trans_list_forward_val[j] = NULL;
00571             trans_list_forward_id[j]  = NULL;
00572         }
00573     }
00574     
00575     for (int32_t i=0; i<num_trans; i++)
00576     {
00577         int32_t from_state   = (int32_t)a_trans[i] ;
00578         int32_t to_state = (int32_t)a_trans[i+num_trans] ;
00579         float64_t val = a_trans[i+num_trans*2] ;
00580         int32_t id = 0 ;
00581         if (p_N==4)
00582             id = (int32_t)a_trans[i+num_trans*3] ;
00583         //SG_DEBUG( "id=%i\n", id) ;
00584             
00585         ASSERT(to_state>=0 && to_state<N);
00586         ASSERT(from_state>=0 && from_state<N);
00587         
00588         trans_list_forward[to_state][trans_list_forward_cnt[to_state]]=from_state ;
00589         trans_list_forward_val[to_state][trans_list_forward_cnt[to_state]]=val ;
00590         trans_list_forward_id[to_state][trans_list_forward_cnt[to_state]]=id ;
00591         trans_list_forward_cnt[to_state]++ ;
00592         transition_matrix_a.element(from_state, to_state) = val ;
00593         transition_matrix_a_id.element(from_state, to_state) = id ;
00594         //SG_PRINT("from_state:%i to_state:%i trans_matrix_a_id:%i \n",from_state, to_state,transition_matrix_a_id.element(from_state, to_state));
00595     } ;
00596 
00597     max_a_id = 0 ;
00598     for (int32_t i=0; i<N; i++)
00599         for (int32_t j=0; j<N; j++)
00600         {
00601             //if (transition_matrix_a_id.element(i,j))
00602             //SG_DEBUG( "(%i,%i)=%i\n", i,j, transition_matrix_a_id.element(i,j)) ;
00603             max_a_id = CMath::max(max_a_id, transition_matrix_a_id.element(i,j)) ;
00604         }
00605     //SG_DEBUG( "max_a_id=%i\n", max_a_id) ;
00606 }
00607 
00608 void CDynProg::init_svm_arrays(int32_t p_num_degrees, int32_t p_num_svms)
00609 {
00610     svm_arrays_clean=false ;
00611 
00612     word_degree.resize_array(num_degrees) ;
00613 
00614     cum_num_words.resize_array(num_degrees+1) ;
00615     cum_num_words_array=cum_num_words.get_array() ;
00616 
00617     num_words.resize_array(num_degrees) ;
00618     num_words_array=num_words.get_array() ;
00619     
00620     //svm_values_unnormalized.resize_array(num_degrees, num_svms) ;
00621     svm_pos_start.resize_array(num_degrees) ;
00622     num_unique_words.resize_array(num_degrees) ;
00623 } 
00624 
00625 
00626 void CDynProg::init_word_degree_array(
00627     int32_t * p_word_degree_array, int32_t num_elem)
00628 {
00629     svm_arrays_clean=false ;
00630 
00631     word_degree.resize_array(num_degrees) ;
00632     ASSERT(num_degrees==num_elem);
00633 
00634 
00635     for (int32_t i=0; i<num_degrees; i++)
00636         word_degree[i]=p_word_degree_array[i] ;
00637 
00638 } 
00639 
00640 void CDynProg::init_cum_num_words_array(
00641     int32_t * p_cum_num_words_array, int32_t num_elem)
00642 {
00643     svm_arrays_clean=false ;
00644 
00645     cum_num_words.resize_array(num_degrees+1) ;
00646     cum_num_words_array=cum_num_words.get_array() ;
00647     ASSERT(num_degrees+1==num_elem);
00648 
00649     for (int32_t i=0; i<num_degrees+1; i++)
00650         cum_num_words[i]=p_cum_num_words_array[i] ;
00651 } 
00652 
00653 void CDynProg::init_num_words_array(
00654     int32_t * p_num_words_array, int32_t num_elem)
00655 {
00656     svm_arrays_clean=false ;
00657 
00658     num_words.resize_array(num_degrees) ;
00659     num_words_array=num_words.get_array() ;
00660     ASSERT(num_degrees==num_elem);
00661 
00662     for (int32_t i=0; i<num_degrees; i++)
00663         num_words[i]=p_num_words_array[i] ;
00664 
00665     //word_used.resize_array(num_degrees, num_words[num_degrees-1], num_strings) ;
00666     //word_used_array=word_used.get_array() ;
00667 } 
00668 
00669 void CDynProg::init_mod_words_array(
00670     int32_t * mod_words_input, int32_t num_elem, int32_t num_columns)
00671 {
00672     //for (int32_t i=0; i<num_columns; i++)
00673     //{
00674     //  for (int32_t j=0; j<num_elem; j++)
00675     //      SG_PRINT("%i ",mod_words_input[i*num_elem+j]);
00676     //  SG_PRINT("\n");
00677     //}
00678     svm_arrays_clean=false ;
00679 
00680     ASSERT(num_svms==num_elem);
00681     ASSERT(num_columns==2);
00682 
00683     mod_words.set_array(mod_words_input, num_elem, 2, true, true) ;
00684     mod_words_array = mod_words.get_array() ;
00685     
00686     /*SG_DEBUG( "mod_words=[") ;
00687     for (int32_t i=0; i<num_elem; i++)
00688         SG_DEBUG( "%i, ", p_mod_words_array[i]) ;
00689         SG_DEBUG( "]\n") ;*/
00690 } 
00691 
00692 void CDynProg::init_sign_words_array(bool* p_sign_words_array, int32_t num_elem)
00693 {
00694     svm_arrays_clean=false ;
00695 
00696     ASSERT(num_svms==num_elem);
00697 
00698     sign_words.set_array(p_sign_words_array, num_elem, true, true) ;
00699     sign_words_array = sign_words.get_array() ;
00700 } 
00701 
00702 void CDynProg::init_string_words_array(
00703     int32_t* p_string_words_array, int32_t num_elem)
00704 {
00705     svm_arrays_clean=false ;
00706 
00707     ASSERT(num_svms==num_elem);
00708 
00709     string_words.set_array(p_string_words_array, num_elem, true, true) ;
00710     string_words_array = string_words.get_array() ;
00711 } 
00712 
00713 bool CDynProg::check_svm_arrays()
00714 {
00715     //SG_DEBUG( "wd_dim1=%d, cum_num_words=%d, num_words=%d, svm_pos_start=%d, num_uniq_w=%d, mod_words_dims=(%d,%d), sign_w=%d,string_w=%d\n num_degrees=%d, num_svms=%d, num_strings=%d", word_degree.get_dim1(), cum_num_words.get_dim1(), num_words.get_dim1(), svm_pos_start.get_dim1(), num_unique_words.get_dim1(), mod_words.get_dim1(), mod_words.get_dim2(), sign_words.get_dim1(), string_words.get_dim1(), num_degrees, num_svms, num_strings);
00716     if ((word_degree.get_dim1()==num_degrees) &&
00717             (cum_num_words.get_dim1()==num_degrees+1) &&
00718             (num_words.get_dim1()==num_degrees) &&
00719             //(word_used.get_dim1()==num_degrees) &&
00720             //(word_used.get_dim2()==num_words[num_degrees-1]) &&
00721             //(word_used.get_dim3()==num_strings) &&
00722             //      (svm_values_unnormalized.get_dim1()==num_degrees) &&
00723             //      (svm_values_unnormalized.get_dim2()==num_svms) &&
00724             (svm_pos_start.get_dim1()==num_degrees) &&
00725             (num_unique_words.get_dim1()==num_degrees) &&
00726             (mod_words.get_dim1()==num_svms) &&
00727             (mod_words.get_dim2()==2) && 
00728             (sign_words.get_dim1()==num_svms) &&
00729             (string_words.get_dim1()==num_svms))
00730     {
00731         svm_arrays_clean=true ;
00732         return true ;
00733     }
00734     else
00735     {
00736         if ((num_unique_words.get_dim1()==num_degrees) &&
00737             (mod_words.get_dim1()==num_svms) &&
00738             (mod_words.get_dim2()==2) &&
00739             (sign_words.get_dim1()==num_svms) &&
00740             (string_words.get_dim1()==num_svms))
00741             SG_PRINT("OK\n") ;
00742         else
00743             SG_PRINT("not OK\n") ;
00744 
00745         if (!(word_degree.get_dim1()==num_degrees))
00746             SG_WARNING("SVM array: word_degree.get_dim1()!=num_degrees") ;
00747         if (!(cum_num_words.get_dim1()==num_degrees+1))
00748             SG_WARNING("SVM array: cum_num_words.get_dim1()!=num_degrees+1") ;
00749         if (!(num_words.get_dim1()==num_degrees))
00750             SG_WARNING("SVM array: num_words.get_dim1()==num_degrees") ;
00751         if (!(svm_pos_start.get_dim1()==num_degrees))
00752             SG_WARNING("SVM array: svm_pos_start.get_dim1()!=num_degrees") ;
00753         if (!(num_unique_words.get_dim1()==num_degrees))
00754             SG_WARNING("SVM array: num_unique_words.get_dim1()!=num_degrees") ;
00755         if (!(mod_words.get_dim1()==num_svms))
00756             SG_WARNING("SVM array: mod_words.get_dim1()!=num_svms") ;
00757         if (!(mod_words.get_dim2()==2))
00758             SG_WARNING("SVM array: mod_words.get_dim2()!=2") ;
00759         if (!(sign_words.get_dim1()==num_svms))
00760             SG_WARNING("SVM array: sign_words.get_dim1()!=num_svms") ;
00761         if (!(string_words.get_dim1()==num_svms))
00762             SG_WARNING("SVM array: string_words.get_dim1()!=num_svms") ;
00763 
00764         svm_arrays_clean=false ;
00765         return false ;  
00766     }
00767 }
00768 
00769 void CDynProg::best_path_set_seq(float64_t *seq, int32_t p_N, int32_t seq_len)
00770 {
00771     if (!svm_arrays_clean)
00772     {
00773         SG_ERROR( "SVM arrays not clean") ;
00774         return ;
00775     } ;
00776 
00777     ASSERT(p_N==N);
00778     ASSERT(initial_state_distribution_p.get_dim1()==N);
00779     ASSERT(end_state_distribution_q.get_dim1()==N);
00780     
00781     m_seq.set_array(seq, N, seq_len, 1, true, true) ;
00782     this->N=N ;
00783 
00784     m_call=3 ;
00785     m_step=2 ;
00786 }
00787 
00788 void CDynProg::best_path_set_seq3d(
00789     float64_t *seq, int32_t p_N, int32_t seq_len, int32_t max_num_signals)
00790 {
00791     if (!svm_arrays_clean)
00792     {
00793         SG_ERROR( "SVM arrays not clean") ;
00794         return ;
00795     } ;
00796 
00797     ASSERT(p_N==N);
00798     ASSERT(initial_state_distribution_p.get_dim1()==N);
00799     ASSERT(end_state_distribution_q.get_dim1()==N);
00800     
00801     m_seq.set_array(seq, N, seq_len, max_num_signals, true, true) ;
00802     this->N=N ;
00803 
00804     m_call=3 ;
00805     m_step=2 ;
00806 }
00807 
00808 void CDynProg::best_path_set_pos(int32_t *pos, int32_t seq_len)  
00809 {
00810     if (m_step!=2)
00811         SG_ERROR( "please call best_path_set_seq first\n") ;
00812     
00813     if (seq_len!=m_seq.get_dim2())
00814         SG_ERROR( "pos size does not match previous info %i!=%i\n", seq_len, m_seq.get_dim2()) ;
00815 
00816     m_pos.set_array(pos, seq_len, true, true) ;
00817 
00818     m_step=3 ;
00819 }
00820 
00821 void CDynProg::best_path_set_orf_info(int32_t *orf_info, int32_t m, int32_t n) 
00822 {
00823     //if (m_step!=3)
00824     //  SG_ERROR( "please call best_path_set_pos first\n") ;
00825         
00826     //if (m!=N)
00827     //  SG_ERROR( "orf_info size does not match previous info %i!=%i\n", m, N) ;
00828     if (n!=2)
00829         SG_ERROR( "orf_info size incorrect %i!=2\n", n) ;
00830     m_orf_info.set_array(orf_info, m, n, true, true) ;
00831 
00832     m_call=1 ;
00833     m_step=4 ;
00834 }
00835 
00836 void CDynProg::best_path_set_segment_sum_weights(
00837     float64_t *segment_sum_weights, int32_t num_states, int32_t seq_len)
00838 {
00839     if (m_step!=3)
00840         SG_ERROR( "please call best_path_set_pos first\n") ;
00841         
00842     if (num_states!=N)
00843         SG_ERROR( "segment_sum_weights size does not match previous info %i!=%i\n", num_states, N) ;
00844     if (seq_len!=m_pos.get_dim1())
00845         SG_ERROR( "segment_sum_weights size incorrect %i!=%i\n", seq_len, m_pos.get_dim1()) ;
00846 
00847     m_segment_sum_weights.set_array(segment_sum_weights, num_states, seq_len, true, true) ;
00848     
00849     m_call=2 ;
00850     m_step=4 ;
00851 }
00852 
00853 void CDynProg::best_path_set_plif_list(CDynamicArray<CPlifBase*>* plifs)
00854 {
00855     ASSERT(plifs);
00856     CPlifBase** plif_list=plifs->get_array();
00857     int32_t num_plif=plifs->get_num_elements();
00858 
00859     if (m_step!=4)
00860         SG_ERROR( "please call best_path_set_orf_info or best_path_segment_sum_weights first\n") ;
00861 
00862     m_plif_list.set_array(plif_list, num_plif, true, true) ;
00863 
00864     m_step=5 ;
00865 }
00866 
00867 void CDynProg::best_path_set_plif_id_matrix(
00868     int32_t *plif_id_matrix, int32_t m, int32_t n)
00869 {
00870     if (m_step!=5)
00871         SG_ERROR( "please call best_path_set_plif_list first\n") ;
00872 
00873     if ((m!=N) || (n!=N))
00874         SG_ERROR( "plif_id_matrix size does not match previous info %i!=%i or %i!=%i\n", m, N, n, N) ;
00875 
00876     CArray2<int32_t> id_matrix(plif_id_matrix, N, N, false, false) ;
00877 #ifdef DYNPROG_DEBUG
00878     id_matrix.set_name("id_matrix");
00879     id_matrix.display_array();
00880 #endif //DYNPROG_DEBUG
00881     m_PEN.resize_array(N, N) ;
00882     for (int32_t i=0; i<N; i++)
00883         for (int32_t j=0; j<N; j++)
00884             if (id_matrix.element(i,j)>=0)
00885                 m_PEN.element(i,j)=m_plif_list[id_matrix.element(i,j)] ;
00886             else
00887                 m_PEN.element(i,j)=NULL ;
00888 
00889     m_step=6 ;
00890 }
00891 
00892 void CDynProg::best_path_set_plif_state_signal_matrix(
00893     int32_t *plif_id_matrix, int32_t m, int32_t max_num_signals)
00894 {
00895     if (m_step!=6)
00896         SG_ERROR( "please call best_path_set_plif_id_matrix first\n") ;
00897     
00898     if (m!=N)
00899         SG_ERROR( "plif_state_signal_matrix size does not match previous info %i!=%i\n", m, N) ;
00900 
00901     if (m_seq.get_dim3() != max_num_signals)
00902         SG_ERROR( "size(plif_state_signal_matrix,2) does not match with size(m_seq,3): %i!=%i\nSorry, Soeren... interface changed\n", m_seq.get_dim3(), max_num_signals) ;
00903 
00904     CArray2<int32_t> id_matrix(plif_id_matrix, N, max_num_signals, false, false) ;
00905     m_PEN_state_signals.resize_array(N, max_num_signals) ;
00906     for (int32_t i=0; i<N; i++)
00907     {
00908         for (int32_t j=0; j<max_num_signals; j++)
00909         {
00910             if (id_matrix.element(i,j)>=0)
00911                 m_PEN_state_signals.element(i,j)=m_plif_list[id_matrix.element(i,j)] ;
00912             else
00913                 m_PEN_state_signals.element(i,j)=NULL ;
00914         }
00915     }
00916 
00917     m_step=6 ;
00918 }
00919 
00920 void CDynProg::best_path_set_genestr(
00921     char* genestr, int32_t genestr_len, int32_t genestr_num)
00922 {
00923     if (m_step!=6)
00924         SG_ERROR( "please call best_path_set_plif_id_matrix first\n") ;
00925 
00926     ASSERT(genestr);
00927     ASSERT(genestr_len>0);
00928     ASSERT(genestr_num>0);
00929 
00930     m_genestr.set_array(genestr, genestr_len, genestr_num, true, true) ;
00931 
00932     m_step=7 ;
00933 }
00934 
00935 void CDynProg::best_path_set_my_state_seq(
00936     int32_t* my_state_seq, int32_t seq_len)
00937 {
00938     ASSERT(my_state_seq && seq_len>0);
00939     m_my_state_seq.resize_array(seq_len);
00940     for (int32_t i=0; i<seq_len; i++)
00941         m_my_state_seq[i]=my_state_seq[i];
00942 }
00943 
00944 void CDynProg::best_path_set_my_pos_seq(int32_t* my_pos_seq, int32_t seq_len)
00945 {
00946     ASSERT(my_pos_seq && seq_len>0);
00947     m_my_pos_seq.resize_array(seq_len);
00948     for (int32_t i=0; i<seq_len; i++)
00949         m_my_pos_seq[i]=my_pos_seq[i];
00950 }
00951 
00952 void CDynProg::best_path_set_dict_weights(
00953     float64_t* dictionary_weights, int32_t dict_len, int32_t n)
00954 {
00955     if (m_step!=7)
00956         SG_ERROR( "please call best_path_set_genestr first\n") ;
00957 
00958     if (num_svms!=n)
00959         SG_ERROR( "dict_weights array does not match num_svms=%i!=%i\n", num_svms, n) ;
00960 
00961     m_dict_weights.set_array(dictionary_weights, dict_len, num_svms, true, true) ;
00962 
00963     // initialize, so it does not bother when not used
00964     m_segment_loss.resize_array(max_a_id+1, max_a_id+1, 2) ;
00965     m_segment_loss.zero() ;
00966     m_segment_ids.resize_array(m_seq.get_dim2()) ;
00967     m_segment_mask.resize_array(m_seq.get_dim2()) ;
00968     m_segment_ids.zero() ;
00969     m_segment_mask.zero() ;
00970 
00971     m_step=8 ;
00972 }
00973 
00974 void CDynProg::best_path_set_segment_loss(
00975     float64_t* segment_loss, int32_t m, int32_t n)
00976 {
00977     // here we need two matrices. Store it in one: 2N x N
00978     if (2*m!=n)
00979         SG_ERROR( "segment_loss should be 2 x quadratic matrix: %i!=%i\n", 2*m, n) ;
00980 
00981     if (m!=max_a_id+1)
00982         SG_ERROR( "segment_loss size should match max_a_id: %i!=%i\n", m, max_a_id+1) ;
00983 
00984     m_segment_loss.set_array(segment_loss, m, n/2, 2, true, true) ;
00985     /*for (int32_t i=0; i<n; i++)
00986         for (int32_t j=0; j<n; j++)
00987         SG_DEBUG( "loss(%i,%i)=%f\n", i,j, m_segment_loss.element(0,i,j)) ;*/
00988 }
00989 
00990 void CDynProg::best_path_set_segment_ids_mask(
00991     int32_t* segment_ids, float64_t* segment_mask, int32_t m)
00992 {
00993     int32_t max_id = 0;
00994     for (int32_t i=1;i<m;i++)
00995         max_id = CMath::max(max_id,segment_ids[i]);
00996     SG_PRINT("max_id: %i, m:%i\n",max_id, m);   
00997     m_segment_ids.set_array(segment_ids, m, false, true) ;
00998     m_segment_ids.set_name("m_segment_ids");
00999     m_segment_mask.set_array(segment_mask, m, false, true) ;
01000     m_segment_mask.set_name("m_segment_mask");
01001 }
01002 
01003 
01004 void CDynProg::best_path_call(int32_t nbest, bool use_orf) 
01005 {
01006     if (m_step!=8)
01007         SG_ERROR( "please call best_path_set_dict_weights first\n") ;
01008     if (m_call!=1)
01009         SG_ERROR( "please call best_path_set_orf_info first\n") ;
01010     ASSERT(N==m_seq.get_dim1()) ;
01011     ASSERT(m_seq.get_dim2()==m_pos.get_dim1()) ;
01012 
01013     m_scores.resize_array(nbest) ;
01014     m_states.resize_array(nbest, m_seq.get_dim2()) ;
01015     m_positions.resize_array(nbest, m_seq.get_dim2()) ;
01016 
01017     m_call=1 ;
01018 
01019     ASSERT(nbest==1||nbest==2) ;
01020     ASSERT(m_genestr.get_dim2()==1) ;
01021     if (nbest==1)
01022         best_path_trans<1,false,false>(m_seq.get_array(), m_seq.get_dim2(), m_pos.get_array(), 
01023                                 m_orf_info.get_array(), m_PEN.get_array(),
01024                                 m_PEN_state_signals.get_array(), m_PEN_state_signals.get_dim2(),
01025                                 m_genestr.get_dim2(), m_scores.get_array(), 
01026                                 m_states.get_array(), m_positions.get_array(),
01027                                 use_orf) ;
01028     else
01029         best_path_trans<2,false,false>(m_seq.get_array(), m_seq.get_dim2(), m_pos.get_array(), 
01030                                 m_orf_info.get_array(), m_PEN.get_array(),
01031                                 m_PEN_state_signals.get_array(), m_PEN_state_signals.get_dim2(),
01032                                 m_genestr.get_dim2(), m_scores.get_array(),
01033                                 m_states.get_array(), m_positions.get_array(),
01034                                 use_orf) ;
01035 
01036     m_step=9 ;
01037 }
01038 
01039 void CDynProg::best_path_deriv_call() 
01040 {
01041     //if (m_step!=8)
01042         //SG_ERROR( "please call best_path_set_dict_weights first\n") ;
01043     //if (m_call!=1)
01044         //SG_ERROR( "please call best_path_set_orf_info first\n") ;
01045     ASSERT(N==m_seq.get_dim1()) ;
01046     ASSERT(m_seq.get_dim2()==m_pos.get_dim1()) ;
01047 
01048     m_call=5 ; // or so ...
01049 
01050     m_my_scores.resize_array(m_my_state_seq.get_array_size()) ;
01051     m_my_losses.resize_array(m_my_state_seq.get_array_size()) ;
01052 
01053     best_path_trans_deriv(m_my_state_seq.get_array(), m_my_pos_seq.get_array(), 
01054                           m_my_scores.get_array(), m_my_losses.get_array(), m_my_state_seq.get_array_size(),
01055                           m_seq.get_array(), m_seq.get_dim2(), m_pos.get_array(), 
01056                           m_PEN.get_array(), m_PEN_state_signals.get_array(), m_PEN_state_signals.get_dim2(), m_genestr.get_dim2()) ;
01057 
01058     m_step=12 ;
01059 }
01060 
01061 
01062 void CDynProg::best_path_2struct_call(int32_t nbest) 
01063 {
01064     if (m_step!=8)
01065         SG_ERROR( "please call best_path_set_orf_dict_weights first\n") ;
01066     if (m_call!=2)
01067         SG_ERROR( "please call best_path_set_segment_sum_weights first\n") ;
01068     ASSERT(N==m_seq.get_dim1()) ;
01069     ASSERT(m_seq.get_dim2()==m_pos.get_dim1()) ;
01070     
01071     m_scores.resize_array(nbest) ;
01072     m_states.resize_array(nbest, m_seq.get_dim2()) ;
01073     m_positions.resize_array(nbest, m_seq.get_dim2()) ;
01074 
01075     m_call=2 ;
01076 
01077     best_path_2struct(m_seq.get_array(), m_seq.get_dim2(), m_pos.get_array(), 
01078                       m_PEN.get_array(), 
01079                       m_genestr.get_array(), m_genestr.get_dim1(),
01080                       nbest, 
01081                       m_scores.get_array(), m_states.get_array(), m_positions.get_array(),
01082                       m_dict_weights.get_array(), m_dict_weights.get_dim1()*m_dict_weights.get_dim2(), 
01083                       m_segment_sum_weights.get_array()) ;
01084 
01085     m_step=9 ;
01086 }
01087 
01088 void CDynProg::best_path_simple_call(int32_t nbest) 
01089 {
01090     if (m_step!=2)
01091         SG_ERROR( "please call best_path_set_seq first\n") ;
01092     if (m_call!=3)
01093         SG_ERROR( "please call best_path_set_seq first\n") ;
01094     ASSERT(N==m_seq.get_dim1()) ;
01095 
01096     m_scores.resize_array(nbest) ;
01097     m_states.resize_array(nbest, m_seq.get_dim2()) ;
01098 
01099     m_call=3 ;
01100 
01101     best_path_trans_simple(m_seq.get_array(), m_seq.get_dim2(), 
01102                            nbest, 
01103                            m_scores.get_array(), m_states.get_array()) ;
01104     
01105     m_step=9 ;
01106 }
01107 
01108 void CDynProg::best_path_deriv_call(int32_t nbest)
01109 {
01110     if (!svm_arrays_clean)
01111     {
01112         SG_ERROR( "SVM arrays not clean") ;
01113         return ;
01114     } ;
01115 
01116     //FIXME
01117 }
01118 
01119 
01120 void CDynProg::best_path_get_scores(float64_t **scores, int32_t *m) 
01121 {
01122     if (m_step!=9 && m_step!=12)
01123         SG_ERROR( "please call best_path*_call first\n") ;
01124 
01125     if (m_step==9)
01126     {
01127         *scores=m_scores.get_array() ;
01128         *m=m_scores.get_dim1() ;
01129     } else
01130     {
01131         *scores=m_my_scores.get_array() ;
01132         *m=m_my_scores.get_dim1() ;
01133     }
01134 
01135     m_step=10 ;
01136 }
01137 
01138 void CDynProg::best_path_get_states(int32_t **states, int32_t *m, int32_t *n) 
01139 {
01140     if (m_step!=10)
01141         SG_ERROR( "please call best_path_get_score first\n") ;
01142     
01143     *states=m_states.get_array() ;
01144     *m=m_states.get_dim1() ;
01145     *n=m_states.get_dim2() ;
01146 
01147     m_step=11 ;
01148 }
01149 
01150 void CDynProg::best_path_get_positions(
01151     int32_t **positions, int32_t *m, int32_t *n)
01152 {
01153     if (m_step!=11)
01154         SG_ERROR( "please call best_path_get_positions first\n") ;
01155     if (m_call==3)
01156         SG_ERROR( "no position information for best_path_simple\n") ;
01157     
01158     *positions=m_positions.get_array() ;
01159     *m=m_positions.get_dim1() ;
01160     *n=m_positions.get_dim2() ;
01161 }
01162 
01163 void CDynProg::best_path_get_losses(float64_t** losses, int32_t* seq_len)
01164 {
01165     ASSERT(losses && seq_len);
01166     *losses=m_my_losses.get_array();
01167     *seq_len=m_my_losses.get_dim1();
01168 }
01169 
01170 
01172 
01173 float64_t CDynProg::best_path_no_b(
01174     int32_t max_iter, int32_t &best_iter, int32_t *my_path)
01175 {
01176     CArray2<T_STATES> psi(max_iter, N) ;
01177     CArray<float64_t>* delta = new CArray<float64_t>(N) ;
01178     CArray<float64_t>* delta_new = new CArray<float64_t>(N) ;
01179     
01180     { // initialization
01181         for (int32_t i=0; i<N; i++)
01182         {
01183             delta->element(i) = get_p(i) ;
01184             psi.element(0, i)= 0 ;
01185         }
01186     } 
01187     
01188     float64_t best_iter_prob = CMath::ALMOST_NEG_INFTY ;
01189     best_iter = 0 ;
01190     
01191     // recursion
01192     for (int32_t t=1; t<max_iter; t++)
01193     {
01194         CArray<float64_t>* dummy;
01195         int32_t NN=N ;
01196         for (int32_t j=0; j<NN; j++)
01197         {
01198             float64_t maxj = delta->element(0) + transition_matrix_a.element(0,j);
01199             int32_t argmax=0;
01200             
01201             for (int32_t i=1; i<NN; i++)
01202             {
01203                 float64_t temp = delta->element(i) + transition_matrix_a.element(i,j);
01204                 
01205                 if (temp>maxj)
01206                 {
01207                     maxj=temp;
01208                     argmax=i;
01209                 }
01210             }
01211             delta_new->element(j)=maxj ;
01212             psi.element(t, j)=argmax ;
01213         }
01214         
01215         dummy=delta;
01216         delta=delta_new;
01217         delta_new=dummy;    //switch delta/delta_new
01218         
01219         { //termination
01220             float64_t maxj=delta->element(0)+get_q(0);
01221             int32_t argmax=0;
01222             
01223             for (int32_t i=1; i<N; i++)
01224             {
01225                 float64_t temp=delta->element(i)+get_q(i);
01226                 
01227                 if (temp>maxj)
01228                 {
01229                     maxj=temp;
01230                     argmax=i;
01231                 }
01232             }
01233             //pat_prob=maxj;
01234             
01235             if (maxj>best_iter_prob)
01236             {
01237                 my_path[t]=argmax;
01238                 best_iter=t ;
01239                 best_iter_prob = maxj ;
01240             } ;
01241         } ;
01242     }
01243 
01244     
01245     { //state sequence backtracking
01246         for (int32_t t = best_iter; t>0; t--)
01247         {
01248             my_path[t-1]=psi.element(t, my_path[t]);
01249         }
01250     }
01251 
01252     delete delta ;
01253     delete delta_new ;
01254     
01255     return best_iter_prob ;
01256 }
01257 
01258 void CDynProg::best_path_no_b_trans(
01259     int32_t max_iter, int32_t &max_best_iter, int16_t nbest,
01260     float64_t *prob_nbest, int32_t *my_paths)
01261 {
01262     //T_STATES *psi=new T_STATES[max_iter*N*nbest] ;
01263     CArray3<T_STATES> psi(max_iter, N, nbest) ;
01264     CArray3<int16_t> ktable(max_iter, N, nbest) ;
01265     CArray2<int16_t> ktable_ends(max_iter, nbest) ;
01266 
01267     CArray<float64_t> tempvv(nbest*N) ;
01268     CArray<int32_t> tempii(nbest*N) ;
01269 
01270     CArray2<T_STATES> path_ends(max_iter, nbest) ;
01271     CArray2<float64_t> *delta=new CArray2<float64_t>(N, nbest) ;
01272     CArray2<float64_t> *delta_new=new CArray2<float64_t>(N, nbest) ;
01273     CArray2<float64_t> delta_end(max_iter, nbest) ;
01274 
01275     CArray2<int32_t> paths(max_iter, nbest) ;
01276     paths.set_array(my_paths, max_iter, nbest, false, false) ;
01277 
01278     { // initialization
01279         for (T_STATES i=0; i<N; i++)
01280         {
01281             delta->element(i,0) = get_p(i) ;
01282             for (int16_t k=1; k<nbest; k++)
01283             {
01284                 delta->element(i,k)=-CMath::INFTY ;
01285                 ktable.element(0,i,k)=0 ;
01286             }
01287         }
01288     }
01289     
01290     // recursion
01291     for (int32_t t=1; t<max_iter; t++)
01292     {
01293         CArray2<float64_t>* dummy=NULL;
01294 
01295         for (T_STATES j=0; j<N; j++)
01296         {
01297             const T_STATES num_elem   = trans_list_forward_cnt[j] ;
01298             const T_STATES *elem_list = trans_list_forward[j] ;
01299             const float64_t *elem_val = trans_list_forward_val[j] ;
01300             
01301             int32_t list_len=0 ;
01302             for (int16_t diff=0; diff<nbest; diff++)
01303             {
01304                 for (int32_t i=0; i<num_elem; i++)
01305                 {
01306                     T_STATES ii = elem_list[i] ;
01307                     
01308                     tempvv.element(list_len) = -(delta->element(ii,diff) + elem_val[i]) ;
01309                     tempii.element(list_len) = diff*N + ii ;
01310                     list_len++ ;
01311                 }
01312             }
01313             CMath::qsort_index(tempvv.get_array(), tempii.get_array(), list_len) ;
01314             
01315             for (int16_t k=0; k<nbest; k++)
01316             {
01317                 if (k<list_len)
01318                 {
01319                     delta_new->element(j,k)  = -tempvv[k] ;
01320                     psi.element(t,j,k)      = (tempii[k]%N) ;
01321                     ktable.element(t,j,k)   = (tempii[k]-(tempii[k]%N))/N ;
01322                 }
01323                 else
01324                 {
01325                     delta_new->element(j,k)  = -CMath::INFTY ;
01326                     psi.element(t,j,k)      = 0 ;
01327                     ktable.element(t,j,k)   = 0 ;
01328                 }
01329             }
01330         }
01331         
01332         dummy=delta;
01333         delta=delta_new;
01334         delta_new=dummy;    //switch delta/delta_new
01335         
01336         { //termination
01337             int32_t list_len = 0 ;
01338             for (int16_t diff=0; diff<nbest; diff++)
01339             {
01340                 for (T_STATES i=0; i<N; i++)
01341                 {
01342                     tempvv.element(list_len) = -(delta->element(i,diff)+get_q(i));
01343                     tempii.element(list_len) = diff*N + i ;
01344                     list_len++ ;
01345                 }
01346             }
01347             CMath::qsort_index(tempvv.get_array(), tempii.get_array(), list_len) ;
01348             
01349             for (int16_t k=0; k<nbest; k++)
01350             {
01351                 delta_end.element(t-1,k) = -tempvv[k] ;
01352                 path_ends.element(t-1,k) = (tempii[k]%N) ;
01353                 ktable_ends.element(t-1,k) = (tempii[k]-(tempii[k]%N))/N ;
01354             }
01355         }
01356     }
01357     
01358     { //state sequence backtracking
01359         max_best_iter=0 ;
01360         
01361         CArray<float64_t> sort_delta_end(max_iter*nbest) ;
01362         CArray<int16_t> sort_k(max_iter*nbest) ;
01363         CArray<int32_t> sort_t(max_iter*nbest) ;
01364         CArray<int32_t> sort_idx(max_iter*nbest) ;
01365         
01366         int32_t i=0 ;
01367         for (int32_t iter=0; iter<max_iter-1; iter++)
01368             for (int16_t k=0; k<nbest; k++)
01369             {
01370                 sort_delta_end[i]=-delta_end.element(iter,k) ;
01371                 sort_k[i]=k ;
01372                 sort_t[i]=iter+1 ;
01373                 sort_idx[i]=i ;
01374                 i++ ;
01375             }
01376         
01377         CMath::qsort_index(sort_delta_end.get_array(), sort_idx.get_array(), (max_iter-1)*nbest) ;
01378 
01379         for (int16_t n=0; n<nbest; n++)
01380         {
01381             int16_t k=sort_k[sort_idx[n]] ;
01382             int32_t iter=sort_t[sort_idx[n]] ;
01383             prob_nbest[n]=-sort_delta_end[n] ;
01384 
01385             if (iter>max_best_iter)
01386                 max_best_iter=iter ;
01387             
01388             ASSERT(k<nbest) ;
01389             ASSERT(iter<max_iter) ;
01390             
01391             paths.element(iter,n) = path_ends.element(iter-1, k) ;
01392             int16_t q   = ktable_ends.element(iter-1, k) ;
01393             
01394             for (int32_t t = iter; t>0; t--)
01395             {
01396                 paths.element(t-1,n)=psi.element(t, paths.element(t,n), q);
01397                 q = ktable.element(t, paths.element(t,n), q) ;
01398             }
01399         }
01400     }
01401 
01402     delete delta ;
01403     delete delta_new ;
01404 }
01405 
01406 
01407 void CDynProg::translate_from_single_order(
01408     uint16_t* obs, int32_t sequence_length, int32_t start, int32_t order,
01409     int32_t max_val)
01410 {
01411     int32_t i,j;
01412     uint16_t value=0;
01413     
01414     for (i=sequence_length-1; i>=order-1; i--) //convert interval of size T
01415     {
01416         value=0;
01417         for (j=i; j>=i-order+1; j--)
01418             value= (value >> max_val) | (obs[j] << (max_val * (order-1)));
01419         
01420         obs[i]=value;
01421     }
01422     
01423     for (i=order-2;i>=0;i--)
01424     {
01425         value=0;
01426         for (j=i; j>=i-order+1; j--)
01427         {
01428             value= (value >> max_val);
01429             if (j>=0)
01430                 value|=obs[j] << (max_val * (order-1));
01431         }
01432         obs[i]=value;
01433         //ASSERT(value<num_words) ;
01434     }
01435     if (start>0)
01436         for (i=start; i<sequence_length; i++)   
01437             obs[i-start]=obs[i];
01438 }
01439 
01440 void CDynProg::reset_svm_value(
01441     int32_t pos, int32_t & last_svm_pos, float64_t * svm_value)
01442 {
01443     for (int32_t i=0; i<num_words_single; i++)
01444         word_used_single[i]=false ;
01445     for (int32_t s=0; s<num_svms; s++)
01446         svm_value_unnormalized_single[s] = 0 ;
01447     for (int32_t s=0; s<num_svms; s++)
01448         svm_value[s] = 0 ;
01449     last_svm_pos = pos - 6+1 ;
01450     num_unique_words_single=0 ;
01451 }
01452 
01453 void CDynProg::extend_svm_value(
01454     uint16_t* wordstr, int32_t pos, int32_t &last_svm_pos, float64_t* svm_value)
01455 {
01456     bool did_something = false ;
01457     for (int32_t i=last_svm_pos-1; (i>=pos) && (i>=0); i--)
01458     {
01459         if (wordstr[i]>=num_words_single)
01460             SG_DEBUG( "wordstr[%i]=%i\n", i, wordstr[i]) ;
01461         
01462         if (!word_used_single[wordstr[i]])
01463         {
01464             for (int32_t s=0; s<num_svms_single; s++)
01465                 svm_value_unnormalized_single[s]+=dict_weights.element(wordstr[i],s) ;
01466             
01467             word_used_single[wordstr[i]]=true ;
01468             num_unique_words_single++ ;
01469             did_something=true ;
01470         }
01471     } ;
01472     if (num_unique_words_single>0)
01473     {
01474         last_svm_pos=pos ;
01475         if (did_something)
01476             for (int32_t s=0; s<num_svms; s++)
01477                 svm_value[s]= svm_value_unnormalized_single[s]/sqrt((float64_t)num_unique_words_single) ;  // full normalization
01478     }
01479     else
01480     {
01481         // what should I do?
01482         for (int32_t s=0; s<num_svms; s++)
01483             svm_value[s]=0 ;
01484     }
01485     
01486 }
01487 
01488 
01489 void CDynProg::reset_segment_sum_value(
01490     int32_t num_states, int32_t pos, int32_t & last_segment_sum_pos,
01491     float64_t * segment_sum_value)
01492 {
01493     for (int32_t s=0; s<num_states; s++)
01494         segment_sum_value[s] = 0 ;
01495     last_segment_sum_pos = pos ;
01496     //SG_DEBUG( "start: %i\n", pos) ;
01497 }
01498 
01499 void CDynProg::extend_segment_sum_value(
01500     float64_t *segment_sum_weights, int32_t seqlen, int32_t num_states,
01501     int32_t pos, int32_t &last_segment_sum_pos, float64_t* segment_sum_value)
01502 {
01503     for (int32_t i=last_segment_sum_pos-1; (i>=pos) && (i>=0); i--)
01504     {
01505         for (int32_t s=0; s<num_states; s++)
01506             segment_sum_value[s] += segment_sum_weights[i*num_states+s] ;
01507     } ;
01508     //SG_DEBUG( "extend %i: %f\n", pos, segment_sum_value[0]) ;
01509     last_segment_sum_pos = pos ;
01510 }
01511 
01512 
01513 void CDynProg::best_path_2struct(
01514     const float64_t *seq_array, int32_t seq_len, const int32_t *pos,
01515     CPlifBase **Plif_matrix, const char *genestr, int32_t genestr_len,
01516     int16_t nbest, float64_t *prob_nbest, int32_t *my_state_seq,
01517     int32_t *my_pos_seq, float64_t *dictionary_weights, int32_t dict_len,
01518     float64_t *segment_sum_weights)
01519 {
01520     const int32_t default_look_back = 100 ;
01521     int32_t max_look_back = default_look_back ;
01522     bool use_svm = false ;
01523     ASSERT(dict_len==num_svms*num_words_single) ;
01524     dict_weights.set_array(dictionary_weights, dict_len, num_svms, false, false) ;
01525     dict_weights_array=dict_weights.get_array() ;
01526 
01527     CArray2<CPlifBase*> PEN(Plif_matrix, N, N, false) ;
01528     CArray2<float64_t> seq((float64_t *)seq_array, N, seq_len, false) ;
01529     
01530     float64_t svm_value[num_svms] ;
01531     float64_t segment_sum_value[N] ;
01532     
01533     { // initialize svm_svalue
01534         for (int32_t s=0; s<num_svms; s++)
01535             svm_value[s]=0 ;
01536     }
01537     
01538     { // determine maximal length of look-back
01539         for (int32_t i=0; i<N; i++)
01540             for (int32_t j=0; j<N; j++)
01541             {
01542                 CPlifBase *penij=PEN.element(i,j) ;
01543                 if (penij==NULL)
01544                     continue ;
01545                 if (penij->get_max_value()>max_look_back)
01546                     max_look_back=(int32_t) CMath::ceil(penij->get_max_value());
01547                 if (penij->uses_svm_values())
01548                     use_svm=true ;
01549             }
01550     }
01551     max_look_back = CMath::min(genestr_len, max_look_back) ;
01552     //SG_DEBUG("use_svm=%i\n", use_svm) ;
01553     //SG_DEBUG("max_look_back=%i\n", max_look_back) ;
01554     
01555     const int32_t look_back_buflen = (max_look_back+1)*nbest*N ;
01556     //SG_DEBUG("look_back_buflen=%i\n", look_back_buflen) ;
01557     const float64_t mem_use = (float64_t)(seq_len*N*nbest*(sizeof(T_STATES)+sizeof(int16_t)+sizeof(int32_t))+
01558                                 look_back_buflen*(2*sizeof(float64_t)+sizeof(int32_t))+
01559                                 seq_len*(sizeof(T_STATES)+sizeof(int32_t))+
01560                                 genestr_len*sizeof(bool))/(1024*1024)
01561          ;
01562     bool is_big = (mem_use>200) || (seq_len>5000) ;
01563     
01564     if (is_big)
01565     {
01566         SG_DEBUG("calling best_path_2struct: seq_len=%i, N=%i, lookback=%i nbest=%i\n", 
01567                      seq_len, N, max_look_back, nbest) ;
01568         SG_DEBUG("allocating %1.2fMB of memory\n", 
01569                      mem_use) ;
01570     }
01571     ASSERT(nbest<32000) ;
01572         
01573     CArray3<float64_t> delta(max_look_back+1, N, nbest) ;
01574     CArray3<T_STATES> psi(seq_len,N,nbest) ;
01575     CArray3<int16_t> ktable(seq_len,N,nbest) ;
01576     CArray3<int32_t> ptable(seq_len,N,nbest) ;
01577 
01578     CArray<float64_t> delta_end(nbest) ;
01579     CArray<T_STATES> path_ends(nbest) ;
01580     CArray<int16_t> ktable_end(nbest) ;
01581 
01582     CArray<float64_t> tempvv(look_back_buflen) ;
01583     CArray<int32_t> tempii(look_back_buflen) ;
01584 
01585     CArray<T_STATES> state_seq(seq_len) ;
01586     CArray<int32_t> pos_seq(seq_len) ;
01587 
01588     // translate to words, if svm is used
01589     uint16_t* wordstr=NULL ;
01590     if (use_svm)
01591     {
01592         ASSERT(dictionary_weights!=NULL) ;
01593         wordstr=new uint16_t[genestr_len] ;
01594         for (int32_t i=0; i<genestr_len; i++)
01595             switch (genestr[i])
01596             {
01597             case 'A':
01598             case 'a': wordstr[i]=0 ; break ;
01599             case 'C':
01600             case 'c': wordstr[i]=1 ; break ;
01601             case 'G':
01602             case 'g': wordstr[i]=2 ; break ;
01603             case 'T':
01604             case 't': wordstr[i]=3 ; break ;
01605             default: ASSERT(0) ;
01606             }
01607         translate_from_single_order(wordstr, genestr_len, word_degree_single-1, word_degree_single) ;
01608     }
01609     
01610     
01611     { // initialization
01612         for (T_STATES i=0; i<N; i++)
01613         {
01614             delta.element(0,i,0) = get_p(i) + seq.element(i,0) ;
01615             psi.element(0,i,0)   = 0 ;
01616             ktable.element(0,i,0)  = 0 ;
01617             ptable.element(0,i,0)  = 0 ;
01618             for (int16_t k=1; k<nbest; k++)
01619             {
01620                 delta.element(0,i,k)    = -CMath::INFTY ;
01621                 psi.element(0,i,0)      = 0 ;
01622                 ktable.element(0,i,k)     = 0 ;
01623                 ptable.element(0,i,k)     = 0 ;
01624             }
01625         }
01626     }
01627 
01628     // recursion
01629     for (int32_t t=1; t<seq_len; t++)
01630     {
01631         if (is_big && t%(seq_len/10000)==1)
01632             SG_PROGRESS(t, 0, seq_len);
01633         //SG_PRINT("%i\n", t) ;
01634         
01635         for (T_STATES j=0; j<N; j++)
01636         {
01637             if (seq.element(j,t)<-1e20)
01638             { // if we cannot observe the symbol here, then we can omit the rest
01639                 for (int16_t k=0; k<nbest; k++)
01640                 {
01641                     delta.element(t%max_look_back,j,k)    = seq.element(j,t) ;
01642                     psi.element(t,j,k)      = 0 ;
01643                     ktable.element(t,j,k)     = 0 ;
01644                     ptable.element(t,j,k)     = 0 ;
01645                 }
01646             }
01647             else
01648             {
01649                 const T_STATES num_elem   = trans_list_forward_cnt[j] ;
01650                 const T_STATES *elem_list = trans_list_forward[j] ;
01651                 const float64_t *elem_val      = trans_list_forward_val[j] ;
01652                 
01653                 int32_t list_len=0 ;
01654                 for (int32_t i=0; i<num_elem; i++)
01655                 {
01656                     T_STATES ii = elem_list[i] ;
01657                     //SG_DEBUG( "i=%i  ii=%i  num_elem=%i  PEN=%ld\n", i, ii, num_elem, PEN(j,ii)) ;
01658                     
01659                     const CPlifBase * penalty = PEN.element(j,ii) ;
01660                     int32_t look_back = default_look_back ;
01661                     if (penalty!=NULL)
01662                         look_back=(int32_t) (CMath::ceil(penalty->get_max_value()));
01663                     
01664                     int32_t last_svm_pos ;
01665                     if (use_svm)
01666                         reset_svm_value(pos[t], last_svm_pos, svm_value) ;
01667 
01668                     int32_t last_segment_sum_pos ;
01669                     reset_segment_sum_value(N, pos[t], last_segment_sum_pos, segment_sum_value) ;
01670 
01671                     for (int32_t ts=t-1; ts>=0 && pos[t]-pos[ts]<=look_back; ts--)
01672                     {
01673                         if (use_svm)
01674                             extend_svm_value(wordstr, pos[ts], last_svm_pos, svm_value) ;
01675 
01676                         extend_segment_sum_value(segment_sum_weights, seq_len, N, pos[ts], last_segment_sum_pos, segment_sum_value) ;
01677                         
01678                         float64_t pen_val = 0.0 ;
01679                         if (penalty)
01680                             pen_val=penalty->lookup_penalty(pos[t]-pos[ts], svm_value) + segment_sum_value[j] ;
01681                         for (int16_t diff=0; diff<nbest; diff++)
01682                         {
01683                             float64_t  val        = delta.element(ts%max_look_back,ii,diff) + elem_val[i] ;
01684                             val             += pen_val ;
01685                             
01686                             tempvv[list_len] = -val ;
01687                             tempii[list_len] =  ii + diff*N + ts*N*nbest;
01688                             //SG_DEBUG( "%i (%i,%i,%i, %i, %i) ", list_len, diff, ts, i, pos[t]-pos[ts], look_back) ;
01689                             list_len++ ;
01690                         }
01691                     }
01692                 }
01693                 CMath::nmin<int32_t>(tempvv.get_array(), tempii.get_array(), list_len, nbest) ;
01694                 
01695                 for (int16_t k=0; k<nbest; k++)
01696                 {
01697                     if (k<list_len)
01698                     {
01699                         delta.element(t%max_look_back,j,k)    = -tempvv[k] + seq.element(j,t);
01700                         psi.element(t,j,k)      = (tempii[k]%N) ;
01701                         ktable.element(t,j,k)     = (tempii[k]%(N*nbest)-psi.element(t,j,k))/N ;
01702                         ptable.element(t,j,k)     = (tempii[k]-(tempii[k]%(N*nbest)))/(N*nbest) ;
01703                     }
01704                     else
01705                     {
01706                         delta.element(t%max_look_back,j,k)    = -CMath::INFTY ;
01707                         psi.element(t,j,k)      = 0 ;
01708                         ktable.element(t,j,k)     = 0 ;
01709                         ptable.element(t,j,k)     = 0 ;
01710                     }
01711                 }
01712             }
01713         }
01714     }
01715     
01716     { //termination
01717         int32_t list_len = 0 ;
01718         for (int16_t diff=0; diff<nbest; diff++)
01719         {
01720             for (T_STATES i=0; i<N; i++)
01721             {
01722                 tempvv[list_len] = -(delta.element((seq_len-1)%max_look_back,i,diff)+get_q(i)) ;
01723                 tempii[list_len] = i + diff*N ;
01724                 list_len++ ;
01725             }
01726         }
01727         
01728         CMath::nmin(tempvv.get_array(), tempii.get_array(), list_len, nbest) ;
01729         
01730         for (int16_t k=0; k<nbest; k++)
01731         {
01732             delta_end.element(k) = -tempvv[k] ;
01733             path_ends.element(k) = (tempii[k]%N) ;
01734             ktable_end.element(k) = (tempii[k]-path_ends.element(k))/N ;
01735         }
01736     }
01737     
01738     { //state sequence backtracking     
01739         for (int16_t k=0; k<nbest; k++)
01740         {
01741             prob_nbest[k]= delta_end.element(k) ;
01742             
01743             int32_t i         = 0 ;
01744             state_seq[i]  = path_ends.element(k) ;
01745             int16_t q   = ktable_end.element(k) ;
01746             pos_seq[i]    = seq_len-1 ;
01747 
01748             while (pos_seq[i]>0)
01749             {
01750                 //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q) ;
01751                 state_seq[i+1] = psi.element(pos_seq[i], state_seq[i], q);
01752                 pos_seq[i+1]   = ptable.element(pos_seq[i], state_seq[i], q) ;
01753                 q              = ktable.element(pos_seq[i], state_seq[i], q) ;
01754                 i++ ;
01755             }
01756             //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q) ;
01757             int32_t num_states = i+1 ;
01758             for (i=0; i<num_states;i++)
01759             {
01760                 my_state_seq[i+k*(seq_len+1)] = state_seq[num_states-i-1] ;
01761                 my_pos_seq[i+k*(seq_len+1)]   = pos_seq[num_states-i-1] ;
01762             }
01763             my_state_seq[num_states+k*(seq_len+1)]=-1 ;
01764             my_pos_seq[num_states+k*(seq_len+1)]=-1 ;
01765         }
01766     }
01767     if (is_big)
01768         SG_PRINT( "DONE.     \n") ;
01769 }
01770 
01771 /*void CDynProg::reset_svm_values(int32_t pos, int32_t * last_svm_pos, float64_t * svm_value) 
01772 {
01773     for (int32_t j=0; j<num_degrees; j++)
01774     {
01775         for (int32_t i=0; i<num_words_array[j]; i++)
01776             word_used.element(word_used_array, j, i, num_degrees)=false ;
01777         for (int32_t s=0; s<num_svms; s++)
01778             svm_values_unnormalized.element(j,s) = 0 ;
01779         num_unique_words[j]=0 ;
01780         last_svm_pos[j] = pos - word_degree[j]+1 ;
01781         svm_pos_start[j] = pos - word_degree[j] ;
01782     }
01783     for (int32_t s=0; s<num_svms; s++)
01784         svm_value[s] = 0 ;
01785 }
01786 
01787 void CDynProg::extend_svm_values(uint16_t** wordstr, int32_t pos, int32_t *last_svm_pos, float64_t* svm_value) 
01788 {
01789     bool did_something = false ;
01790     for (int32_t j=0; j<num_degrees; j++)
01791     {
01792         for (int32_t i=last_svm_pos[j]-1; (i>=pos) && (i>=0); i--)
01793         {
01794             if (wordstr[j][i]>=num_words_array[j])
01795                 SG_DEBUG( "wordstr[%i]=%i\n", i, wordstr[j][i]) ;
01796 
01797             ASSERT(wordstr[j][i]<num_words_array[j]) ;
01798             if (!word_used.element(word_used_array, j, wordstr[j][i], num_degrees))
01799             {
01800                 for (int32_t s=0; s<num_svms; s++)
01801                     svm_values_unnormalized.element(j,s)+=dict_weights_array[wordstr[j][i]+cum_num_words_array[j]+s*cum_num_words_array[num_degrees]] ;
01802                     //svm_values_unnormalized.element(j,s)+=dict_weights.element(wordstr[j][i]+cum_num_words_array[j],s) ;
01803                 
01804                 //word_used.element(j,wordstr[j][i])=true ;
01805                 word_used.element(word_used_array, j, wordstr[j][i], num_degrees)=true ;
01806                 num_unique_words[j]++ ;
01807                 did_something=true ;
01808             } ;
01809         } ;
01810         if (num_unique_words[j]>0)
01811             last_svm_pos[j]=pos ;
01812     } ;
01813     
01814     if (did_something)
01815         for (int32_t s=0; s<num_svms; s++)
01816         {
01817             svm_value[s]=0.0 ;
01818             for (int32_t j=0; j<num_degrees; j++)
01819                 if (num_unique_words[j]>0)
01820                     svm_value[s]+= svm_values_unnormalized.element(j,s)/sqrt((float64_t)num_unique_words[j]) ;  // full normalization
01821         }
01822 }
01823 */
01824 
01825 void CDynProg::init_segment_loss(
01826     struct segment_loss_struct & loss, int32_t seqlen, int32_t howmuchlookback)
01827 {
01828 #ifdef DYNPROG_TIMING
01829     MyTime.start() ;
01830 #endif
01831     int32_t clear_size = CMath::min(howmuchlookback,seqlen) ;
01832     
01833     if (!loss.num_segment_id)
01834     {
01835         loss.segments_changed       = new int32_t[seqlen] ;
01836         loss.num_segment_id         = new float64_t[(max_a_id+1)*seqlen] ;
01837         loss.length_segment_id      = new int32_t[(max_a_id+1)*seqlen] ;
01838 
01839         clear_size = seqlen ;
01840     }
01841     
01842     for (int32_t j=0; j<clear_size; j++)
01843     {
01844         loss.segments_changed[j]=0 ;
01845         for (int32_t i=0; i<max_a_id+1; i++)       
01846         {
01847             loss.num_segment_id[i*seqlen+j] = 0;
01848             loss.length_segment_id[i*seqlen+j] = 0;
01849         }
01850     }
01851 
01852     loss.maxlookback = howmuchlookback ;
01853     loss.seqlen = seqlen;
01854 
01855 #ifdef DYNPROG_TIMING
01856     MyTime.stop() ;
01857     segment_init_time += MyTime.time_diff_sec() ;
01858 #endif
01859 }
01860 
01861 void CDynProg::clear_segment_loss(struct segment_loss_struct & loss) 
01862 {
01863 #ifdef DYNPROG_TIMING
01864     MyTime.start() ;
01865 #endif
01866     
01867     if (loss.num_segment_id != NULL)
01868     {
01869         delete[] loss.segments_changed ;
01870         delete[] loss.num_segment_id ;
01871         delete[] loss.length_segment_id ;
01872         loss.segments_changed = NULL ;
01873         loss.num_segment_id = NULL ;
01874         loss.length_segment_id = NULL ;
01875     }
01876 #ifdef DYNPROG_TIMING
01877     MyTime.stop() ;
01878     segment_clean_time += MyTime.time_diff_sec() ;
01879 #endif
01880 }
01881 
01882 float64_t CDynProg::extend_segment_loss(
01883     struct segment_loss_struct & loss, const int32_t * pos_array,
01884     int32_t segment_id, int32_t pos, int32_t & last_pos, float64_t &last_value)
01885 {
01886 #ifdef DYNPROG_TIMING
01887     MyTime.start() ;
01888 #endif
01889     
01890     if (pos==last_pos)
01891         return last_value ;
01892     ASSERT(pos<last_pos) ;
01893 
01894     last_pos-- ;
01895     bool changed = false ;
01896     while (last_pos>=pos)
01897     {
01898         if (loss.segments_changed[last_pos])
01899         {
01900             changed=true ;
01901             break ;
01902         }
01903         last_pos-- ;
01904     }
01905     if (last_pos<pos)
01906         last_pos = pos ;
01907     
01908     if (!changed)
01909     {
01910         ASSERT(last_pos>=0) ;
01911         ASSERT(last_pos<loss.seqlen) ;
01912         float64_t length_contrib = (pos_array[last_pos]-pos_array[pos])*m_segment_loss.element(m_segment_ids.element(pos), segment_id, 1) ;
01913         float64_t ret = last_value + length_contrib ;
01914         last_pos = pos ;
01915         return ret ;
01916     }
01917 
01918     CArray2<float64_t> num_segment_id(loss.num_segment_id, loss.seqlen, max_a_id+1, false, false) ;
01919     CArray2<int32_t> length_segment_id(loss.length_segment_id, loss.seqlen, max_a_id+1, false, false) ;
01920     float64_t ret = 0.0 ;
01921     for (int32_t i=0; i<max_a_id+1; i++)
01922     {
01923         //SG_DEBUG( "%i: %i, %i, %f (%f), %f (%f)\n", pos, num_segment_id.element(pos, i), length_segment_id.element(pos, i), num_segment_id.element(pos, i)*m_segment_loss.element(i, segment_id,0), m_segment_loss.element(i, segment_id, 0), length_segment_id.element(pos, i)*m_segment_loss.element(i, segment_id, 1), m_segment_loss.element(i, segment_id,1)) ;
01924 
01925         if (num_segment_id.element(pos, i)!=0)
01926         {
01927             ret += num_segment_id.element(pos, i)*m_segment_loss.element(i, segment_id, 0) ;
01928         //  SG_PRINT("ret:%f pos:%i i:%i segment_id:%i \n",ret,pos,i,segment_id);
01929         //  if (ret>0)
01930         //  {
01931         //      for (int32_t g=0; g<max_a_id+1; g++)
01932         //          SG_PRINT("g:%i sid(pos, g):%i    ",g,num_segment_id.element(pos, g));
01933         //      SG_PRINT("\n");
01934         //  }
01935         }
01936         if (length_segment_id.element(pos, i)!=0)
01937             ret += length_segment_id.element(pos, i)*m_segment_loss.element(i, segment_id, 1) ;
01938     }
01939     last_pos = pos ;
01940     last_value = ret ;
01941 
01942 #ifdef DYNPROG_TIMING
01943     MyTime.stop() ;
01944     segment_extend_time += MyTime.time_diff_sec() ;
01945 #endif
01946     return ret ;
01947 }
01948 
01949 void CDynProg::find_segment_loss_till_pos(
01950     const int32_t * pos, int32_t t_end, CArray<int32_t>& segment_ids,
01951     CArray<float64_t>& segment_mask, struct segment_loss_struct & loss)
01952 {
01953 #ifdef DYNPROG_TIMING
01954     MyTime.start() ;
01955 #endif
01956     CArray2<float64_t> num_segment_id(loss.num_segment_id, loss.seqlen, max_a_id+1, false, false) ;
01957     CArray2<int32_t> length_segment_id(loss.length_segment_id, loss.seqlen, max_a_id+1, false, false) ;
01958     
01959     for (int32_t i=0; i<max_a_id+1; i++)
01960     {
01961         num_segment_id.element(t_end, i) = 0 ;
01962         length_segment_id.element(t_end, i) = 0 ;
01963     }
01964     int32_t wobble_pos_segment_id_switch = 0 ;
01965     int32_t last_segment_id = -1 ;
01966     int32_t ts = t_end-1 ;       
01967     while ((ts>=0) && (pos[t_end] - pos[ts] <= loss.maxlookback))
01968     {
01969         int32_t cur_segment_id = segment_ids.element(ts) ;
01970         // allow at most one wobble
01971         bool wobble_pos = (CMath::abs(segment_mask.element(ts))<1e-7) && (wobble_pos_segment_id_switch==0) ;
01972         if (!(cur_segment_id<=max_a_id))
01973             SG_ERROR("(cur_segment_id<=max_a_id), cur_segment_id:%i max_a_id:%i\n",cur_segment_id,max_a_id);
01974         ASSERT(cur_segment_id>=0) ;
01975         
01976         for (int32_t i=0; i<max_a_id+1; i++)
01977         {
01978             num_segment_id.element(ts, i) = num_segment_id.element(ts+1, i) ;
01979             length_segment_id.element(ts, i) = length_segment_id.element(ts+1, i) ;
01980         }
01981         
01982         if (cur_segment_id!=last_segment_id)
01983         {
01984             if (wobble_pos)
01985             {
01986                 //SG_DEBUG( "no change at %i: %i, %i\n", ts, last_segment_id, cur_segment_id) ;
01987                 wobble_pos_segment_id_switch++ ;
01988                 //ASSERT(wobble_pos_segment_id_switch<=1) ;
01989             }
01990             else
01991             {
01992                 //SG_DEBUG( "change at %i: %i, %i\n", ts, last_segment_id, cur_segment_id) ;
01993                 loss.segments_changed[ts] = true ;
01994                 num_segment_id.element(ts, cur_segment_id) += segment_mask.element(ts);
01995                 length_segment_id.element(ts, cur_segment_id) += (int32_t)((pos[ts+1]-pos[ts])*segment_mask.element(ts));
01996                 wobble_pos_segment_id_switch = 0 ;
01997             }
01998             last_segment_id = cur_segment_id ;
01999         } 
02000         else
02001             if (!wobble_pos)
02002                 length_segment_id.element(ts, cur_segment_id) += pos[ts+1] - pos[ts] ;
02003 
02004         ts--;
02005     }
02006 #ifdef DYNPROG_TIMING
02007     MyTime.stop() ;
02008     segment_pos_time += MyTime.time_diff_sec() ;
02009 #endif
02010 }
02011 
02012 void CDynProg::init_svm_values(
02013     struct svm_values_struct & svs, int32_t start_pos, int32_t seqlen,
02014     int32_t maxlookback)
02015 {
02016 #ifdef DYNPROG_TIMING
02017     MyTime.start() ;
02018 #endif
02019     /*
02020       See find_svm_values_till_pos for comments
02021       
02022       svs.svm_values[i+s*svs.seqlen] has the value of the s-th SVM on genestr(pos(t_end-i):pos(t_end)) 
02023       for every i satisfying pos(t_end)-pos(t_end-i) <= svs.maxlookback
02024       
02025       where t_end is the end of all segments we are currently looking at
02026     */
02027     int32_t clear_size = CMath::min(maxlookback,seqlen) ;
02028     
02029     if (!svs.svm_values)
02030     {
02031         svs.svm_values              = new float64_t[seqlen*num_svms] ;
02032         svs.num_unique_words        = new int32_t*[num_degrees] ;
02033         svs.svm_values_unnormalized = new float64_t*[num_degrees] ;
02034         svs.word_used               = new bool**[num_degrees] ;
02035         for (int32_t j=0; j<num_degrees; j++)
02036         {
02037             svs.word_used[j]               = new bool*[num_svms] ;
02038             for (int32_t s=0; s<num_svms; s++)
02039                 svs.word_used[j][s]               = new bool[num_words_array[j]] ;
02040         }
02041         for (int32_t j=0; j<num_degrees; j++)
02042         {
02043             svs.svm_values_unnormalized[j] = new float64_t[num_svms] ;
02044             svs.num_unique_words[j]        = new int32_t[num_svms] ;
02045         }
02046         svs.start_pos               = new int32_t[num_svms] ;
02047         clear_size = seqlen ;
02048     }
02049     
02050     //for (int32_t i=0; i<maxlookback*num_svms; i++)       // initializing this for safety, though we should be able to live without it
02051     //  svs.svm_values[i] = 0;
02052     memset(svs.svm_values, 0, clear_size*num_svms*sizeof(float64_t)) ;
02053 
02054     for (int32_t j=0; j<num_degrees; j++)
02055     {       
02056         //for (int32_t s=0; s<num_svms; s++)
02057         //  svs.svm_values_unnormalized[j][s] = 0 ;
02058         memset(svs.svm_values_unnormalized[j], 0, num_svms*sizeof(float64_t)) ;
02059 
02060         //for (int32_t s=0; s<num_svms; s++)
02061         //  svs.num_unique_words[j][s] = 0 ;
02062         memset(svs.num_unique_words[j], 0, num_svms*sizeof(int32_t)) ;
02063     }
02064     
02065     for (int32_t j=0; j<num_degrees; j++)
02066         for (int32_t s=0; s<num_svms; s++)
02067         {
02068             //for (int32_t i=0; i<num_words_array[j]; i++)
02069             //  svs.word_used[j][s][i] = false ;
02070             memset(svs.word_used[j][s], 0, num_words_array[j]*sizeof(bool)) ;
02071         }
02072     
02073     for (int32_t s=0; s<num_svms; s++)
02074         svs.start_pos[s] = start_pos - mod_words.element(s,1) ;
02075     
02076     svs.maxlookback = maxlookback ;
02077     svs.seqlen = seqlen ;
02078 
02079 #ifdef DYNPROG_TIMING
02080     MyTime.stop() ;
02081     svm_init_time += MyTime.time_diff_sec() ;
02082 #endif
02083 }
02084 
02085 void CDynProg::clear_svm_values(struct svm_values_struct & svs) 
02086 {
02087 #ifdef DYNPROG_TIMING
02088     MyTime.start() ;
02089 #endif  
02090     if (NULL != svs.svm_values)
02091     {
02092         for (int32_t j=0; j<num_degrees; j++)
02093         {
02094             for (int32_t s=0; s<num_svms; s++)
02095                 delete[] svs.word_used[j][s] ;
02096             delete[] svs.word_used[j];
02097         }
02098         delete[] svs.word_used;
02099         
02100         for (int32_t j=0; j<num_degrees; j++)
02101             delete[] svs.svm_values_unnormalized[j] ;
02102         for (int32_t j=0; j<num_degrees; j++)
02103             delete[] svs.num_unique_words[j] ;
02104         
02105         delete[] svs.svm_values_unnormalized;
02106         delete[] svs.svm_values;
02107         delete[] svs.num_unique_words ;
02108         
02109         svs.word_used=NULL ;
02110         svs.svm_values=NULL ;
02111         svs.svm_values_unnormalized=NULL ;
02112     }
02113 
02114 #ifdef DYNPROG_TIMING
02115     MyTime.stop() ;
02116     svm_clean_time += MyTime.time_diff_sec() ;
02117 #endif
02118 }
02119 
02120 
02121 void CDynProg::find_svm_values_till_pos(
02122     uint16_t*** wordstr,  const int32_t *pos,  int32_t t_end,
02123     struct svm_values_struct &svs)
02124 {
02125 #ifdef DYNPROG_TIMING
02126     MyTime.start() ;
02127 #endif
02128     
02129     /*
02130       wordstr is a vector of L n-gram indices, with wordstr(i) representing a number betweeen 0 and 4095 
02131       corresponding to the 6-mer in genestr(i-5:i) 
02132       pos is a vector of candidate transition positions (it is input to best_path_trans)
02133       t_end is some index in pos
02134       
02135       svs has been initialized by init_svm_values
02136       
02137       At the end of this procedure, 
02138       svs.svm_values[i+s*svs.seqlen] has the value of the s-th SVM on genestr(pos(t_end-i):pos(t_end)) 
02139       for every i satisfying pos(t_end)-pos(t_end-i) <= svs.maxlookback
02140       
02141       The SVM weights are precomputed in dict_weights
02142     */
02143     
02144     for (int32_t j=0; j<num_degrees; j++)
02145     //for (int32_t j=0; j<1; j++)
02146     {
02147         int32_t plen = 1;
02148         int32_t ts = t_end-1;        // index in pos; pos(ts) and pos(t) are indices of wordstr
02149         int32_t offset;
02150         
02151         int32_t posprev = pos[t_end]-word_degree[j]+1;
02152         int32_t poscurrent = pos[ts];
02153         
02154         //SG_DEBUG( "j=%i seqlen=%i posprev = %i, poscurrent = %i", j, svs.seqlen, posprev, poscurrent) ;
02155 
02156         if (poscurrent<0)
02157             poscurrent = 0;
02158         float64_t * my_svm_values_unnormalized = svs.svm_values_unnormalized[j] ;
02159         int32_t * my_num_unique_words = svs.num_unique_words[j] ;
02160         bool ** my_word_used = svs.word_used[j] ;
02161         
02162         int32_t len = pos[t_end] - poscurrent;
02163         while ((ts>=0) && (len <= svs.maxlookback))
02164         {
02165             for (int32_t i=posprev-1 ; (i>=poscurrent) && (i>=0) ; i--)
02166             {
02167                 //SG_PRINT("string_words_array[0]=%i (%ld), j=%i (%ld)  i=%i\n", string_words_array[0], wordstr[string_words_array[0]], j, wordstr[string_words_array[0]][j], i) ;
02168                 
02169                 uint16_t word = wordstr[string_words_array[0]][j][i] ;
02170                 int32_t last_string = string_words_array[0] ;
02171                 for (int32_t s=0; s<num_svms; s++)
02172                 {
02173                 //sign_words_array[s]=false;
02174                     // try to avoid memory accesses
02175                     if (last_string != string_words_array[s])
02176                     {
02177                         last_string = string_words_array[s] ;
02178                         word = wordstr[last_string][j][i] ;
02179                     }
02180 
02181                     // do not consider k-mer, if seen before and in signum mode
02182                     if (sign_words_array[s] && my_word_used[s][word])
02183                         continue ;
02184                     
02185                     // only count k-mer if in frame (if applicable)
02186                     //if ((svs.start_pos[s]-i>0) && ((svs.start_pos[s]-i)%mod_words_array[s]==0))
02187                     {
02188                         my_svm_values_unnormalized[s] += dict_weights_array[(word+cum_num_words_array[j])+s*cum_num_words_array[num_degrees]] ;
02189                         //svs.svm_values_unnormalized[j][s]+=dict_weights.element(word+cum_num_words_array[j], s) ;
02190                         my_num_unique_words[s]++ ;
02191                         if (sign_words_array[s])
02192                             my_word_used[s][word]=true ;
02193                     }
02194                 }
02195             }
02196             offset = plen*num_svms ;
02197             for (int32_t s=0; s<num_svms; s++)
02198             {
02199                 float64_t normalization_factor = 1.0;
02200                 if (my_num_unique_words[s] > 0)
02201                 {
02202                     if (sign_words_array[s])
02203                         normalization_factor = sqrt((float64_t)my_num_unique_words[s]);
02204                     else
02205                         normalization_factor = (float64_t)my_num_unique_words[s];
02206                 }
02207 
02208                 if (j==0)
02209                     svs.svm_values[offset+s]=0 ;
02210                 svs.svm_values[offset+s] += my_svm_values_unnormalized[s] / normalization_factor;
02211             }
02212             
02213             if (posprev > poscurrent)         // remember posprev initially set to pos[t_end]-word_degree+1... pos[ts] could be e.g. pos[t_end]-2
02214                 posprev = poscurrent;           
02215             
02216             ts--;
02217             plen++;
02218             
02219             if (ts>=0)
02220             {
02221                 poscurrent=pos[ts];
02222                 if (poscurrent<0)
02223                     poscurrent = 0;
02224                 len = pos[t_end] - poscurrent;
02225             }
02226         }
02227     }
02228 
02229 #ifdef DYNPROG_TIMING
02230     MyTime.stop() ;
02231     svm_pos_time += MyTime.time_diff_sec() ;
02232 #endif
02233 }
02234 
02235 
02236 void CDynProg::find_svm_values_till_pos(
02237     uint16_t** wordstr,  const int32_t *pos,  int32_t t_end,
02238     struct svm_values_struct &svs)
02239 {
02240 #ifdef DYNPROG_TIMING
02241     MyTime.start() ;
02242 #endif  
02243     /*
02244       wordstr is a vector of L n-gram indices, with wordstr(i) representing a number betweeen 0 and 4095 
02245       corresponding to the 6-mer in genestr(i-5:i) 
02246       pos is a vector of candidate transition positions (it is input to best_path_trans)
02247       t_end is some index in pos
02248       
02249       svs has been initialized by init_svm_values
02250       
02251       At the end of this procedure, 
02252       svs.svm_values[i+s*svs.seqlen] has the value of the s-th SVM on genestr(pos(t_end-i):pos(t_end)) 
02253       for every i satisfying pos(t_end)-pos(t_end-i) <= svs.maxlookback
02254       
02255       The SVM weights are precomputed in dict_weights
02256     */
02257     
02258     for (int32_t j=0; j<num_degrees; j++)
02259     //for (int32_t j=0; j<1; j++)
02260     {
02261         int32_t plen = 1;
02262         int32_t ts = t_end-1;        // index in pos; pos(ts) and pos(t) are indices of wordstr
02263         int32_t offset;
02264         
02265         int32_t posprev = pos[t_end]-word_degree[j]+1;
02266         int32_t poscurrent = pos[ts];
02267         
02268         //SG_DEBUG( "j=%i seqlen=%i posprev = %i, poscurrent = %i", j, svs.seqlen, posprev, poscurrent) ;
02269 
02270         if (poscurrent<0)
02271             poscurrent = 0;
02272         float64_t * my_svm_values_unnormalized = svs.svm_values_unnormalized[j] ;
02273         int32_t * my_num_unique_words = svs.num_unique_words[j] ;
02274         bool ** my_word_used = svs.word_used[j] ;
02275         
02276         int32_t len = pos[t_end] - poscurrent;
02277         while ((ts>=0) && (len <= svs.maxlookback))
02278         {
02279             for (int32_t i=posprev-1 ; (i>=poscurrent) && (i>=0) ; i--)
02280             {
02281                 //SG_PRINT("string_words_array[0]=%i (%ld), j=%i (%ld)  i=%i\n", string_words_array[0], wordstr[string_words_array[0]], j, wordstr[string_words_array[0]][j], i) ;
02282                 
02283                 uint16_t word = wordstr[j][i] ;
02284                 for (int32_t s=0; s<num_svms; s++)
02285                 {
02286                     //sign_words_array[s]=false;
02287                     // do not consider k-mer, if seen before and in signum mode
02288                     if (sign_words_array[s] && my_word_used[s][word])
02289                         continue ;
02290                     
02291                     // only count k-mer if in frame (if applicable)
02292                     //if ((svs.start_pos[s]-i>0) && ((svs.start_pos[s]-i)%mod_words_array[s]==0))
02293                     {
02294                         my_svm_values_unnormalized[s] += dict_weights_array[(word+cum_num_words_array[j])+s*cum_num_words_array[num_degrees]] ;
02295                         //svs.svm_values_unnormalized[j][s]+=dict_weights.element(word+cum_num_words_array[j], s) ;
02296                         my_num_unique_words[s]++ ;
02297                         if (sign_words_array[s])
02298                             my_word_used[s][word]=true ;
02299                     }
02300                 }
02301             }
02302             offset = plen*num_svms ;
02303             for (int32_t s=0; s<num_svms; s++)
02304             {
02305                 float64_t normalization_factor = 1.0;
02306                 if (my_num_unique_words[s] > 0)
02307                 {
02308                     if (sign_words_array[s])
02309                         normalization_factor = sqrt((float64_t)my_num_unique_words[s]);
02310                     else
02311                         normalization_factor = (float64_t)my_num_unique_words[s];
02312                 }
02313 
02314                 if (j==0)
02315                     svs.svm_values[offset+s]=0 ;
02316                 svs.svm_values[offset+s] += my_svm_values_unnormalized[s] / normalization_factor;
02317             }
02318             
02319             if (posprev > poscurrent)         // remember posprev initially set to pos[t_end]-word_degree+1... pos[ts] could be e.g. pos[t_end]-2
02320                 posprev = poscurrent;           
02321             
02322             ts--;
02323             plen++;
02324             
02325             if (ts>=0)
02326             {
02327                 poscurrent=pos[ts];
02328                 if (poscurrent<0)
02329                     poscurrent = 0;
02330                 len = pos[t_end] - poscurrent;
02331             }
02332         }
02333     }
02334 
02335 #ifdef DYNPROG_TIMING
02336     MyTime.stop() ;
02337     svm_pos_time += MyTime.time_diff_sec() ;
02338 #endif
02339 }
02340 
02341 bool CDynProg::extend_orf(
02342     int32_t orf_from, int32_t orf_to, int32_t start, int32_t &last_pos,
02343     int32_t to)
02344 {
02345 #ifdef DYNPROG_TIMING
02346     MyTime.start() ;
02347 #endif
02348     
02349     if (start<0) 
02350         start=0 ;
02351     if (to<0)
02352         to=0 ;
02353     
02354     int32_t orf_target = orf_to-orf_from ;
02355     if (orf_target<0) orf_target+=3 ;
02356     
02357     int32_t pos ;
02358     if (last_pos==to)
02359         pos = to-orf_to-3 ;
02360     else
02361         pos=last_pos ;
02362 
02363     if (pos<0)
02364         return true ;
02365     
02366     for (; pos>=start; pos-=3)
02367         if (m_genestr_stop[pos])
02368             return false ;
02369     
02370     last_pos = CMath::min(pos+3,to-orf_to-3) ;
02371 
02372 #ifdef DYNPROG_TIMING
02373     MyTime.stop() ;
02374     orf_time += MyTime.time_diff_sec() ;
02375 #endif
02376     return true ;
02377 }
02378 
02379 template <int16_t nbest, bool with_loss, bool with_multiple_sequences>
02380 void CDynProg::best_path_trans(
02381     const float64_t *seq_array, int32_t seq_len, const int32_t *pos,
02382     const int32_t *orf_info_array, CPlifBase **Plif_matrix,
02383     CPlifBase **Plif_state_signals, int32_t max_num_signals,
02384     int32_t genestr_num, float64_t *prob_nbest, int32_t *my_state_seq,
02385     int32_t *my_pos_seq, bool use_orf)
02386 {
02387 #ifdef DYNPROG_TIMING
02388     segment_init_time = 0.0 ;
02389     segment_pos_time = 0.0 ;
02390     segment_extend_time = 0.0 ;
02391     segment_clean_time = 0.0 ;
02392     orf_time = 0.0 ;
02393     svm_init_time = 0.0 ;
02394     svm_pos_time = 0.0 ;
02395     svm_clean_time = 0.0 ;
02396     MyTime2.start() ;
02397 #endif
02398     
02399     //SG_PRINT( "best_path_trans:%x\n", seq_array);
02400     if (!svm_arrays_clean)
02401     {
02402         SG_ERROR( "SVM arrays not clean") ;
02403         return ;
02404     }
02405     
02406 #ifdef DYNPROG_DEBUG
02407     transition_matrix_a.set_name("transition_matrix");
02408     transition_matrix_a.display_array();
02409     mod_words.display_array() ;
02410     sign_words.display_array() ;
02411     string_words.display_array() ;
02412     SG_PRINT("use_orf = %i\n", use_orf) ;
02413 #endif
02414     
02415     int32_t max_look_back = 20000 ;
02416     bool use_svm = false ;
02417     //ASSERT(dict_len==num_svms*cum_num_words_array[num_degrees]) ;
02418     //dict_weights.set_array(dictionary_weights, cum_num_words_array[num_degrees], num_svms, false, false) ;
02419     //dict_weights_array=dict_weights.get_array() ;
02420     
02421     SG_PRINT("N:%i, seq_len:%i, max_num_signals:%i\n",N, seq_len, max_num_signals) ;
02422 
02423 //  for (int32_t i=0;i<N*seq_len*max_num_signals;i++)
02424 //      SG_PRINT("(%i)%0.2f ",i,seq_array[i]);
02425 
02426     //CArray2<CPlifBase*> PEN(Plif_matrix, N, N, false, false) ;
02427     CArray2<CPlifBase*> PEN(Plif_matrix, N, N, false, true) ;
02428     PEN.set_name("PEN");
02429     //CArray2<CPlifBase*> PEN_state_signals(Plif_state_signals, N, max_num_signals, false, false) ;
02430     CArray2<CPlifBase*> PEN_state_signals(Plif_state_signals, N, max_num_signals, false, true) ;
02431     PEN_state_signals.set_name("state_signals");
02432     //CArray3<float64_t> seq_input(seq_array, N, seq_len, max_num_signals) ;
02433     CArray3<float64_t> seq_input(seq_array, N, seq_len, max_num_signals) ;
02434     seq_input.set_name("seq_input") ;
02435     //seq_input.display_array() ;
02436     CArray2<float64_t> seq(N, seq_len) ;
02437     seq.set_name("seq") ;
02438     seq.zero() ;
02439 
02440     CArray2<int32_t> orf_info(orf_info_array, N, 2) ;
02441     orf_info.set_name("orf_info") ;
02442     //g_orf_info = orf_info ;
02443     //orf_info.display_array() ;
02444 
02445     float64_t svm_value[m_num_lin_feat_plifs_cum[m_num_raw_data]] ;
02446     { // initialize svm_svalue
02447         for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]; s++)
02448             svm_value[s]=0 ;
02449     }
02450 
02451     { // convert seq_input to seq
02452       // this is independent of the svm values 
02453         for (int32_t i=0; i<N; i++)
02454             for (int32_t j=0; j<seq_len; j++)
02455                 seq.element(i,j) = 0 ;
02456 
02457         for (int32_t i=0; i<N; i++)
02458             for (int32_t j=0; j<seq_len; j++)
02459                 for (int32_t k=0; k<max_num_signals; k++)
02460                 {
02461                     if ((PEN_state_signals.element(i,k)==NULL) && (k==0))
02462                     {
02463                         // no plif
02464                         seq.element(i,j) = seq_input.element(i,j,k) ;
02465                         break ;
02466                     }
02467                     if (PEN_state_signals.element(i,k)!=NULL)
02468                     {
02469                         // just one plif
02470                         if (CMath::is_finite(seq_input.element(i,j,k)))
02471                             seq.element(i,j) += PEN_state_signals.element(i,k)->lookup_penalty(seq_input.element(i,j,k), svm_value) ;
02472                         else
02473                             // keep infinity values
02474                             seq.element(i,j) = seq_input.element(i, j, k) ;
02475                     } 
02476                     else
02477                         break ;
02478                 }
02479     }
02480 
02481 
02482     { // determine maximal length of look-back
02483         for (int32_t i=0; i<N; i++)
02484         {
02485             // only consider transitions that are actually allowed
02486             const T_STATES num_elem   = trans_list_forward_cnt[i] ;
02487             const T_STATES *elem_list = trans_list_forward[i] ;
02488                 
02489             for (int32_t jj=0; jj<num_elem; jj++)
02490             {
02491                 T_STATES j = elem_list[jj] ;
02492 
02493                 CPlifBase *penij=PEN.element(i,j) ;
02494                 if (penij==NULL)
02495                     continue ;
02496                 if (penij->get_max_value()>max_look_back)
02497                 {
02498                     SG_DEBUG( "%d %d -> value: %f\n", i,j,penij->get_max_value());
02499                     //penij->print() ;
02500                     max_look_back=(int32_t) (CMath::ceil(penij->get_max_value()));
02501                 }
02502                 if (penij->uses_svm_values())
02503                     use_svm=true ;
02504             }
02505         }
02506     }
02507 
02508     //SG_PRINT("use_svm=%i, genestr_len: \n", use_svm, m_genestr_len) ;
02509     max_look_back = CMath::min(m_genestr_len, max_look_back) ;
02510     SG_DEBUG("use_svm=%i\n", use_svm) ;
02511     
02512     SG_DEBUG("maxlook: %d N: %d nbest: %d \n", max_look_back, N, nbest);
02513     const int32_t look_back_buflen = (max_look_back*N+1)*nbest ;
02514     SG_DEBUG("look_back_buflen=%i\n", look_back_buflen) ;
02515     const float64_t mem_use = (float64_t)(seq_len*N*nbest*(sizeof(T_STATES)+sizeof(int16_t)+sizeof(int32_t))+
02516                                   look_back_buflen*(2*sizeof(float64_t)+sizeof(int32_t))+
02517                                   seq_len*(sizeof(T_STATES)+sizeof(int32_t))+
02518                                   m_genestr_len*sizeof(bool))/(1024*1024);
02519 
02520     bool is_big = (mem_use>200) || (seq_len>5000) ;
02521 
02522     if (1)//(is_big)
02523     {
02524         SG_DEBUG("calling best_path_trans: seq_len=%i, N=%i, lookback=%i nbest=%i\n", 
02525                      seq_len, N, max_look_back, nbest) ;
02526         SG_DEBUG("allocating %1.2fMB of memory\n", 
02527                      mem_use) ;
02528     }
02529     ASSERT(nbest<32000) ;
02530     
02531 
02532     
02533     CArray3<float64_t> delta(seq_len, N, nbest) ;
02534     delta.set_name("delta");
02535     float64_t* delta_array = delta.get_array() ;
02536     //delta.zero() ;
02537     
02538     CArray3<T_STATES> psi(seq_len, N, nbest) ;
02539     psi.set_name("psi");
02540     //psi.zero() ;
02541     
02542     CArray3<int16_t> ktable(seq_len, N, nbest) ;
02543     ktable.set_name("ktable");
02544     //ktable.zero() ;
02545     
02546     CArray3<int32_t> ptable(seq_len, N, nbest) ;    
02547     ptable.set_name("ptable");
02548     //ptable.zero() ;
02549 
02550     CArray<float64_t> delta_end(nbest) ;
02551     delta_end.set_name("delta_end");
02552     //delta_end.zero() ;
02553     
02554     CArray<T_STATES> path_ends(nbest) ;
02555     path_ends.set_name("path_ends");
02556     //path_ends.zero() ;
02557     
02558     CArray<int16_t> ktable_end(nbest) ;
02559     ktable_end.set_name("ktable_end");
02560     //ktable_end.zero() ;
02561 
02562     float64_t * fixedtempvv=new float64_t[look_back_buflen] ;
02563     memset(fixedtempvv, 0, look_back_buflen*sizeof(float64_t)) ;
02564     int32_t * fixedtempii=new int32_t[look_back_buflen] ;
02565     memset(fixedtempii, 0, look_back_buflen*sizeof(int32_t)) ;
02566 
02567     CArray<float64_t> oldtempvv(look_back_buflen) ;
02568     oldtempvv.set_name("oldtempvv");
02569     CArray<float64_t> oldtempvv2(look_back_buflen) ;
02570     oldtempvv2.set_name("oldtempvv2");
02571     //oldtempvv.zero() ;
02572     //oldtempvv.display_size() ;
02573     
02574     CArray<int32_t> oldtempii(look_back_buflen) ;
02575     oldtempii.set_name("oldtempii");
02576     CArray<int32_t> oldtempii2(look_back_buflen) ;
02577     oldtempii2.set_name("oldtempii2");
02578     //oldtempii.zero() ;
02579 
02580     CArray<T_STATES> state_seq(seq_len) ;
02581     state_seq.set_name("state_seq");
02582     //state_seq.zero() ;
02583     
02584     CArray<int32_t> pos_seq(seq_len) ;
02585     pos_seq.set_name("pos_seq");
02586     //pos_seq.zero() ;
02587 
02588     
02589     //dict_weights.set_name("dict_weights") ;
02590     word_degree.set_name("word_degree") ;
02591     cum_num_words.set_name("cum_num_words") ;
02592     num_words.set_name("num_words") ;
02593     //word_used.set_name("word_used") ;
02594     //svm_values_unnormalized.set_name("svm_values_unnormalized") ;
02595     svm_pos_start.set_name("svm_pos_start") ;
02596     num_unique_words.set_name("num_unique_words") ;
02597 
02598     PEN.set_name("PEN") ;
02599     seq.set_name("seq") ;
02600     orf_info.set_name("orf_info") ;
02601     
02602     delta.set_name("delta") ;
02603     psi.set_name("psi") ;
02604     ktable.set_name("ktable") ;
02605     ptable.set_name("ptable") ;
02606     delta_end.set_name("delta_end") ;
02607     path_ends.set_name("path_ends") ;
02608     ktable_end.set_name("ktable_end") ;
02609 
02610 #ifdef USE_TMP_ARRAYCLASS
02611     fixedtempvv.set_name("fixedtempvv") ;
02612     fixedtempii.set_name("fixedtempvv") ;
02613 #endif
02614 
02615     oldtempvv.set_name("oldtempvv") ;
02616     oldtempvv2.set_name("oldtempvv2") ;
02617     oldtempii.set_name("oldtempii") ;
02618     oldtempii2.set_name("oldtempii2") ;
02619 
02620 
02622 
02623 #ifdef DYNPROG_DEBUG
02624     state_seq.display_size() ;
02625     pos_seq.display_size() ;
02626 
02627     dict_weights.display_size() ;
02628     word_degree.display_array() ;
02629     cum_num_words.display_array() ;
02630     num_words.display_array() ;
02631     //word_used.display_size() ;
02632     //svm_values_unnormalized.display_size() ;
02633     svm_pos_start.display_array() ;
02634     num_unique_words.display_array() ;
02635 
02636     PEN.display_size() ;
02637     PEN_state_signals.display_size() ;
02638     seq.display_size() ;
02639     orf_info.display_size() ;
02640     
02641     //m_genestr_stop.display_size() ;
02642     delta.display_size() ;
02643     psi.display_size() ;
02644     ktable.display_size() ;
02645     ptable.display_size() ;
02646     delta_end.display_size() ;
02647     path_ends.display_size() ;
02648     ktable_end.display_size() ;
02649 
02650 #ifdef USE_TMP_ARRAYCLASS
02651     fixedtempvv.display_size() ;
02652     fixedtempii.display_size() ;
02653 #endif
02654 
02655     //oldtempvv.display_size() ;
02656     //oldtempii.display_size() ;
02657 
02658     state_seq.display_size() ;
02659     pos_seq.display_size() ;
02660 
02661     CArray<int32_t>pp = CArray<int32_t>(pos, seq_len) ;
02662     pp.set_name("pp");
02663     pp.display_array() ;
02664     
02665     //seq.zero() ;
02666     //seq_input.display_array() ;
02667 
02668 #endif //DYNPROG_DEBUG
02669 
02671 
02672 
02673 
02674     {
02675         for (int32_t s=0; s<num_svms; s++)
02676             ASSERT(string_words_array[s]<genestr_num)  ;
02677     }
02678 
02679     
02680         //CArray2<int32_t*> trans_matrix_svms(N,N);
02681     //CArray2<int32_t> trans_matrix_num_svms(N,N);
02682 
02683     { // initialization
02684 
02685         for (T_STATES i=0; i<N; i++)
02686         {
02687             //delta.element(0, i, 0) = get_p(i) + seq.element(i,0) ;        // get_p defined in HMM.h to be equiv to initial_state_distribution
02688             delta.element(delta_array, 0, i, 0, seq_len, N) = get_p(i) + seq.element(i,0) ;        // get_p defined in HMM.h to be equiv to initial_state_distribution
02689             psi.element(0,i,0)   = 0 ;
02690             if (nbest>1)
02691                 ktable.element(0,i,0)  = 0 ;
02692             ptable.element(0,i,0)  = 0 ;
02693             for (int16_t k=1; k<nbest; k++)
02694             {
02695                 int32_t dim1, dim2, dim3 ;
02696                 delta.get_array_size(dim1, dim2, dim3) ;
02697                 //SG_DEBUG("i=%i, k=%i -- %i, %i, %i\n", i, k, dim1, dim2, dim3) ;
02698                 //delta.element(0, i, k)    = -CMath::INFTY ;
02699                 delta.element(delta_array, 0, i, k, seq_len, N)    = -CMath::INFTY ;
02700                 psi.element(0,i,0)      = 0 ;                  // <--- what's this for?
02701                 if (nbest>1)
02702                     ktable.element(0,i,k)     = 0 ;
02703                 ptable.element(0,i,k)     = 0 ;
02704             }
02705 /*
02706             for (T_STATES j=0; j<N; j++)
02707             {
02708                 CPlifBase * penalty = PEN.element(i,j) ;
02709                 int32_t num_current_svms=0;
02710                 int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1};
02711                 if (penalty)
02712                 {
02713                     SG_PRINT("trans %i -> %i \n",i,j);
02714                     penalty->get_used_svms(&num_current_svms, svm_ids);
02715                     trans_matrix_svms.set_element(svm_ids,i,j);
02716                     for (int32_t l=0;l<num_current_svms;l++)
02717                         SG_PRINT("svm_ids[%i]: %i \n",l,svm_ids[l]);
02718                     trans_matrix_num_svms.set_element(num_current_svms,i,j);
02719                 }
02720             }
02721 */
02722 
02723         }
02724     }
02725 
02726     /*struct svm_values_struct svs;
02727     svs.num_unique_words = NULL;
02728     svs.svm_values = NULL;
02729     svs.svm_values_unnormalized = NULL;
02730     svs.word_used = NULL;*/
02731 
02732     struct segment_loss_struct loss;
02733     loss.segments_changed = NULL;
02734     loss.num_segment_id = NULL;
02735 
02736     SG_DEBUG("START_RECURSION \n\n");
02737 
02738     // recursion
02739     for (int32_t t=1; t<seq_len; t++)
02740     {
02741         if (is_big && t%(1+(seq_len/1000))==1)
02742             SG_PROGRESS(t, 0, seq_len);
02743         //SG_PRINT("%i\n", t) ;
02744 
02745         if (with_loss)
02746         {
02747             init_segment_loss(loss, seq_len, max_look_back);
02748             find_segment_loss_till_pos(pos, t, m_segment_ids, m_segment_mask, loss);  
02749         }
02750         
02751         for (T_STATES j=0; j<N; j++)
02752         {
02753             if (seq.element(j,t)<=-1e20)
02754             { // if we cannot observe the symbol here, then we can omit the rest
02755                 for (int16_t k=0; k<nbest; k++)
02756                 {
02757                     delta.element(delta_array, t, j, k, seq_len, N)    = seq.element(j,t) ;
02758                     psi.element(t,j,k)      = 0 ;
02759                     if (nbest>1)
02760                         ktable.element(t,j,k)     = 0 ;
02761                     ptable.element(t,j,k)     = 0 ;
02762                 }
02763             }
02764             else
02765             {
02766                 const T_STATES num_elem   = trans_list_forward_cnt[j] ;
02767                 const T_STATES *elem_list = trans_list_forward[j] ;
02768                 const float64_t *elem_val      = trans_list_forward_val[j] ;
02769                 const int32_t *elem_id      = trans_list_forward_id[j] ;
02770                 
02771                 int32_t fixed_list_len = 0 ;
02772                 float64_t fixedtempvv_ = CMath::INFTY ;
02773                 int32_t fixedtempii_ = 0 ;
02774                 
02775             
02776                 for (int32_t i=0; i<num_elem; i++)
02777                 {
02778                     T_STATES ii = elem_list[i] ;
02779                     
02780                     const CPlifBase * penalty = PEN.element(j,ii) ;
02781                     int32_t look_back = max_look_back ;
02782                     { // find lookback length
02783                         CPlifBase *pen = (CPlifBase*) penalty ;
02784                         if (pen!=NULL)
02785                             look_back=(int32_t) (CMath::ceil(pen->get_max_value()));
02786                         if (look_back>=1e6)
02787                             SG_PRINT("%i,%i -> %d from %ld\n", j, ii, look_back, (long)pen) ;
02788                         ASSERT(look_back<1e6);
02789                     }
02790                     //int32_t num_current_svms = trans_matrix_num_svms.element(j,ii);
02791                     //int32_t* svm_ids = trans_matrix_svms.element(j,ii);
02792                     //int32_t* svm_ids[num_current_svms];
02793                     //for (int32_t id=0;id<num_current_svms;id++)
02794                     //  svm_ids[id]=*(p_svm_ids+id);
02795                 
02796                     int32_t orf_from = orf_info.element(ii,0) ;
02797                     int32_t orf_to   = orf_info.element(j,1) ;
02798                     if((orf_from!=-1)!=(orf_to!=-1))
02799                         SG_DEBUG("j=%i  ii=%i  orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i]) ;
02800                     ASSERT((orf_from!=-1)==(orf_to!=-1)) ;
02801                     
02802                     int32_t orf_target = -1 ;
02803                     if (orf_from!=-1)
02804                     {
02805                         orf_target=orf_to-orf_from ;
02806                         if (orf_target<0) 
02807                             orf_target+=3 ;
02808                         ASSERT(orf_target>=0 && orf_target<3) ;
02809                     }
02810                     
02811                     int32_t orf_last_pos = pos[t] ;
02812                     int32_t loss_last_pos = t ;
02813                     float64_t last_loss = 0.0 ;
02814 
02815                     for (int32_t ts=t-1; ts>=0 && pos[t]-pos[ts]<=look_back; ts--)
02816                     {
02817                         bool ok ;
02818                         //int32_t plen=t-ts;
02819 
02820                         /*for (int32_t s=0; s<num_svms; s++)
02821                             if ((fabs(svs.svm_values[s*svs.seqlen+plen]-svs2.svm_values[s*svs.seqlen+plen])>1e-6) ||
02822                                 (fabs(svs.svm_values[s*svs.seqlen+plen]-svs3.svm_values[s*svs.seqlen+plen])>1e-6))
02823                             {
02824                                 SG_DEBUG( "s=%i, t=%i, ts=%i, %1.5e, %1.5e, %1.5e\n", s, t, ts, svs.svm_values[s*svs.seqlen+plen], svs2.svm_values[s*svs.seqlen+plen], svs3.svm_values[s*svs.seqlen+plen]);
02825                                 }*/
02826                         
02827                         if (orf_target==-1)
02828                             ok=true ;
02829                         else if (pos[ts]!=-1 && (pos[t]-pos[ts])%3==orf_target)
02830                             ok=(!use_orf) || extend_orf(orf_from, orf_to, pos[ts], orf_last_pos, pos[t]) ;
02831                         else
02832                             ok=false ;
02833                         
02834                         if (ok)
02835                         {
02836                             
02837                             float64_t segment_loss = 0.0 ;
02838                             if (with_loss)
02839                                 segment_loss = extend_segment_loss(loss, pos, elem_id[i], ts, loss_last_pos, last_loss) ;
02840 
02842                             // BEST_PATH_TRANS
02844                             int32_t frame = orf_info.element(ii,0);
02845                             lookup_content_svm_values(ts, t, pos[ts], pos[t], svm_value, frame);
02846 
02847                             //int32_t offset = plen*num_svms ;
02848                             //for (int32_t ss=0; ss<num_svms; ss++)
02849                             //{
02850                             //  //svm_value[ss]=svs.svm_values[offset+ss];
02851                             //  svm_value[ss]=new_svm_value[ss];
02852                             //  //if (CMath::abs(new_svm_value[ss]-svm_value[ss])>1e-5)
02853                             //  //  SG_PRINT("ts: %i t: %i  precomp: %f old: %f diff: %f \n",ts, t,new_svm_value[ss],svm_value[ss], CMath::abs(new_svm_value[ss]-svm_value[ss]));
02854                             //}
02855 
02856                             float64_t pen_val = 0.0 ;
02857                             if (penalty)
02858                                 pen_val = penalty->lookup_penalty(pos[t]-pos[ts], svm_value) ;
02859                             if (nbest==1)
02860                             {
02861                                 float64_t  val        = elem_val[i] + pen_val ;
02862                                 if (with_loss)
02863                                     val              += segment_loss ;
02864                                 
02865                                 float64_t mval = -(val + delta.element(delta_array, ts, ii, 0, seq_len, N)) ;
02866                                 if (mval<fixedtempvv_)
02867                                 {
02868                                     fixedtempvv_ = mval ;
02869                                     fixedtempii_ = ii + ts*N;
02870                                     fixed_list_len = 1 ;
02871                                 }
02872                             }
02873                             else
02874                             {
02875                                 for (int16_t diff=0; diff<nbest; diff++)
02876                                 {
02877                                     float64_t  val        = elem_val[i]  ;
02878                                     val              += pen_val ;
02879                                     if (with_loss)
02880                                         val              += segment_loss ;
02881                                     
02882                                     float64_t mval = -(val + delta.element(delta_array, ts, ii, diff, seq_len, N)) ;
02883                                     
02884                                     /* only place -val in fixedtempvv if it is one of the nbest lowest values in there */
02885                                     /* fixedtempvv[i], i=0:nbest-1, is sorted so that fixedtempvv[0] <= fixedtempvv[1] <= ...*/
02886                                     /* fixed_list_len has the number of elements in fixedtempvv */
02887                                     
02888                                     if ((fixed_list_len < nbest) || ((0==fixed_list_len) || (mval < fixedtempvv[fixed_list_len-1])))
02889                                     {
02890                                         if ( (fixed_list_len<nbest) && ((0==fixed_list_len) || (mval>fixedtempvv[fixed_list_len-1])) )
02891                                         {
02892                                             fixedtempvv[fixed_list_len] = mval ;
02893                                             fixedtempii[fixed_list_len] = ii + diff*N + ts*N*nbest;
02894                                             fixed_list_len++ ;
02895                                         }
02896                                         else  // must have mval < fixedtempvv[fixed_list_len-1]
02897                                         {
02898                                             int32_t addhere = fixed_list_len;
02899                                             while ((addhere > 0) && (mval < fixedtempvv[addhere-1]))
02900                                                 addhere--;
02901                                             
02902                                             // move everything from addhere+1 one forward 
02903                                             for (int32_t jj=fixed_list_len-1; jj>addhere; jj--)
02904                                             {
02905                                                 fixedtempvv[jj] = fixedtempvv[jj-1];
02906                                                 fixedtempii[jj] = fixedtempii[jj-1];
02907                                             }
02908                                             
02909                                             fixedtempvv[addhere] = mval;
02910                                             fixedtempii[addhere] = ii + diff*N + ts*N*nbest;
02911                                             
02912                                             if (fixed_list_len < nbest)
02913                                                 fixed_list_len++;
02914                                         }
02915                                     }
02916                                 }
02917                             }
02918                         }
02919                     }
02920                 }
02921                 
02922                 
02923                 int32_t numEnt = fixed_list_len;
02924                 
02925                 float64_t minusscore;
02926                 int64_t fromtjk;
02927                 
02928                 for (int16_t k=0; k<nbest; k++)
02929                 {
02930                     if (k<numEnt)
02931                     {
02932                         if (nbest==1)
02933                         {
02934                             minusscore = fixedtempvv_ ;
02935                             fromtjk = fixedtempii_ ;
02936                         }
02937                         else
02938                         {
02939                             minusscore = fixedtempvv[k];
02940                             fromtjk = fixedtempii[k];
02941                         }
02942                         
02943                         delta.element(delta_array, t, j, k, seq_len, N)    = -minusscore + seq.element(j,t);
02944                         psi.element(t,j,k)      = (fromtjk%N) ;
02945                         if (nbest>1)
02946                             ktable.element(t,j,k)   = (fromtjk%(N*nbest)-psi.element(t,j,k))/N ;
02947                         ptable.element(t,j,k)   = (fromtjk-(fromtjk%(N*nbest)))/(N*nbest) ;
02948                     }
02949                     else
02950                     {
02951                         delta.element(delta_array, t, j, k, seq_len, N)    = -CMath::INFTY ;
02952                         psi.element(t,j,k)      = 0 ;
02953                         if (nbest>1)
02954                             ktable.element(t,j,k)     = 0 ;
02955                         ptable.element(t,j,k)     = 0 ;
02956                     }
02957                 }
02958             }
02959         }
02960     }
02961 
02962     //clear_svm_values(svs);
02963     if (with_loss)
02964         clear_segment_loss(loss);
02965 
02966     { //termination
02967         int32_t list_len = 0 ;
02968         for (int16_t diff=0; diff<nbest; diff++)
02969         {
02970             for (T_STATES i=0; i<N; i++)
02971             {
02972                 oldtempvv[list_len] = -(delta.element(delta_array, (seq_len-1), i, diff, seq_len, N)+get_q(i)) ;
02973                 oldtempii[list_len] = i + diff*N ;
02974                 list_len++ ;
02975             }
02976         }
02977         
02978         CMath::nmin(oldtempvv.get_array(), oldtempii.get_array(), list_len, nbest) ;
02979         
02980         for (int16_t k=0; k<nbest; k++)
02981         {
02982             delta_end.element(k) = -oldtempvv[k] ;
02983             path_ends.element(k) = (oldtempii[k]%N) ;
02984             if (nbest>1)
02985                 ktable_end.element(k) = (oldtempii[k]-path_ends.element(k))/N ;
02986         }
02987     }
02988     
02989     { //state sequence backtracking     
02990         for (int16_t k=0; k<nbest; k++)
02991         {
02992             prob_nbest[k]= delta_end.element(k) ;
02993             
02994             int32_t i         = 0 ;
02995             state_seq[i]  = path_ends.element(k) ;
02996             int16_t q   = 0 ;
02997             if (nbest>1)
02998                 q=ktable_end.element(k) ;
02999             pos_seq[i]    = seq_len-1 ;
03000 
03001             while (pos_seq[i]>0)
03002             {
03003                 //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q) ;
03004                 state_seq[i+1] = psi.element(pos_seq[i], state_seq[i], q);
03005                 pos_seq[i+1]   = ptable.element(pos_seq[i], state_seq[i], q) ;
03006                 if (nbest>1)
03007                     q              = ktable.element(pos_seq[i], state_seq[i], q) ;
03008                 i++ ;
03009             }
03010             //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q) ;
03011             int32_t num_states = i+1 ;
03012             for (i=0; i<num_states;i++)
03013             {
03014                 my_state_seq[i+k*seq_len] = state_seq[num_states-i-1] ;
03015                 my_pos_seq[i+k*seq_len]   = pos_seq[num_states-i-1] ;
03016             }
03017             my_state_seq[num_states+k*seq_len]=-1 ;
03018             my_pos_seq[num_states+k*seq_len]=-1 ;
03019         }
03020     }
03021     
03022     if (is_big)
03023         SG_PRINT( "DONE.     \n") ;
03024 
03025 
03026 #ifdef DYNPROG_TIMING
03027     MyTime2.stop() ;
03028     
03029     if (is_big)
03030         SG_PRINT("Timing:  orf=%1.2f s \n Segment_init=%1.2f s Segment_pos=%1.2f s  Segment_extend=%1.2f s Segment_clean=%1.2f s\nsvm_init=%1.2f s  svm_pos=%1.2f  svm_clean=%1.2f\n  total=%1.2f\n", orf_time, segment_init_time, segment_pos_time, segment_extend_time, segment_clean_time, svm_init_time, svm_pos_time, svm_clean_time, MyTime2.time_diff_sec()) ;
03031 #endif
03032 
03033     delete[] fixedtempvv ;
03034     delete[] fixedtempii ;
03035 }
03036 
03037 void CDynProg::best_path_trans_deriv(
03038     int32_t *my_state_seq, int32_t *my_pos_seq, float64_t *my_scores,
03039     float64_t* my_losses,int32_t my_seq_len, const float64_t *seq_array,
03040     int32_t seq_len, const int32_t *pos, CPlifBase **Plif_matrix,
03041     CPlifBase **Plif_state_signals, int32_t max_num_signals,int32_t genestr_num)
03042 {   
03043     if (!svm_arrays_clean)
03044     {
03045         SG_ERROR( "SVM arrays not clean") ;
03046         return ;
03047     } ;
03048     //SG_PRINT( "genestr_len=%i, genestr_num=%i\n", genestr_len, genestr_num) ;
03049     //mod_words.display() ;
03050     //sign_words.display() ;
03051     //string_words.display() ;
03052 
03053     bool use_svm = false ;
03054     //ASSERT(dict_len==num_svms*cum_num_words_array[num_degrees]) ;
03055     //dict_weights.set_array(dictionary_weights, cum_num_words_array[num_degrees], num_svms, false, false) ;
03056     //dict_weights_array=dict_weights.get_array() ;
03057     
03058     CArray2<CPlifBase*> PEN(Plif_matrix, N, N, false, false) ;
03059     CArray2<CPlifBase*> PEN_state_signals(Plif_state_signals, N, max_num_signals, false, false) ;
03060     CArray3<float64_t> seq_input(seq_array, N, seq_len, max_num_signals) ;
03061     
03062     { // determine whether to use svm outputs and clear derivatives
03063         for (int32_t i=0; i<N; i++)
03064             for (int32_t j=0; j<N; j++)
03065             {
03066                 CPlifBase *penij=PEN.element(i,j) ;
03067                 if (penij==NULL)
03068                     continue ;
03069                 
03070                 if (penij->uses_svm_values())
03071                     use_svm=true ;
03072                 penij->penalty_clear_derivative() ;
03073             }
03074         for (int32_t i=0; i<N; i++)
03075             for (int32_t j=0; j<max_num_signals; j++)
03076             {
03077                 CPlifBase *penij=PEN_state_signals.element(i,j) ;
03078                 if (penij==NULL)
03079                     continue ;
03080                 if (penij->uses_svm_values())
03081                     use_svm=true ;
03082                 penij->penalty_clear_derivative() ;
03083             }
03084     }
03085 
03086     { // set derivatives of p, q and a to zero
03087         for (int32_t i=0; i<N; i++)
03088         {
03089             initial_state_distribution_p_deriv.element(i)=0 ;
03090             end_state_distribution_q_deriv.element(i)=0 ;
03091             for (int32_t j=0; j<N; j++)
03092                 transition_matrix_a_deriv.element(i,j)=0 ;
03093         }
03094     }
03095     
03096     { // clear score vector
03097         for (int32_t i=0; i<my_seq_len; i++)
03098         {
03099             my_scores[i]=0.0 ;
03100             my_losses[i]=0.0 ;
03101         }
03102     }
03103 
03104     //int32_t total_len = 0 ;
03105     
03106     //transition_matrix_a.display_array() ;
03107     //transition_matrix_a_id.display_array() ;
03108     
03109     { // compute derivatives for given path
03110         float64_t* svm_value = new float64_t[m_num_lin_feat_plifs_cum[m_num_raw_data]];
03111         for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]; s++)
03112             svm_value[s]=0 ;
03113         
03114         for (int32_t i=0; i<my_seq_len; i++)
03115         {
03116             my_scores[i]=0.0 ;
03117             my_losses[i]=0.0 ;
03118         }
03119         
03120 //#ifdef DYNPROG_DEBUG
03121         float64_t total_score = 0.0 ;
03122         float64_t total_loss = 0.0 ;
03123 //#endif        
03124 
03125         ASSERT(my_state_seq[0]>=0) ;
03126         initial_state_distribution_p_deriv.element(my_state_seq[0])++ ;
03127         my_scores[0] += initial_state_distribution_p.element(my_state_seq[0]) ;
03128 
03129         ASSERT(my_state_seq[my_seq_len-1]>=0) ;
03130         end_state_distribution_q_deriv.element(my_state_seq[my_seq_len-1])++ ;
03131         my_scores[my_seq_len-1] += end_state_distribution_q.element(my_state_seq[my_seq_len-1]);
03132 
03133 //#ifdef DYNPROG_DEBUG
03134         total_score += my_scores[0] + my_scores[my_seq_len-1] ;
03135 //#endif        
03136         struct svm_values_struct svs;
03137         svs.num_unique_words = NULL;
03138         svs.svm_values = NULL;
03139         svs.svm_values_unnormalized = NULL;
03140         svs.word_used = NULL;
03141         //init_svm_values(svs, my_state_seq[0], seq_len, 100);
03142 
03143         struct segment_loss_struct loss;
03144         loss.segments_changed = NULL;
03145         loss.num_segment_id = NULL;
03146         //SG_DEBUG( "seq_len=%i\n", my_seq_len) ;
03147         for (int32_t i=0; i<my_seq_len-1; i++)
03148         {
03149             if (my_state_seq[i+1]==-1)
03150                 break ;
03151             int32_t from_state = my_state_seq[i] ;
03152             int32_t to_state   = my_state_seq[i+1] ;
03153             int32_t from_pos   = my_pos_seq[i] ;
03154             int32_t to_pos     = my_pos_seq[i+1] ;
03155 
03156             // compute loss relative to another segmentation using the segment_loss function
03157             init_segment_loss(loss, seq_len, pos[to_pos]-pos[from_pos]+10);
03158             find_segment_loss_till_pos(pos, to_pos,m_segment_ids, m_segment_mask, loss);  
03159 
03160             int32_t loss_last_pos = to_pos ;
03161             float64_t last_loss = 0.0 ;
03162             int32_t elem_id = transition_matrix_a_id.element(from_state, to_state) ;
03163             my_losses[i] = extend_segment_loss(loss, pos, elem_id, from_pos, loss_last_pos, last_loss) ;
03164 #ifdef DYNPROG_DEBUG
03165             io->set_loglevel(M_DEBUG) ;
03166             SG_DEBUG( "%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state) ;
03167 #endif
03168             // increase usage of this transition
03169             transition_matrix_a_deriv.element(from_state, to_state)++ ;
03170             my_scores[i] += transition_matrix_a.element(from_state, to_state) ;
03171             //SG_PRINT("transition_matrix_a.element(%i, %i),%f \n",from_state, to_state, transition_matrix_a.element(from_state, to_state));
03172 #ifdef DYNPROG_DEBUG
03173             SG_DEBUG( "%i. scores[i]=%f\n", i, my_scores[i]) ;
03174 #endif
03175             
03176             /*int32_t last_svm_pos[num_degrees] ;
03177             for (int32_t qq=0; qq<num_degrees; qq++)
03178             last_svm_pos[qq]=-1 ;*/
03179             
03180             if (use_svm)
03181             {
03182                 //SG_PRINT("from_pos: %i; to_pos: %i; pos[to_pos]-pos[from_pos]: %i \n",from_pos, to_pos, pos[to_pos]-pos[from_pos]); 
03183                 int32_t frame = m_orf_info.element(from_state,0);
03184                 if (false)//(frame>=0)
03185                 {
03186                     int32_t num_current_svms=0;
03187                     int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1};
03188                     SG_PRINT("penalties(%i, %i), frame:%i  ", from_state, to_state, frame);
03189                     PEN.element(to_state, from_state)->get_used_svms(&num_current_svms, svm_ids);
03190                     SG_PRINT("\n");
03191                 }
03192 
03193 
03194                 lookup_content_svm_values(from_pos, to_pos, pos[from_pos],pos[to_pos], svm_value, frame);
03195                 if (false)//(frame>=0)
03196                     SG_PRINT("svm_values: %f, %f, %f \n", svm_value[4], svm_value[5], svm_value[6]);
03197                 //SG_PRINT("svm_values: %f, %f, %f, %f \n", svm_value[8], svm_value[9], svm_value[10], svm_value[11]);
03198     
03199                 //#ifdef DYNPROG_DEBUG
03200                 //                  SG_DEBUG( "svm[%i]: %f\n", ss, svm_value[ss]) ;
03201                 //#endif
03202             }
03203             
03204             if (PEN.element(to_state, from_state)!=NULL)
03205             {
03206                 float64_t nscore = PEN.element(to_state, from_state)->lookup_penalty(pos[to_pos]-pos[from_pos], svm_value) ;
03207                 my_scores[i] += nscore ;
03208 
03209                 for (int32_t s=num_svms;s<m_num_lin_feat_plifs_cum[m_num_raw_data];s++)/*set tiling plif values to neutral values (that do not influence derivative calculation)*/
03210                     svm_value[s]=-CMath::INFTY;
03211             
03212 #ifdef DYNPROG_DEBUG
03213                 //SG_DEBUG( "%i. transition penalty: from_state=%i to_state=%i from_pos=%i [%i] to_pos=%i [%i] value=%i\n", i, from_state, to_state, from_pos, pos[from_pos], to_pos, pos[to_pos], pos[to_pos]-pos[from_pos]) ;
03214 
03215                 /*
03216                   int32_t orf_from = g_orf_info.element(from_state,0) ;
03217                   int32_t orf_to   = g_orf_info.element(to_state,1) ;
03218                   ASSERT((orf_from!=-1)==(orf_to!=-1)) ;
03219                   if (orf_from != -1)
03220                   total_len = total_len + pos[to_pos]-pos[from_pos] ;
03221                   
03222                   SG_DEBUG( "%i. orf_info: from_orf=%i to_orf=%i orf_diff=%i, len=%i, lenmod3=%i, total_len=%i, total_lenmod3=%i\n", i, orf_from, orf_to, (orf_to-orf_from)%3, pos[to_pos]-pos[from_pos], (pos[to_pos]-pos[from_pos])%3, total_len, total_len%3) ;
03223                 */
03224 #endif
03225                 PEN.element(to_state, from_state)->penalty_add_derivative(pos[to_pos]-pos[from_pos], svm_value) ;
03226                 for (int32_t d=1;d<=m_num_raw_data;d++) 
03227                 {
03228                     for (int32_t s=0;s<m_num_lin_feat_plifs_cum[m_num_raw_data];s++)
03229                         svm_value[s]=-CMath::INFTY;
03230                     float64_t* intensities = new float64_t[m_num_probes_cum[d]];
03231                     int32_t num_intensities = raw_intensities_interval_query(pos[from_pos], pos[to_pos],intensities, d);
03232                     //SG_PRINT("pos[from_pos]:%i, pos[to_pos]:%i, num_intensities:%i\n",pos[from_pos],pos[to_pos], num_intensities);
03233                     for (int32_t k=0;k<num_intensities;k++)
03234                     {
03235                         for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
03236                             svm_value[j]=intensities[k];
03237                         
03238                         PEN.element(to_state, from_state)->penalty_add_derivative(-CMath::INFTY, svm_value) ;   
03239                         
03240                     }
03241                     delete[] intensities;
03242                 }
03243             }
03244 #ifdef DYNPROG_DEBUG
03245             SG_DEBUG( "%i. scores[i]=%f\n", i, my_scores[i]) ;
03246 #endif
03247 
03248             //SG_DEBUG( "emmission penalty skipped: to_state=%i to_pos=%i value=%1.2f score=%1.2f\n", to_state, to_pos, seq_input.element(to_state, to_pos), 0.0) ;
03249             for (int32_t k=0; k<max_num_signals; k++)
03250             {
03251                 if ((PEN_state_signals.element(to_state,k)==NULL)&&(k==0))
03252                 {
03253 #ifdef DYNPROG_DEBUG
03254                     SG_DEBUG( "%i. emmission penalty: to_state=%i to_pos=%i score=%1.2f (no signal plif)\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k)) ;
03255 #endif
03256                     my_scores[i] += seq_input.element(to_state, to_pos, k) ;
03257                     //if (seq_input.element(to_state, to_pos, k) !=0)
03258                     //  SG_PRINT("features(%i,%i): %f\n",to_state,to_pos,seq_input.element(to_state, to_pos, k));
03259                     break ;
03260                 }
03261                 if (PEN_state_signals.element(to_state, k)!=NULL)
03262                 {
03263                     float64_t nscore = PEN_state_signals.element(to_state,k)->lookup_penalty(seq_input.element(to_state, to_pos, k), svm_value) ;
03264                     my_scores[i] += nscore ;
03265                     //break ;
03266                     //int32_t num_current_svms=0;
03267                     //int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1};
03268                     //SG_PRINT("PEN_state_signals->id: ");
03269                     //PEN_state_signals.element(to_state, k)->get_used_svms(&num_current_svms, svm_ids);
03270                     //SG_PRINT("\n");
03271                     //if (nscore != 0)
03272                         //SG_PRINT( "%i. emmission penalty: to_state=%i to_pos=%i value=%1.2f score=%1.2f k=%i\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k), nscore, k) ;
03273 #ifdef DYNPROG_DEBUG
03274                     SG_DEBUG( "%i. emmission penalty: to_state=%i to_pos=%i value=%1.2f score=%1.2f k=%i\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k), nscore, k) ;
03275 #endif
03276                     PEN_state_signals.element(to_state,k)->penalty_add_derivative(seq_input.element(to_state, to_pos, k), svm_value) ;
03277                 } else
03278                     break ;
03279             }
03280             
03281 //#ifdef DYNPROG_DEBUG
03282             //SG_PRINT( "scores[%i]=%f (final) \n", i, my_scores[i]) ;
03283             //SG_PRINT( "losses[%i]=%f (final) , total_loss: %f \n", i, my_losses[i], total_loss) ;
03284             total_score += my_scores[i] ;
03285             total_loss += my_losses[i] ;
03286 //#endif
03287         }
03288 //#ifdef DYNPROG_DEBUG
03289         //SG_PRINT( "total score = %f \n", total_score) ;
03290         SG_PRINT( "total loss = %f \n", total_loss) ;
03291 //#endif
03292         clear_svm_values(svs);
03293         clear_segment_loss(loss);
03294         delete[] svm_value;
03295     }
03296 
03297     
03298 }
03299 
03300 
03301 void CDynProg::best_path_trans_simple(
03302     const float64_t *seq_array, int32_t seq_len, int16_t nbest,
03303     float64_t *prob_nbest, int32_t *my_state_seq)
03304 {
03305     if (!svm_arrays_clean)
03306     {
03307         SG_ERROR( "SVM arrays not clean") ;
03308         return ;
03309     } ;
03310 
03311     int32_t max_look_back = 2 ;
03312     const int32_t look_back_buflen = max_look_back*nbest*N ;
03313     ASSERT(nbest<32000) ;
03314         
03315     CArray2<float64_t> seq((float64_t *)seq_array, N, seq_len, false) ;
03316 
03317     CArray3<float64_t> delta(max_look_back, N, nbest) ;
03318     CArray3<T_STATES> psi(seq_len, N, nbest) ;
03319     CArray3<int16_t> ktable(seq_len,N,nbest) ;
03320     CArray3<int32_t> ptable(seq_len,N,nbest) ;
03321 
03322     CArray<float64_t> delta_end(nbest) ;
03323     CArray<T_STATES> path_ends(nbest) ;
03324     CArray<int16_t> ktable_end(nbest) ;
03325 
03326     CArray<float64_t> oldtempvv(look_back_buflen) ;
03327     CArray<int32_t> oldtempii(look_back_buflen) ;
03328 
03329     CArray<T_STATES> state_seq(seq_len) ;
03330     CArray<int32_t> pos_seq(seq_len) ;
03331 
03332     { // initialization
03333 
03334         for (T_STATES i=0; i<N; i++)
03335         {
03336             delta.element(0,i,0) = get_p(i) + seq.element(i,0) ;        // get_p defined in HMM.h to be equiv to initial_state_distribution
03337             psi.element(0,i,0)   = 0 ;
03338             ktable.element(0,i,0)  = 0 ;
03339             ptable.element(0,i,0)  = 0 ;
03340             for (int16_t k=1; k<nbest; k++)
03341             {
03342                 delta.element(0,i,k)    = -CMath::INFTY ;
03343                 psi.element(0,i,0)      = 0 ;                  // <--- what's this for?
03344                 ktable.element(0,i,k)     = 0 ;
03345                 ptable.element(0,i,k)     = 0 ;
03346             }
03347         }
03348     }
03349 
03350     // recursion
03351     for (int32_t t=1; t<seq_len; t++)
03352     {
03353         for (T_STATES j=0; j<N; j++)
03354         {
03355             if (seq.element(j,t)<-1e20)
03356             { // if we cannot observe the symbol here, then we can omit the rest
03357                 for (int16_t k=0; k<nbest; k++)
03358                 {
03359                     delta.element(t%max_look_back,j,k)    = seq.element(j,t) ;
03360                     psi.element(t,j,k)      = 0 ;
03361                     ktable.element(t,j,k)     = 0 ;
03362                     ptable.element(t,j,k)     = 0 ;
03363                 }
03364             }
03365             else
03366             {
03367                 const T_STATES num_elem   = trans_list_forward_cnt[j] ;
03368                 const T_STATES *elem_list = trans_list_forward[j] ;
03369                 const float64_t *elem_val      = trans_list_forward_val[j] ;
03370                 
03371                 int32_t old_list_len = 0 ;
03372                 
03373                 for (int32_t i=0; i<num_elem; i++)
03374                 {
03375                     T_STATES ii = elem_list[i] ;
03376 
03377                     int32_t ts=t-1; 
03378                     if (ts>=0)
03379                     {
03380                         bool ok=true ;
03381                         
03382                         if (ok)
03383                         {
03384 
03385                           
03386                           for (int16_t diff=0; diff<nbest; diff++)
03387                             {
03388                               float64_t  val        = delta.element(ts%max_look_back,ii,diff) + elem_val[i] ;
03389                               float64_t mval = -val;
03390 
03391                               oldtempvv[old_list_len] = mval ;
03392                               oldtempii[old_list_len] = ii + diff*N + ts*N*nbest;
03393                               old_list_len++ ;
03394                             }
03395                         }
03396                     }
03397                 }
03398                 
03399                 CMath::nmin<int32_t>(oldtempvv.get_array(), oldtempii.get_array(), old_list_len, nbest) ;
03400 
03401                 int32_t numEnt = 0;
03402                 numEnt = old_list_len;
03403 
03404                 float64_t minusscore;
03405                 int64_t fromtjk;
03406 
03407                 for (int16_t k=0; k<nbest; k++)
03408                 {
03409                     if (k<numEnt)
03410                     {
03411                         minusscore = oldtempvv[k];
03412                         fromtjk = oldtempii[k];
03413                         
03414                         delta.element(t%max_look_back,j,k)    = -minusscore + seq.element(j,t);
03415                         psi.element(t,j,k)      = (fromtjk%N) ;
03416                         ktable.element(t,j,k)     = (fromtjk%(N*nbest)-psi.element(t,j,k))/N ;
03417                         ptable.element(t,j,k)     = (fromtjk-(fromtjk%(N*nbest)))/(N*nbest) ;
03418                     }
03419                     else
03420                     {
03421                         delta.element(t%max_look_back,j,k)    = -CMath::INFTY ;
03422                         psi.element(t,j,k)      = 0 ;
03423                         ktable.element(t,j,k)     = 0 ;
03424                         ptable.element(t,j,k)     = 0 ;
03425                     }
03426                 }
03427                 
03428             }
03429         }
03430     }
03431 
03432     
03433     { //termination
03434         int32_t list_len = 0 ;
03435         for (int16_t diff=0; diff<nbest; diff++)
03436         {
03437             for (T_STATES i=0; i<N; i++)
03438             {
03439                 oldtempvv[list_len] = -(delta.element((seq_len-1)%max_look_back,i,diff)+get_q(i)) ;
03440                 oldtempii[list_len] = i + diff*N ;
03441                 list_len++ ;
03442             }
03443         }
03444         
03445         CMath::nmin(oldtempvv.get_array(), oldtempii.get_array(), list_len, nbest) ;
03446         
03447         for (int16_t k=0; k<nbest; k++)
03448         {
03449             delta_end.element(k) = -oldtempvv[k] ;
03450             path_ends.element(k) = (oldtempii[k]%N) ;
03451             ktable_end.element(k) = (oldtempii[k]-path_ends.element(k))/N ;
03452         }
03453     }
03454     
03455     { //state sequence backtracking     
03456         for (int16_t k=0; k<nbest; k++)
03457         {
03458             prob_nbest[k]= delta_end.element(k) ;
03459             
03460             int32_t i         = 0 ;
03461             state_seq[i]  = path_ends.element(k) ;
03462             int16_t q   = ktable_end.element(k) ;
03463             pos_seq[i]    = seq_len-1 ;
03464 
03465             while (pos_seq[i]>0)
03466             {
03467                 //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q) ;
03468                 state_seq[i+1] = psi.element(pos_seq[i], state_seq[i], q);
03469                 pos_seq[i+1]   = ptable.element(pos_seq[i], state_seq[i], q) ;
03470                 q              = ktable.element(pos_seq[i], state_seq[i], q) ;
03471                 i++ ;
03472             }
03473             //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q) ;
03474             int32_t num_states = i+1 ;
03475             for (i=0; i<num_states;i++)
03476             {
03477                 my_state_seq[i+k*seq_len] = state_seq[num_states-i-1] ;
03478             }
03479             //my_state_seq[num_states+k*seq_len]=-1 ;
03480         }
03481 
03482     }
03483 }
03484 

SHOGUN Machine Learning Toolbox - Documentation