Trie.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2009 Gunnar Raetsch
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #ifndef _TRIE_H___
00013 #define _TRIE_H___
00014 
00015 #include <string.h>
00016 #include "lib/common.h"
00017 #include "lib/io.h"
00018 #include "lib/DynamicArray.h"
00019 #include "lib/Mathematics.h"
00020 #include "base/SGObject.h"
00021 
00022 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00023 
00024 // sentinel is 0xFFFFFFFC or float -2
00025 #define NO_CHILD ((int32_t)-1073741824)
00026 
00027 #define WEIGHTS_IN_TRIE 
00028 //#define TRIE_CHECK_EVERYTHING
00029 
00030 #ifdef TRIE_CHECK_EVERYTHING
00031 #define TRIE_ASSERT_EVERYTHING(x) ASSERT(x)
00032 #else
00033 #define TRIE_ASSERT_EVERYTHING(x) 
00034 #endif
00035 
00036 //#define TRIE_ASSERT(x) ASSERT(x)
00037 #define TRIE_ASSERT(x) 
00038 
00039 #define TRIE_TERMINAL_CHARACTER  7
00040 
00042 struct ConsensusEntry
00043 {
00045     uint64_t string;
00047     float32_t score;
00049     int32_t bt;
00050 };
00051 
00053 struct POIMTrie
00054 {
00056     float64_t weight;
00057 #ifdef TRIE_CHECK_EVERYTHING
00058 
00059     bool has_seq;
00061     bool has_floats;
00062 #endif      
00063     union
00064     {
00066         float32_t child_weights[4];
00068         int32_t children[4];
00070         uint8_t seq[16] ;
00071     }; 
00072 
00074     float64_t S;
00076     float64_t L;
00078     float64_t R;
00079 };
00080 
00082 struct DNATrie
00083 {
00085     float64_t weight;
00086 #ifdef TRIE_CHECK_EVERYTHING
00087 
00088     bool has_seq;
00090     bool has_floats;
00091 #endif
00092     union
00093     {
00095         float32_t child_weights[4];
00097         int32_t children[4];
00099         uint8_t seq[16] ;
00100     }; 
00101 };
00102 
00104 struct TreeParseInfo {
00106     int32_t num_sym;
00108     int32_t num_feat;
00110     int32_t p;
00112     int32_t k;
00114     int32_t* nofsKmers;
00116     float64_t* margFactors;
00118     int32_t* x;
00120     int32_t* substrs;
00122     int32_t y0;
00124     float64_t* C_k;
00126     float64_t* L_k;
00128     float64_t* R_k;
00129 };
00130 
00131 #endif // DOXYGEN_SHOULD_SKIP_THIS
00132 
00133 template <class Trie> class CTrie;
00134 
00153 template <class Trie> class CTrie : public CSGObject
00154 {
00155     public:
00162         CTrie(int32_t d, bool p_use_compact_terminal_nodes=true);
00163 
00165         CTrie(const CTrie & to_copy);
00166         virtual ~CTrie();
00167 
00169         const CTrie & operator=(const CTrie & to_copy);
00170 
00178         bool compare_traverse(
00179             int32_t node, const CTrie & other, int32_t other_node);
00180 
00186         bool compare(const CTrie & other);
00187 
00194         bool find_node(int32_t node, int32_t * trace, int32_t &trace_len) const;
00195 
00202         int32_t find_deepest_node(
00203             int32_t start_node, int32_t &deepest_node) const;
00204 
00209         void display_node(int32_t node) const;
00210 
00212         void destroy();
00213 
00218         void set_degree(int32_t d);
00219 
00226         void create(int32_t len, bool p_use_compact_terminal_nodes=true);
00227 
00233         void delete_trees(bool p_use_compact_terminal_nodes=true);
00234 
00245         void add_to_trie(
00246             int32_t i, int32_t seq_offset, int32_t* vec, float32_t alpha,
00247             float64_t *weights, bool degree_times_position_weights);
00248 
00255         float64_t compute_abs_weights_tree(int32_t tree, int32_t depth);
00256 
00262         float64_t* compute_abs_weights(int32_t &len);
00263 
00276         float64_t compute_by_tree_helper(
00277             int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
00278             int32_t weight_pos, float64_t * weights,
00279             bool degree_times_position_weights) ;
00280 
00295         void compute_by_tree_helper(
00296             int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
00297             int32_t weight_pos, float64_t* LevelContrib, float64_t factor,
00298             int32_t mkl_stepsize, float64_t * weights,
00299             bool degree_times_position_weights);
00300 
00315         void compute_scoring_helper(
00316             int32_t tree, int32_t i, int32_t j, float64_t weight, int32_t d,
00317             int32_t max_degree, int32_t num_feat, int32_t num_sym,
00318             int32_t sym_offset, int32_t offs, float64_t* result);
00319 
00332         void add_example_to_tree_mismatch_recursion(
00333             int32_t tree,  int32_t i, float64_t alpha, int32_t *vec,
00334             int32_t len_rem, int32_t degree_rec, int32_t mismatch_rec,
00335             int32_t max_mismatch, float64_t * weights);
00336 
00346         void traverse(
00347             int32_t tree, const int32_t p, struct TreeParseInfo info,
00348             const int32_t depth, int32_t* const x, const int32_t k);
00349 
00359         void count(
00360             const float64_t w, const int32_t depth,
00361             const struct TreeParseInfo info, const int32_t p, int32_t* x,
00362             const int32_t k);
00363 
00370         int32_t compact_nodes(int32_t start_node, int32_t depth, float64_t * weights);
00371 
00380         float64_t get_cumulative_score(
00381             int32_t pos, uint64_t seq, int32_t deg, float64_t* weights);
00382 
00392         void fill_backtracking_table_recursion(
00393             Trie* tree, int32_t depth, uint64_t seq, float64_t value,
00394             CDynamicArray<ConsensusEntry>* table, float64_t* weights);
00395 
00404         void fill_backtracking_table(
00405             int32_t pos, CDynamicArray<ConsensusEntry>* prev,
00406             CDynamicArray<ConsensusEntry>* cur, bool cumulative,
00407             float64_t* weights);
00408 
00414         void POIMs_extract_W(float64_t* const* const W, const int32_t K);
00415 
00420         void POIMs_precalc_SLR(const float64_t* const distrib);
00421 
00431         void POIMs_get_SLR(
00432             const int32_t parentIdx, const int32_t sym, const int32_t depth,
00433             float64_t* S, float64_t* L, float64_t* R);
00434 
00441         void POIMs_add_SLR(
00442             float64_t* const* const poims, const int32_t K,
00443             const int32_t debug);
00444 
00449         inline bool get_use_compact_terminal_nodes()
00450         {
00451             return use_compact_terminal_nodes ;
00452         }
00453 
00459         inline void set_use_compact_terminal_nodes(
00460             bool p_use_compact_terminal_nodes)
00461         {
00462             use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
00463         }
00464 
00469         inline int32_t get_num_used_nodes()
00470         {
00471             return TreeMemPtr;
00472         }
00473 
00478         inline void set_position_weights(const float64_t * p_position_weights)
00479         {
00480             position_weights=p_position_weights;
00481         }
00482 
00487         inline int32_t get_node(bool last_node=false)
00488         {
00489             int32_t ret = TreeMemPtr++;
00490             check_treemem() ;
00491 
00492             if (last_node)
00493             {
00494                 for (int32_t q=0; q<4; q++)
00495                     TreeMem[ret].child_weights[q]=0.0;
00496             }
00497             else
00498             {
00499                 for (int32_t q=0; q<4; q++)
00500                     TreeMem[ret].children[q]=NO_CHILD;
00501             }
00502 #ifdef TRIE_CHECK_EVERYTHING
00503             TreeMem[ret].has_seq=false ;
00504             TreeMem[ret].has_floats=false ;
00505 #endif
00506             TreeMem[ret].weight=0.0;
00507             return ret ;
00508         }
00509 
00511         inline void check_treemem()
00512         {
00513             if (TreeMemPtr+10 < TreeMemPtrMax)
00514                 return;
00515             SG_DEBUG( "Extending TreeMem from %i to %i elements\n",
00516                     TreeMemPtrMax, (int32_t) ((float64_t)TreeMemPtrMax*1.2));
00517             TreeMemPtrMax = (int32_t) ((float64_t)TreeMemPtrMax*1.2);
00518             TreeMem = (Trie*) realloc(TreeMem,
00519                     TreeMemPtrMax*sizeof(Trie));
00520             if (!TreeMem)
00521                 SG_ERROR( "out of memory\n");
00522         }
00523 
00528         inline void set_weights_in_tree(bool weights_in_tree_)
00529         {
00530             weights_in_tree = weights_in_tree_;
00531         }
00532 
00537         inline bool get_weights_in_tree()
00538         {
00539             return weights_in_tree;
00540         }
00541 
00551         void POIMs_extract_W_helper(
00552             const int32_t nodeIdx, const int32_t depth, const int32_t offset,
00553             const int32_t y0, float64_t* const* const W, const int32_t K);
00554 
00567         void POIMs_calc_SLR_helper1(
00568             const float64_t* const distrib, const int32_t i,
00569             const int32_t nodeIdx, int32_t left_tries_idx[4],
00570             const int32_t depth, int32_t const lastSym, float64_t* S,
00571             float64_t* L, float64_t* R);
00572 
00573 
00584         void POIMs_calc_SLR_helper2(
00585             const float64_t* const distrib, const int32_t i,
00586             const int32_t nodeIdx, int32_t left_tries_idx[4],
00587             const int32_t depth, float64_t* S, float64_t* L, float64_t* R);
00588 
00599         void POIMs_add_SLR_helper1(
00600             const int32_t nodeIdx, const int32_t depth,const int32_t i,
00601             const int32_t y0, float64_t* const* const poims, const int32_t K,
00602             const int32_t debug);
00603 
00617         void POIMs_add_SLR_helper2(
00618             float64_t* const* const poims, const int32_t K, const int32_t k,
00619             const int32_t i, const int32_t y, const float64_t valW,
00620             const float64_t valS, const float64_t valL, const float64_t valR,
00621             const int32_t debug);
00622 
00624         inline virtual const char* get_name() const { return "Trie"; }
00625 
00626     public:
00628         int32_t NUM_SYMS;
00629 
00630     protected:
00632         int32_t length;
00634         int32_t * trees;
00635 
00637         int32_t degree;
00639         float64_t const *  position_weights;
00640 
00642         Trie* TreeMem;
00644         int32_t TreeMemPtr;
00646         int32_t TreeMemPtrMax;
00648         bool use_compact_terminal_nodes;
00649 
00651         bool weights_in_tree;
00652 
00654         int32_t* nofsKmers;
00655 };
00656 
00657     template <class Trie>
00658     CTrie<Trie>::CTrie(int32_t d, bool p_use_compact_terminal_nodes)
00659     : CSGObject(), degree(d), position_weights(NULL),
00660         use_compact_terminal_nodes(p_use_compact_terminal_nodes),
00661         weights_in_tree(true)
00662     {
00663         TreeMemPtrMax=1024*1024/sizeof(Trie);
00664         TreeMemPtr=0;
00665         TreeMem=(Trie*)malloc(TreeMemPtrMax*sizeof(Trie));
00666 
00667         length=0;
00668         trees=NULL;
00669 
00670         NUM_SYMS=4;
00671     }
00672 
00673     template <class Trie>
00674     CTrie<Trie>::CTrie(const CTrie & to_copy)
00675     : CSGObject(to_copy), degree(to_copy.degree), position_weights(NULL),
00676         use_compact_terminal_nodes(to_copy.use_compact_terminal_nodes)
00677     {
00678         if (to_copy.position_weights!=NULL)
00679         {
00680             position_weights = to_copy.position_weights;
00681             /*new float64_t[to_copy.length];
00682               for (int32_t i=0; i<to_copy.length; i++)
00683               position_weights[i]=to_copy.position_weights[i]; */
00684         }
00685         else
00686             position_weights=NULL;
00687 
00688         TreeMemPtrMax=to_copy.TreeMemPtrMax;
00689         TreeMemPtr=to_copy.TreeMemPtr;
00690         TreeMem=(Trie*)malloc(TreeMemPtrMax*sizeof(Trie));
00691         memcpy(TreeMem, to_copy.TreeMem, TreeMemPtrMax*sizeof(Trie));
00692 
00693         length=to_copy.length;
00694         trees=new int32_t[length];
00695         for (int32_t i=0; i<length; i++)
00696             trees[i]=to_copy.trees[i];
00697 
00698         NUM_SYMS=4;
00699     }
00700 
00701     template <class Trie>
00702 const CTrie<Trie> &CTrie<Trie>::operator=(const CTrie<Trie> & to_copy)
00703 {
00704     degree=to_copy.degree ;
00705     use_compact_terminal_nodes=to_copy.use_compact_terminal_nodes ;
00706 
00707     delete[] position_weights ;
00708     position_weights=NULL ;
00709     if (to_copy.position_weights!=NULL)
00710     {
00711         position_weights=to_copy.position_weights ;
00712         /*position_weights = new float64_t[to_copy.length] ;
00713           for (int32_t i=0; i<to_copy.length; i++)
00714           position_weights[i]=to_copy.position_weights[i] ;*/
00715     }
00716     else
00717         position_weights=NULL ;
00718 
00719     TreeMemPtrMax=to_copy.TreeMemPtrMax ;
00720     TreeMemPtr=to_copy.TreeMemPtr ;
00721     free(TreeMem) ;
00722     TreeMem = (Trie*)malloc(TreeMemPtrMax*sizeof(Trie)) ;
00723     memcpy(TreeMem, to_copy.TreeMem, TreeMemPtrMax*sizeof(Trie)) ;
00724 
00725     length = to_copy.length ;
00726     if (trees)
00727         delete[] trees ;
00728     trees=new int32_t[length] ;     
00729     for (int32_t i=0; i<length; i++)
00730         trees[i]=to_copy.trees[i] ;
00731 
00732     return *this ;
00733 }
00734 
00735 template <class Trie>
00736 int32_t CTrie<Trie>::find_deepest_node(
00737     int32_t start_node, int32_t& deepest_node) const
00738 {
00739 #ifdef TRIE_CHECK_EVERYTHING
00740     int32_t ret=0 ;
00741     SG_DEBUG("start_node=%i\n", start_node) ;
00742 
00743     if (start_node==NO_CHILD) 
00744     {
00745         for (int32_t i=0; i<length; i++)
00746         {
00747             int32_t my_deepest_node ;
00748             int32_t depth=find_deepest_node(i, my_deepest_node) ;
00749             SG_DEBUG("start_node %i depth=%i\n", i, depth) ;
00750             if (depth>ret)
00751             {
00752                 deepest_node=my_deepest_node ;
00753                 ret=depth ;
00754             }
00755         }
00756         return ret ;
00757     }
00758 
00759     if (TreeMem[start_node].has_seq)
00760     {
00761         for (int32_t q=0; q<16; q++)
00762             if (TreeMem[start_node].seq[q]!=TRIE_TERMINAL_CHARACTER)
00763                 ret++ ;
00764         deepest_node=start_node ;
00765         return ret ;
00766     }
00767     if (TreeMem[start_node].has_floats)
00768     {
00769         deepest_node=start_node ;
00770         return 1 ;
00771     }
00772 
00773     for (int32_t q=0; q<4; q++)
00774     {
00775         int32_t my_deepest_node ;
00776         if (TreeMem[start_node].children[q]==NO_CHILD)
00777             continue ;
00778         int32_t depth=find_deepest_node(abs(TreeMem[start_node].children[q]), my_deepest_node) ;
00779         if (depth>ret)
00780         {
00781             deepest_node=my_deepest_node ;
00782             ret=depth ;
00783         }
00784     }
00785     return ret ;
00786 #else
00787     SG_ERROR( "not implemented\n") ;
00788     return 0 ;
00789 #endif
00790 }
00791 
00792     template <class Trie>
00793 int32_t CTrie<Trie>::compact_nodes(
00794     int32_t start_node, int32_t depth, float64_t * weights) 
00795 {
00796     SG_ERROR( "code buggy\n") ;
00797 
00798     int32_t ret=0 ;
00799 
00800     if (start_node==NO_CHILD) 
00801     {
00802         for (int32_t i=0; i<length; i++)
00803             compact_nodes(i,1, weights) ;
00804         return 0 ;
00805     }
00806     if (start_node<0)
00807         return -1 ;
00808 
00809     if (depth==degree-1)
00810     {
00811         TRIE_ASSERT_EVERYTHING(TreeMem[start_node].has_floats) ;
00812         int32_t num_used=0 ;
00813         for (int32_t q=0; q<4; q++)
00814             if (TreeMem[start_node].child_weights[q]!=0.0)
00815                 num_used++ ;
00816         if (num_used>1)
00817             return -1 ;
00818         return 1 ;
00819     }
00820     TRIE_ASSERT_EVERYTHING(!TreeMem[start_node].has_floats) ;
00821 
00822     int32_t num_used = 0 ;
00823     int32_t q_used=-1 ;
00824 
00825     for (int32_t q=0; q<4; q++)
00826     {
00827         if (TreeMem[start_node].children[q]==NO_CHILD)
00828             continue ;
00829         num_used++ ;
00830         q_used=q ;
00831     }
00832     if (num_used>1)
00833     {
00834         if (depth>=degree-2)
00835             return -1 ;
00836         for (int32_t q=0; q<4; q++)
00837         {
00838             if (TreeMem[start_node].children[q]==NO_CHILD)
00839                 continue ;
00840             int32_t num=compact_nodes(abs(TreeMem[start_node].children[q]), depth+1, weights) ;
00841             if (num<=2)
00842                 continue ;
00843             int32_t node=get_node() ;
00844 
00845             int32_t last_node=TreeMem[start_node].children[q] ;
00846             if (weights_in_tree)
00847             {
00848                 ASSERT(weights[depth]!=0.0) ;
00849                 TreeMem[node].weight=TreeMem[last_node].weight/weights[depth] ;
00850             }
00851             else
00852                 TreeMem[node].weight=TreeMem[last_node].weight ;
00853 
00854 #ifdef TRIE_CHECK_EVERYTHING
00855             TreeMem[node].has_seq=true ;
00856 #endif
00857             memset(TreeMem[node].seq, TRIE_TERMINAL_CHARACTER, 16) ;
00858             for (int32_t n=0; n<num; n++)
00859             {
00860                 ASSERT(depth+n+1<=degree-1) ;
00861                 ASSERT(last_node!=NO_CHILD) ;
00862                 if (depth+n+1==degree-1)
00863                 {
00864                     TRIE_ASSERT_EVERYTHING(TreeMem[last_node].has_floats) ;
00865                     int32_t  k ;
00866                     for (k=0; k<4; k++)
00867                         if (TreeMem[last_node].child_weights[k]!=0.0)
00868                             break ;
00869                     if (k==4)
00870                         break ;
00871                     TreeMem[node].seq[n]=k ;
00872                     break ;
00873                 }
00874                 else
00875                 {
00876                     TRIE_ASSERT_EVERYTHING(!TreeMem[last_node].has_floats) ;
00877                     int32_t k ;
00878                     for (k=0; k<4; k++)
00879                         if (TreeMem[last_node].children[k]!=NO_CHILD)
00880                             break ;
00881                     if (k==4)
00882                         break ;
00883                     TreeMem[node].seq[n]=k ;
00884                     last_node=TreeMem[last_node].children[k] ;
00885                 }
00886             }
00887             TreeMem[start_node].children[q]=-node ;
00888         }
00889         return -1 ;
00890     }
00891     if (num_used==0)
00892         return 0 ;
00893 
00894     ret=compact_nodes(abs(TreeMem[start_node].children[q_used]), depth+1, weights) ;
00895     if (ret<0)
00896         return ret ;
00897     return ret+1 ;
00898 }
00899 
00900 
00901     template <class Trie>
00902 bool CTrie<Trie>::compare_traverse(
00903     int32_t node, const CTrie<Trie> & other, int32_t other_node)
00904 {
00905     SG_DEBUG("checking nodes %i and %i\n", node, other_node) ;
00906     if (fabs(TreeMem[node].weight-other.TreeMem[other_node].weight)>=1e-5)
00907     {
00908         SG_DEBUG( "CTrie::compare: TreeMem[%i].weight=%f!=other.TreeMem[%i].weight=%f\n", node, TreeMem[node].weight, other_node,other.TreeMem[other_node].weight) ;
00909         SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;           
00910         display_node(node) ;
00911         SG_DEBUG( "============================================================\n") ;           
00912         other.display_node(other_node) ;
00913         SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;           
00914         return false ;
00915     }
00916 
00917 #ifdef TRIE_CHECK_EVERYTHING
00918     if (TreeMem[node].has_seq!=other.TreeMem[other_node].has_seq)
00919     {
00920         SG_DEBUG( "CTrie::compare: TreeMem[%i].has_seq=%i!=other.TreeMem[%i].has_seq=%i\n", node, TreeMem[node].has_seq, other_node,other.TreeMem[other_node].has_seq) ;
00921         SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;           
00922         display_node(node) ;
00923         SG_DEBUG( "============================================================\n") ;           
00924         other.display_node(other_node) ;
00925         SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
00926         return false ;
00927     }
00928     if (TreeMem[node].has_floats!=other.TreeMem[other_node].has_floats)
00929     {
00930         SG_DEBUG( "CTrie::compare: TreeMem[%i].has_floats=%i!=other.TreeMem[%i].has_floats=%i\n", node, TreeMem[node].has_floats, other_node, other.TreeMem[other_node].has_floats) ;
00931         return false ;
00932     }
00933     if (other.TreeMem[other_node].has_floats)
00934     {
00935         for (int32_t q=0; q<4; q++)
00936             if (fabs(TreeMem[node].child_weights[q]-other.TreeMem[other_node].child_weights[q])>1e-5)
00937             {
00938                 SG_DEBUG( "CTrie::compare: TreeMem[%i].child_weights[%i]=%e!=other.TreeMem[%i].child_weights[%i]=%e\n", node, q,TreeMem[node].child_weights[q], other_node,q,other.TreeMem[other_node].child_weights[q]) ;
00939                 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;           
00940                 display_node(node) ;
00941                 SG_DEBUG( "============================================================\n") ;           
00942                 other.display_node(other_node) ;
00943                 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;           
00944                 return false ;
00945             }
00946     }
00947     if (other.TreeMem[other_node].has_seq)
00948     {
00949         for (int32_t q=0; q<16; q++)
00950             if ((TreeMem[node].seq[q]!=other.TreeMem[other_node].seq[q]) && ((TreeMem[node].seq[q]<4)||(other.TreeMem[other_node].seq[q]<4)))
00951             {
00952                 SG_DEBUG( "CTrie::compare: TreeMem[%i].seq[%i]=%i!=other.TreeMem[%i].seq[%i]=%i\n", node,q,TreeMem[node].seq[q], other_node,q,other.TreeMem[other_node].seq[q]) ;
00953                 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;           
00954                 display_node(node) ;
00955                 SG_DEBUG( "============================================================\n") ;           
00956                 other.display_node(other_node) ;
00957                 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;           
00958                 return false ;
00959             }
00960     }
00961     if (!other.TreeMem[other_node].has_seq && !other.TreeMem[other_node].has_floats)
00962     {
00963         for (int32_t q=0; q<4; q++)
00964         {
00965             if ((TreeMem[node].children[q]==NO_CHILD) && (other.TreeMem[other_node].children[q]==NO_CHILD))
00966                 continue ;
00967             if ((TreeMem[node].children[q]==NO_CHILD)!=(other.TreeMem[other_node].children[q]==NO_CHILD))
00968             {
00969                 SG_DEBUG( "CTrie::compare: TreeMem[%i].children[%i]=%i!=other.TreeMem[%i].children[%i]=%i\n", node,q,TreeMem[node].children[q], other_node,q,other.TreeMem[other_node].children[q]) ;
00970                 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;           
00971                 display_node(node) ;
00972                 SG_DEBUG( "============================================================\n") ;           
00973                 other.display_node(other_node) ;
00974                 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
00975                 return false ;
00976             }
00977             if (!compare_traverse(abs(TreeMem[node].children[q]), other, abs(other.TreeMem[other_node].children[q])))
00978                 return false ;
00979         }
00980     }
00981 #else
00982     SG_ERROR( "not implemented\n") ;
00983 #endif
00984 
00985     return true ;
00986 }
00987 
00988     template <class Trie>
00989 bool CTrie<Trie>::compare(const CTrie<Trie> & other)
00990 {
00991     bool ret=true ;
00992     for (int32_t i=0; i<length; i++)
00993         if (!compare_traverse(trees[i], other, other.trees[i]))
00994             return false ;
00995         else
00996             SG_DEBUG("two tries at %i identical\n", i) ;
00997 
00998     return ret ;
00999 }
01000 
01001 template <class Trie>
01002 bool CTrie<Trie>::find_node(
01003     int32_t node, int32_t * trace, int32_t& trace_len) const 
01004 {
01005 #ifdef TRIE_CHECK_EVERYTHING
01006     ASSERT(trace_len-1>=0) ;
01007     ASSERT((trace[trace_len-1]>=0) && (trace[trace_len-1]<TreeMemPtrMax))
01008         if (TreeMem[trace[trace_len-1]].has_seq)
01009             return false ;
01010     if (TreeMem[trace[trace_len-1]].has_floats)
01011         return false ;
01012 
01013     for (int32_t q=0; q<4; q++)
01014     {
01015         if (TreeMem[trace[trace_len-1]].children[q]==NO_CHILD)
01016             continue ;
01017         int32_t tl=trace_len+1 ;
01018         if (TreeMem[trace[trace_len-1]].children[q]>=0)
01019             trace[trace_len]=TreeMem[trace[trace_len-1]].children[q] ;
01020         else
01021             trace[trace_len]=-TreeMem[trace[trace_len-1]].children[q] ;
01022 
01023         if (trace[trace_len]==node)
01024         {
01025             trace_len=tl ;
01026             return true ;
01027         }
01028         if (find_node(node, trace, tl))
01029         {
01030             trace_len=tl ;
01031             return true ;
01032         }
01033     }
01034     trace_len=0 ;
01035     return false ;
01036 #else
01037     SG_ERROR( "not implemented\n") ;
01038     return false ;
01039 #endif
01040 }
01041 
01042 template <class Trie>
01043 void CTrie<Trie>::display_node(int32_t node) const
01044 {
01045 #ifdef TRIE_CHECK_EVERYTHING
01046     int32_t * trace=new int32_t[2*degree] ;
01047     int32_t trace_len=-1 ;
01048     bool found = false ;
01049     int32_t tree=-1 ;
01050     for (tree=0; tree<length; tree++)
01051     {
01052         trace[0]=trees[tree] ;
01053         trace_len=1 ;
01054         found=find_node(node, trace, trace_len) ;
01055         if (found)
01056             break ;
01057     }
01058     ASSERT(found) ;
01059     SG_PRINT( "position %i  trace: ", tree) ;
01060 
01061     for (int32_t i=0; i<trace_len-1; i++)
01062     {
01063         int32_t branch=-1 ;
01064         for (int32_t q=0; q<4; q++)
01065             if (abs(TreeMem[trace[i]].children[q])==trace[i+1])
01066             {
01067                 branch=q;
01068                 break ;
01069             }
01070         ASSERT(branch!=-1) ;
01071         char acgt[5]="ACGT" ;
01072         SG_PRINT( "%c", acgt[branch]) ;
01073     }
01074     SG_PRINT( "\nnode=%i\nweight=%f\nhas_seq=%i\nhas_floats=%i\n", node, TreeMem[node].weight, TreeMem[node].has_seq, TreeMem[node].has_floats) ;
01075     if (TreeMem[node].has_floats)
01076     {
01077         for (int32_t q=0; q<4; q++)
01078             SG_PRINT( "child_weighs[%i] = %f\n", q, TreeMem[node].child_weights[q]) ;
01079     }
01080     if (TreeMem[node].has_seq)
01081     {
01082         for (int32_t q=0; q<16; q++)
01083             SG_PRINT( "seq[%i] = %i\n", q, TreeMem[node].seq[q]) ;
01084     }
01085     if (!TreeMem[node].has_seq && !TreeMem[node].has_floats)
01086     {
01087         for (int32_t q=0; q<4; q++)
01088         {
01089             if (TreeMem[node].children[q]!=NO_CHILD)
01090             {
01091                 SG_PRINT( "children[%i] = %i -> \n", q, TreeMem[node].children[q]) ;
01092                 display_node(abs(TreeMem[node].children[q])) ;
01093             }
01094             else
01095                 SG_PRINT( "children[%i] = NO_CHILD -| \n", q, TreeMem[node].children[q]) ;
01096         }
01097 
01098     }
01099 
01100     delete[] trace ;
01101 #else
01102     SG_ERROR( "not implemented\n") ;
01103 #endif
01104 }
01105 
01106 
01107 template <class Trie> CTrie<Trie>::~CTrie()
01108 {
01109     destroy() ;
01110 
01111     free(TreeMem) ;
01112 }
01113 
01114 template <class Trie> void CTrie<Trie>::destroy()
01115 {
01116     if (trees!=NULL)
01117     {
01118         delete_trees();
01119         for (int32_t i=0; i<length; i++)
01120             trees[i] = NO_CHILD;
01121         delete[] trees;
01122 
01123         TreeMemPtr=0;
01124         length=0;
01125         trees=NULL;
01126     }
01127 }
01128 
01129 template <class Trie> void CTrie<Trie>::set_degree(int32_t d)
01130 {
01131     delete_trees(get_use_compact_terminal_nodes());
01132     degree=d;
01133 }
01134 
01135 template <class Trie> void CTrie<Trie>::create(
01136     int32_t len, bool p_use_compact_terminal_nodes)
01137 {
01138     destroy();
01139 
01140     trees=new int32_t[len] ;        
01141     TreeMemPtr=0 ;
01142     for (int32_t i=0; i<len; i++)
01143         trees[i]=get_node(degree==1);
01144     length = len ;
01145 
01146     use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
01147 }
01148 
01149 
01150 template <class Trie> void CTrie<Trie>::delete_trees(
01151     bool p_use_compact_terminal_nodes)
01152 {
01153     if (trees==NULL)
01154         return;
01155 
01156     TreeMemPtr=0 ;
01157     for (int32_t i=0; i<length; i++)
01158         trees[i]=get_node(degree==1);
01159 
01160     use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
01161 } 
01162 
01163     template <class Trie>
01164 float64_t CTrie<Trie>::compute_abs_weights_tree(int32_t tree, int32_t depth)
01165 {
01166     float64_t ret=0 ;
01167 
01168     if (tree==NO_CHILD)
01169         return 0 ;
01170     TRIE_ASSERT(tree>=0) ;
01171 
01172     if (depth==degree-2)
01173     {
01174         ret+=(TreeMem[tree].weight) ;
01175 
01176         for (int32_t k=0; k<4; k++)
01177             ret+=(TreeMem[tree].child_weights[k]) ;
01178 
01179         return ret ;
01180     }
01181 
01182     ret+=(TreeMem[tree].weight) ;
01183 
01184     for (int32_t i=0; i<4; i++)
01185         if (TreeMem[tree].children[i]!=NO_CHILD)
01186             ret += compute_abs_weights_tree(TreeMem[tree].children[i], depth+1)  ;
01187 
01188     return ret ;
01189 }
01190 
01191 
01192     template <class Trie>
01193 float64_t *CTrie<Trie>::compute_abs_weights(int32_t &len)
01194 {
01195     float64_t * sum=new float64_t[length*4] ;
01196     for (int32_t i=0; i<length*4; i++)
01197         sum[i]=0 ;
01198     len=length ;
01199 
01200     for (int32_t i=0; i<length; i++)
01201     {
01202         TRIE_ASSERT(trees[i]!=NO_CHILD) ;
01203         for (int32_t k=0; k<4; k++)
01204         {
01205             sum[i*4+k]=compute_abs_weights_tree(TreeMem[trees[i]].children[k], 0) ;
01206         }
01207     }
01208 
01209     return sum ;
01210 }
01211 
01212     template <class Trie>
01213 void CTrie<Trie>::add_example_to_tree_mismatch_recursion(
01214     int32_t tree,  int32_t i, float64_t alpha,
01215         int32_t *vec, int32_t len_rem, 
01216         int32_t degree_rec, int32_t mismatch_rec, 
01217         int32_t max_mismatch, float64_t * weights) 
01218 {
01219     if (tree==NO_CHILD)
01220         tree=trees[i] ;
01221     TRIE_ASSERT(tree!=NO_CHILD) ;
01222 
01223     if ((len_rem<=0) || (mismatch_rec>max_mismatch) || (degree_rec>degree))
01224         return ;
01225     const int32_t other[4][3] = {   {1,2,3},{0,2,3},{0,1,3},{0,1,2} } ;
01226 
01227     int32_t subtree = NO_CHILD ;
01228 
01229     if (degree_rec==degree-1)
01230     {
01231         TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ;
01232         if (weights_in_tree)
01233             TreeMem[tree].child_weights[vec[0]] += alpha*weights[degree_rec+degree*mismatch_rec];
01234         else
01235             if (weights[degree_rec]!=0.0)
01236                 TreeMem[tree].child_weights[vec[0]] += alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec];
01237         if (mismatch_rec+1<=max_mismatch)
01238             for (int32_t o=0; o<3; o++)
01239             {
01240                 if (weights_in_tree)
01241                     TreeMem[tree].child_weights[other[vec[0]][o]] += alpha*weights[degree_rec+degree*(mismatch_rec+1)];
01242                 else
01243                     if (weights[degree_rec]!=0.0)
01244                         TreeMem[tree].child_weights[other[vec[0]][o]] += alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec];
01245             }
01246         return ;
01247     }
01248     else
01249     {
01250         TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01251         if (TreeMem[tree].children[vec[0]]!=NO_CHILD)
01252         {
01253             subtree=TreeMem[tree].children[vec[0]] ;
01254             if (weights_in_tree)
01255                 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*mismatch_rec];
01256             else
01257                 if (weights[degree_rec]!=0.0)
01258                     TreeMem[subtree].weight += alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec];
01259         }
01260         else 
01261         {
01262             int32_t tmp = get_node(degree_rec==degree-2);
01263             ASSERT(tmp>=0) ;
01264             TreeMem[tree].children[vec[0]]=tmp ;
01265             subtree=tmp ;
01266 #ifdef TRIE_CHECK_EVERYTHING
01267             if (degree_rec==degree-2)
01268                 TreeMem[subtree].has_floats=true ;
01269 #endif
01270             if (weights_in_tree)
01271                 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*mismatch_rec] ;
01272             else
01273                 if (weights[degree_rec]!=0.0)
01274                     TreeMem[subtree].weight = alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec] ;
01275                 else
01276                     TreeMem[subtree].weight = 0.0 ;
01277         }
01278         add_example_to_tree_mismatch_recursion(subtree,  i, alpha,
01279                 &vec[1], len_rem-1, 
01280                 degree_rec+1, mismatch_rec, max_mismatch, weights) ;
01281 
01282         if (mismatch_rec+1<=max_mismatch)
01283         {
01284             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01285             for (int32_t o=0; o<3; o++)
01286             {
01287                 int32_t ot = other[vec[0]][o] ;
01288                 if (TreeMem[tree].children[ot]!=NO_CHILD)
01289                 {
01290                     subtree=TreeMem[tree].children[ot] ;
01291                     if (weights_in_tree)
01292                         TreeMem[subtree].weight += alpha*weights[degree_rec+degree*(mismatch_rec+1)];
01293                     else
01294                         if (weights[degree_rec]!=0.0)
01295                             TreeMem[subtree].weight += alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec];
01296                 }
01297                 else 
01298                 {
01299                     int32_t tmp = get_node(degree_rec==degree-2);
01300                     ASSERT(tmp>=0) ;
01301                     TreeMem[tree].children[ot]=tmp ;
01302                     subtree=tmp ;
01303 #ifdef TRIE_CHECK_EVERYTHING
01304                     if (degree_rec==degree-2)
01305                         TreeMem[subtree].has_floats=true ;
01306 #endif
01307 
01308                     if (weights_in_tree)
01309                         TreeMem[subtree].weight = alpha*weights[degree_rec+degree*(mismatch_rec+1)] ;
01310                     else
01311                         if (weights[degree_rec]!=0.0)
01312                             TreeMem[subtree].weight = alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec] ;
01313                         else
01314                             TreeMem[subtree].weight = 0.0 ;
01315                 }
01316 
01317                 add_example_to_tree_mismatch_recursion(subtree,  i, alpha,
01318                         &vec[1], len_rem-1, 
01319                         degree_rec+1, mismatch_rec+1, max_mismatch, weights) ;
01320             }
01321         }
01322     }
01323 }
01324 
01325     template <class Trie>
01326 void CTrie<Trie>::compute_scoring_helper(
01327     int32_t tree, int32_t i, int32_t j, float64_t weight, int32_t d,
01328     int32_t max_degree, int32_t num_feat, int32_t num_sym, int32_t sym_offset,
01329     int32_t offs, float64_t* result)
01330 {
01331     if (i+j<num_feat)
01332     {
01333         float64_t decay=1.0; //no decay by default
01334         //if (j>d)
01335         //  decay=pow(0.5,j); //marginalize out lower order matches
01336 
01337         if (j<degree-1)
01338         {
01339             for (int32_t k=0; k<num_sym; k++)
01340             {
01341                 if (TreeMem[tree].children[k]!=NO_CHILD)
01342                 {
01343                     int32_t child=TreeMem[tree].children[k];
01344                     //continue recursion if not yet at max_degree, else add to result
01345                     if (d<max_degree-1)
01346                         compute_scoring_helper(child, i, j+1, weight+decay*TreeMem[child].weight, d+1, max_degree, num_feat, num_sym, sym_offset, num_sym*offs+k, result);
01347                     else
01348                         result[sym_offset*(i+j-max_degree+1)+num_sym*offs+k] += weight+decay*TreeMem[child].weight;
01349 
01351                     if (d==0)
01352                         compute_scoring_helper(child, i, j+1, 0.0, 0, max_degree, num_feat, num_sym, sym_offset, offs, result);
01353                 }
01354             }
01355         }
01356         else if (j==degree-1)
01357         {
01358             for (int32_t k=0; k<num_sym; k++)
01359             {
01360                 //continue recursion if not yet at max_degree, else add to result
01361                 if (d<max_degree-1 && i<num_feat-1)
01362                     compute_scoring_helper(trees[i+1], i+1, 0, weight+decay*TreeMem[tree].child_weights[k], d+1, max_degree, num_feat, num_sym, sym_offset, num_sym*offs+k, result);
01363                 else
01364                     result[sym_offset*(i+j-max_degree+1)+num_sym*offs+k] += weight+decay*TreeMem[tree].child_weights[k];
01365             }
01366         }
01367     }
01368 }
01369 
01370     template <class Trie>
01371 void CTrie<Trie>::traverse(
01372     int32_t tree, const int32_t p, struct TreeParseInfo info,
01373     const int32_t depth, int32_t* const x, const int32_t k)
01374 {
01375     const int32_t num_sym = info.num_sym;
01376     const int32_t y0 = info.y0;
01377     const int32_t y1 = (k==0) ? 0 : y0 - ( (depth<k) ? 0 : info.nofsKmers[k-1] * x[depth-k] );
01378     //const int32_t temp = info.substrs[depth]*num_sym - ( (depth<=k) ? 0 : info.nofsKmers[k] * x[depth-k-1] );
01379     //if( !( info.y0 == temp ) ) {
01380     //  printf( "\n temp=%d y0=%d k=%d depth=%d \n", temp, info.y0, k, depth );
01381     //}
01382     //ASSERT( info.y0 == temp );
01383     int32_t sym;
01384     ASSERT( depth < degree );
01385     //ASSERT( 0 <= info.substrs[depth] && info.substrs[depth] < info.nofsKmers[k] );
01386     if (depth<degree-1)
01387     {
01388         for( sym=0; sym<num_sym; ++sym ) {
01389             const int32_t childNum = TreeMem[tree].children[ sym ];
01390             if( childNum != NO_CHILD ) {
01391                 int32_t child = childNum ;
01392                 x[depth] = sym;
01393                 info.substrs[depth+1] = y0 + sym;
01394                 info.y0 = (k==0) ? 0 : (y1+sym)*num_sym;
01395                 //ASSERT( info.y0 == ( info.substrs[depth+1]*num_sym - ( (depth<k) ? 0 : info.nofsKmers[k] * x[depth-k] ) ) );
01396                 count( TreeMem[child].weight, depth, info, p, x, k );
01397                 traverse( child, p, info, depth+1, x, k );
01398                 x[depth] = -1;
01399             }
01400         }
01401     }
01402     else if( depth == degree-1 )
01403     {
01404         for( sym=0; sym<num_sym; ++sym ) {
01405             const float64_t w = TreeMem[tree].child_weights[ sym ];
01406             if( w != 0.0 ) {
01407                 x[depth] = sym;
01408                 info.substrs[depth+1] = y0 + sym;
01409                 info.y0 = (k==0) ? 0 : (y1+sym)*num_sym;
01410                 //ASSERT( info.y0 == ( info.substrs[depth+1]*num_sym - ( (depth<k) ? 0 : info.nofsKmers[k] * x[depth-k] ) ) );
01411                 count( w, depth, info, p, x, k );
01412                 x[depth] = -1;
01413             }
01414         }
01415     }
01416     //info.substrs[depth+1] = -1;
01417     //info.y0 = temp;
01418 }
01419 
01420     template <class Trie>
01421 void CTrie<Trie>::count(
01422     const float64_t w, const int32_t depth, const struct TreeParseInfo info,
01423     const int32_t p, int32_t* x, const int32_t k)
01424 {
01425     ASSERT( fabs(w) < 1e10 );
01426     ASSERT( x[depth] >= 0 );
01427     ASSERT( x[depth+1] < 0 );
01428     if ( depth < k ) {
01429         return;
01430     }
01431     //ASSERT( info.margFactors[ depth-k ] == pow( 0.25, depth-k ) );
01432     const int32_t nofKmers = info.nofsKmers[k];
01433     const float64_t margWeight =  w * info.margFactors[ depth-k ];
01434     const int32_t m_a = depth - k + 1;
01435     const int32_t m_b = info.num_feat - p;
01436     const int32_t m = ( m_a < m_b ) ? m_a : m_b;
01437     // all proper k-substrings
01438     const int32_t offset0 = nofKmers * p;
01439     register int32_t i;
01440     register int32_t offset;
01441     offset = offset0;
01442     for( i = 0; i < m; ++i ) {
01443         const int32_t y = info.substrs[i+k+1];
01444         info.C_k[ y + offset ] += margWeight;
01445         offset += nofKmers;
01446     }
01447     if( depth > k ) {
01448         // k-prefix
01449         const int32_t offsR = info.substrs[k+1] + offset0;
01450         info.R_k[offsR] += margWeight;
01451         // k-suffix
01452         if( p+depth-k < info.num_feat ) {
01453             const int32_t offsL = info.substrs[depth+1] + nofKmers * (p+depth-k);
01454             info.L_k[offsL] += margWeight; 
01455         }
01456     }
01457     //    # N.x = substring represented by N
01458     //    # N.d = length of N.x
01459     //    # N.s = starting position of N.x
01460     //    # N.w = weight for feature represented by N
01461     //    if( N.d >= k )
01462     //      margContrib = w / 4^(N.d-k)
01463     //      for i = 1 to (N.d-k+1)
01464     //        y = N.x[i:(i+k-1)]  # overlapped k-mer
01465     //        C_k[ N.s+i-1, y ] += margContrib
01466     //      end;
01467     //      if( N.d > k )
01468     //        L_k[ N.s+N.d-k, N.x[N.d-k+(1:k)] ] += margContrib  # j-suffix of N.x
01469     //        R_k[ N.s,       N.x[1:k]         ] += margContrib  # j-prefix of N.x
01470     //      end;
01471     //    end;
01472 }
01473 
01474     template <class Trie>
01475 void CTrie<Trie>::add_to_trie(
01476     int32_t i, int32_t seq_offset, int32_t * vec, float32_t alpha,
01477     float64_t *weights, bool degree_times_position_weights)
01478 {
01479     int32_t tree = trees[i] ;
01480     //ASSERT(seq_offset==0) ;
01481 
01482     int32_t max_depth = 0 ;
01483     float64_t* weights_column ;
01484     if (degree_times_position_weights)
01485         weights_column = &weights[(i+seq_offset)*degree] ;
01486     else
01487         weights_column = weights ;
01488 
01489     if (weights_in_tree)
01490     {
01491         for (int32_t j=0; (j<degree) && (i+j<length); j++)
01492             if (CMath::abs(weights_column[j]*alpha)>0)
01493                 max_depth = j+1 ;
01494     }
01495     else
01496         // don't use the weights
01497         max_depth=degree ;
01498 
01499     for (int32_t j=0; (j<max_depth) && (i+j+seq_offset<length); j++)
01500     {
01501         TRIE_ASSERT((vec[i+j+seq_offset]>=0) && (vec[i+j+seq_offset]<4)) ;
01502         if ((j<degree-1) && (TreeMem[tree].children[vec[i+j+seq_offset]]!=NO_CHILD))
01503         {
01504             if (TreeMem[tree].children[vec[i+j+seq_offset]]<0)
01505             {
01506                 // special treatment of the next nodes
01507                 TRIE_ASSERT(j >= degree-16) ;
01508                 // get the right element
01509                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01510                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01511                 int32_t node = - TreeMem[tree].children[vec[i+j+seq_offset]] ;
01512 
01513                 TRIE_ASSERT((node>=0) && (node<=TreeMemPtrMax)) ;
01514                 TRIE_ASSERT_EVERYTHING(TreeMem[node].has_seq) ;
01515                 TRIE_ASSERT_EVERYTHING(!TreeMem[node].has_floats) ;
01516 
01517                 // check whether the same string is stored
01518                 int32_t mismatch_pos = -1 ;
01519                 {
01520                     int32_t k ;
01521                     for (k=0; (j+k<max_depth) && (i+j+seq_offset+k<length); k++)
01522                     {
01523                         TRIE_ASSERT((vec[i+j+seq_offset+k]>=0) && (vec[i+j+seq_offset+k]<4)) ;
01524                         // ###
01525                         if ((TreeMem[node].seq[k]>=4) && (TreeMem[node].seq[k]!=TRIE_TERMINAL_CHARACTER))
01526                             fprintf(stderr, "+++i=%i j=%i seq[%i]=%i\n", i, j, k, TreeMem[node].seq[k]) ;
01527                         TRIE_ASSERT((TreeMem[node].seq[k]<4) || (TreeMem[node].seq[k]==TRIE_TERMINAL_CHARACTER)) ;
01528                         TRIE_ASSERT(k<16) ;
01529                         if (TreeMem[node].seq[k]!=vec[i+j+seq_offset+k])
01530                         {
01531                             mismatch_pos=k ;
01532                             break ;
01533                         }
01534                     }
01535                 }
01536 
01537                 // what happens when the .seq sequence is longer than vec? should we branch???
01538 
01539                 if (mismatch_pos==-1)
01540                     // if so, then just increase the weight by alpha and stop
01541                     TreeMem[node].weight+=alpha ;
01542                 else
01543                     // otherwise
01544                     // 1. replace current node with new node
01545                     // 2. create new nodes until mismatching positon
01546                     // 2. add a branch with old string (old node) and the new string (new node)
01547                 {
01548                     // replace old node
01549                     int32_t last_node=tree ;
01550 
01551                     // create new nodes until mismatch
01552                     int32_t k ;
01553                     for (k=0; k<mismatch_pos; k++)
01554                     {
01555                         TRIE_ASSERT((vec[i+j+seq_offset+k]>=0) && (vec[i+j+seq_offset+k]<4)) ;
01556                         TRIE_ASSERT(vec[i+j+seq_offset+k]==TreeMem[node].seq[k]) ;
01557 
01558                         int32_t tmp=get_node();
01559                         TreeMem[last_node].children[vec[i+j+seq_offset+k]]=tmp ;
01560                         last_node=tmp ;
01561                         if (weights_in_tree)
01562                             TreeMem[last_node].weight = (TreeMem[node].weight+alpha)*weights_column[j+k] ;
01563                         else
01564                             TreeMem[last_node].weight = (TreeMem[node].weight+alpha) ;
01565                         TRIE_ASSERT(j+k!=degree-1) ;
01566                     }
01567                     if ((TreeMem[node].seq[mismatch_pos]>=4) && (TreeMem[node].seq[mismatch_pos]!=TRIE_TERMINAL_CHARACTER))
01568                         fprintf(stderr, "**i=%i j=%i seq[%i]=%i\n", i, j, k, TreeMem[node].seq[mismatch_pos]) ;
01569                     ASSERT((TreeMem[node].seq[mismatch_pos]<4) || (TreeMem[node].seq[mismatch_pos]==TRIE_TERMINAL_CHARACTER)) ;
01570                     TRIE_ASSERT(vec[i+j+seq_offset+mismatch_pos]!=TreeMem[node].seq[mismatch_pos]) ;
01571 
01572                     if (j+k==degree-1)
01573                     {
01574                         // init child weights with zero if after dropping out
01575                         // of the k<mismatch_pos loop we are one level below degree
01576                         // (keep this even after get_node() change!)
01577                         for (int32_t q=0; q<4; q++)
01578                             TreeMem[last_node].child_weights[q]=0.0 ;
01579                         if (weights_in_tree)
01580                         {
01581                             if (TreeMem[node].seq[mismatch_pos]<4) // i.e. !=TRIE_TERMINAL_CHARACTER
01582                                 TreeMem[last_node].child_weights[TreeMem[node].seq[mismatch_pos]]+=TreeMem[node].weight*weights_column[degree-1] ;
01583                             TreeMem[last_node].child_weights[vec[i+j+seq_offset+k]] += alpha*weights_column[degree-1] ;
01584                         }
01585                         else
01586                         {
01587                             if (TreeMem[node].seq[mismatch_pos]<4) // i.e. !=TRIE_TERMINAL_CHARACTER
01588                                 TreeMem[last_node].child_weights[TreeMem[node].seq[mismatch_pos]]=TreeMem[node].weight ;
01589                             TreeMem[last_node].child_weights[vec[i+j+seq_offset+k]] = alpha ;
01590                         }
01591 
01592 #ifdef TRIE_CHECK_EVERYTHING
01593                         TreeMem[last_node].has_floats=true ;
01594 #endif
01595                     }
01596                     else
01597                     {
01598                         // the branch for the existing string
01599                         if (TreeMem[node].seq[mismatch_pos]<4) // i.e. !=TRIE_TERMINAL_CHARACTER
01600                         {
01601                             TreeMem[last_node].children[TreeMem[node].seq[mismatch_pos]] = -node ;
01602 
01603                             // move string by mismatch_pos positions
01604                             for (int32_t q=0; q<16; q++)
01605                             {
01606                                 if ((j+q+mismatch_pos<degree) && (i+j+seq_offset+q+mismatch_pos<length))
01607                                     TreeMem[node].seq[q] = TreeMem[node].seq[q+mismatch_pos] ;
01608                                 else
01609                                     TreeMem[node].seq[q] = TRIE_TERMINAL_CHARACTER ;
01610                             }
01611 #ifdef TRIE_CHECK_EVERYTHING
01612                             TreeMem[node].has_seq=true ;
01613 #endif
01614                         }
01615 
01616                         // the new branch
01617                         TRIE_ASSERT((vec[i+j+seq_offset+mismatch_pos]>=0) && (vec[i+j+seq_offset+mismatch_pos]<4)) ;
01618                         int32_t tmp = get_node() ;
01619                         TreeMem[last_node].children[vec[i+j+seq_offset+mismatch_pos]] = -tmp ;
01620                         last_node=tmp ;
01621 
01622                         TreeMem[last_node].weight = alpha ;
01623 #ifdef TRIE_CHECK_EVERYTHING
01624                         TreeMem[last_node].has_seq = true ;
01625 #endif
01626                         memset(TreeMem[last_node].seq, TRIE_TERMINAL_CHARACTER, 16) ;
01627                         for (int32_t q=0; (j+q+mismatch_pos<degree) && (i+j+seq_offset+q+mismatch_pos<length); q++)
01628                             TreeMem[last_node].seq[q] = vec[i+j+seq_offset+mismatch_pos+q] ;
01629                     }
01630                 }
01631                 break ;
01632             } 
01633             else
01634             {
01635                 tree=TreeMem[tree].children[vec[i+j+seq_offset]] ;
01636                 TRIE_ASSERT((tree>=0) && (tree<TreeMemPtrMax)) ;
01637                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01638                 if (weights_in_tree)
01639                     TreeMem[tree].weight += alpha*weights_column[j];
01640                 else
01641                     TreeMem[tree].weight += alpha ;
01642             }
01643         }
01644         else if (j==degree-1)
01645         {
01646             // special treatment of the last node
01647             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01648             TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ;
01649             if (weights_in_tree)
01650                 TreeMem[tree].child_weights[vec[i+j+seq_offset]] += alpha*weights_column[j] ;
01651             else
01652                 TreeMem[tree].child_weights[vec[i+j+seq_offset]] += alpha;
01653 
01654             break;
01655         }
01656         else
01657         {
01658             bool use_seq = use_compact_terminal_nodes && (j>degree-16) ;
01659             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01660             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01661 
01662             int32_t tmp = get_node((j==degree-2) && (!use_seq));
01663             if (use_seq)
01664                 TreeMem[tree].children[vec[i+j+seq_offset]] = -tmp ;
01665             else
01666                 TreeMem[tree].children[vec[i+j+seq_offset]] = tmp ;
01667             tree=tmp ;
01668 
01669             TRIE_ASSERT((tree>=0) && (tree<TreeMemPtrMax)) ;
01670 #ifdef TRIE_CHECK_EVERYTHING
01671             TreeMem[tree].has_seq = use_seq ;
01672 #endif
01673             if (use_seq)
01674             {
01675                 TreeMem[tree].weight = alpha ;
01676                 // important to have the terminal characters (see ###)
01677                 memset(TreeMem[tree].seq, TRIE_TERMINAL_CHARACTER, 16) ;
01678                 for (int32_t q=0; (j+q<degree) && (i+j+seq_offset+q<length); q++)
01679                 {
01680                     TRIE_ASSERT(q<16) ;
01681                     TreeMem[tree].seq[q]=vec[i+j+seq_offset+q] ;
01682                 }
01683                 break ;
01684             }
01685             else
01686             {
01687                 if (weights_in_tree)
01688                     TreeMem[tree].weight = alpha*weights_column[j] ;
01689                 else
01690                     TreeMem[tree].weight = alpha ;
01691 #ifdef TRIE_CHECK_EVERYTHING
01692                 if (j==degree-2)
01693                     TreeMem[tree].has_floats = true ;
01694 #endif
01695             }
01696         }
01697     }
01698 }
01699 
01700     template <class Trie>
01701 float64_t CTrie<Trie>::compute_by_tree_helper(
01702     int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
01703         int32_t weight_pos, float64_t* weights, 
01704         bool degree_times_position_weights)
01705 {
01706     int32_t tree = trees[tree_pos] ;
01707 
01708     if ((position_weights!=NULL) && (position_weights[weight_pos]==0))
01709         return 0.0;
01710 
01711     float64_t *weights_column=NULL ;
01712     if (degree_times_position_weights)
01713         weights_column=&weights[weight_pos*degree] ;
01714     else // weights is a vector (1 x degree)
01715         weights_column=weights ;
01716 
01717     float64_t sum=0 ;
01718     for (int32_t j=0; seq_pos+j < len; j++)
01719     {
01720         TRIE_ASSERT((vec[seq_pos+j]<4) && (vec[seq_pos+j]>=0)) ;
01721 
01722         if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01723         {
01724             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01725             if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01726             {
01727                 tree = - TreeMem[tree].children[vec[seq_pos+j]];
01728                 TRIE_ASSERT(tree>=0) ;
01729                 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01730                 float64_t this_weight=0.0 ;
01731                 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01732                 {
01733                     TRIE_ASSERT((vec[seq_pos+j+k]<4) && (vec[seq_pos+j+k]>=0)) ;
01734                     if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01735                         break ;
01736                     this_weight += weights_column[j+k] ;
01737                 }
01738                 sum += TreeMem[tree].weight * this_weight ;
01739                 break ;
01740             }
01741             else
01742             {
01743                 tree=TreeMem[tree].children[vec[seq_pos+j]];
01744                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01745                 if (weights_in_tree)
01746                     sum += TreeMem[tree].weight ;
01747                 else
01748                     sum += TreeMem[tree].weight * weights_column[j] ;
01749             } ;
01750         }
01751         else
01752         {
01753             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01754             if (j==degree-1)
01755             {
01756                 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ;
01757                 if (weights_in_tree)
01758                     sum += TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01759                 else
01760                     sum += TreeMem[tree].child_weights[vec[seq_pos+j]] * weights_column[j] ;
01761             }
01762             else
01763                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01764 
01765             break;
01766         }
01767     } 
01768 
01769     if (position_weights!=NULL)
01770         return sum*position_weights[weight_pos] ;
01771     else
01772         return sum ;
01773 }
01774 
01775     template <class Trie>
01776 void CTrie<Trie>::compute_by_tree_helper(
01777     int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
01778     int32_t weight_pos, float64_t* LevelContrib, float64_t factor,
01779     int32_t mkl_stepsize, float64_t * weights,
01780     bool degree_times_position_weights)
01781 {
01782     int32_t tree = trees[tree_pos] ;
01783     if (factor==0)
01784         return ;
01785 
01786     if (position_weights!=NULL)
01787     {
01788         factor *= position_weights[weight_pos] ;
01789         if (factor==0)
01790             return ;
01791         if (!degree_times_position_weights) // with position_weigths, weights is a vector (1 x degree)
01792         {
01793             for (int32_t j=0; seq_pos+j<len; j++)
01794             {
01795                 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01796                 {
01797                     if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01798                     {
01799                         tree = -TreeMem[tree].children[vec[seq_pos+j]];
01800                         TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01801                         for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01802                         {
01803                             if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01804                                 break ;
01805                             if (weights_in_tree)
01806                                 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01807                             else
01808                                 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k] ;
01809                         }
01810                         break ;
01811                     }
01812                     else
01813                     {
01814                         tree=TreeMem[tree].children[vec[seq_pos+j]];
01815                         TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01816                         if (weights_in_tree)
01817                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01818                         else
01819                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j] ;
01820                     }
01821                 } 
01822                 else
01823                 {
01824                     TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01825                     if (j==degree-1)
01826                     {
01827                         if (weights_in_tree)
01828                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01829                         else
01830                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j] ;
01831                     }
01832                 }
01833             }
01834         } 
01835         else // with position_weigths, weights is a matrix (len x degree)
01836         {
01837             for (int32_t j=0; seq_pos+j<len; j++)
01838             {
01839                 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01840                 {
01841                     if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01842                     {
01843                         tree = -TreeMem[tree].children[vec[seq_pos+j]];
01844                         TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01845                         for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01846                         {
01847                             if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01848                                 break ;
01849                             if (weights_in_tree)
01850                                 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01851                             else
01852                                 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k+weight_pos*degree] ;
01853                         }
01854                         break ;
01855                     }
01856                     else
01857                     {
01858                         tree=TreeMem[tree].children[vec[seq_pos+j]];
01859                         TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01860                         if (weights_in_tree)
01861                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01862                         else
01863                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+weight_pos*degree] ;
01864                     }
01865                 } 
01866                 else
01867                 {
01868                     TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01869                     if (j==degree-1)
01870                     {
01871                         if (weights_in_tree)
01872                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01873                         else
01874                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j+weight_pos*degree] ;
01875                     }         
01876                     break ;
01877                 }
01878             }
01879         } 
01880     }
01881     else if (!degree_times_position_weights) // no position_weigths, weights is a vector (1 x degree)
01882     {
01883         for (int32_t j=0; seq_pos+j<len; j++)
01884         {
01885             if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01886             {
01887                 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01888                 {
01889                     tree = -TreeMem[tree].children[vec[seq_pos+j]];
01890                     TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01891                     for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01892                     {
01893                         if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01894                             break ;
01895                         if (weights_in_tree)
01896                             LevelContrib[(j+k)/mkl_stepsize] += factor*TreeMem[tree].weight ;
01897                         else
01898                             LevelContrib[(j+k)/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k] ;
01899                     }
01900                     break ;
01901                 }
01902                 else
01903                 {
01904                     tree=TreeMem[tree].children[vec[seq_pos+j]];
01905                     TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01906                     if (weights_in_tree)
01907                         LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].weight ;
01908                     else
01909                         LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j] ;
01910                 }
01911             }
01912             else
01913             {
01914                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01915                 if (j==degree-1)
01916                 {
01917                     if (weights_in_tree)
01918                         LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01919                     else
01920                         LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j] ;
01921                 }
01922                 break ;
01923             }
01924         } 
01925     } 
01926     else // no position_weigths, weights is a matrix (len x degree)
01927     {
01928         /*if (!position_mask)
01929           {     
01930           position_mask = new bool[len] ;
01931           for (int32_t i=0; i<len; i++)
01932           {
01933           position_mask[i]=false ;
01934 
01935           for (int32_t j=0; j<degree; j++)
01936           if (weights[i*degree+j]!=0.0)
01937           {
01938           position_mask[i]=true ;
01939           break ;
01940           }
01941           }
01942           }
01943           if (position_mask[weight_pos]==0)
01944           return ;*/
01945 
01946         for (int32_t j=0; seq_pos+j<len; j++)
01947         {
01948             if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01949             {
01950                 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01951                 {
01952                     tree = -TreeMem[tree].children[vec[seq_pos+j]];
01953                     TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01954                     for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01955                     {
01956                         if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01957                             break ;
01958                         if (weights_in_tree)
01959                             LevelContrib[(j+k+degree*weight_pos)/mkl_stepsize] += factor*TreeMem[tree].weight ;
01960                         else
01961                             LevelContrib[(j+k+degree*weight_pos)/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k+weight_pos*degree] ;
01962                     }
01963                     break ;
01964                 }
01965                 else
01966                 {
01967                     tree=TreeMem[tree].children[vec[seq_pos+j]];
01968                     TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01969                     if (weights_in_tree)
01970                         LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].weight ;
01971                     else
01972                         LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].weight * weights[j+weight_pos*degree] ;
01973                 } 
01974             }
01975             else
01976             {
01977                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01978                 if (j==degree-1)
01979                 {
01980                     if (weights_in_tree)
01981                         LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01982                     else
01983                         LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].child_weights[vec[seq_pos+j]] * weights[j+weight_pos*degree] ;
01984                 }
01985                 break ;
01986             }
01987         } 
01988     }
01989 }
01990 
01991     template <class Trie>
01992 void CTrie<Trie>::fill_backtracking_table_recursion(
01993     Trie* tree, int32_t depth, uint64_t seq, float64_t value,
01994     CDynamicArray<ConsensusEntry>* table, float64_t* weights)
01995 {
01996     float64_t w=1.0;
01997 
01998     if (weights_in_tree || depth==0)
01999         value+=tree->weight;
02000     else
02001     {
02002         w=weights[degree-1];
02003         value+=weights[depth-1]*tree->weight;
02004     }
02005 
02006     if (degree-1==depth)
02007     {
02008         for (int32_t sym=0; sym<4; sym++)
02009         {
02010             float64_t v=w*tree->child_weights[sym];
02011             if (v!=0.0)
02012             {
02013                 ConsensusEntry entry;
02014                 entry.bt=-1;
02015                 entry.score=value+v;
02016                 entry.string=seq | ((uint64_t) sym) << (2*(degree-depth-1));
02017 
02018                 table->append_element(entry);
02019             }
02020         }
02021     }
02022     else
02023     {
02024         for (int32_t sym=0; sym<4; sym++)
02025         {
02026             uint64_t str=seq | ((uint64_t) sym) << (2*(degree-depth-1));
02027             if (tree->children[sym] != NO_CHILD)
02028                 fill_backtracking_table_recursion(&TreeMem[tree->children[sym]], depth+1, str, value, table, weights);
02029         }
02030     }
02031 }
02032 
02033     template <class Trie>
02034 float64_t CTrie<Trie>::get_cumulative_score(
02035     int32_t pos, uint64_t seq, int32_t deg, float64_t* weights)
02036 {
02037     float64_t result=0.0;
02038 
02039     //SG_PRINT("pos:%i length:%i deg:%i seq:0x%0llx...\n", pos, length, deg, seq);
02040 
02041     for (int32_t i=pos; i<pos+deg && i<length; i++)
02042     {
02043         //SG_PRINT("loop %d\n", i);
02044         Trie* tree = &TreeMem[trees[i]];
02045 
02046         for (int32_t d=0; d<deg-i+pos; d++)
02047         {
02048             //SG_PRINT("loop degree %d shit: %d\n", d, (2*(deg-1-d-i+pos)));
02049             ASSERT(d-1<degree);
02050             int32_t sym = (int32_t) (seq >> (2*(deg-1-d-i+pos)) & 3);
02051 
02052             float64_t w=1.0;
02053             if (!weights_in_tree)
02054                 w=weights[d];
02055 
02056             ASSERT(tree->children[sym] != NO_CHILD);
02057             tree=&TreeMem[tree->children[sym]];
02058             result+=w*tree->weight;
02059         }
02060     }
02061     //SG_PRINT("cum: %f\n", result);
02062     return result;
02063 }
02064 
02065     template <class Trie>
02066 void CTrie<Trie>::fill_backtracking_table(
02067     int32_t pos, CDynamicArray<ConsensusEntry>* prev,
02068     CDynamicArray<ConsensusEntry>* cur, bool cumulative, float64_t* weights)
02069 {
02070     ASSERT(pos>=0 && pos<length);
02071     ASSERT(!use_compact_terminal_nodes);
02072 
02073     Trie* t = &TreeMem[trees[pos]];
02074 
02075     fill_backtracking_table_recursion(t, 0, (uint64_t) 0, 0.0, cur, weights);
02076 
02077 
02078     if (cumulative)
02079     {
02080         int32_t num_cur=cur->get_num_elements();
02081         for (int32_t i=0; i<num_cur; i++)
02082         {
02083             ConsensusEntry entry=cur->get_element(i);
02084             entry.score+=get_cumulative_score(pos+1, entry.string, degree-1, weights);
02085             cur->set_element(entry,i);
02086             //SG_PRINT("cum: str:0%0llx sc:%f bt:%d\n",entry.string,entry.score,entry.bt);
02087         }
02088     }
02089 
02090     //if previous tree exists find maximum scoring path
02091     //for each element in cur and update bt table
02092     if (prev)
02093     {
02094         int32_t num_cur=cur->get_num_elements();
02095         int32_t num_prev=prev->get_num_elements();
02096 
02097         for (int32_t i=0; i<num_cur; i++)
02098         {
02099             //uint64_t str_cur_old= cur->get_element(i).string;
02100             uint64_t str_cur= cur->get_element(i).string >> 2;
02101             //SG_PRINT("...cur:0x%0llx cur_noprfx:0x%0llx...\n", str_cur_old, str_cur);
02102 
02103             int32_t bt=-1;
02104             float64_t max_score=0.0;
02105 
02106             for (int32_t j=0; j<num_prev; j++)
02107             {
02108                 //uint64_t str_prev_old= prev->get_element(j).string;
02109                 uint64_t mask=
02110                     ((((uint64_t)0)-1) ^ (((uint64_t) 3) << (2*(degree-1))));
02111                 uint64_t str_prev=  mask & prev->get_element(j).string;
02112                 //SG_PRINT("...prev:0x%0llx prev_nosfx:0x%0llx mask:%0llx...\n", str_prev_old, str_prev,mask);
02113 
02114                 if (str_cur == str_prev)
02115                 {
02116                     float64_t sc=prev->get_element(j).score+cur->get_element(i).score;
02117                     if (bt==-1 || sc>max_score)
02118                     {
02119                         bt=j;
02120                         max_score=sc;
02121 
02122                         //SG_PRINT("new max[%i,%i] = %f\n", j,i, max_score);
02123                     }
02124                 }
02125             }
02126 
02127             ASSERT(bt!=-1);
02128             ConsensusEntry entry;
02129             entry.bt=bt;
02130             entry.score=max_score;
02131             entry.string=cur->get_element(i).string;
02132             cur->set_element(entry, i);
02133             //SG_PRINT("entry[%d]: str:0%0llx sc:%f bt:%d\n",i, entry.string,entry.score,entry.bt);
02134         }
02135     }
02136 }
02137 #endif // _TRIE_H___

SHOGUN Machine Learning Toolbox - Documentation