Trie.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg, Gunnar Raetsch
00008  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include "lib/common.h"
00012 #include "lib/io.h"
00013 #include "lib/Trie.h"
00014 #include "lib/Mathematics.h"
00015 
00016 template <>
00017 void CTrie<POIMTrie>::POIMs_extract_W_helper(
00018     const int32_t nodeIdx, const int32_t depth, const int32_t offset,
00019     const int32_t y0, float64_t* const* const W, const int32_t K )
00020 {
00021     ASSERT(nodeIdx!=NO_CHILD);
00022     ASSERT(depth<K);
00023     float64_t* const W_kiy = & W[ depth ][ offset + y0 ];
00024     POIMTrie* const node = &TreeMem[ nodeIdx ];
00025     int32_t sym;
00026 
00027     if( depth < degree-1 )
00028     {
00029         const int32_t offset1 = offset * NUM_SYMS;
00030         for( sym = 0; sym < NUM_SYMS; ++sym )
00031         {
00032             ASSERT(W_kiy[sym]==0);
00033             const int32_t childIdx = node->children[ sym ];
00034             if( childIdx != NO_CHILD )
00035             {
00036                 W_kiy[ sym ] = TreeMem[ childIdx ].weight;
00037 
00038                 if (depth < K-1)
00039                 {
00040                     const int32_t y1 = ( y0 + sym ) * NUM_SYMS;
00041                     POIMs_extract_W_helper( childIdx, depth+1, offset1, y1, W, K );
00042                 }
00043             }
00044         }
00045     }
00046     else
00047     {
00048         ASSERT(depth==degree-1);
00049         for( sym = 0; sym < NUM_SYMS; ++sym )
00050         {
00051             ASSERT(W_kiy[sym]==0);
00052             W_kiy[ sym ] = node->child_weights[ sym ];
00053         }
00054     }
00055 }
00056 
00057 template <>
00058 void CTrie<POIMTrie>::POIMs_extract_W(
00059     float64_t* const* const W, const int32_t K)
00060 {
00061   ASSERT(degree>=1);
00062   ASSERT(K>=1);
00063   const int32_t N = length;
00064   int32_t i;
00065   for( i = 0; i < N; ++i ) {
00066     //SG_PRINT( "W_helper( %d )\n", i );
00067     POIMs_extract_W_helper( trees[i], 0, i*NUM_SYMS, 0*NUM_SYMS, W, K );
00068   }
00069 }
00070 
00071 template <>
00072 void CTrie<POIMTrie>::POIMs_calc_SLR_helper1(
00073     const float64_t* const distrib, const int32_t i, const int32_t nodeIdx,
00074     int32_t left_tries_idx[4], const int32_t depth, int32_t const lastSym,
00075     float64_t* S, float64_t* L, float64_t* R )
00076 {
00077   ASSERT(depth==degree-1);
00078   ASSERT(nodeIdx!=NO_CHILD);
00079 
00080   const float64_t* const distribLeft  = & distrib[ (i-1)     * NUM_SYMS ];
00081   POIMTrie* const node = &TreeMem[ nodeIdx ];
00082   int32_t symRight;
00083   int32_t symLeft;
00084 
00085   // --- init
00086   node->S = 0;
00087   node->L = 0;
00088   node->R = 0;
00089 
00090   if (i+depth<length)
00091   {
00092       const float64_t* const distribRight = & distrib[ (i+depth) * NUM_SYMS ];
00093 
00094       // --- go thru direct children
00095       for( symRight = 0; symRight < NUM_SYMS; ++symRight ) {
00096           const float64_t w1 = node->child_weights[ symRight ];
00097           const float64_t pRight = distribRight[ symRight ];
00098           const float64_t incr1 = pRight * w1;
00099           node->S += incr1;
00100           node->R += incr1;
00101       }
00102   }
00103 
00104   // --- collect precalced values from left neighbor tree
00105   for( symLeft = 0; symLeft < NUM_SYMS; ++symLeft )
00106   {
00107       if (left_tries_idx[symLeft] != NO_CHILD)
00108       {
00109           POIMTrie* nodeLeft = &TreeMem[left_tries_idx[symLeft]];
00110 
00111           ASSERT(nodeLeft);
00112           const float64_t w2 = nodeLeft->child_weights[ lastSym ];
00113           const float64_t pLeft = distribLeft[ symLeft ];
00114           const float64_t incr2 = pLeft * w2;
00115           node->S += incr2;
00116           node->L += incr2;
00117       }
00118   }
00119 
00120   // --- add w and return results
00121   const float64_t w0 = node->weight;
00122   node->S += w0;
00123   node->L += w0;
00124   node->R += w0;
00125   *S = node->S;
00126   *L = node->L;
00127   *R = node->R;
00128 }
00129 
00130 
00131 template <>
00132 void CTrie<POIMTrie>::POIMs_calc_SLR_helper2(
00133     const float64_t* const distrib, const int32_t i, const int32_t nodeIdx,
00134     int32_t left_tries_idx[4], const int32_t depth, float64_t* S, float64_t* L,
00135     float64_t* R )
00136 {
00137   ASSERT(0<=depth && depth<=degree-2);
00138   ASSERT(nodeIdx!=NO_CHILD);
00139 
00140   const float64_t* const distribLeft  = & distrib[ (i-1)     * NUM_SYMS ];
00141   POIMTrie* const node = &TreeMem[ nodeIdx ];
00142   float64_t dummy;
00143   float64_t auxS;
00144   float64_t auxR;
00145   int32_t symRight;
00146   int32_t symLeft;
00147 
00148   // --- init
00149   node->S = 0;
00150   node->L = 0;
00151   node->R = 0;
00152 
00153   // --- recurse thru direct children
00154   for( symRight = 0; symRight < NUM_SYMS; ++symRight )
00155   {
00156       const int32_t childIdx = node->children[ symRight ];
00157       if( childIdx != NO_CHILD )
00158       {
00159           if( depth < degree-2 ) 
00160           {
00161               int32_t new_left_tries_idx[4];
00162 
00163               for( symLeft = 0; symLeft < NUM_SYMS; ++symLeft )
00164               {
00165                   new_left_tries_idx[symLeft]=NO_CHILD;
00166 
00167                   if (left_tries_idx[symLeft] != NO_CHILD)
00168                   {
00169                       POIMTrie* nodeLeft = &TreeMem[left_tries_idx[symLeft]];
00170                       ASSERT(nodeLeft);
00171                       new_left_tries_idx[ symLeft ]=nodeLeft->children[ symRight ];
00172                   }
00173               }
00174               POIMs_calc_SLR_helper2( distrib, i, childIdx, new_left_tries_idx, depth+1, &auxS, &dummy, &auxR );
00175           }
00176           else 
00177               POIMs_calc_SLR_helper1( distrib, i, childIdx, left_tries_idx, depth+1, symRight, &auxS, &dummy, &auxR );
00178 
00179           if (i+depth<length)
00180           {
00181               const float64_t* const distribRight = & distrib[ (i+depth) * NUM_SYMS ];
00182               const float64_t pRight = distribRight[ symRight ];
00183               node->S += pRight * auxS;
00184               node->R += pRight * auxR;
00185           }
00186       }
00187   }
00188 
00189   // --- collect precalced values from left neighbor tree
00190   for( symLeft = 0; symLeft < NUM_SYMS; ++symLeft )
00191   {
00192       if (left_tries_idx[symLeft] != NO_CHILD)
00193       {
00194           const POIMTrie* nodeLeft = &TreeMem[left_tries_idx[symLeft]];
00195           ASSERT(nodeLeft);
00196           const float64_t pLeft = distribLeft[ symLeft ];
00197 
00198           node->S += pLeft * nodeLeft->S;
00199           node->L += pLeft * nodeLeft->L;
00200 
00201           if (i+depth<length)
00202           {
00203               const float64_t* const distribRight = & distrib[ (i+depth) * NUM_SYMS ];
00204               // - second order correction for S
00205               auxS = 0;
00206               if( depth < degree-2 )
00207               {
00208                   for( symRight = 0; symRight < NUM_SYMS; ++symRight )
00209                   {
00210                       const int32_t childIdxLeft = nodeLeft->children[ symRight ];
00211                       if( childIdxLeft != NO_CHILD )
00212                       {
00213                           const POIMTrie* const childLeft = &TreeMem[ childIdxLeft ];
00214                           auxS += distribRight[symRight] * childLeft->S;
00215                       }
00216                   }
00217               }
00218               else
00219               {
00220                   for( symRight = 0; symRight < NUM_SYMS; ++symRight ) {
00221                       auxS += distribRight[symRight] * nodeLeft->child_weights[ symRight ];
00222                   }
00223               }
00224               node->S -= pLeft* auxS;
00225           }
00226       }
00227   }
00228 
00229   // --- add w and return results
00230   const float64_t w0 = node->weight;
00231   //SG_PRINT( "  d=%d, node=%d, dS=%.3f, w=%.3f\n", depth, nodeIdx, node->S, w0 );
00232   node->S += w0;
00233   node->L += w0;
00234   node->R += w0;
00235   *S = node->S;
00236   *L = node->L;
00237   *R = node->R;
00238 }
00239 
00240 
00241 
00242 template <>
00243 void CTrie<POIMTrie>::POIMs_precalc_SLR( const float64_t* const distrib )
00244 {
00245     if( degree == 1 ) {
00246         return;
00247     }
00248 
00249     ASSERT(degree>=2);
00250     const int32_t N = length;
00251     float64_t dummy;
00252     int32_t symLeft;
00253     int32_t leftSubtrees[4];
00254     for( symLeft = 0; symLeft < NUM_SYMS; ++symLeft )
00255         leftSubtrees[ symLeft ] = NO_CHILD;
00256 
00257     for(int32_t i = 0; i < N; ++i )
00258     {
00259         POIMs_calc_SLR_helper2( distrib, i, trees[i], leftSubtrees, 0, &dummy, &dummy, &dummy );
00260 
00261         const POIMTrie* const node = &TreeMem[ trees[i] ];
00262         ASSERT(trees[i]!=NO_CHILD);
00263 
00264         for(symLeft = 0; symLeft < NUM_SYMS; ++symLeft )
00265             leftSubtrees[ symLeft ] = node->children[ symLeft ];
00266     }
00267 }
00268 
00269 template <>
00270 void CTrie<POIMTrie>::POIMs_get_SLR(
00271     const int32_t parentIdx, const int32_t sym, const int32_t depth,
00272     float64_t* S, float64_t* L, float64_t* R )
00273 {
00274   ASSERT(parentIdx!=NO_CHILD);
00275   const POIMTrie* const parent = &TreeMem[ parentIdx ];
00276   if( depth < degree ) {
00277     const int32_t nodeIdx = parent->children[ sym ];
00278     const POIMTrie* const node = &TreeMem[ nodeIdx ];
00279     *S = node->S;
00280     *L = node->L;
00281     *R = node->R;
00282   } else {
00283     ASSERT(depth==degree);
00284     const float64_t w = parent->child_weights[ sym ];
00285     *S = w;
00286     *L = w;
00287     *R = w;
00288   }
00289 }
00290 
00291 template <>
00292 void CTrie<POIMTrie>::POIMs_add_SLR_helper2(
00293     float64_t* const* const poims, const int32_t K, const int32_t k,
00294     const int32_t i, const int32_t y, const float64_t valW,
00295     const float64_t valS, const float64_t valL, const float64_t valR,
00296     const int32_t debug)
00297 {
00298     //SG_PRINT( "i=%d, d=%d, y=%d:  w=%.3f \n", i, k, y, valW );
00299     const int32_t nk = nofsKmers[ k ];
00300     ASSERT(1<=k && k<=K);
00301     ASSERT(0<=y && y<nk);
00302     int32_t z;
00303     int32_t j;
00304 
00305     // --- add superstring score; subtract "w", as it was counted twice
00306     if( debug==0 || debug==2 )
00307     {
00308         poims[ k-1 ][ i*nk + y ] += valS - valW;
00309     }
00310 
00311     // --- left partial overlaps
00312     if( debug==0 || debug==3 )
00313     {
00314         int32_t r;
00315         for( r = 1; k+r <= K; ++r )
00316         {
00317             const int32_t nr = nofsKmers[ r ];
00318             const int32_t nz = nofsKmers[ k+r ];
00319             float64_t* const poim = & poims[ k+r-1 ][ i*nz ];
00320             z = y * nr;
00321             for( j = 0; j < nr; ++j )
00322             {
00323                 if( !( 0 <= z && z < nz ) ) {
00324                     SG_PRINT( "k=%d, nk=%d,  r=%d, nr=%d,  nz=%d \n", k, nk, r, nr, nz );
00325                     SG_PRINT( "  j=%d, y=%d, z=%d \n", j, y, z );
00326                 }
00327                 ASSERT(0<=z && z<nz);
00328                 poim[ z ] += valL - valW;
00329                 ++z;
00330             }
00331         }
00332     }
00333     // --- right partial overlaps
00334     if( debug==0 || debug==4 )
00335     {
00336         int32_t l;
00337         for( l = 1; k+l <= K && l <= i; ++l )
00338         {
00339             const int32_t nl = nofsKmers[ l ];
00340             const int32_t nz = nofsKmers[ k+l ];
00341             float64_t* const poim = & poims[ k+l-1 ][ (i-l)*nz ];
00342             z = y;
00343             for( j = 0; j < nl; ++j ) {
00344                 ASSERT(0<=z && z<nz);
00345                 poim[ z ] += valR - valW;
00346                 z += nk;
00347             }
00348         }
00349     }
00350 }
00351 
00352 template <>
00353 void CTrie<POIMTrie>::POIMs_add_SLR_helper1(
00354     const int32_t nodeIdx, const int32_t depth, const int32_t i,
00355     const int32_t y0, float64_t* const* const poims, const int32_t K,
00356     const int32_t debug)
00357 {
00358     ASSERT(nodeIdx!=NO_CHILD);
00359     ASSERT(depth<K);
00360     POIMTrie* const node = &TreeMem[ nodeIdx ];
00361     int32_t sym;
00362     if( depth < degree-1 )
00363     {
00364         if( depth < K-1 )
00365         {
00366             for( sym = 0; sym < NUM_SYMS; ++sym )
00367             {
00368                 const int32_t childIdx = node->children[ sym ];
00369                 if( childIdx != NO_CHILD )
00370                 {
00371                     POIMTrie* const child = &TreeMem[ childIdx ];
00372                     const int32_t y = y0 + sym;
00373                     const int32_t y1 = y * NUM_SYMS;
00374                     const float64_t w = child->weight;
00375                     POIMs_add_SLR_helper2( poims, K, depth+1, i, y, w, child->S, child->L, child->R, debug );
00376                     POIMs_add_SLR_helper1( childIdx, depth+1, i, y1, poims, K, debug );
00377                 }
00378             }
00379         }
00380         else
00381         {
00382             ASSERT(depth==K-1);
00383             for( sym = 0; sym < NUM_SYMS; ++sym )
00384             {
00385                 const int32_t childIdx = node->children[ sym ];
00386                 if( childIdx != NO_CHILD ) {
00387                     POIMTrie* const child = &TreeMem[ childIdx ];
00388                     const int32_t y = y0 + sym;
00389                     const float64_t w = child->weight;
00390                     POIMs_add_SLR_helper2( poims, K, depth+1, i, y, w, child->S, child->L, child->R, debug );
00391                 }
00392             }
00393         }
00394     }
00395     else
00396     {
00397         ASSERT(depth==degree-1);
00398         for( sym = 0; sym < NUM_SYMS; ++sym )
00399         {
00400             const float64_t w = node->child_weights[ sym ];
00401             const int32_t y = y0 + sym;
00402             POIMs_add_SLR_helper2( poims, K, depth+1, i, y, w, w, w, w, debug );
00403         }
00404     }
00405 }
00406 
00407 
00408 
00409 template <>
00410 void CTrie<POIMTrie>::POIMs_add_SLR(
00411     float64_t* const* const poims, const int32_t K, const int32_t debug)
00412 {
00413   ASSERT(degree>=1);
00414   ASSERT(K>=1);
00415   {
00416     const int32_t m = ( degree > K ) ? degree : K;
00417     nofsKmers = new int32_t[ m+1 ];
00418     int32_t n;
00419     int32_t k;
00420     n = 1;
00421     for( k = 0; k < m+1; ++k ) {
00422       nofsKmers[ k ] = n;
00423       n *= NUM_SYMS;
00424     }
00425   }
00426   const int32_t N = length;
00427   int32_t i;
00428   for( i = 0; i < N; ++i ) {
00429     POIMs_add_SLR_helper1( trees[i], 0, i, 0*NUM_SYMS, poims, K, debug );
00430   }
00431   delete[] nofsKmers;
00432 }

SHOGUN Machine Learning Toolbox - Documentation