00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00015
00016
00017 #include "lib/common.h"
00018 #include "lib/Mathematics.h"
00019 #include "lib/lapack.h"
00020 #include "lib/io.h"
00021
00022 #include <stdio.h>
00023 #include <stdlib.h>
00024 #include <math.h>
00025
00027
00029
00030 #ifdef USE_LOGCACHE
00031
00032 #ifdef USE_HMMDEBUG
00033 #define MAX_LOG_TABLE_SIZE 10*1024*1024
00034 #define LOG_TABLE_PRECISION 1e-6
00035 #else
00036 #define MAX_LOG_TABLE_SIZE 123*1024*1024
00037 #define LOG_TABLE_PRECISION 1e-15
00038 #endif
00039
00040 int32_t CMath::LOGACCURACY = 0;
00041 #endif
00042
00043 int32_t CMath::LOGRANGE = 0;
00044
00045 const float64_t CMath::INFTY = -log(0.0);
00046 const float64_t CMath::ALMOST_INFTY = +1e+20;
00047 const float64_t CMath::ALMOST_NEG_INFTY = -1000;
00048 #ifdef USE_LOGCACHE
00049 float64_t* CMath::logtable = NULL;
00050 #endif
00051 char* CMath::rand_state = NULL;
00052 uint32_t CMath::seed = 0;
00053
00054 CMath::CMath()
00055 : CSGObject()
00056 {
00057 CMath::rand_state=new char[RNG_SEED_SIZE];
00058 init_random();
00059
00060 #ifdef USE_LOGCACHE
00061 LOGRANGE=CMath::determine_logrange();
00062 LOGACCURACY=CMath::determine_logaccuracy(LOGRANGE);
00063 CMath::logtable=new float64_t[LOGRANGE*LOGACCURACY];
00064 init_log_table();
00065 #else
00066 int32_t i=0;
00067 while ((float64_t)log(1+((float64_t)exp(-float64_t(i)))))
00068 i++;
00069
00070 LOGRANGE=i;
00071 #endif
00072 }
00073
00074 CMath::~CMath()
00075 {
00076 delete[] CMath::rand_state;
00077 CMath::rand_state=NULL;
00078 #ifdef USE_LOGCACHE
00079 delete[] CMath::logtable;
00080 CMath::logtable=NULL;
00081 #endif
00082 }
00083
00084 #ifdef USE_LOGCACHE
00085 int32_t CMath::determine_logrange()
00086 {
00087 int32_t i;
00088 float64_t acc=0;
00089 for (i=0; i<50; i++)
00090 {
00091 acc=((float64_t)log(1+((float64_t)exp(-float64_t(i)))));
00092 if (acc<=(float64_t)LOG_TABLE_PRECISION)
00093 break;
00094 }
00095
00096 SG_SINFO( "determined range for x in table log(1+exp(-x)) is:%d (error:%G)\n",i,acc);
00097 return i;
00098 }
00099
00100 int32_t CMath::determine_logaccuracy(int32_t range)
00101 {
00102 range=MAX_LOG_TABLE_SIZE/range/((int)sizeof(float64_t));
00103 SG_SINFO( "determined accuracy for x in table log(1+exp(-x)) is:%d (error:%G)\n",range,1.0/(double) range);
00104 return range;
00105 }
00106
00107
00108 void CMath::init_log_table()
00109 {
00110 for (int32_t i=0; i< LOGACCURACY*LOGRANGE; i++)
00111 logtable[i]=log(1+exp(float64_t(-i)/float64_t(LOGACCURACY)));
00112 }
00113 #endif
00114
00115 void CMath::sort(int32_t *a, int32_t cols, int32_t sort_col)
00116 {
00117 int32_t changed=1;
00118 if (a[0]==-1) return ;
00119 while (changed)
00120 {
00121 changed=0; int32_t i=0 ;
00122 while ((a[(i+1)*cols]!=-1) && (a[(i+1)*cols+1]!=-1))
00123 {
00124 if (a[i*cols+sort_col]>a[(i+1)*cols+sort_col])
00125 {
00126 for (int32_t j=0; j<cols; j++)
00127 CMath::swap(a[i*cols+j],a[(i+1)*cols+j]) ;
00128 changed=1 ;
00129 } ;
00130 i++ ;
00131 } ;
00132 } ;
00133 }
00134
00135 void CMath::sort(float64_t *a, int32_t* idx, int32_t N)
00136 {
00137 int32_t changed=1;
00138 while (changed)
00139 {
00140 changed=0;
00141 for (int32_t i=0; i<N-1; i++)
00142 {
00143 if (a[i]>a[i+1])
00144 {
00145 swap(a[i],a[i+1]) ;
00146 swap(idx[i],idx[i+1]) ;
00147 changed=1 ;
00148 } ;
00149 } ;
00150 } ;
00151
00152 }
00153
00154 float64_t CMath::Align(
00155 char* seq1, char* seq2, int32_t l1, int32_t l2, float64_t gapCost)
00156 {
00157 float64_t actCost=0 ;
00158 int32_t i1, i2 ;
00159 float64_t* const gapCosts1 = new float64_t[ l1 ];
00160 float64_t* const gapCosts2 = new float64_t[ l2 ];
00161 float64_t* costs2_0 = new float64_t[ l2 + 1 ];
00162 float64_t* costs2_1 = new float64_t[ l2 + 1 ];
00163
00164
00165 for( i1 = 0; i1 < l1; ++i1 ) {
00166 gapCosts1[ i1 ] = gapCost * i1;
00167 }
00168 costs2_1[ 0 ] = 0;
00169 for( i2 = 0; i2 < l2; ++i2 ) {
00170 gapCosts2[ i2 ] = gapCost * i2;
00171 costs2_1[ i2+1 ] = costs2_1[ i2 ] + gapCosts2[ i2 ];
00172 }
00173
00174 for( i1 = 0; i1 < l1; ++i1 ) {
00175 swap( costs2_0, costs2_1 );
00176 actCost = costs2_0[ 0 ] + gapCosts1[ i1 ];
00177 costs2_1[ 0 ] = actCost;
00178 for( i2 = 0; i2 < l2; ++i2 ) {
00179 const float64_t actMatch = costs2_0[ i2 ] + ( seq1[i1] == seq2[i2] );
00180 const float64_t actGap1 = costs2_0[ i2+1 ] + gapCosts1[ i1 ];
00181 const float64_t actGap2 = actCost + gapCosts2[ i2 ];
00182 const float64_t actGap = min( actGap1, actGap2 );
00183 actCost = min( actMatch, actGap );
00184 costs2_1[ i2+1 ] = actCost;
00185 }
00186 }
00187
00188 delete [] gapCosts1;
00189 delete [] gapCosts2;
00190 delete [] costs2_0;
00191 delete [] costs2_1;
00192
00193
00194 return actCost;
00195 }
00196
00197
00198
00199 int32_t CMath::calcroc(
00200 float64_t* fp, float64_t* tp, float64_t* output, int32_t* label,
00201 int32_t& size, int32_t& possize, int32_t& negsize, float64_t& tresh,
00202 FILE* rocfile)
00203 {
00204 int32_t left=0;
00205 int32_t right=size-1;
00206 int32_t i;
00207
00208 for (i=0; i<size; i++)
00209 {
00210 if (!(label[i]== -1 || label[i]==1))
00211 return -1;
00212 }
00213
00214
00215 while (left<right)
00216 {
00217 while ((label[left] < 0) && (left<right))
00218 left++;
00219 while ((label[right] > 0) && (left<right))
00220 right--;
00221
00222 swap(output[left],output[right]);
00223 swap(label[left], label[right]);
00224 }
00225
00226
00227 negsize=left;
00228 possize=size-left;
00229 float64_t* negout=output;
00230 float64_t* posout=&output[left];
00231
00232
00233 qsort(negout, negsize);
00234 qsort(posout, possize);
00235
00236
00237 float64_t minimum=min(negout[0], posout[0]);
00238 float64_t maximum=minimum;
00239 if (negsize>0)
00240 maximum=max(maximum,negout[negsize-1]);
00241 if (possize>0)
00242 maximum=max(maximum,posout[possize-1]);
00243
00244 float64_t treshhold=minimum;
00245 float64_t old_treshhold=treshhold;
00246
00247
00248 for (i=0; i<size; i++)
00249 {
00250 fp[i]=1.0;
00251 tp[i]=1.0;
00252 }
00253
00254
00255
00256 int32_t posidx=0;
00257 int32_t negidx=0;
00258 int32_t iteration=1;
00259 int32_t returnidx=-1;
00260
00261 float64_t minerr=10;
00262
00263 while (iteration < size && treshhold<=maximum)
00264 {
00265 old_treshhold=treshhold;
00266
00267 while (treshhold==old_treshhold && treshhold<=maximum)
00268 {
00269 if (posidx<possize && negidx<negsize)
00270 {
00271 if (posout[posidx]<negout[negidx])
00272 {
00273 if (posout[posidx]==treshhold)
00274 posidx++;
00275 else
00276 treshhold=posout[posidx];
00277 }
00278 else
00279 {
00280 if (negout[negidx]==treshhold)
00281 negidx++;
00282 else
00283 treshhold=negout[negidx];
00284 }
00285 }
00286 else
00287 {
00288 if (posidx>=possize && negidx<negsize-1)
00289 {
00290 negidx++;
00291 treshhold=negout[negidx];
00292 }
00293 else if (negidx>=negsize && posidx<possize-1)
00294 {
00295 posidx++;
00296 treshhold=posout[posidx];
00297 }
00298 else if (negidx<negsize && treshhold!=negout[negidx])
00299 treshhold=negout[negidx];
00300 else if (posidx<possize && treshhold!=posout[posidx])
00301 treshhold=posout[posidx];
00302 else
00303 {
00304 treshhold=2*(maximum+100);
00305 posidx=possize;
00306 negidx=negsize;
00307 break;
00308 }
00309 }
00310 }
00311
00312
00313 tp[iteration]=(possize-posidx)/(float64_t (possize));
00314 fp[iteration]=(negsize-negidx)/(float64_t (negsize));
00315
00316
00317 if (minerr > negsize*fp[iteration]/size+(1-tp[iteration])*possize/size )
00318 {
00319 minerr=negsize*fp[iteration]/size+(1-tp[iteration])*possize/size;
00320 tresh=(old_treshhold+treshhold)/2;
00321 returnidx=iteration;
00322 }
00323
00324 iteration++;
00325 }
00326
00327
00328 size=iteration;
00329
00330 if (rocfile)
00331 {
00332 const char id[]="ROC";
00333 fwrite(id, sizeof(char), sizeof(id), rocfile);
00334 fwrite(fp, sizeof(float64_t), size, rocfile);
00335 fwrite(tp, sizeof(float64_t), size, rocfile);
00336 }
00337
00338 return returnidx;
00339 }
00340
00341 uint32_t CMath::crc32(uint8_t *data, int32_t len)
00342 {
00343 uint32_t result;
00344 int32_t i,j;
00345 uint8_t octet;
00346
00347 result = 0-1;
00348 for (i=0; i<len; i++)
00349 {
00350 octet = *(data++);
00351 for (j=0; j<8; j++)
00352 {
00353 if ((octet >> 7) ^ (result >> 31))
00354 {
00355 result = (result << 1) ^ 0x04c11db7;
00356 }
00357 else
00358 {
00359 result = (result << 1);
00360 }
00361 octet <<= 1;
00362 }
00363 }
00364
00365 return ~result;
00366 }
00367
00368 float64_t CMath::mutual_info(float64_t* p1, float64_t* p2, int32_t len)
00369 {
00370 double e=0;
00371
00372 for (int32_t i=0; i<len; i++)
00373 for (int32_t j=0; j<len; j++)
00374 e+=exp(p2[j*len+i])*(p2[j*len+i]-p1[i]-p1[j]);
00375
00376 return (float64_t) e;
00377 }
00378
00379 float64_t CMath::relative_entropy(float64_t* p, float64_t* q, int32_t len)
00380 {
00381 double e=0;
00382
00383 for (int32_t i=0; i<len; i++)
00384 e+=exp(p[i])*(p[i]-q[i]);
00385
00386 return (float64_t) e;
00387 }
00388
00389 float64_t CMath::entropy(float64_t* p, int32_t len)
00390 {
00391 double e=0;
00392
00393 for (int32_t i=0; i<len; i++)
00394 e-=exp(p[i])*p[i];
00395
00396 return (float64_t) e;
00397 }
00398
00399
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409 #ifdef HAVE_LAPACK
00410 float64_t* CMath::pinv(
00411 float64_t* matrix, int32_t rows, int32_t cols, float64_t* target)
00412 {
00413 if (!target)
00414 target=new float64_t[rows*cols];
00415
00416 char jobu='A';
00417 char jobvt='A';
00418 int m=rows;
00419 int n=cols;
00420 int lda=m;
00421 int ldu=m;
00422 int ldvt=n;
00423 int info=-1;
00424 int32_t lsize=CMath::min((int32_t) m, (int32_t) n);
00425 double* s=new double[lsize];
00426 double* u=new double[m*m];
00427 double* vt=new double[n*n];
00428
00429 wrap_dgesvd(jobu, jobvt, m, n, matrix, lda, s, u, ldu, vt, ldvt, &info);
00430 ASSERT(info==0);
00431
00432 for (int32_t i=0; i<n; i++)
00433 {
00434 for (int32_t j=0; j<lsize; j++)
00435 vt[i*n+j]=vt[i*n+j]/s[j];
00436 }
00437
00438 cblas_dgemm(CblasColMajor, CblasTrans, CblasTrans, m, n, m, 1.0, vt, ldvt, u, ldu, 0, target, m);
00439
00440 delete[] u;
00441 delete[] vt;
00442 delete[] s;
00443
00444 return target;
00445 }
00446 #endif
00447
00448 template <>
00449 void CMath::display_vector(uint8_t* vector, int32_t n, const char* name)
00450 {
00451 ASSERT(n>=0);
00452 SG_SPRINT("%s=[", name);
00453 for (int32_t i=0; i<n; i++)
00454 SG_SPRINT("%d%s", vector[i], i==n-1? "" : ",");
00455 SG_SPRINT("]\n");
00456 }
00457
00458 template <>
00459 void CMath::display_vector(int32_t* vector, int32_t n, const char* name)
00460 {
00461 ASSERT(n>=0);
00462 SG_SPRINT("%s=[", name);
00463 for (int32_t i=0; i<n; i++)
00464 SG_SPRINT("%d%s", vector[i], i==n-1? "" : ",");
00465 SG_SPRINT("]\n");
00466 }
00467
00468 template <>
00469 void CMath::display_vector(int64_t* vector, int32_t n, const char* name)
00470 {
00471 ASSERT(n>=0);
00472 SG_SPRINT("%s=[", name);
00473 for (int32_t i=0; i<n; i++)
00474 SG_SPRINT("%lld%s", vector[i], i==n-1? "" : ",");
00475 SG_SPRINT("]\n");
00476 }
00477
00478 template <>
00479 void CMath::display_vector(float32_t* vector, int32_t n, const char* name)
00480 {
00481 ASSERT(n>=0);
00482 SG_SPRINT("%s=[", name);
00483 for (int32_t i=0; i<n; i++)
00484 SG_SPRINT("%10.10f%s", vector[i], i==n-1? "" : ",");
00485 SG_SPRINT("]\n");
00486 }
00487
00488 template <>
00489 void CMath::display_vector(float64_t* vector, int32_t n, const char* name)
00490 {
00491 ASSERT(n>=0);
00492 SG_SPRINT("%s=[", name);
00493 for (int32_t i=0; i<n; i++)
00494 SG_SPRINT("%10.10f%s", vector[i], i==n-1? "" : ",");
00495 SG_SPRINT("]\n");
00496 }
00497
00498 template <>
00499 void CMath::display_matrix(
00500 int32_t* matrix, int32_t rows, int32_t cols, const char* name)
00501 {
00502 ASSERT(rows>=0 && cols>=0);
00503 SG_SPRINT("%s=[\n", name);
00504 for (int32_t i=0; i<rows; i++)
00505 {
00506 SG_SPRINT("[");
00507 for (int32_t j=0; j<cols; j++)
00508 SG_SPRINT("\t%d%s", matrix[j*rows+i],
00509 j==cols-1? "" : ",");
00510 SG_SPRINT("]%s\n", i==rows-1? "" : ",");
00511 }
00512 SG_SPRINT("]\n");
00513 }
00514
00515 template <>
00516 void CMath::display_matrix(
00517 float64_t* matrix, int32_t rows, int32_t cols, const char* name)
00518 {
00519 ASSERT(rows>=0 && cols>=0);
00520 SG_SPRINT("%s=[\n", name);
00521 for (int32_t i=0; i<rows; i++)
00522 {
00523 SG_SPRINT("[");
00524 for (int32_t j=0; j<cols; j++)
00525 SG_SPRINT("\t%lf%s", (double) matrix[j*rows+i],
00526 j==cols-1? "" : ",");
00527 SG_SPRINT("]%s\n", i==rows-1? "" : ",");
00528 }
00529 SG_SPRINT("]\n");
00530 }
00531