spandsp 0.0.6
|
00001 /* 00002 * SpanDSP - a series of DSP components for telephony 00003 * 00004 * fast_convert.h - Quick ways to convert floating point numbers to integers 00005 * 00006 * Written by Steve Underwood <steveu@coppice.org> 00007 * 00008 * Copyright (C) 2009 Steve Underwood 00009 * 00010 * All rights reserved. 00011 * 00012 * This program is free software; you can redistribute it and/or modify 00013 * it under the terms of the GNU Lesser General Public License version 2.1, 00014 * as published by the Free Software Foundation. 00015 * 00016 * This program is distributed in the hope that it will be useful, 00017 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00018 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00019 * GNU Lesser General Public License for more details. 00020 * 00021 * You should have received a copy of the GNU Lesser General Public 00022 * License along with this program; if not, write to the Free Software 00023 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 00024 * 00025 * $Id: fast_convert.h,v 1.7 2009/04/18 03:18:41 steveu Exp $ 00026 */ 00027 00028 #if !defined(_SPANDSP_FAST_CONVERT_H_) 00029 #define _SPANDSP_FAST_CONVERT_H_ 00030 00031 #if defined(__cplusplus) 00032 extern "C" 00033 { 00034 #endif 00035 00036 /* The following code, to handle issues with lrint() and lrintf() on various 00037 * platforms, is adapted from similar code in libsndfile, which is: 00038 * 00039 * Copyright (C) 2001-2004 Erik de Castro Lopo <erikd@mega-nerd.com> 00040 * 00041 * This program is free software; you can redistribute it and/or modify 00042 * it under the terms of the GNU Lesser General Public License as published by 00043 * the Free Software Foundation; either version 2.1 of the License, or 00044 * (at your option) any later version. 00045 * 00046 * This program is distributed in the hope that it will be useful, 00047 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00048 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00049 * GNU Lesser General Public License for more details. 00050 */ 00051 00052 /* 00053 * On Intel Pentium processors (especially PIII and probably P4), converting 00054 * from float to int is very slow. To meet the C specs, the code produced by 00055 * most C compilers targeting Pentium needs to change the FPU rounding mode 00056 * before the float to int conversion is performed. 00057 * 00058 * Changing the FPU rounding mode causes the FPU pipeline to be flushed. It 00059 * is this flushing of the pipeline which is so slow. 00060 * 00061 * Fortunately the ISO C99 specification defines the functions lrint, lrintf, 00062 * llrint and llrintf which fix this problem as a side effect. 00063 * 00064 * On Unix-like systems, the configure process should have detected the 00065 * presence of these functions. If they weren't found we have to replace them 00066 * here with a standard C cast. 00067 */ 00068 00069 /* 00070 * The C99 prototypes for these functions are as follows: 00071 * 00072 * int rintf(float x); 00073 * int rint(double x); 00074 * long int lrintf(float x); 00075 * long int lrint(double x); 00076 * long long int llrintf(float x); 00077 * long long int llrint(double x); 00078 * 00079 * The presence of the required functions are detected during the configure 00080 * process and the values HAVE_LRINT and HAVE_LRINTF are set accordingly in 00081 * the config file. 00082 */ 00083 00084 #if defined(__CYGWIN__) 00085 #if !defined(__cplusplus) 00086 /* 00087 * CYGWIN has lrint and lrintf functions, but they are slow and buggy: 00088 * http://sourceware.org/ml/cygwin/2005-06/msg00153.html 00089 * http://sourceware.org/ml/cygwin/2005-09/msg00047.html 00090 * The latest version of cygwin seems to have made no effort to fix this. 00091 * These replacement functions (pulled from the Public Domain MinGW 00092 * math.h header) replace the native versions. 00093 */ 00094 static __inline__ long int lrint(double x) 00095 { 00096 long int retval; 00097 00098 __asm__ __volatile__ 00099 ( 00100 "fistpl %0" 00101 : "=m" (retval) 00102 : "t" (x) 00103 : "st" 00104 ); 00105 00106 return retval; 00107 } 00108 00109 static __inline__ long int lrintf(float x) 00110 { 00111 long int retval; 00112 00113 __asm__ __volatile__ 00114 ( 00115 "fistpl %0" 00116 : "=m" (retval) 00117 : "t" (x) 00118 : "st" 00119 ); 00120 return retval; 00121 } 00122 #endif 00123 00124 /* The fastest way to convert is the equivalent of lrint() */ 00125 static __inline__ long int lfastrint(double x) 00126 { 00127 long int retval; 00128 00129 __asm__ __volatile__ 00130 ( 00131 "fistpl %0" 00132 : "=m" (retval) 00133 : "t" (x) 00134 : "st" 00135 ); 00136 00137 return retval; 00138 } 00139 00140 static __inline__ long int lfastrintf(float x) 00141 { 00142 long int retval; 00143 00144 __asm__ __volatile__ 00145 ( 00146 "fistpl %0" 00147 : "=m" (retval) 00148 : "t" (x) 00149 : "st" 00150 ); 00151 return retval; 00152 } 00153 #elif defined(__GNUC__) 00154 00155 #if defined(__i386__) 00156 /* These routines are guaranteed fast on an i386 machine. Using the built in 00157 lrint() and lrintf() should be similar, but they may not always be enabled. 00158 Sometimes, especially with "-O0", you might get slow calls to routines. */ 00159 static __inline__ long int lfastrint(double x) 00160 { 00161 long int retval; 00162 00163 __asm__ __volatile__ 00164 ( 00165 "fistpl %0" 00166 : "=m" (retval) 00167 : "t" (x) 00168 : "st" 00169 ); 00170 00171 return retval; 00172 } 00173 00174 static __inline__ long int lfastrintf(float x) 00175 { 00176 long int retval; 00177 00178 __asm__ __volatile__ 00179 ( 00180 "fistpl %0" 00181 : "=m" (retval) 00182 : "t" (x) 00183 : "st" 00184 ); 00185 return retval; 00186 } 00187 #elif defined(__x86_64__) 00188 /* On an x86_64 machine, the fastest thing seems to be a pure assignment from a 00189 double or float to an int. It looks like the design on the x86_64 took account 00190 of the default behaviour specified for C. */ 00191 static __inline__ long int lfastrint(double x) 00192 { 00193 return (long int) (x); 00194 } 00195 00196 static __inline__ long int lfastrintf(float x) 00197 { 00198 return (long int) (x); 00199 } 00200 #elif defined(__ppc__) || defined(__powerpc__) 00201 static __inline__ long int lfastrint(register double x) 00202 { 00203 int res[2]; 00204 00205 __asm__ __volatile__ 00206 ( 00207 "fctiw %1, %1\n\t" 00208 "stfd %1, %0" 00209 : "=m" (res) /* Output */ 00210 : "f" (x) /* Input */ 00211 : "memory" 00212 ); 00213 00214 return res[1]; 00215 } 00216 00217 static __inline__ long int lfastrintf(register float x) 00218 { 00219 int res[2]; 00220 00221 __asm__ __volatile__ 00222 ( 00223 "fctiw %1, %1\n\t" 00224 "stfd %1, %0" 00225 : "=m" (res) /* Output */ 00226 : "f" (x) /* Input */ 00227 : "memory" 00228 ); 00229 00230 return res[1]; 00231 } 00232 #else 00233 /* Fallback routines, for unrecognised platforms */ 00234 static __inline__ long int lfastrint(double x) 00235 { 00236 return (long int) x; 00237 } 00238 00239 static __inline__ long int lfastrintf(float x) 00240 { 00241 return (long int) x; 00242 } 00243 #endif 00244 00245 #elif defined(_M_IX86) 00246 /* Visual Studio i386 */ 00247 /* 00248 * Win32 doesn't seem to have the lrint() and lrintf() functions. 00249 * Therefore implement inline versions of these functions here. 00250 */ 00251 00252 __inline long int lrint(double x) 00253 { 00254 long int i; 00255 00256 _asm 00257 { 00258 fld x 00259 fistp i 00260 }; 00261 return i; 00262 } 00263 00264 __inline long int lrintf(float x) 00265 { 00266 long int i; 00267 00268 _asm 00269 { 00270 fld x 00271 fistp i 00272 }; 00273 return i; 00274 } 00275 00276 __inline float rintf(float flt) 00277 { 00278 _asm 00279 { fld flt 00280 frndint 00281 } 00282 } 00283 00284 __inline double rint(double dbl) 00285 { 00286 _asm 00287 { 00288 fld dbl 00289 frndint 00290 } 00291 } 00292 00293 __inline long int lfastrint(double x) 00294 { 00295 long int i; 00296 00297 _asm 00298 { 00299 fld x 00300 fistp i 00301 }; 00302 return i; 00303 } 00304 00305 __inline long int lfastrintf(float x) 00306 { 00307 long int i; 00308 00309 _asm 00310 { 00311 fld x 00312 fistp i 00313 }; 00314 return i; 00315 } 00316 #elif defined(_M_X64) 00317 /* Visual Studio x86_64 */ 00318 /* x86_64 machines will do best with a simple assignment. */ 00319 #include <intrin.h> 00320 00321 __inline long int lrint(double x) 00322 { 00323 return (long int)_mm_cvtsd_si64x( _mm_loadu_pd ((const double*)&x) ); 00324 } 00325 00326 __inline long int lrintf(float x) 00327 { 00328 return _mm_cvt_ss2si( _mm_load_ss((const float*)&x) ); 00329 } 00330 00331 __inline long int lfastrint(double x) 00332 { 00333 return (long int) (x); 00334 } 00335 00336 __inline long int lfastrintf(float x) 00337 { 00338 return (long int) (x); 00339 } 00340 #elif defined(__MWERKS__) && defined(macintosh) 00341 /* This MacOS 9 solution was provided by Stephane Letz */ 00342 00343 long int __inline__ lfastrint(register double x) 00344 { 00345 long int res[2]; 00346 00347 asm 00348 { 00349 fctiw x, x 00350 stfd x, res 00351 } 00352 return res[1]; 00353 } 00354 00355 long int __inline__ lfastrintf(register float x) 00356 { 00357 long int res[2]; 00358 00359 asm 00360 { 00361 fctiw x, x 00362 stfd x, res 00363 } 00364 return res[1]; 00365 } 00366 #elif defined(__MACH__) && defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__)) 00367 /* For Apple Mac OS/X - do recent versions still need this? */ 00368 00369 static __inline__ long int lfastrint(register double x) 00370 { 00371 int res[2]; 00372 00373 __asm__ __volatile__ 00374 ( 00375 "fctiw %1, %1\n\t" 00376 "stfd %1, %0" 00377 : "=m" (res) /* Output */ 00378 : "f" (x) /* Input */ 00379 : "memory" 00380 ); 00381 00382 return res[1]; 00383 } 00384 00385 static __inline__ long int lfastrintf(register float x) 00386 { 00387 int res[2]; 00388 00389 __asm__ __volatile__ 00390 ( 00391 "fctiw %1, %1\n\t" 00392 "stfd %1, %0" 00393 : "=m" (res) /* Output */ 00394 : "f" (x) /* Input */ 00395 : "memory" 00396 ); 00397 00398 return res[1]; 00399 } 00400 #else 00401 /* There is nothing else to do, but use a simple casting operation, instead of a real 00402 rint() type function. Since we are only trying to use rint() to speed up conversions, 00403 the accuracy issues related to changing the rounding scheme are of little concern 00404 to us. */ 00405 00406 #if !defined(__sgi) 00407 #warning "No usable lrint() and lrintf() functions available." 00408 #warning "Replacing these functions with a simple C cast." 00409 #endif 00410 00411 static __inline__ long int lrint(double x) 00412 { 00413 return (long int) (x); 00414 } 00415 00416 static __inline__ long int lrintf(float x) 00417 { 00418 return (long int) (x); 00419 } 00420 00421 static __inline__ long int lfastrint(double x) 00422 { 00423 return (long int) (x); 00424 } 00425 00426 static __inline__ long int lfastrintf(float x) 00427 { 00428 return (long int) (x); 00429 } 00430 #endif 00431 00432 #if defined(__cplusplus) 00433 } 00434 #endif 00435 00436 #endif 00437 00438 /*- End of file ------------------------------------------------------------*/