Answers:
如果您想要便宜又便宜的优化幂级数展开(泰勒级数的系数收敛缓慢)sqrt()
以及其他一系列超越条件,我早就有了一些代码。我曾经出售过此代码,但近十年来没有人为此付钱。所以我想我将其发布以供公众使用。这特定的文件是为其中所述处理器已经浮点(IEEE-754单精度)的应用程序,他们有一个C编译器和DEV系统,但他们并没有具有(或他们不想链接)具有标准数学功能的stdlib。他们不需要完美的精度,但他们希望事情快。您可以轻松地对代码进行反向工程以查看幂级数系数,然后编写自己的代码。此代码假定采用IEEE-754,并且屏蔽了尾数和指数位。
看来,SE拥有的“代码标记”对角字符(您知道“>”或“ <”)不友好,因此您可能必须单击“编辑”才能看到所有内容。
//
// FILE: __functions.h
//
// fast and approximate transcendental functions
//
// copyright (c) 2004 Robert Bristow-Johnson
//
// rbj@audioimagination.com
//
#ifndef __FUNCTIONS_H
#define __FUNCTIONS_H
#define TINY 1.0e-8
#define HUGE 1.0e8
#define PI (3.1415926535897932384626433832795028841972) /* pi */
#define ONE_OVER_PI (0.3183098861837906661338147750939)
#define TWOPI (6.2831853071795864769252867665590057683943) /* 2*pi */
#define ONE_OVER_TWOPI (0.15915494309189535682609381638)
#define PI_2 (1.5707963267948966192313216916397514420986) /* pi/2 */
#define TWO_OVER_PI (0.636619772367581332267629550188)
#define LN2 (0.6931471805599453094172321214581765680755) /* ln(2) */
#define ONE_OVER_LN2 (1.44269504088896333066907387547)
#define LN10 (2.3025850929940456840179914546843642076011) /* ln(10) */
#define ONE_OVER_LN10 (0.43429448190325177635683940025)
#define ROOT2 (1.4142135623730950488016887242096980785697) /* sqrt(2) */
#define ONE_OVER_ROOT2 (0.707106781186547438494264988549)
#define DB_LOG2_ENERGY (3.01029995663981154631945610163) /* dB = DB_LOG2_ENERGY*__log2(energy) */
#define DB_LOG2_AMPL (6.02059991327962309263891220326) /* dB = DB_LOG2_AMPL*__log2(amplitude) */
#define ONE_OVER_DB_LOG2_AMPL (0.16609640474436811218256075335) /* amplitude = __exp2(ONE_OVER_DB_LOG2_AMPL*dB) */
#define LONG_OFFSET 4096L
#define FLOAT_OFFSET 4096.0
float __sqrt(float x);
float __log2(float x);
float __exp2(float x);
float __log(float x);
float __exp(float x);
float __pow(float x, float y);
float __sin_pi(float x);
float __cos_pi(float x);
float __sin(float x);
float __cos(float x);
float __tan(float x);
float __atan(float x);
float __asin(float x);
float __acos(float x);
float __arg(float Imag, float Real);
float __poly(float *a, int order, float x);
float __map(float *f, float scaler, float x);
float __discreteMap(float *f, float scaler, float x);
unsigned long __random();
#endif
//
// FILE: __functions.c
//
// fast and approximate transcendental functions
//
// copyright (c) 2004 Robert Bristow-Johnson
//
// rbj@audioimagination.com
//
#define STD_MATH_LIB 0
#include "__functions.h"
#if STD_MATH_LIB
#include "math.h" // angle brackets don't work with SE markup
#endif
float __sqrt(register float x)
{
#if STD_MATH_LIB
return (float) sqrt((double)x);
#else
if (x > 5.877471754e-39)
{
register float accumulator, xPower;
register long intPart;
register union {float f; long i;} xBits;
xBits.f = x;
intPart = ((xBits.i)>>23); /* get biased exponent */
intPart -= 127; /* unbias it */
x = (float)(xBits.i & 0x007FFFFF); /* mask off exponent leaving 0x800000*(mantissa - 1) */
x *= 1.192092895507812e-07; /* divide by 0x800000 */
accumulator = 1.0 + 0.49959804148061*x;
xPower = x*x;
accumulator += -0.12047308243453*xPower;
xPower *= x;
accumulator += 0.04585425015501*xPower;
xPower *= x;
accumulator += -0.01076564682800*xPower;
if (intPart & 0x00000001)
{
accumulator *= ROOT2; /* an odd input exponent means an extra sqrt(2) in the output */
}
xBits.i = intPart >> 1; /* divide exponent by 2, lose LSB */
xBits.i += 127; /* rebias exponent */
xBits.i <<= 23; /* move biased exponent into exponent bits */
return accumulator * xBits.f;
}
else
{
return 0.0;
}
#endif
}
float __log2(register float x)
{
#if STD_MATH_LIB
return (float) (ONE_OVER_LN2*log((double)x));
#else
if (x > 5.877471754e-39)
{
register float accumulator, xPower;
register long intPart;
register union {float f; long i;} xBits;
xBits.f = x;
intPart = ((xBits.i)>>23); /* get biased exponent */
intPart -= 127; /* unbias it */
x = (float)(xBits.i & 0x007FFFFF); /* mask off exponent leaving 0x800000*(mantissa - 1) */
x *= 1.192092895507812e-07; /* divide by 0x800000 */
accumulator = 1.44254494359510*x;
xPower = x*x;
accumulator += -0.71814525675041*xPower;
xPower *= x;
accumulator += 0.45754919692582*xPower;
xPower *= x;
accumulator += -0.27790534462866*xPower;
xPower *= x;
accumulator += 0.12179791068782*xPower;
xPower *= x;
accumulator += -0.02584144982967*xPower;
return accumulator + (float)intPart;
}
else
{
return -HUGE;
}
#endif
}
float __exp2(register float x)
{
#if STD_MATH_LIB
return (float) exp(LN2*(double)x);
#else
if (x >= -127.0)
{
register float accumulator, xPower;
register union {float f; long i;} xBits;
xBits.i = (long)(x + FLOAT_OFFSET) - LONG_OFFSET; /* integer part */
x -= (float)(xBits.i); /* fractional part */
accumulator = 1.0 + 0.69303212081966*x;
xPower = x*x;
accumulator += 0.24137976293709*xPower;
xPower *= x;
accumulator += 0.05203236900844*xPower;
xPower *= x;
accumulator += 0.01355574723481*xPower;
xBits.i += 127; /* bias integer part */
xBits.i <<= 23; /* move biased int part into exponent bits */
return accumulator * xBits.f;
}
else
{
return 0.0;
}
#endif
}
float __log(register float x)
{
#if STD_MATH_LIB
return (float) log((double)x);
#else
return LN2*__log2(x);
#endif
}
float __exp(register float x)
{
#if STD_MATH_LIB
return (float) exp((double)x);
#else
return __exp2(ONE_OVER_LN2*x);
#endif
}
float __pow(float x, float y)
{
#if STD_MATH_LIB
return (float) pow((double)x, (double)y);
#else
return __exp2(y*__log2(x));
#endif
}
float __sin_pi(register float x)
{
#if STD_MATH_LIB
return (float) sin(PI*(double)x);
#else
register float accumulator, xPower, xSquared;
register long evenIntPart = ((long)(0.5*x + 1024.5) - 1024)<<1;
x -= (float)evenIntPart;
xSquared = x*x;
accumulator = 3.14159265358979*x;
xPower = xSquared*x;
accumulator += -5.16731953364340*xPower;
xPower *= xSquared;
accumulator += 2.54620566822659*xPower;
xPower *= xSquared;
accumulator += -0.586027023087261*xPower;
xPower *= xSquared;
accumulator += 0.06554823491427*xPower;
return accumulator;
#endif
}
float __cos_pi(register float x)
{
#if STD_MATH_LIB
return (float) cos(PI*(double)x);
#else
register float accumulator, xPower, xSquared;
register long evenIntPart = ((long)(0.5*x + 1024.5) - 1024)<<1;
x -= (float)evenIntPart;
xSquared = x*x;
accumulator = 1.57079632679490*x; /* series for sin(PI/2*x) */
xPower = xSquared*x;
accumulator += -0.64596406188166*xPower;
xPower *= xSquared;
accumulator += 0.07969158490912*xPower;
xPower *= xSquared;
accumulator += -0.00467687997706*xPower;
xPower *= xSquared;
accumulator += 0.00015303015470*xPower;
return 1.0 - 2.0*accumulator*accumulator; /* cos(w) = 1 - 2*(sin(w/2))^2 */
#endif
}
float __sin(register float x)
{
#if STD_MATH_LIB
return (float) sin((double)x);
#else
x *= ONE_OVER_PI;
return __sin_pi(x);
#endif
}
float __cos(register float x)
{
#if STD_MATH_LIB
return (float) cos((double)x);
#else
x *= ONE_OVER_PI;
return __cos_pi(x);
#endif
}
float __tan(register float x)
{
#if STD_MATH_LIB
return (float) tan((double)x);
#else
x *= ONE_OVER_PI;
return __sin_pi(x)/__cos_pi(x);
#endif
}
float __atan(register float x)
{
#if STD_MATH_LIB
return (float) atan((double)x);
#else
register float accumulator, xPower, xSquared, offset;
offset = 0.0;
if (x < -1.0)
{
offset = -PI_2;
x = -1.0/x;
}
else if (x > 1.0)
{
offset = PI_2;
x = -1.0/x;
}
xSquared = x*x;
accumulator = 1.0;
xPower = xSquared;
accumulator += 0.33288950512027*xPower;
xPower *= xSquared;
accumulator += -0.08467922817644*xPower;
xPower *= xSquared;
accumulator += 0.03252232640125*xPower;
xPower *= xSquared;
accumulator += -0.00749305860992*xPower;
return offset + x/accumulator;
#endif
}
float __asin(register float x)
{
#if STD_MATH_LIB
return (float) asin((double)x);
#else
return __atan(x/__sqrt(1.0 - x*x));
#endif
}
float __acos(register float x)
{
#if STD_MATH_LIB
return (float) acos((double)x);
#else
return __atan(__sqrt(1.0 - x*x)/x);
#endif
}
float __arg(float Imag, float Real)
{
#if STD_MATH_LIB
return (float) atan2((double)Imag, (double)Real);
#else
register float accumulator, xPower, xSquared, offset, x;
if (Imag > 0.0)
{
if (Imag <= -Real)
{
offset = PI;
x = Imag/Real;
}
else if (Imag > Real)
{
offset = PI_2;
x = -Real/Imag;
}
else
{
offset = 0.0;
x = Imag/Real;
}
}
else
{
if (Imag >= Real)
{
offset = -PI;
x = Imag/Real;
}
else if (Imag < -Real)
{
offset = -PI_2;
x = -Real/Imag;
}
else
{
offset = 0.0;
x = Imag/Real;
}
}
xSquared = x*x;
accumulator = 1.0;
xPower = xSquared;
accumulator += 0.33288950512027*xPower;
xPower *= xSquared;
accumulator += -0.08467922817644*xPower;
xPower *= xSquared;
accumulator += 0.03252232640125*xPower;
xPower *= xSquared;
accumulator += -0.00749305860992*xPower;
return offset + x/accumulator;
#endif
}
float __poly(float *a, int order, float x)
{
register float accumulator = 0.0, xPower;
register int n;
accumulator = a[0];
xPower = x;
for (n=1; n<=order; n++)
{
accumulator += a[n]*xPower;
xPower *= x;
}
return accumulator;
}
float __map(float *f, float scaler, float x)
{
register long i;
x *= scaler;
i = (long)(x + FLOAT_OFFSET) - LONG_OFFSET; /* round down without floor() */
return f[i] + (f[i+1] - f[i])*(x - (float)i); /* linear interpolate between points */
}
float __discreteMap(float *f, float scaler, float x)
{
register long i;
x *= scaler;
i = (long)(x + (FLOAT_OFFSET+0.5)) - LONG_OFFSET; /* round to nearest */
return f[i];
}
unsigned long __random()
{
static unsigned long seed0 = 0x5B7A2775, seed1 = 0x80C7169F;
seed0 += seed1;
seed1 += seed0;
return seed1;
}
stdlib
。
如果您还没有看到它,那么“雷神平方根”简直就是神秘。它使用一些位级魔术来给您一个很好的第一近似值,然后使用牛顿近似值的一或两轮进行修正。如果您使用有限的资源,可能会有所帮助。
https://zh.wikipedia.org/wiki/Fast_inverse_square_root
http://betterexplained.com/articles/understanding-quakes-fast-inverse-square-root/
您还可以使用牛顿法近似平方根函数。牛顿法是一种近似函数根的位置的方法。这也是一种迭代方法,其中前一次迭代的结果将在下一次迭代中使用,直到收敛为止。给定初始猜测,牛顿方法猜测函数的根在哪里的方程式定义为:
是对根位置的第一个猜测。我们一直在循环方程式,并使用以前迭代的结果,直到答案不变。通常,为了确定在次迭代中对根的猜测,给定在次迭代中的猜测定义为:
要使用牛顿法对平方根进行近似,假设给定一个数字。因此,要计算平方根,我们需要计算 因此,我们试图找到一个答案,使。将两边都平方,然后将移到方程的另一边,得出。因此,该方程的答案为,因此是函数的根。这样,令是我们要查找其根的方程。通过将其代入牛顿方法,,因此:
因此,计算的平方根,我们只需要计算牛顿法,直到我们收敛。但是,正如@ robertbristow-johnson指出的那样,除法操作非常昂贵-尤其是对于资源有限的微控制器/ DSP。另外,可能存在猜测可能为0的情况,这将由于除法运算而导致除以0的错误。这样,我们可以做的是使用牛顿方法并求解倒数函数,即。这也避免了任何分裂,我们将在后面看到。将双方平方,然后将移动到左侧,则得到。因此,解决方案是。通过乘以,我们会得到我们想要的结果。同样,使用牛顿法,我们得到:
但是,在查看上述方程式时,有警告我们应考虑。对于平方根,解决方案应为正,因此为了使迭代(和结果)为正,必须满足以下条件:
3 x n > (x n )3 a (x n )2 a < 3
因此:
因此,给定我们希望计算其平方根的数字,初始猜测必须满足上述条件。由于这将最终放置在微控制器上,因此我们可以从任何值开始(例如1),然后继续循环并减小的值,直到满足上述条件。请注意,我避免进行除法运算以直接计算的值x 0 x 0 10 − 6应该因为分裂是一项昂贵的操作。一旦有了初步的猜测,就可以遍历牛顿的方法。请注意,根据最初的猜测,收敛可能会花费更短或更长时间。这完全取决于您与实际答案的距离。您可以限制迭代次数,也可以等待直到两个根之间的相对差小于某个阈值(例如左右)。
当您的标记正在中寻找一种算法时C
,让我们快速编写一个:
#include <stdio.h> // For printf
#include <math.h> // For fabs
void main()
{
float a = 5.0; // Number we want to take the square root of
float x = 1.0; // Initial guess
float xprev; // Root for previous iteration
int count; // Counter for iterations
// Find a better initial guess
// Half at each step until condition is satisfied
while (x*x*a >= 3.0)
x *= 0.5;
printf("Initial guess: %f\n", x);
count = 1;
do {
xprev = x; // Save for previous iteration
printf("Iteration #%d: %f\n", count++, x);
x = 0.5*(3*xprev - (xprev*xprev*xprev)*a); // Find square root of the reciprocal
} while (fabs(x - xprev) > 1e-6);
x *= a; // Actual answer - Multiply by a
printf("Square root is: %f\n", x);
printf("Done!");
}
这是牛顿方法的非常基本的实现。请注意,在满足我们之前讨论的条件之前,我一直将初始猜测降低一半。我还试图找到5的平方根。我们知道这大约等于2.236左右。使用上面的代码将给出以下输出:
Initial guess: 0.500000
Iteration #1: 0.500000
Iteration #2: 0.437500
Iteration #3: 0.446899
Iteration #4: 0.447213
Square root is: 2.236068
Done!
请注意,牛顿法正在寻找倒数解的解,我们在末尾乘以得到最终答案。另外,请注意,最初的猜测已更改,以确保满足上述条件。只是为了好玩,让我们尝试找到9876的平方根。
Initial guess: 0.015625
Iteration #1: 0.015625
Iteration #2: 0.004601
Iteration #3: 0.006420
Iteration #4: 0.008323
Iteration #5: 0.009638
Iteration #6: 0.010036
Iteration #7: 0.010062
Square root is: 99.378067
Done!
如您所见,唯一不同的是计算平方根需要多少次迭代。您要计算的数量越多,迭代次数就越多。
我知道该方法已经在较早的文章中提出,但是我认为我会派生该方法并提供一些代码!
是的,幂级数只能在有限域内快速而有效地近似平方根函数。域越广,在幂级数中所需的项越多,以使误差足够小。
哪里
如果是浮点,则需要像其他答案中的C代码一样将指数和尾数分开。
实际上,这是通过使用牛顿法求解二次方程来完成的:
http://en.wikipedia.org/wiki/Methods_of_computing_square_roots
对于大于1的数字,可以使用以下泰勒展开式:
如果我没记错的话,精度在4%以内。它被工程师使用,而不是对数标尺和计算器。我是在1923年于德拉哈珀(De Laharpe)的Notes and formules de l'ingénieur中学习到的