Math package for float32 / compex64 types https://godoc.org/pkg/math/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

exp1m.go 7.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. // Copyright 2010 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package math32
  5. // The original C code, the long comment, and the constants
  6. // below are from FreeBSD's /usr/src/lib/msun/src/s_expm1.c
  7. // and came with this notice. The go code is a simplified
  8. // version of the original C.
  9. //
  10. // ====================================================
  11. // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
  12. //
  13. // Developed at SunPro, a Sun Microsystems, Inc. business.
  14. // Permission to use, copy, modify, and distribute this
  15. // software is freely granted, provided that this notice
  16. // is preserved.
  17. // ====================================================
  18. //
  19. // expm1(x)
  20. // Returns exp(x)-1, the exponential of x minus 1.
  21. //
  22. // Method
  23. // 1. Argument reduction:
  24. // Given x, find r and integer k such that
  25. //
  26. // x = k*ln2 + r, |r| <= 0.5*ln2 ~ 0.34658
  27. //
  28. // Here a correction term c will be computed to compensate
  29. // the error in r when rounded to a floating-point number.
  30. //
  31. // 2. Approximating expm1(r) by a special rational function on
  32. // the interval [0,0.34658]:
  33. // Since
  34. // r*(exp(r)+1)/(exp(r)-1) = 2+ r**2/6 - r**4/360 + ...
  35. // we define R1(r*r) by
  36. // r*(exp(r)+1)/(exp(r)-1) = 2+ r**2/6 * R1(r*r)
  37. // That is,
  38. // R1(r**2) = 6/r *((exp(r)+1)/(exp(r)-1) - 2/r)
  39. // = 6/r * ( 1 + 2.0*(1/(exp(r)-1) - 1/r))
  40. // = 1 - r**2/60 + r**4/2520 - r**6/100800 + ...
  41. // We use a special Reme algorithm on [0,0.347] to generate
  42. // a polynomial of degree 5 in r*r to approximate R1. The
  43. // maximum error of this polynomial approximation is bounded
  44. // by 2**-61. In other words,
  45. // R1(z) ~ 1.0 + Q1*z + Q2*z**2 + Q3*z**3 + Q4*z**4 + Q5*z**5
  46. // where Q1 = -1.6666666666666567384E-2,
  47. // Q2 = 3.9682539681370365873E-4,
  48. // Q3 = -9.9206344733435987357E-6,
  49. // Q4 = 2.5051361420808517002E-7,
  50. // Q5 = -6.2843505682382617102E-9;
  51. // (where z=r*r, and the values of Q1 to Q5 are listed below)
  52. // with error bounded by
  53. // | 5 | -61
  54. // | 1.0+Q1*z+...+Q5*z - R1(z) | <= 2
  55. // | |
  56. //
  57. // expm1(r) = exp(r)-1 is then computed by the following
  58. // specific way which minimize the accumulation rounding error:
  59. // 2 3
  60. // r r [ 3 - (R1 + R1*r/2) ]
  61. // expm1(r) = r + --- + --- * [--------------------]
  62. // 2 2 [ 6 - r*(3 - R1*r/2) ]
  63. //
  64. // To compensate the error in the argument reduction, we use
  65. // expm1(r+c) = expm1(r) + c + expm1(r)*c
  66. // ~ expm1(r) + c + r*c
  67. // Thus c+r*c will be added in as the correction terms for
  68. // expm1(r+c). Now rearrange the term to avoid optimization
  69. // screw up:
  70. // ( 2 2 )
  71. // ({ ( r [ R1 - (3 - R1*r/2) ] ) } r )
  72. // expm1(r+c)~r - ({r*(--- * [--------------------]-c)-c} - --- )
  73. // ({ ( 2 [ 6 - r*(3 - R1*r/2) ] ) } 2 )
  74. // ( )
  75. //
  76. // = r - E
  77. // 3. Scale back to obtain expm1(x):
  78. // From step 1, we have
  79. // expm1(x) = either 2**k*[expm1(r)+1] - 1
  80. // = or 2**k*[expm1(r) + (1-2**-k)]
  81. // 4. Implementation notes:
  82. // (A). To save one multiplication, we scale the coefficient Qi
  83. // to Qi*2**i, and replace z by (x**2)/2.
  84. // (B). To achieve maximum accuracy, we compute expm1(x) by
  85. // (i) if x < -56*ln2, return -1.0, (raise inexact if x!=inf)
  86. // (ii) if k=0, return r-E
  87. // (iii) if k=-1, return 0.5*(r-E)-0.5
  88. // (iv) if k=1 if r < -0.25, return 2*((r+0.5)- E)
  89. // else return 1.0+2.0*(r-E);
  90. // (v) if (k<-2||k>56) return 2**k(1-(E-r)) - 1 (or exp(x)-1)
  91. // (vi) if k <= 20, return 2**k((1-2**-k)-(E-r)), else
  92. // (vii) return 2**k(1-((E+2**-k)-r))
  93. //
  94. // Special cases:
  95. // expm1(INF) is INF, expm1(NaN) is NaN;
  96. // expm1(-INF) is -1, and
  97. // for finite argument, only expm1(0)=0 is exact.
  98. //
  99. // Accuracy:
  100. // according to an error analysis, the error is always less than
  101. // 1 ulp (unit in the last place).
  102. //
  103. // Misc. info.
  104. // For IEEE double
  105. // if x > 7.09782712893383973096e+02 then expm1(x) overflow
  106. //
  107. // Constants:
  108. // The hexadecimal values are the intended ones for the following
  109. // constants. The decimal values may be used, provided that the
  110. // compiler will convert from decimal to binary accurately enough
  111. // to produce the hexadecimal values shown.
  112. //
  113. // Expm1 returns e**x - 1, the base-e exponential of x minus 1.
  114. // It is more accurate than Exp(x) - 1 when x is near zero.
  115. //
  116. // Special cases are:
  117. // Expm1(+Inf) = +Inf
  118. // Expm1(-Inf) = -1
  119. // Expm1(NaN) = NaN
  120. // Very large values overflow to -1 or +Inf.
  121. func Expm1(x float32) float32 {
  122. return expm1(x)
  123. }
  124. func expm1(x float32) float32 {
  125. const (
  126. Othreshold = 89.415985 // 0x42b2d4fc
  127. Ln2X27 = 1.871497344970703125e+01 // 0x4195b844
  128. Ln2HalfX3 = 1.0397207736968994140625 // 0x3F851592
  129. Ln2Half = 3.465735912322998046875e-01 // 0x3eb17218
  130. Ln2Hi = 6.9313812256e-01 // 0x3f317180
  131. Ln2Lo = 9.0580006145e-06 // 0x3717f7d1
  132. InvLn2 = 1.4426950216e+00 // 0x3fb8aa3b
  133. Tiny = 1.0 / (1 << 54) // 2**-54 = 0x3c90000000000000
  134. /* scaled coefficients related to expm1 */
  135. Q1 = -3.3333335072e-02 /* 0xbd088889 */
  136. Q2 = 1.5873016091e-03 /* 0x3ad00d01 */
  137. Q3 = -7.9365076090e-05 /* 0xb8a670cd */
  138. Q4 = 4.0082177293e-06 /* 0x36867e54 */
  139. Q5 = -2.0109921195e-07 /* 0xb457edbb */
  140. )
  141. // special cases
  142. switch {
  143. case IsInf(x, 1) || IsNaN(x):
  144. return x
  145. case IsInf(x, -1):
  146. return -1
  147. }
  148. absx := x
  149. sign := false
  150. if x < 0 {
  151. absx = -absx
  152. sign = true
  153. }
  154. // filter out huge argument
  155. if absx >= Ln2X27 { // if |x| >= 27 * ln2
  156. if sign {
  157. return -1 // x < -56*ln2, return -1
  158. }
  159. if absx >= Othreshold { // if |x| >= 89.415985...
  160. return Inf(1)
  161. }
  162. }
  163. // argument reduction
  164. var c float32
  165. var k int
  166. if absx > Ln2Half { // if |x| > 0.5 * ln2
  167. var hi, lo float32
  168. if absx < Ln2HalfX3 { // and |x| < 1.5 * ln2
  169. if !sign {
  170. hi = x - Ln2Hi
  171. lo = Ln2Lo
  172. k = 1
  173. } else {
  174. hi = x + Ln2Hi
  175. lo = -Ln2Lo
  176. k = -1
  177. }
  178. } else {
  179. if !sign {
  180. k = int(InvLn2*x + 0.5)
  181. } else {
  182. k = int(InvLn2*x - 0.5)
  183. }
  184. t := float32(k)
  185. hi = x - t*Ln2Hi // t * Ln2Hi is exact here
  186. lo = t * Ln2Lo
  187. }
  188. x = hi - lo
  189. c = (hi - x) - lo
  190. } else if absx < Tiny { // when |x| < 2**-54, return x
  191. return x
  192. } else {
  193. k = 0
  194. }
  195. // x is now in primary range
  196. hfx := 0.5 * x
  197. hxs := x * hfx
  198. r1 := 1 + hxs*(Q1+hxs*(Q2+hxs*(Q3+hxs*(Q4+hxs*Q5))))
  199. t := 3 - r1*hfx
  200. e := hxs * ((r1 - t) / (6.0 - x*t))
  201. if k != 0 {
  202. e = (x*(e-c) - c)
  203. e -= hxs
  204. switch {
  205. case k == -1:
  206. return 0.5*(x-e) - 0.5
  207. case k == 1:
  208. if x < -0.25 {
  209. return -2 * (e - (x + 0.5))
  210. }
  211. return 1 + 2*(x-e)
  212. case k <= -2 || k > 56: // suffice to return exp(x)-1
  213. y := 1 - (e - x)
  214. y = Float32frombits(Float32bits(y) + uint32(k)<<23) // add k to y's exponent
  215. return y - 1
  216. }
  217. if k < 20 {
  218. t := Float32frombits(0x3f800000 - (0x1000000 >> uint(k))) // t=1-2**-k
  219. y := t - (e - x)
  220. y = Float32frombits(Float32bits(y) + uint32(k)<<23) // add k to y's exponent
  221. return y
  222. }
  223. t := Float32frombits(uint32(0x7f-k) << 23) // 2**-k
  224. y := x - (e + t)
  225. y += 1
  226. y = Float32frombits(Float32bits(y) + uint32(k)<<23) // add k to y's exponent
  227. return y
  228. }
  229. return x - (x*e - hxs) // c is 0
  230. }