summaryrefslogtreecommitdiff
path: root/src/math/sqrtl.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/math/sqrtl.c')
-rw-r--r--src/math/sqrtl.c254
1 files changed, 253 insertions, 1 deletions
diff --git a/src/math/sqrtl.c b/src/math/sqrtl.c
index 83a8f80c..1b9f19c7 100644
--- a/src/math/sqrtl.c
+++ b/src/math/sqrtl.c
@@ -1,7 +1,259 @@
+#include <stdint.h>
#include <math.h>
+#include <float.h>
+#include "libm.h"
+#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
long double sqrtl(long double x)
{
- /* FIXME: implement in C, this is for LDBL_MANT_DIG == 64 only */
return sqrt(x);
}
+#elif (LDBL_MANT_DIG == 113 || LDBL_MANT_DIG == 64) && LDBL_MAX_EXP == 16384
+#include "sqrt_data.h"
+
+#define FENV_SUPPORT 1
+
+typedef struct {
+ uint64_t hi;
+ uint64_t lo;
+} u128;
+
+/* top: 16 bit sign+exponent, x: significand. */
+static inline long double mkldbl(uint64_t top, u128 x)
+{
+ union ldshape u;
+#if LDBL_MANT_DIG == 113
+ u.i2.hi = x.hi;
+ u.i2.lo = x.lo;
+ u.i2.hi &= 0x0000ffffffffffff;
+ u.i2.hi |= top << 48;
+#elif LDBL_MANT_DIG == 64
+ u.i.se = top;
+ u.i.m = x.lo;
+ /* force the top bit on non-zero (and non-subnormal) results. */
+ if (top & 0x7fff)
+ u.i.m |= 0x8000000000000000;
+#endif
+ return u.f;
+}
+
+/* return: top 16 bit is sign+exp and following bits are the significand. */
+static inline u128 asu128(long double x)
+{
+ union ldshape u = {.f=x};
+ u128 r;
+#if LDBL_MANT_DIG == 113
+ r.hi = u.i2.hi;
+ r.lo = u.i2.lo;
+#elif LDBL_MANT_DIG == 64
+ r.lo = u.i.m<<49;
+ /* ignore the top bit: pseudo numbers are not handled. */
+ r.hi = u.i.m>>15;
+ r.hi &= 0x0000ffffffffffff;
+ r.hi |= (uint64_t)u.i.se << 48;
+#endif
+ return r;
+}
+
+/* returns a*b*2^-32 - e, with error 0 <= e < 1. */
+static inline uint32_t mul32(uint32_t a, uint32_t b)
+{
+ return (uint64_t)a*b >> 32;
+}
+
+/* returns a*b*2^-64 - e, with error 0 <= e < 3. */
+static inline uint64_t mul64(uint64_t a, uint64_t b)
+{
+ uint64_t ahi = a>>32;
+ uint64_t alo = a&0xffffffff;
+ uint64_t bhi = b>>32;
+ uint64_t blo = b&0xffffffff;
+ return ahi*bhi + (ahi*blo >> 32) + (alo*bhi >> 32);
+}
+
+static inline u128 add64(u128 a, uint64_t b)
+{
+ u128 r;
+ r.lo = a.lo + b;
+ r.hi = a.hi;
+ if (r.lo < a.lo)
+ r.hi++;
+ return r;
+}
+
+static inline u128 add128(u128 a, u128 b)
+{
+ u128 r;
+ r.lo = a.lo + b.lo;
+ r.hi = a.hi + b.hi;
+ if (r.lo < a.lo)
+ r.hi++;
+ return r;
+}
+
+static inline u128 sub64(u128 a, uint64_t b)
+{
+ u128 r;
+ r.lo = a.lo - b;
+ r.hi = a.hi;
+ if (a.lo < b)
+ r.hi--;
+ return r;
+}
+
+static inline u128 sub128(u128 a, u128 b)
+{
+ u128 r;
+ r.lo = a.lo - b.lo;
+ r.hi = a.hi - b.hi;
+ if (a.lo < b.lo)
+ r.hi--;
+ return r;
+}
+
+/* a<<n, 0 <= n <= 127 */
+static inline u128 lsh(u128 a, int n)
+{
+ if (n == 0)
+ return a;
+ if (n >= 64) {
+ a.hi = a.lo<<(n-64);
+ a.lo = 0;
+ } else {
+ a.hi = (a.hi<<n) | (a.lo>>(64-n));
+ a.lo = a.lo<<n;
+ }
+ return a;
+}
+
+/* a>>n, 0 <= n <= 127 */
+static inline u128 rsh(u128 a, int n)
+{
+ if (n == 0)
+ return a;
+ if (n >= 64) {
+ a.lo = a.hi>>(n-64);
+ a.hi = 0;
+ } else {
+ a.lo = (a.lo>>n) | (a.hi<<(64-n));
+ a.hi = a.hi>>n;
+ }
+ return a;
+}
+
+/* returns a*b exactly. */
+static inline u128 mul64_128(uint64_t a, uint64_t b)
+{
+ u128 r;
+ uint64_t ahi = a>>32;
+ uint64_t alo = a&0xffffffff;
+ uint64_t bhi = b>>32;
+ uint64_t blo = b&0xffffffff;
+ uint64_t lo1 = ((ahi*blo)&0xffffffff) + ((alo*bhi)&0xffffffff) + (alo*blo>>32);
+ uint64_t lo2 = (alo*blo)&0xffffffff;
+ r.hi = ahi*bhi + (ahi*blo>>32) + (alo*bhi>>32) + (lo1>>32);
+ r.lo = (lo1<<32) + lo2;
+ return r;
+}
+
+/* returns a*b*2^-128 - e, with error 0 <= e < 7. */
+static inline u128 mul128(u128 a, u128 b)
+{
+ u128 hi = mul64_128(a.hi, b.hi);
+ uint64_t m1 = mul64(a.hi, b.lo);
+ uint64_t m2 = mul64(a.lo, b.hi);
+ return add64(add64(hi, m1), m2);
+}
+
+/* returns a*b % 2^128. */
+static inline u128 mul128_tail(u128 a, u128 b)
+{
+ u128 lo = mul64_128(a.lo, b.lo);
+ lo.hi += a.hi*b.lo + a.lo*b.hi;
+ return lo;
+}
+
+
+/* see sqrt.c for detailed comments. */
+
+long double sqrtl(long double x)
+{
+ u128 ix, ml;
+ uint64_t top;
+
+ ix = asu128(x);
+ top = ix.hi >> 48;
+ if (predict_false(top - 0x0001 >= 0x7fff - 0x0001)) {
+ /* x < 0x1p-16382 or inf or nan. */
+ if (2*ix.hi == 0 && ix.lo == 0)
+ return x;
+ if (ix.hi == 0x7fff000000000000 && ix.lo == 0)
+ return x;
+ if (top >= 0x7fff)
+ return __math_invalidl(x);
+ /* x is subnormal, normalize it. */
+ ix = asu128(x * 0x1p112);
+ top = ix.hi >> 48;
+ top -= 112;
+ }
+
+ /* x = 4^e m; with int e and m in [1, 4) */
+ int even = top & 1;
+ ml = lsh(ix, 15);
+ ml.hi |= 0x8000000000000000;
+ if (even) ml = rsh(ml, 1);
+ top = (top + 0x3fff) >> 1;
+
+ /* r ~ 1/sqrt(m) */
+ static const uint64_t three = 0xc0000000;
+ uint64_t r, s, d, u, i;
+ i = (ix.hi >> 42) % 128;
+ r = (uint32_t)__rsqrt_tab[i] << 16;
+ /* |r sqrt(m) - 1| < 0x1p-8 */
+ s = mul32(ml.hi>>32, r);
+ d = mul32(s, r);
+ u = three - d;
+ r = mul32(u, r) << 1;
+ /* |r sqrt(m) - 1| < 0x1.7bp-16, switch to 64bit */
+ r = r<<32;
+ s = mul64(ml.hi, r);
+ d = mul64(s, r);
+ u = (three<<32) - d;
+ r = mul64(u, r) << 1;
+ /* |r sqrt(m) - 1| < 0x1.a5p-31 */
+ s = mul64(u, s) << 1;
+ d = mul64(s, r);
+ u = (three<<32) - d;
+ r = mul64(u, r) << 1;
+ /* |r sqrt(m) - 1| < 0x1.c001p-59, switch to 128bit */
+
+ static const u128 threel = {.hi=three<<32, .lo=0};
+ u128 rl, sl, dl, ul;
+ rl.hi = r;
+ rl.lo = 0;
+ sl = mul128(ml, rl);
+ dl = mul128(sl, rl);
+ ul = sub128(threel, dl);
+ sl = mul128(ul, sl); /* repr: 3.125 */
+ /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */
+ sl = rsh(sub64(sl, 4), 125-(LDBL_MANT_DIG-1));
+ /* s < sqrt(m) < s + 1 ULP + tiny */
+
+ long double y;
+ u128 d2, d1, d0;
+ d0 = sub128(lsh(ml, 2*(LDBL_MANT_DIG-1)-126), mul128_tail(sl,sl));
+ d1 = sub128(sl, d0);
+ d2 = add128(add64(sl, 1), d1);
+ sl = add64(sl, d1.hi >> 63);
+ y = mkldbl(top, sl);
+ if (FENV_SUPPORT) {
+ /* handle rounding modes and inexact exception. */
+ top = predict_false((d2.hi|d2.lo)==0) ? 0 : 1;
+ top |= ((d1.hi^d2.hi)&0x8000000000000000) >> 48;
+ y += mkldbl(top, (u128){0});
+ }
+ return y;
+}
+#else
+#error unsupported long double format
+#endif