math: rewrite hypot

method: if there is a large difference between the scale of x and y then the larger magnitude dominates, otherwise reduce x,y so the argument of sqrt (x*x+y*y) does not overflow or underflow and calculate the argument precisely using exact multiplication. If the argument has less error than 1/sqrt(2) ~ 0.7 ulp, then the result has less error than 1 ulp in nearest rounding mode. the original fdlibm method was the same, except it used bit hacks instead of dekker-veltkamp algorithm, which is problematic for long double where different representations are supported. (the new hypot and hypotl code should be smaller and faster on 32bit cpu archs with fast fpu), the new code behaves differently in non-nearest rounding, but the error should be still less than 2ulps. ld80 and ld128 are supported
author: Szabolcs Nagy <nsz@port70.net> 2013-09-03 14:37:48 +0000
committer: Szabolcs Nagy <nsz@port70.net> 2013-09-05 11:30:07 +0000
commit: c2a0dfea629617a39af2f59bd400e1a3595d0783 (patch)
tree: d0401b88980dba72d3c2dc21cf6a11642828a51b /src/math/hypotl.c
parent: ee2ee92d62c43f6658d37ddea4c316d2089d0fe9 (diff)
download: musl-c2a0dfea629617a39af2f59bd400e1a3595d0783.tar.gz
1 files changed, 48 insertions, 130 deletions
diff --git a/src/math/hypotl.c b/src/math/hypotl.c
index f4a64f74..479aa92c 100644
--- a/src/math/hypotl.c
+++ b/src/math/hypotl.c
@@ -1,16 +1,3 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_hypotl.c */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunSoft, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-/* long double version of hypot().  See comments in hypot.c. */
-
 #include "libm.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
@@ -19,130 +6,61 @@ long double hypotl(long double x, long double y)
 	return hypot(x, y);
 }
 #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
-
-#define GET_LDBL_EXPSIGN(i, v) do {     \
-	union IEEEl2bits uv;            \
-					\
-	uv.e = v;                       \
-	i = uv.xbits.expsign;           \
-} while (0)
-
-#define GET_LDBL_MAN(h, l, v) do {      \
-	union IEEEl2bits uv;            \
-					\
-	uv.e = v;                       \
-	h = uv.bits.manh;               \
-	l = uv.bits.manl;               \
-} while (0)
-
-#define SET_LDBL_EXPSIGN(v, i) do {     \
-	union IEEEl2bits uv;            \
-					\
-	uv.e = v;                       \
-	uv.xbits.expsign = i;           \
-	v = uv.e;                       \
-} while (0)
-
-#undef GET_HIGH_WORD
-#define GET_HIGH_WORD(i, v)     GET_LDBL_EXPSIGN(i, v)
-#undef SET_HIGH_WORD
-#define SET_HIGH_WORD(v, i)     SET_LDBL_EXPSIGN(v, i)
-
-#define DESW(exp)       (exp)           /* delta expsign word */
-#define ESW(exp)        (MAX_EXP - 1 + (exp))   /* expsign word */
-#define MANT_DIG        LDBL_MANT_DIG
-#define MAX_EXP         LDBL_MAX_EXP
-
-#if LDBL_MANL_SIZE > 32
-typedef uint64_t man_t;
-#else
-typedef uint32_t man_t;
+#if LDBL_MANT_DIG == 64
+#define SPLIT (0x1p32L+1)
+#elif LDBL_MANT_DIG == 113
+#define SPLIT (0x1p57L+1)
 #endif
 
+static void sq(long double *hi, long double *lo, long double x)
+{
+	long double xh, xl, xc;
+	xc = x*SPLIT;
+	xh = x - xc + xc;
+	xl = x - xh;
+	*hi = x*x;
+	*lo = xh*xh - *hi + 2*xh*xl + xl*xl;
+}
+
 long double hypotl(long double x, long double y)
 {
-	long double a=x,b=y,t1,t2,y1,y2,w;
-	int32_t j,k,ha,hb;
+	union ldshape ux = {x}, uy = {y};
+	int ex, ey;
+	long double hx, lx, hy, ly, z;
 
-	GET_HIGH_WORD(ha, x);
-	ha &= 0x7fff;
-	GET_HIGH_WORD(hb, y);
-	hb &= 0x7fff;
-	if (hb > ha) {
-		a = y;
-		b = x;
-		j=ha; ha=hb; hb=j;
-	} else {
-		a = x;
-		b = y;
-	}
-	a = fabsl(a);
-	b = fabsl(b);
-	if (ha - hb > DESW(MANT_DIG+7))  /* x/y > 2**(MANT_DIG+7) */
-		return a+b;
-	k = 0;
-	if (ha > ESW(MAX_EXP/2-12)) {    /* a>2**(MAX_EXP/2-12) */
-		if (ha >= ESW(MAX_EXP)) {  /* Inf or NaN */
-			man_t manh, manl;
-			/* Use original arg order iff result is NaN; quieten sNaNs. */
-			w = fabsl(x+0.0)-fabsl(y+0.0);
-			GET_LDBL_MAN(manh,manl,a);
-			if (manh == LDBL_NBIT && manl == 0) w = a;
-			GET_LDBL_MAN(manh,manl,b);
-			if (hb >= ESW(MAX_EXP) && manh == LDBL_NBIT && manl == 0) w = b;
-			return w;
-		}
-		/* scale a and b by 2**-(MAX_EXP/2+88) */
-		ha -= DESW(MAX_EXP/2+88); hb -= DESW(MAX_EXP/2+88);
-		k += MAX_EXP/2+88;
-		SET_HIGH_WORD(a, ha);
-		SET_HIGH_WORD(b, hb);
-	}
-	if (hb < ESW(-(MAX_EXP/2-12))) {  /* b < 2**-(MAX_EXP/2-12) */
-		if (hb <= 0) {  /* subnormal b or 0 */
-			man_t manh, manl;
-			GET_LDBL_MAN(manh,manl,b);
-			if ((manh|manl) == 0)
-				return a;
-			t1 = 0;
-			SET_HIGH_WORD(t1, ESW(MAX_EXP-2));  /* t1 = 2^(MAX_EXP-2) */
-			b *= t1;
-			a *= t1;
-			k -= MAX_EXP-2;
-		} else {            /* scale a and b by 2^(MAX_EXP/2+88) */
-			ha += DESW(MAX_EXP/2+88);
-			hb += DESW(MAX_EXP/2+88);
-			k -= MAX_EXP/2+88;
-			SET_HIGH_WORD(a, ha);
-			SET_HIGH_WORD(b, hb);
-		}
-	}
-	/* medium size a and b */
-	w = a - b;
-	if (w > b) {
-		t1 = a;
-		union IEEEl2bits uv;
-		uv.e = t1; uv.bits.manl = 0; t1 = uv.e;
-		t2 = a-t1;
-		w  = sqrtl(t1*t1-(b*(-b)-t2*(a+t1)));
+	ux.i.se &= 0x7fff;
+	uy.i.se &= 0x7fff;
+	if (ux.i.se < uy.i.se) {
+		ex = uy.i.se;
+		ey = ux.i.se;
+		x = uy.f;
+		y = ux.f;
 	} else {
-		a  = a+a;
-		y1 = b;
-		union IEEEl2bits uv;
-		uv.e = y1; uv.bits.manl = 0; y1 = uv.e;
-		y2 = b - y1;
-		t1 = a;
-		uv.e = t1; uv.bits.manl = 0; t1 = uv.e;
-		t2 = a - t1;
-		w  = sqrtl(t1*y1-(w*(-w)-(t1*y2+t2*b)));
+		ex = ux.i.se;
+		ey = uy.i.se;
+		x = ux.f;
+		y = uy.f;
 	}
-	if(k!=0) {
-		uint32_t high;
-		t1 = 1.0;
-		GET_HIGH_WORD(high, t1);
-		SET_HIGH_WORD(t1, high+DESW(k));
-		return t1*w;
+
+	if (ex == 0x7fff && isinf(y))
+		return y;
+	if (ex == 0x7fff || y == 0)
+		return x;
+	if (ex - ey > LDBL_MANT_DIG)
+		return x + y;
+
+	z = 1;
+	if (ex > 0x3fff+8000) {
+		z = 0x1p10000L;
+		x *= 0x1p-10000L;
+		y *= 0x1p-10000L;
+	} else if (ey < 0x3fff-8000) {
+		z = 0x1p-10000L;
+		x *= 0x1p10000L;
+		y *= 0x1p10000L;
 	}
-	return w;
+	sq(&hx, &lx, x);
+	sq(&hy, &ly, y);
+	return z*sqrtl(ly+lx+hy+hx);
 }
 #endif
author	Szabolcs Nagy <nsz@port70.net>	2013-09-03 14:37:48 +0000
committer	Szabolcs Nagy <nsz@port70.net>	2013-09-05 11:30:07 +0000
commit	c2a0dfea629617a39af2f59bd400e1a3595d0783 (patch)
tree	d0401b88980dba72d3c2dc21cf6a11642828a51b /src/math/hypotl.c
parent	ee2ee92d62c43f6658d37ddea4c316d2089d0fe9 (diff)
download	musl-c2a0dfea629617a39af2f59bd400e1a3595d0783.tar.gz