new floating point parser/converter

this version is intended to be fully conformant to the ISO C, POSIX, and IEEE standards for conversion of decimal/hex floating point strings to float, double, and long double (ld64 or ld80 only at present) values. in particular, all results are intended to be rounded correctly according to the current rounding mode. further, this implementation aims to set the floating point underflow, overflow, and inexact flags to reflect the conversion performed. a moderate amount of testing has been performed (by nsz and myself) prior to integration of the code in musl, but it still may have bugs. so far, only strto(d|ld|f) use the new code. scanf integration will be done as a separate commit, and i will add implementations of the wide character functions later.
author: Rich Felker <dalias@aerifal.cx> 2012-04-10 11:52:55 -0400
committer: Rich Felker <dalias@aerifal.cx> 2012-04-10 11:52:55 -0400
commit: 415c4cd7fdb3e8b7476fbb2be2390f4592cf5165 (patch)
tree: 8dd9bb181dd259227dd6101633f81880cd0157f3 /src/internal
parent: 3be616c1df7ee176b5e00b9f493136ca7385ec46 (diff)
download: musl-415c4cd7fdb3e8b7476fbb2be2390f4592cf5165.tar.gz
2 files changed, 446 insertions, 0 deletions
diff --git a/src/internal/floatscan.c b/src/internal/floatscan.c
new file mode 100644
index 00000000..15ad5e12
--- /dev/null
+++ b/src/internal/floatscan.c
@@ -0,0 +1,438 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <math.h>
+#include <float.h>
+#include <limits.h>
+
+#include "floatscan.h"
+
+#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
+
+#define LD_B1B_DIG 2
+#define LD_B1B_MAX 9007199, 254740991
+#define KMAX 128
+
+#else /* LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 */
+
+#define LD_B1B_DIG 3
+#define LD_B1B_MAX 18, 446744073, 709551615
+#define KMAX 2048
+
+#endif
+
+#define MASK (KMAX-1)
+
+
+#if 1
+#include "stdio_impl.h"
+#undef ungetc
+#define ungetc(c,f) ((f)->rpos--,(c))
+#undef getc
+#define getc getc_unlocked
+#endif
+
+
+static long long scanexp(FILE *f, off_t *pcnt)
+{
+	int c;
+	int x;
+	long long y;
+	int neg = 0;
+	
+	*pcnt += (c=getc(f))>=0;
+	if (c=='+' || c=='-') {
+		neg = (c=='-');
+		*pcnt += (c=getc(f))>=0;
+		if (c-'0'>=10U) {
+			if (c>=0) {
+				ungetc(c, f);
+				--*pcnt;
+			}
+			return LLONG_MIN;
+		}
+	}
+	for (x=0; c-'0'<10U && x<INT_MAX/10; *pcnt += (c=getc(f))>=0)
+		x = 10*x + c-'0';
+	for (y=x; c-'0'<10U && x<LLONG_MAX/10; *pcnt += (c=getc(f))>=0)
+		y = 10*y + c-'0';
+	for (; c-'0'<10U; *pcnt += (c=getc(f))>=0);
+	if (c>=0) {
+		ungetc(c, f);
+		--*pcnt;
+	}
+	return neg ? -y : y;
+}
+
+
+static long double decfloat(FILE *f, int c, int bits, int emin, int sign, int pok, off_t *pcnt)
+{
+	uint32_t x[KMAX];
+	static const uint32_t th[] = { LD_B1B_MAX };
+	int i, j, k, a, z;
+	long long lrp=-1, dc=0;
+	int gotdig = 0;
+	int rp;
+	int e10=0;
+	int e2;
+	long double y;
+	long double frac=0;
+	long double bias=0;
+
+	j=0;
+	k=0;
+
+	if (c<0) *pcnt += (c=getc(f))>=0;
+
+	/* Don't let leading zeros consume buffer space */
+	for (; c=='0'; *pcnt += (c=getc(f))>=0) gotdig=1;
+
+	x[0] = 0;
+	for (; c-'0'<10U || c=='.'; *pcnt += (c=getc(f))>=0) {
+		if (c == '.') {
+			if (lrp!=-1) break;
+			lrp = dc;
+		} else if (k < KMAX) {
+			dc++;
+			if (j) x[k] = x[k]*10 + c-'0';
+			else x[k] = c-'0';
+			if (++j==9) {
+				k++;
+				j=0;
+			}
+			gotdig=1;
+		} else {
+			dc++;
+			x[KMAX-1] |= c-'0';
+		}
+	}
+	if (lrp==-1) lrp=dc;
+
+	if (gotdig && (c|32)=='e') {
+		e10 = scanexp(f, pcnt);
+		if (e10 == LLONG_MIN) {
+			if (!pok) {
+				*pcnt = 0;
+				return 0;
+			}
+			e10 = 0;
+		}
+		lrp += e10;
+	} else if (c>=0) {
+		ungetc(c, f);
+		--*pcnt;
+	}
+	if (!gotdig) {
+		*pcnt = 0;
+		return 0;
+	}
+
+	if (!x[0])
+		return sign * 0.0;
+	if (lrp==dc && (!k || (k==1 && !j)) && (bits>30 || x[0]>>bits==0))
+		return sign * (long double)x[0];
+	if (lrp > -emin/2)
+		return sign * LDBL_MAX * LDBL_MAX;
+	if (lrp < emin-2*LDBL_MANT_DIG)
+		return sign * LDBL_MIN * LDBL_MIN;
+
+	if (k<KMAX && j) {
+		for (; j<9; j++) x[k]*=10;
+		k++;
+		j=0;
+	}
+
+	a = 0;
+	z = k;
+	e2 = 0;
+	rp = lrp;
+
+	while (rp < 18+9*LD_B1B_DIG) {
+		uint32_t carry = 0;
+		e2 -= 29;
+		for (k=(z-1 & MASK); ; k=(k-1 & MASK)) {
+			uint64_t tmp = ((uint64_t)x[k] << 29) + carry;
+			if (tmp > 1000000000) {
+				carry = tmp / 1000000000;
+				x[k] = tmp % 1000000000;
+			} else {
+				carry = 0;
+				x[k] = tmp;
+			}
+			if (k==(z-1 & MASK) && k!=a && !x[k]) z = k;
+			if (k==a) break;
+		}
+		if (carry) {
+			rp += 9;
+			if (a == z) {
+				z = (z-1 & MASK);
+				x[z-1 & MASK] |= x[z];
+			}
+			a = (a-1 & MASK);
+			x[a] = carry;
+		}
+	}
+
+	if (rp % 9) {
+		static const int p10s[] = {
+			100000000, 10000000, 1000000, 100000,
+			10000, 1000, 100, 10
+		};
+		int rpm9 = rp % 9;
+		int p10 = p10s[rpm9-1];
+		uint32_t carry = 0;
+		for (k=a; k!=z; k=(k+1 & MASK)) {
+			uint32_t tmp = x[k] % p10;
+			x[k] = x[k]/p10 + carry;
+			carry = 1000000000/p10 * tmp;
+			if (k==a && !x[k]) {
+				a = (a+1 & MASK);
+				rp -= 9;
+			}
+		}
+		if (carry) {
+			if ((z+1 & MASK) != a) {
+				x[z] = carry;
+				z = (z+1 & MASK);
+			} else x[z-1 & MASK] |= 1;
+		}
+		rp += 9-rpm9;
+	}
+
+	for (;;) {
+		uint32_t carry = 0;
+		int sh = 1;
+		for (i=0; i<LD_B1B_DIG; i++) {
+			k = (a+i & MASK);
+			if (k == z || x[k] < th[i]) {
+				i=LD_B1B_DIG;
+				break;
+			}
+			if (x[a+i & MASK] > th[i]) break;
+		}
+		if (i==LD_B1B_DIG && rp==9*LD_B1B_DIG) break;
+		/* FIXME: find a way to compute optimal sh */
+		if (rp > 9+9*LD_B1B_DIG) sh = 9;
+		e2 += sh;
+		for (k=a; k!=z; k=(k+1 & MASK)) {
+			uint32_t tmp = x[k] & (1<<sh)-1;
+			x[k] = (x[k]>>sh) + carry;
+			carry = (1000000000>>sh) * tmp;
+			if (k==a && !x[k]) {
+				a = (a+1 & MASK);
+				rp -= 9;
+			}
+		}
+		if (carry) {
+			if ((z+1 & MASK) != a) {
+				x[z] = carry;
+				z = (z+1 & MASK);
+			} else x[z-1 & MASK] |= 1;
+		}
+	}
+
+	for (y=i=0; i<LD_B1B_DIG && (a+i & MASK)!=z; i++)
+		y = 1000000000.0L * y + x[a+i & MASK];
+
+	y *= sign;
+
+	if (bits > LDBL_MANT_DIG+e2-emin) {
+		bits = LDBL_MANT_DIG+e2-emin;
+		if (bits<0) bits=0;
+	}
+
+	if (bits < LDBL_MANT_DIG) {
+		bias = copysignl(scalbn(1, 2*LDBL_MANT_DIG-bits-1), y);
+		frac = fmodl(y, scalbn(1, LDBL_MANT_DIG-bits));
+		y -= frac;
+		y += bias;
+	}
+
+	if ((a+i & MASK) != z) {
+		uint32_t t = x[a+i & MASK];
+		if (t < 500000000 && (t || (a+i+1 & MASK) != z))
+			frac += 0.25*sign;
+		else if (t > 500000000)
+			frac += 0.75*sign;
+		else if (t == 500000000) {
+			if ((a+i+1 & MASK) == z)
+				frac += 0.5*sign;
+			else
+				frac += 0.75*sign;
+		}
+		if (LDBL_MANT_DIG-bits >= 2 && !fmodl(frac, 1))
+			frac++;
+	}
+
+	y += frac;
+	y -= bias;
+
+	y = scalbnl(y, e2);
+
+	return y;
+}
+
+static long double hexfloat(FILE *f, int c, int bits, int emin, int sign, int pok, off_t *pcnt)
+{
+	uint32_t x = 0;
+	long double y = 0;
+	long double scale = 1;
+	long double bias = 0;
+	int gottail = 0, gotrad = 0, gotdig = 0;
+	long long rp = 0;
+	long long dc = 0;
+	long long e2 = 0;
+	int d;
+
+	if (c<0) *pcnt += (c=getc(f))>=0;
+
+	/* Skip leading zeros */
+	for (; c=='0'; *pcnt += (c=getc(f))>=0) gotdig = 1;
+
+	if (c=='.') {
+		gotrad = 1;
+		*pcnt += (c=getc(f))>=0;
+		/* Count zeros after the radix point before significand */
+		for (rp=0; c=='0'; *pcnt += (c=getc(f))>=0, rp--) gotdig = 1;
+	}
+
+	for (; c-'0'<10U || (c|32)-'a'<6U || c=='.'; *pcnt += (c=getc(f))>=0) {
+		if (c=='.') {
+			if (gotrad) break;
+			rp = dc;
+			gotrad = 1;
+		} else {
+			gotdig = 1;
+			if (c > '9') d = (c|32)+10-'a';
+			else d = c-'0';
+			if (dc<8) {
+				x = x*16 + d;
+			} else if (dc < LDBL_MANT_DIG/4+1) {
+				y += d*(scale/=16);
+			} else if (d && !gottail) {
+				y += 0.5*scale;
+				gottail = 1;
+			}
+			dc++;
+		}
+	}
+	if (!gotdig) {
+		if (c>=0) {
+			ungetc(c, f);
+			--*pcnt;
+		}
+		if (pok) *pcnt -= 1+gotrad; /* uncount the rp, x of 0x */
+		else *pcnt = 0;
+		return 0;
+	}
+	if (!gotrad) rp = dc;
+	while (dc<8) x *= 16, dc++;
+	if ((c|32)=='p') {
+		e2 = scanexp(f, pcnt);
+		if (e2 == LLONG_MIN) {
+			if (!pok) {
+				*pcnt = 0;
+				return 0;
+			}
+			e2 = 0;
+		}
+	}
+	e2 += 4*rp - 32;
+
+	if (!x) return sign * 0.0;
+	if (e2 > -emin) return sign * LDBL_MAX * LDBL_MAX;
+	if (e2 < emin-2*LDBL_MANT_DIG) return sign * LDBL_MIN * LDBL_MIN;
+
+	while (x < 0x80000000) {
+		if (y>=0.5) {
+			x += x + 1;
+			y += y - 1;
+		} else {
+			x += x;
+			y += y;
+		}
+		e2--;
+	}
+
+	if (bits > 32+e2-emin) {
+		bits = 32+e2-emin;
+		if (bits<0) bits=0;
+	}
+
+	if (bits < LDBL_MANT_DIG)
+		bias = copysignl(scalbn(1, 32+LDBL_MANT_DIG-bits-1), sign);
+
+	if (bits<32 && y && !(x&1)) x++, y=0;
+
+	y = bias + sign*(long double)x + sign*y;
+	y -= bias;
+
+	return scalbnl(y, e2);
+}
+
+long double __floatscan(FILE *f, int c, int prec, int pok, off_t *pcnt)
+{
+	int sign = 1;
+	int i;
+	int bits;
+	int emin;
+
+	*pcnt = 0;
+
+	switch (prec) {
+	case 0:
+		bits = 24;
+		emin = -149;
+		break;
+	case 1:
+		bits = 53;
+		emin = -1074;
+		break;
+	case 2:
+		bits = LDBL_MANT_DIG;
+		emin = -16445;
+		break;
+	default:
+		return 0;
+	}
+
+	if (c<0) *pcnt += (c=getc(f))>=0;
+
+	if (c=='+' || c=='-') {
+		sign -= 2*(c=='-');
+		*pcnt += (c=getc(f))>=0;
+	}
+
+	for (i=0; i<8 && (c|32)=="infinity"[i]; i++)
+		if (i<7) c = getc(f);
+	if (i==3 || i==8 || (i>3 && pok)) {
+		if (i==3 && c>=0) ungetc(c, f);
+		if (i==8) *pcnt += 7;
+		else *pcnt += 2;
+		return sign * INFINITY;
+	}
+	if (!i) for (i=0; i<3 && (c|32)=="nan"[i]; i++)
+		if (i<3) c = getc(f);
+	if (i==3) {
+		*pcnt += 2;
+		return sign>0 ? NAN : -NAN;
+	}
+
+	if (i) {
+		if (c>=0) ungetc(c, f);
+		*pcnt = 0;
+		return 0;
+	}
+
+	if (c=='0') {
+		*pcnt += (c=getc(f))>=0;
+		if ((c|32) == 'x')
+			return hexfloat(f, -1, bits, emin, sign, pok, pcnt);
+		if (c>=0) {
+			ungetc(c, f);
+			--*pcnt;
+		}
+		c = '0';
+	}
+
+	return decfloat(f, c, bits, emin, sign, pok, pcnt);
+}
diff --git a/src/internal/floatscan.h b/src/internal/floatscan.h
new file mode 100644
index 00000000..5ea74cc9
--- /dev/null
+++ b/src/internal/floatscan.h
@@ -0,0 +1,8 @@
+#ifndef FLOATSCAN_H
+#define FLOATSCAN_H
+
+#include <stdio.h>
+
+long double __floatscan(FILE *, int, int, int, off_t *);
+
+#endif
author	Rich Felker <dalias@aerifal.cx>	2012-04-10 11:52:55 -0400
committer	Rich Felker <dalias@aerifal.cx>	2012-04-10 11:52:55 -0400
commit	415c4cd7fdb3e8b7476fbb2be2390f4592cf5165 (patch)
tree	8dd9bb181dd259227dd6101633f81880cd0157f3 /src/internal
parent	3be616c1df7ee176b5e00b9f493136ca7385ec46 (diff)
download	musl-415c4cd7fdb3e8b7476fbb2be2390f4592cf5165.tar.gz