summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSzabolcs Nagy <nsz@port70.net>2013-09-05 10:58:48 +0000
committerSzabolcs Nagy <nsz@port70.net>2013-09-05 11:30:09 +0000
commit07039ed8563b850624146c938ae201a1099d2f75 (patch)
tree2a751e8684596943be0bca4391289759b718f2b4
parent8dba5486288e719ed290cccefcd932ed32756d7c (diff)
downloadmusl-07039ed8563b850624146c938ae201a1099d2f75.tar.gz
math: fix exp2l asm on x86 (raise underflow correctly)
there were two problems: * omitted underflow on subnormal results: exp2l(-16383.5) was calculated as sqrt(2)*2^-16384, the last bits of sqrt(2) are zero so the down scaling does not underflow eventhough the result is in subnormal range * spurious underflow for subnormal inputs: exp2l(0x1p-16400) was evaluated as f2xm1(x)+1 and f2xm1 raised underflow (because inexact subnormal result) the first issue is fixed by raising underflow manually if x is in (-32768,-16382] and not integer (x-0x1p63+0x1p63 != x) the second issue is fixed by treating x in (-0x1p64,0x1p64) specially for these fixes the special case handling was completely rewritten
-rw-r--r--src/math/i386/exp.s70
-rw-r--r--src/math/x86_64/exp2l.s75
2 files changed, 78 insertions, 67 deletions
diff --git a/src/math/i386/exp.s b/src/math/i386/exp.s
index e5f54588..abb90369 100644
--- a/src/math/i386/exp.s
+++ b/src/math/i386/exp.s
@@ -95,42 +95,32 @@ exp:
.type exp2,@function
exp2:
fldl 4(%esp)
-1: pushl $0x467ff000
- flds (%esp) # 16380
- xorl %eax,%eax
- pushl $0x80000000
- push %eax
- fld %st(1)
- fabs
- fucomp %st(1)
- fnstsw
- fstp %st(0)
- sahf
- ja 3f # |x| > 16380
- jp 2f # x is nan (avoid invalid except in fistp)
+1: sub $12,%esp
fld %st(0)
- fistpl 8(%esp)
- fildl 8(%esp)
- fxch %st(1)
- fsub %st(1)
- mov $0x3fff,%eax
- add %eax,8(%esp)
- f2xm1
- fld1
- faddp # 2^(x-rint(x))
- fldt (%esp) # 2^rint(x)
- fmulp
- fstp %st(1)
-2: add $12,%esp
- ret
-
-3: fld %st(0)
fstpt (%esp)
- fld1
mov 8(%esp),%ax
and $0x7fff,%ax
- cmp $0x7fff,%ax
- je 1f # x = +-inf
+ cmp $0x3fff+13,%ax
+ jb 4f # |x| < 8192
+ cmp $0x3fff+15,%ax
+ jae 3f # |x| >= 32768
+ fsts (%esp)
+ cmpl $0xc67ff800,(%esp)
+ jb 2f # x > -16382
+ movl $0x5f000000,(%esp)
+ flds (%esp) # 0x1p63
+ fld %st(1)
+ fsub %st(1)
+ faddp
+ fucomp %st(1)
+ fnstsw
+ sahf
+ je 2f # x - 0x1p63 + 0x1p63 == x
+ movl $1,(%esp)
+ flds (%esp) # 0x1p-149
+ fdiv %st(1)
+ fstps (%esp) # raise underflow
+2: fld1
fld %st(1)
frndint
fxch %st(2)
@@ -141,3 +131,19 @@ exp2:
fstp %st(1)
add $12,%esp
ret
+3: xor %eax,%eax
+4: cmp $0x3fff-64,%ax
+ fld1
+ jb 1b # |x| < 0x1p-64
+ fstpt (%esp)
+ fistl 8(%esp)
+ fildl 8(%esp)
+ fsubrp %st(1)
+ addl $0x3fff,8(%esp)
+ f2xm1
+ fld1
+ faddp # 2^(x-rint(x))
+ fldt (%esp) # 2^rint(x)
+ fmulp
+ add $12,%esp
+ ret
diff --git a/src/math/x86_64/exp2l.s b/src/math/x86_64/exp2l.s
index 1f8ed7bb..e7145881 100644
--- a/src/math/x86_64/exp2l.s
+++ b/src/math/x86_64/exp2l.s
@@ -26,44 +26,32 @@ expm1l:
.type exp2l,@function
exp2l:
fldt 8(%rsp)
-1: mov $0x467ff000,%eax
- mov %eax,-16(%rsp)
- mov $0x80000000,%eax
- mov %eax,-20(%rsp)
- xor %eax,%eax
- mov %eax,-24(%rsp)
- flds -16(%rsp) # 16380
+1: fld %st(0)
+ sub $16,%rsp
+ fstpt (%rsp)
+ mov 8(%rsp),%ax
+ and $0x7fff,%ax
+ cmp $0x3fff+13,%ax
+ jb 4f # |x| < 8192
+ cmp $0x3fff+15,%ax
+ jae 3f # |x| >= 32768
+ fsts (%rsp)
+ cmpl $0xc67ff800,(%rsp)
+ jb 2f # x > -16382
+ movl $0x5f000000,(%rsp)
+ flds (%rsp) # 0x1p63
fld %st(1)
- fabs
- fucom %st(1)
+ fsub %st(1)
+ faddp
+ fucomp %st(1)
fnstsw
- fstp %st(0)
- fstp %st(0)
sahf
- ja 3f # |x| > 16380
- jp 2f # x is nan (avoid invalid except in fistp)
- fld %st(0)
- fistpl -16(%rsp)
- fildl -16(%rsp)
- fxch %st(1)
- fsub %st(1)
- mov $0x3fff,%eax
- add %eax,-16(%rsp)
- f2xm1
- fld1
- faddp # 2^(x-rint(x))
- fldt -24(%rsp) # 2^rint(x)
- fmulp
-2: fstp %st(1)
- ret
-
-3: fld %st(0)
- fstpt -24(%rsp)
- fld1
- mov -15(%rsp),%ax
- and $0x7fff,%ax
- cmp $0x7fff,%ax
- je 1f # x = +-inf
+ je 2f # x - 0x1p63 + 0x1p63 == x
+ movl $1,(%rsp)
+ flds (%rsp) # 0x1p-149
+ fdiv %st(1)
+ fstps (%rsp) # raise underflow
+2: fld1
fld %st(1)
frndint
fxch %st(2)
@@ -72,4 +60,21 @@ exp2l:
faddp # 2^(x-rint(x))
1: fscale
fstp %st(1)
+ add $16,%rsp
+ ret
+3: xor %eax,%eax
+4: cmp $0x3fff-64,%ax
+ fld1
+ jb 1b # |x| < 0x1p-64
+ fstpt (%rsp)
+ fistl 8(%rsp)
+ fildl 8(%rsp)
+ fsubrp %st(1)
+ addl $0x3fff,8(%rsp)
+ f2xm1
+ fld1
+ faddp # 2^(x-rint(x))
+ fldt (%rsp) # 2^rint(x)
+ fmulp
+ add $16,%rsp
ret