summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSzabolcs Nagy <nsz@port70.net>2013-08-17 02:40:44 +0000
committerSzabolcs Nagy <nsz@port70.net>2013-08-18 16:27:20 +0000
commitebc10fa17634a6ddb87a3aedd71b7d9617d12c19 (patch)
tree8e3f1ffffe8c0ed8d768ac7bbdb156038d12556e
parentd6841499109fc397cd3a57a726304fec9b08f510 (diff)
downloadmusl-ebc10fa17634a6ddb87a3aedd71b7d9617d12c19.tar.gz
add sse fenv support on i386 through hwcap
the sse and x87 rounding modes should be always the same, the visible exception flags are the bitwise or of the two fenv states (so it's enough to query the rounding mode or raise exceptions on one fenv)
-rw-r--r--src/fenv/i386/fenv.s67
-rw-r--r--src/fenv/x86_64/fenv.s3
2 files changed, 61 insertions, 9 deletions
diff --git a/src/fenv/i386/fenv.s b/src/fenv/i386/fenv.s
index 471d2af8..9bba40a5 100644
--- a/src/fenv/i386/fenv.s
+++ b/src/fenv/i386/fenv.s
@@ -1,14 +1,26 @@
+.hidden __hwcap
+
.global feclearexcept
.type feclearexcept,@function
feclearexcept:
mov 4(%esp),%ecx
not %ecx
- test $0x3f,%ecx
+ # consider sse fenv as well if the cpu has XMM capability
+ call 1f
+1: addl $__hwcap-1b,(%esp)
+ pop %edx
+ testl $0x02000000,(%edx)
+ jz 1f
+ stmxcsr 4(%esp)
+ and %ecx,4(%esp)
+ ldmxcsr 4(%esp)
+1: test $0x3f,%ecx
jnz 2f
1: fnclex
xor %eax,%eax
ret
2: fnstsw %ax
+ # TODO: only load/store fenv if exceptions arent clear yet
and %ecx,%eax
jz 1b
sub $32,%esp
@@ -41,7 +53,18 @@ fesetround:
andb $0xf3,1(%esp)
or %ch,1(%esp)
fldcw (%esp)
- pop %ecx
+ # consider sse fenv as well if the cpu has XMM capability
+ call 1f
+1: addl $__hwcap-1b,(%esp)
+ pop %edx
+ testl $0x02000000,(%edx)
+ jmp 1f
+ stmxcsr (%esp)
+ shl $3,%ch
+ andb $0x9f,1(%esp)
+ or %ch,1(%esp)
+ ldmxcsr (%esp)
+1: pop %ecx
ret
.global fegetround
@@ -59,7 +82,18 @@ fegetenv:
mov 4(%esp),%ecx
xor %eax,%eax
fnstenv (%ecx)
- ret
+ # consider sse fenv as well if the cpu has XMM capability
+ call 1f
+1: addl $__hwcap-1b,(%esp)
+ pop %edx
+ testl $0x02000000,(%edx)
+ jz 1f
+ push %eax
+ stmxcsr (%esp)
+ pop %edx
+ and $0x3f,%edx
+ or %edx,4(%ecx)
+1: ret
.global fesetenv
.type fesetenv,@function
@@ -69,7 +103,8 @@ fesetenv:
inc %ecx
jz 1f
fldenv -1(%ecx)
- ret
+ movl -1(%ecx),%ecx
+ jmp 2f
1: push %eax
push %eax
push %eax
@@ -79,12 +114,32 @@ fesetenv:
pushl $0x37f
fldenv (%esp)
add $28,%esp
- ret
+ # consider sse fenv as well if the cpu has XMM capability
+2: call 1f
+1: addl $__hwcap-1b,(%esp)
+ pop %edx
+ testl $0x02000000,(%edx)
+ jz 1f
+ # mxcsr := same rounding mode, cleared exceptions, default mask
+ and $0xc00,%ecx
+ shl $3,%ecx
+ or $0x1f80,%ecx
+ mov %ecx,4(%esp)
+ ldmxcsr 4(%esp)
+1: ret
.global fetestexcept
.type fetestexcept,@function
fetestexcept:
mov 4(%esp),%ecx
fnstsw %ax
- and %ecx,%eax
+ # consider sse fenv as well if the cpu has XMM capability
+ call 1f
+1: addl $__hwcap-1b,(%esp)
+ pop %edx
+ testl $0x02000000,(%edx)
+ jz 1f
+ stmxcsr 4(%esp)
+ or 4(%esp),%eax
+1: and %ecx,%eax
ret
diff --git a/src/fenv/x86_64/fenv.s b/src/fenv/x86_64/fenv.s
index 443e35a2..c48dade3 100644
--- a/src/fenv/x86_64/fenv.s
+++ b/src/fenv/x86_64/fenv.s
@@ -28,9 +28,6 @@ feraiseexcept:
stmxcsr -8(%rsp)
or %edi,-8(%rsp)
ldmxcsr -8(%rsp)
- fnstenv -32(%rsp)
- or %edi,-28(%rsp)
- fldenv -32(%rsp)
xor %eax,%eax
ret