diff options
author | Szabolcs Nagy <nsz@port70.net> | 2013-08-17 02:40:44 +0000 |
---|---|---|
committer | Szabolcs Nagy <nsz@port70.net> | 2013-08-18 16:27:20 +0000 |
commit | ebc10fa17634a6ddb87a3aedd71b7d9617d12c19 (patch) | |
tree | 8e3f1ffffe8c0ed8d768ac7bbdb156038d12556e | |
parent | d6841499109fc397cd3a57a726304fec9b08f510 (diff) | |
download | musl-ebc10fa17634a6ddb87a3aedd71b7d9617d12c19.tar.gz |
add sse fenv support on i386 through hwcap
the sse and x87 rounding modes should be always the same,
the visible exception flags are the bitwise or of the two
fenv states (so it's enough to query the rounding mode or
raise exceptions on one fenv)
-rw-r--r-- | src/fenv/i386/fenv.s | 67 | ||||
-rw-r--r-- | src/fenv/x86_64/fenv.s | 3 |
2 files changed, 61 insertions, 9 deletions
diff --git a/src/fenv/i386/fenv.s b/src/fenv/i386/fenv.s index 471d2af8..9bba40a5 100644 --- a/src/fenv/i386/fenv.s +++ b/src/fenv/i386/fenv.s @@ -1,14 +1,26 @@ +.hidden __hwcap + .global feclearexcept .type feclearexcept,@function feclearexcept: mov 4(%esp),%ecx not %ecx - test $0x3f,%ecx + # consider sse fenv as well if the cpu has XMM capability + call 1f +1: addl $__hwcap-1b,(%esp) + pop %edx + testl $0x02000000,(%edx) + jz 1f + stmxcsr 4(%esp) + and %ecx,4(%esp) + ldmxcsr 4(%esp) +1: test $0x3f,%ecx jnz 2f 1: fnclex xor %eax,%eax ret 2: fnstsw %ax + # TODO: only load/store fenv if exceptions arent clear yet and %ecx,%eax jz 1b sub $32,%esp @@ -41,7 +53,18 @@ fesetround: andb $0xf3,1(%esp) or %ch,1(%esp) fldcw (%esp) - pop %ecx + # consider sse fenv as well if the cpu has XMM capability + call 1f +1: addl $__hwcap-1b,(%esp) + pop %edx + testl $0x02000000,(%edx) + jmp 1f + stmxcsr (%esp) + shl $3,%ch + andb $0x9f,1(%esp) + or %ch,1(%esp) + ldmxcsr (%esp) +1: pop %ecx ret .global fegetround @@ -59,7 +82,18 @@ fegetenv: mov 4(%esp),%ecx xor %eax,%eax fnstenv (%ecx) - ret + # consider sse fenv as well if the cpu has XMM capability + call 1f +1: addl $__hwcap-1b,(%esp) + pop %edx + testl $0x02000000,(%edx) + jz 1f + push %eax + stmxcsr (%esp) + pop %edx + and $0x3f,%edx + or %edx,4(%ecx) +1: ret .global fesetenv .type fesetenv,@function @@ -69,7 +103,8 @@ fesetenv: inc %ecx jz 1f fldenv -1(%ecx) - ret + movl -1(%ecx),%ecx + jmp 2f 1: push %eax push %eax push %eax @@ -79,12 +114,32 @@ fesetenv: pushl $0x37f fldenv (%esp) add $28,%esp - ret + # consider sse fenv as well if the cpu has XMM capability +2: call 1f +1: addl $__hwcap-1b,(%esp) + pop %edx + testl $0x02000000,(%edx) + jz 1f + # mxcsr := same rounding mode, cleared exceptions, default mask + and $0xc00,%ecx + shl $3,%ecx + or $0x1f80,%ecx + mov %ecx,4(%esp) + ldmxcsr 4(%esp) +1: ret .global fetestexcept .type fetestexcept,@function fetestexcept: mov 4(%esp),%ecx fnstsw %ax - and %ecx,%eax + # consider sse fenv as well if the cpu has XMM capability + call 1f +1: addl $__hwcap-1b,(%esp) + pop %edx + testl $0x02000000,(%edx) + jz 1f + stmxcsr 4(%esp) + or 4(%esp),%eax +1: and %ecx,%eax ret diff --git a/src/fenv/x86_64/fenv.s b/src/fenv/x86_64/fenv.s index 443e35a2..c48dade3 100644 --- a/src/fenv/x86_64/fenv.s +++ b/src/fenv/x86_64/fenv.s @@ -28,9 +28,6 @@ feraiseexcept: stmxcsr -8(%rsp) or %edi,-8(%rsp) ldmxcsr -8(%rsp) - fnstenv -32(%rsp) - or %edi,-28(%rsp) - fldenv -32(%rsp) xor %eax,%eax ret |