diff options
Diffstat (limited to 'src/math/x32')
-rw-r--r-- | src/math/x32/__invtrigl.s | 0 | ||||
-rw-r--r-- | src/math/x32/acosl.s | 16 | ||||
-rw-r--r-- | src/math/x32/asinl.s | 12 | ||||
-rw-r--r-- | src/math/x32/atan2l.s | 7 | ||||
-rw-r--r-- | src/math/x32/atanl.s | 7 | ||||
-rw-r--r-- | src/math/x32/ceill.s | 1 | ||||
-rw-r--r-- | src/math/x32/exp2l.s | 90 | ||||
-rw-r--r-- | src/math/x32/expl.s | 101 | ||||
-rw-r--r-- | src/math/x32/expm1l.s | 1 | ||||
-rw-r--r-- | src/math/x32/fabs.s | 9 | ||||
-rw-r--r-- | src/math/x32/fabsf.s | 7 | ||||
-rw-r--r-- | src/math/x32/fabsl.s | 6 | ||||
-rw-r--r-- | src/math/x32/floorl.s | 27 | ||||
-rw-r--r-- | src/math/x32/fmodl.s | 11 | ||||
-rw-r--r-- | src/math/x32/llrint.s | 5 | ||||
-rw-r--r-- | src/math/x32/llrintf.s | 5 | ||||
-rw-r--r-- | src/math/x32/llrintl.s | 7 | ||||
-rw-r--r-- | src/math/x32/log10l.s | 7 | ||||
-rw-r--r-- | src/math/x32/log1pl.s | 15 | ||||
-rw-r--r-- | src/math/x32/log2l.s | 7 | ||||
-rw-r--r-- | src/math/x32/logl.s | 7 | ||||
-rw-r--r-- | src/math/x32/lrint.s | 5 | ||||
-rw-r--r-- | src/math/x32/lrintf.s | 5 | ||||
-rw-r--r-- | src/math/x32/lrintl.s | 7 | ||||
-rw-r--r-- | src/math/x32/remainderl.s | 11 | ||||
-rw-r--r-- | src/math/x32/rintl.s | 6 | ||||
-rw-r--r-- | src/math/x32/sqrt.s | 4 | ||||
-rw-r--r-- | src/math/x32/sqrtf.s | 4 | ||||
-rw-r--r-- | src/math/x32/sqrtl.s | 5 | ||||
-rw-r--r-- | src/math/x32/truncl.s | 1 |
30 files changed, 396 insertions, 0 deletions
diff --git a/src/math/x32/__invtrigl.s b/src/math/x32/__invtrigl.s new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/math/x32/__invtrigl.s diff --git a/src/math/x32/acosl.s b/src/math/x32/acosl.s new file mode 100644 index 00000000..88e01b49 --- /dev/null +++ b/src/math/x32/acosl.s @@ -0,0 +1,16 @@ +# see ../i386/acos.s + +.global acosl +.type acosl,@function +acosl: + fldt 8(%rsp) +1: fld %st(0) + fld1 + fsub %st(0),%st(1) + fadd %st(2) + fmulp + fsqrt + fabs + fxch %st(1) + fpatan + ret diff --git a/src/math/x32/asinl.s b/src/math/x32/asinl.s new file mode 100644 index 00000000..ed212d9a --- /dev/null +++ b/src/math/x32/asinl.s @@ -0,0 +1,12 @@ +.global asinl +.type asinl,@function +asinl: + fldt 8(%rsp) +1: fld %st(0) + fld1 + fsub %st(0),%st(1) + fadd %st(2) + fmulp + fsqrt + fpatan + ret diff --git a/src/math/x32/atan2l.s b/src/math/x32/atan2l.s new file mode 100644 index 00000000..e5f0a3de --- /dev/null +++ b/src/math/x32/atan2l.s @@ -0,0 +1,7 @@ +.global atan2l +.type atan2l,@function +atan2l: + fldt 8(%rsp) + fldt 24(%rsp) + fpatan + ret diff --git a/src/math/x32/atanl.s b/src/math/x32/atanl.s new file mode 100644 index 00000000..df76de5d --- /dev/null +++ b/src/math/x32/atanl.s @@ -0,0 +1,7 @@ +.global atanl +.type atanl,@function +atanl: + fldt 8(%rsp) + fld1 + fpatan + ret diff --git a/src/math/x32/ceill.s b/src/math/x32/ceill.s new file mode 100644 index 00000000..f5cfa3b3 --- /dev/null +++ b/src/math/x32/ceill.s @@ -0,0 +1 @@ +# see floorl.s diff --git a/src/math/x32/exp2l.s b/src/math/x32/exp2l.s new file mode 100644 index 00000000..0d6cd563 --- /dev/null +++ b/src/math/x32/exp2l.s @@ -0,0 +1,90 @@ +.global expm1l +.type expm1l,@function +expm1l: + fldt 8(%rsp) + fldl2e + fmulp + movl $0xc2820000,-4(%rsp) + flds -4(%rsp) + fucomp %st(1) + fnstsw %ax + sahf + fld1 + jb 1f + # x*log2e <= -65, return -1 without underflow + fstp %st(1) + fchs + ret +1: fld %st(1) + fabs + fucom %st(1) + fnstsw %ax + fstp %st(0) + fstp %st(0) + sahf + ja 1f + f2xm1 + ret +1: push %rax + call 1f + pop %rax + fld1 + fsubrp + ret + +.global exp2l +.type exp2l,@function +exp2l: + fldt 8(%rsp) +1: fld %st(0) + sub $16,%rsp + fstpt (%rsp) + mov 8(%rsp),%ax + and $0x7fff,%ax + cmp $0x3fff+13,%ax + jb 4f # |x| < 8192 + cmp $0x3fff+15,%ax + jae 3f # |x| >= 32768 + fsts (%rsp) + cmpl $0xc67ff800,(%rsp) + jb 2f # x > -16382 + movl $0x5f000000,(%rsp) + flds (%rsp) # 0x1p63 + fld %st(1) + fsub %st(1) + faddp + fucomp %st(1) + fnstsw + sahf + je 2f # x - 0x1p63 + 0x1p63 == x + movl $1,(%rsp) + flds (%rsp) # 0x1p-149 + fdiv %st(1) + fstps (%rsp) # raise underflow +2: fld1 + fld %st(1) + frndint + fxch %st(2) + fsub %st(2) # st(0)=x-rint(x), st(1)=1, st(2)=rint(x) + f2xm1 + faddp # 2^(x-rint(x)) +1: fscale + fstp %st(1) + add $16,%rsp + ret +3: xor %eax,%eax +4: cmp $0x3fff-64,%ax + fld1 + jb 1b # |x| < 0x1p-64 + fstpt (%rsp) + fistl 8(%rsp) + fildl 8(%rsp) + fsubrp %st(1) + addl $0x3fff,8(%rsp) + f2xm1 + fld1 + faddp # 2^(x-rint(x)) + fldt (%rsp) # 2^rint(x) + fmulp + add $16,%rsp + ret diff --git a/src/math/x32/expl.s b/src/math/x32/expl.s new file mode 100644 index 00000000..3add810d --- /dev/null +++ b/src/math/x32/expl.s @@ -0,0 +1,101 @@ +# exp(x) = 2^hi + 2^hi (2^lo - 1) +# where hi+lo = log2e*x with 128bit precision +# exact log2e*x calculation depends on nearest rounding mode +# using the exact multiplication method of Dekker and Veltkamp + +.global expl +.type expl,@function +expl: + fldt 8(%rsp) + + # interesting case: 0x1p-32 <= |x| < 16384 + # check if (exponent|0x8000) is in [0xbfff-32, 0xbfff+13] + mov 16(%rsp), %ax + or $0x8000, %ax + sub $0xbfdf, %ax + cmp $45, %ax + jbe 2f + test %ax, %ax + fld1 + js 1f + # if |x|>=0x1p14 or nan return 2^trunc(x) + fscale + fstp %st(1) + ret + # if |x|<0x1p-32 return 1+x +1: faddp + ret + + # should be 0x1.71547652b82fe178p0L == 0x3fff b8aa3b29 5c17f0bc + # it will be wrong on non-nearest rounding mode +2: fldl2e + subq $48, %rsp + # hi = log2e_hi*x + # 2^hi = exp2l(hi) + fmul %st(1),%st + fld %st(0) + fstpt (%rsp) + fstpt 16(%rsp) + fstpt 32(%rsp) + call exp2l + # if 2^hi == inf return 2^hi + fld %st(0) + fstpt (%rsp) + cmpw $0x7fff, 8(%rsp) + je 1f + fldt 32(%rsp) + fldt 16(%rsp) + # fpu stack: 2^hi x hi + # exact mult: x*log2e + fld %st(1) + # c = 0x1p32+1 + movq $0x41f0000000100000,%rax + pushq %rax + fldl (%rsp) + # xh = x - c*x + c*x + # xl = x - xh + fmulp + fld %st(2) + fsub %st(1), %st + faddp + fld %st(2) + fsub %st(1), %st + # yh = log2e_hi - c*log2e_hi + c*log2e_hi + movq $0x3ff7154765200000,%rax + pushq %rax + fldl (%rsp) + # fpu stack: 2^hi x hi xh xl yh + # lo = hi - xh*yh + xl*yh + fld %st(2) + fmul %st(1), %st + fsubp %st, %st(4) + fmul %st(1), %st + faddp %st, %st(3) + # yl = log2e_hi - yh + movq $0x3de705fc2f000000,%rax + pushq %rax + fldl (%rsp) + # fpu stack: 2^hi x lo xh xl yl + # lo += xh*yl + xl*yl + fmul %st, %st(2) + fmulp %st, %st(1) + fxch %st(2) + faddp + faddp + # log2e_lo + movq $0xbfbe,%rax + pushq %rax + movq $0x82f0025f2dc582ee,%rax + pushq %rax + fldt (%rsp) + addq $40,%rsp + # fpu stack: 2^hi x lo log2e_lo + # lo += log2e_lo*x + # return 2^hi + 2^hi (2^lo - 1) + fmulp %st, %st(2) + faddp + f2xm1 + fmul %st(1), %st + faddp +1: addq $48, %rsp + ret diff --git a/src/math/x32/expm1l.s b/src/math/x32/expm1l.s new file mode 100644 index 00000000..e773f080 --- /dev/null +++ b/src/math/x32/expm1l.s @@ -0,0 +1 @@ +# see exp2l.s diff --git a/src/math/x32/fabs.s b/src/math/x32/fabs.s new file mode 100644 index 00000000..5715005e --- /dev/null +++ b/src/math/x32/fabs.s @@ -0,0 +1,9 @@ +.global fabs +.type fabs,@function +fabs: + xor %eax,%eax + dec %rax + shr %rax + movq %rax,%xmm1 + andpd %xmm1,%xmm0 + ret diff --git a/src/math/x32/fabsf.s b/src/math/x32/fabsf.s new file mode 100644 index 00000000..501a1f17 --- /dev/null +++ b/src/math/x32/fabsf.s @@ -0,0 +1,7 @@ +.global fabsf +.type fabsf,@function +fabsf: + mov $0x7fffffff,%eax + movq %rax,%xmm1 + andps %xmm1,%xmm0 + ret diff --git a/src/math/x32/fabsl.s b/src/math/x32/fabsl.s new file mode 100644 index 00000000..4e7ab525 --- /dev/null +++ b/src/math/x32/fabsl.s @@ -0,0 +1,6 @@ +.global fabsl +.type fabsl,@function +fabsl: + fldt 8(%rsp) + fabs + ret diff --git a/src/math/x32/floorl.s b/src/math/x32/floorl.s new file mode 100644 index 00000000..80da4660 --- /dev/null +++ b/src/math/x32/floorl.s @@ -0,0 +1,27 @@ +.global floorl +.type floorl,@function +floorl: + fldt 8(%rsp) +1: mov $0x7,%al +1: fstcw 8(%rsp) + mov 9(%rsp),%ah + mov %al,9(%rsp) + fldcw 8(%rsp) + frndint + mov %ah,9(%rsp) + fldcw 8(%rsp) + ret + +.global ceill +.type ceill,@function +ceill: + fldt 8(%rsp) + mov $0xb,%al + jmp 1b + +.global truncl +.type truncl,@function +truncl: + fldt 8(%rsp) + mov $0xf,%al + jmp 1b diff --git a/src/math/x32/fmodl.s b/src/math/x32/fmodl.s new file mode 100644 index 00000000..ca81e60c --- /dev/null +++ b/src/math/x32/fmodl.s @@ -0,0 +1,11 @@ +.global fmodl +.type fmodl,@function +fmodl: + fldt 24(%rsp) + fldt 8(%rsp) +1: fprem + fstsw %ax + sahf + jp 1b + fstp %st(1) + ret diff --git a/src/math/x32/llrint.s b/src/math/x32/llrint.s new file mode 100644 index 00000000..bf476498 --- /dev/null +++ b/src/math/x32/llrint.s @@ -0,0 +1,5 @@ +.global llrint +.type llrint,@function +llrint: + cvtsd2si %xmm0,%rax + ret diff --git a/src/math/x32/llrintf.s b/src/math/x32/llrintf.s new file mode 100644 index 00000000..d7204ac0 --- /dev/null +++ b/src/math/x32/llrintf.s @@ -0,0 +1,5 @@ +.global llrintf +.type llrintf,@function +llrintf: + cvtss2si %xmm0,%rax + ret diff --git a/src/math/x32/llrintl.s b/src/math/x32/llrintl.s new file mode 100644 index 00000000..1ec0817d --- /dev/null +++ b/src/math/x32/llrintl.s @@ -0,0 +1,7 @@ +.global llrintl +.type llrintl,@function +llrintl: + fldt 8(%rsp) + fistpll 8(%rsp) + mov 8(%rsp),%rax + ret diff --git a/src/math/x32/log10l.s b/src/math/x32/log10l.s new file mode 100644 index 00000000..48ea4af7 --- /dev/null +++ b/src/math/x32/log10l.s @@ -0,0 +1,7 @@ +.global log10l +.type log10l,@function +log10l: + fldlg2 + fldt 8(%rsp) + fyl2x + ret diff --git a/src/math/x32/log1pl.s b/src/math/x32/log1pl.s new file mode 100644 index 00000000..955c9dbf --- /dev/null +++ b/src/math/x32/log1pl.s @@ -0,0 +1,15 @@ +.global log1pl +.type log1pl,@function +log1pl: + mov 14(%rsp),%eax + fldln2 + and $0x7fffffff,%eax + fldt 8(%rsp) + cmp $0x3ffd9400,%eax + ja 1f + fyl2xp1 + ret +1: fld1 + faddp + fyl2x + ret diff --git a/src/math/x32/log2l.s b/src/math/x32/log2l.s new file mode 100644 index 00000000..ba08b9fb --- /dev/null +++ b/src/math/x32/log2l.s @@ -0,0 +1,7 @@ +.global log2l +.type log2l,@function +log2l: + fld1 + fldt 8(%rsp) + fyl2x + ret diff --git a/src/math/x32/logl.s b/src/math/x32/logl.s new file mode 100644 index 00000000..20dd1f81 --- /dev/null +++ b/src/math/x32/logl.s @@ -0,0 +1,7 @@ +.global logl +.type logl,@function +logl: + fldln2 + fldt 8(%rsp) + fyl2x + ret diff --git a/src/math/x32/lrint.s b/src/math/x32/lrint.s new file mode 100644 index 00000000..15fc2454 --- /dev/null +++ b/src/math/x32/lrint.s @@ -0,0 +1,5 @@ +.global lrint +.type lrint,@function +lrint: + cvtsd2si %xmm0,%rax + ret diff --git a/src/math/x32/lrintf.s b/src/math/x32/lrintf.s new file mode 100644 index 00000000..488423d2 --- /dev/null +++ b/src/math/x32/lrintf.s @@ -0,0 +1,5 @@ +.global lrintf +.type lrintf,@function +lrintf: + cvtss2si %xmm0,%rax + ret diff --git a/src/math/x32/lrintl.s b/src/math/x32/lrintl.s new file mode 100644 index 00000000..d587b12b --- /dev/null +++ b/src/math/x32/lrintl.s @@ -0,0 +1,7 @@ +.global lrintl +.type lrintl,@function +lrintl: + fldt 8(%rsp) + fistpll 8(%rsp) + mov 8(%rsp),%rax + ret diff --git a/src/math/x32/remainderl.s b/src/math/x32/remainderl.s new file mode 100644 index 00000000..75c12374 --- /dev/null +++ b/src/math/x32/remainderl.s @@ -0,0 +1,11 @@ +.global remainderl +.type remainderl,@function +remainderl: + fldt 24(%rsp) + fldt 8(%rsp) +1: fprem1 + fstsw %ax + sahf + jp 1b + fstp %st(1) + ret diff --git a/src/math/x32/rintl.s b/src/math/x32/rintl.s new file mode 100644 index 00000000..64e663cd --- /dev/null +++ b/src/math/x32/rintl.s @@ -0,0 +1,6 @@ +.global rintl +.type rintl,@function +rintl: + fldt 8(%rsp) + frndint + ret diff --git a/src/math/x32/sqrt.s b/src/math/x32/sqrt.s new file mode 100644 index 00000000..d3c609f9 --- /dev/null +++ b/src/math/x32/sqrt.s @@ -0,0 +1,4 @@ +.global sqrt +.type sqrt,@function +sqrt: sqrtsd %xmm0, %xmm0 + ret diff --git a/src/math/x32/sqrtf.s b/src/math/x32/sqrtf.s new file mode 100644 index 00000000..eec48c60 --- /dev/null +++ b/src/math/x32/sqrtf.s @@ -0,0 +1,4 @@ +.global sqrtf +.type sqrtf,@function +sqrtf: sqrtss %xmm0, %xmm0 + ret diff --git a/src/math/x32/sqrtl.s b/src/math/x32/sqrtl.s new file mode 100644 index 00000000..23cd687d --- /dev/null +++ b/src/math/x32/sqrtl.s @@ -0,0 +1,5 @@ +.global sqrtl +.type sqrtl,@function +sqrtl: fldt 8(%rsp) + fsqrt + ret diff --git a/src/math/x32/truncl.s b/src/math/x32/truncl.s new file mode 100644 index 00000000..f5cfa3b3 --- /dev/null +++ b/src/math/x32/truncl.s @@ -0,0 +1 @@ +# see floorl.s |