diff --git a/src/asm/fcontext_arm_aapcs_elf_gas.S b/src/asm/fcontext_arm_aapcs_elf_gas.S index 9fadc00..0f1f8fb 100644 --- a/src/asm/fcontext_arm_aapcs_elf_gas.S +++ b/src/asm/fcontext_arm_aapcs_elf_gas.S @@ -53,17 +53,19 @@ jump_fcontext: stmia a1, {v1-v8,sp-lr} @ save V1-V8,SP-LR str lr, [a1,#40] @ save LR as PC #if (defined(__VFP_FP__) && !defined(__SOFTFP__)) + cmp a4, #0 @ test if fpu env should be preserved + be 1f + ldr a4, [a1,#52] stmia a4, {s16-s31} @ save S16-S31 + + ldr a4, [a2,#52] + ldmia a4, {s16-s31} @ restore S16-S31 +1: #endif mov a1, a3 @ use third arg as return value after jump @ and as first arg in context function -#if (defined(__VFP_FP__) && !defined(__SOFTFP__)) - ldr a4, [a2,#52] - ldmia a4, {s16-s31} @ restore S16-S31 -#endif - ldmia a2, {v1-v8,sp-pc} @ restore v1-V8,SP-PC .size jump_fcontext,.-jump_fcontext diff --git a/src/asm/fcontext_mips32_o32_elf_gas.S b/src/asm/fcontext_mips32_o32_elf_gas.S index e200ce7..be86f18 100644 --- a/src/asm/fcontext_mips32_o32_elf_gas.S +++ b/src/asm/fcontext_mips32_o32_elf_gas.S @@ -59,14 +59,22 @@ jump_fcontext: sw $ra, 96($a0) # save RA as PC #if defined(__mips_hard_float) + beqz $a3, 1f # test if fpu env should be preserved s.d $f20, 120($a0) # save F20 s.d $f22, 128($a0) # save F22 s.d $f24, 136($a0) # save F24 s.d $f26, 144($a0) # save F26 s.d $f28, 152($a0) # save F28 s.d $f30, 160($a0) # save F30 -#endif + l.d $f20, 120($a1) # restore F20 + l.d $f22, 128($a1) # restore F22 + l.d $f24, 136($a1) # restore F24 + l.d $f26, 144($a1) # restore F26 + l.d $f28, 152($a1) # restore F28 + l.d $f30, 160($a1) # restore F30 +1: +#endif lw $s0, ($a1) # restore S0 lw $s1, 8($a1) # restore S1 @@ -82,15 +90,6 @@ jump_fcontext: lw $ra, 88($a1) # restore RA move $a0, $s2 # restore void pointer as argument -#if defined(__mips_hard_float) - l.d $f20, 120($a1) # restore F20 - l.d $f22, 128($a1) # restore F22 - l.d $f24, 136($a1) # restore F24 - l.d $f26, 144($a1) # restore F26 - l.d $f28, 152($a1) # restore F28 - l.d $f30, 160($a1) # restore F30 -#endif - move $v0, $a2 # use third arg as return value after jump move $a0, $a2 # use third arg as first arg in context function diff --git a/src/asm/fcontext_ppc32_sysv_elf_gas.S b/src/asm/fcontext_ppc32_sysv_elf_gas.S index c844a4a..0087176 100644 --- a/src/asm/fcontext_ppc32_sysv_elf_gas.S +++ b/src/asm/fcontext_ppc32_sysv_elf_gas.S @@ -98,6 +98,9 @@ jump_fcontext: stw %r0, 84(%r3) # save LR stw %r0, 88(%r3) # save LR as PC + cmpwi cr7, r6, 0 # test if fpu env should be preserved + beq cr7, 1f + stfd %f14, 100(%r3) # save F14 stfd %f15, 108(%r3) # save F15 stfd %f16, 116(%r3) # save F16 @@ -119,6 +122,27 @@ jump_fcontext: mffs %f0 # load FPSCR stfd %f0, 244(%r3) # save FPSCR + lfd %f14, 100(%r4) # restore F14 + lfd %f15, 108(%r4) # restore F15 + lfd %f16, 116(%r4) # restore F16 + lfd %f17, 124(%r4) # restore F17 + lfd %f18, 132(%r4) # restore F18 + lfd %f19, 140(%r4) # restore F19 + lfd %f20, 148(%r4) # restore F20 + lfd %f21, 156(%r4) # restore F21 + lfd %f22, 164(%r4) # restore F22 + lfd %f23, 172(%r4) # restore F23 + lfd %f24, 180(%r4) # restore F24 + lfd %f25, 188(%r4) # restore F25 + lfd %f26, 196(%r4) # restore F26 + lfd %f27, 204(%r4) # restore F27 + lfd %f28, 212(%r4) # restore F28 + lfd %f29, 220(%r4) # restore F29 + lfd %f30, 228(%r4) # restore F30 + lfd %f31, 236(%r4) # restore F31 + lfd %f0, 244(%r4) # load FPSCR + mtfsf 0xff, %f0 # restore FPSCR +1: lwz %r13, 0(%r4) # restore R13 lwz %r14, 4(%r4) # restore R14 @@ -146,27 +170,6 @@ jump_fcontext: lwz %r0, 84(%r4) # load LR mtlr %r0 # restore LR - lfd %f14, 100(%r4) # restore F14 - lfd %f15, 108(%r4) # restore F15 - lfd %f16, 116(%r4) # restore F16 - lfd %f17, 124(%r4) # restore F17 - lfd %f18, 132(%r4) # restore F18 - lfd %f19, 140(%r4) # restore F19 - lfd %f20, 148(%r4) # restore F20 - lfd %f21, 156(%r4) # restore F21 - lfd %f22, 164(%r4) # restore F22 - lfd %f23, 172(%r4) # restore F23 - lfd %f24, 180(%r4) # restore F24 - lfd %f25, 188(%r4) # restore F25 - lfd %f26, 196(%r4) # restore F26 - lfd %f27, 204(%r4) # restore F27 - lfd %f28, 212(%r4) # restore F28 - lfd %f29, 220(%r4) # restore F29 - lfd %f30, 228(%r4) # restore F30 - lfd %f31, 236(%r4) # restore F31 - lfd %f0, 244(%r4) # load FPSCR - mtfsf 0xff, %f0 # restore FPSCR - mr. %r3, %r5 # use third arg as return value after jump # and as first arg in context function diff --git a/src/asm/fcontext_ppc64_sysv_elf_gas.S b/src/asm/fcontext_ppc64_sysv_elf_gas.S index 512c044..c09447a 100644 --- a/src/asm/fcontext_ppc64_sysv_elf_gas.S +++ b/src/asm/fcontext_ppc64_sysv_elf_gas.S @@ -119,6 +119,9 @@ jump_fcontext: std %r0, 168(%r3) # save LR std %r0, 176(%r3) # save LR as PC + cmpwi cr7, r6, 0 # test if fpu env should be preserved + beq cr7, 1f + stfd %f14, 200(%r3) # save F14 stfd %f15, 208(%r3) # save F15 stfd %f16, 216(%r3) # save F16 @@ -140,6 +143,27 @@ jump_fcontext: mffs %f0 # load FPSCR stfd %f0, 344(%r3) # save FPSCR + lfd %f14, 200(%r4) # restore F14 + lfd %f15, 208(%r4) # restore F15 + lfd %f16, 216(%r4) # restore F16 + lfd %f17, 224(%r4) # restore F17 + lfd %f18, 232(%r4) # restore F18 + lfd %f19, 240(%r4) # restore F19 + lfd %f20, 248(%r4) # restore F20 + lfd %f21, 256(%r4) # restore F21 + lfd %f22, 264(%r4) # restore F22 + lfd %f23, 272(%r4) # restore F23 + lfd %f24, 280(%r4) # restore F24 + lfd %f25, 288(%r4) # restore F25 + lfd %f26, 296(%r4) # restore F26 + lfd %f27, 304(%r4) # restore F27 + lfd %f28, 312(%r4) # restore F28 + lfd %f29, 320(%r4) # restore F29 + lfd %f30, 328(%r4) # restore F30 + lfd %f31, 336(%r4) # restore F31 + lfd %f0, 344(%r4) # load FPSCR + mtfsf 0xff, %f0 # restore FPSCR +1: ld %r13, 0(%r4) # restore R13 ld %r14, 8(%r4) # restore R14 @@ -167,27 +191,6 @@ jump_fcontext: ld %r0, 168(%r4) # load LR mtlr %r0 # restore LR - lfd %f14, 200(%r4) # restore F14 - lfd %f15, 208(%r4) # restore F15 - lfd %f16, 216(%r4) # restore F16 - lfd %f17, 224(%r4) # restore F17 - lfd %f18, 232(%r4) # restore F18 - lfd %f19, 240(%r4) # restore F19 - lfd %f20, 248(%r4) # restore F20 - lfd %f21, 256(%r4) # restore F21 - lfd %f22, 264(%r4) # restore F22 - lfd %f23, 272(%r4) # restore F23 - lfd %f24, 280(%r4) # restore F24 - lfd %f25, 288(%r4) # restore F25 - lfd %f26, 296(%r4) # restore F26 - lfd %f27, 304(%r4) # restore F27 - lfd %f28, 312(%r4) # restore F28 - lfd %f29, 320(%r4) # restore F29 - lfd %f30, 328(%r4) # restore F30 - lfd %f31, 336(%r4) # restore F31 - lfd %f0, 344(%r4) # load FPSCR - mtfsf 0xff, %f0 # restore FPSCR - mr. %r3, %r5 # use third arg as return value after jump # and as first arg in context function diff --git a/src/asm/fcontext_x86_64_ms_pe_masm.asm b/src/asm/fcontext_x86_64_ms_pe_masm.asm index 37fe51a..1dd067c 100644 --- a/src/asm/fcontext_x86_64_ms_pe_masm.asm +++ b/src/asm/fcontext_x86_64_ms_pe_masm.asm @@ -107,6 +107,9 @@ jump_fcontext PROC EXPORT FRAME:seh_fcontext mov rax, [r10+018h] ; load fiber local storage mov [rcx+060h], rax ; save fiber local storage + cmp r9, 0 + je 1f + stmxcsr [rcx+068h] ; save MMX control and status word fnstcw [rcx+06ch] ; save x87 control word mov r10, [rcx+070h] ; address of aligned XMM storage @@ -121,6 +124,21 @@ jump_fcontext PROC EXPORT FRAME:seh_fcontext movaps [r10+080h], xmm14 movaps [r10+090h], xmm15 + ldmxcsr [rdx+068h] ; restore MMX control and status word + fldcw [rdx+06ch] ; restore x87 control word + mov r10, [rdx+070h] ; address of aligned XMM storage + movaps xmm6, [r10] + movaps xmm7, [r10+010h] + movaps xmm8, [r10+020h] + movaps xmm9, [r10+030h] + movaps xmm10, [r10+040h] + movaps xmm11, [r10+050h] + movaps xmm12, [r10+060h] + movaps xmm13, [r10+070h] + movaps xmm14, [r10+080h] + movaps xmm15, [r10+090h] +1: + lea rax, [rsp+08h] ; exclude the return address mov [rcx+040h], rax ; save as stack pointer mov rax, [rsp] ; load return address @@ -143,20 +161,6 @@ jump_fcontext PROC EXPORT FRAME:seh_fcontext mov rax, [rdx+060h] ; load fiber local storage mov [r10+018h], rax ; restore fiber local storage - ldmxcsr [rdx+068h] ; restore MMX control and status word - fldcw [rdx+06ch] ; restore x87 control word - mov r10, [rdx+070h] ; address of aligned XMM storage - movaps xmm6, [r10] - movaps xmm7, [r10+010h] - movaps xmm8, [r10+020h] - movaps xmm9, [r10+030h] - movaps xmm10, [r10+040h] - movaps xmm11, [r10+050h] - movaps xmm12, [r10+060h] - movaps xmm13, [r10+070h] - movaps xmm14, [r10+080h] - movaps xmm15, [r10+090h] - mov rsp, [rdx+040h] ; restore RSP mov r10, [rdx+048h] ; fetch the address to returned to diff --git a/src/asm/fcontext_x86_64_sysv_elf_gas.S b/src/asm/fcontext_x86_64_sysv_elf_gas.S index ccd06bd..ad2d42b 100644 --- a/src/asm/fcontext_x86_64_sysv_elf_gas.S +++ b/src/asm/fcontext_x86_64_sysv_elf_gas.S @@ -50,13 +50,20 @@ jump_fcontext: movq %r15, 0x20(%rdi) /* save R15 */ movq %rbp, 0x28(%rdi) /* save RBP */ - stmxcsr 0x50(%rdi) /* save MMX control and status word */ - fnstcw 0x54(%rdi) /* save x87 control word */ + cmp $0, %rcx + je 1f - leaq 0x8(%rsp), %rax /* exclude the return address and save as stack pointer */ - movq %rax, 0x30(%rdi) /* save as stack pointer */ - movq (%rsp), %rax /* save return address */ - movq %rax, 0x38(%rdi) /* save return address as RIP */ + stmxcsr 0x50(%rdi) /* save MMX control and status word */ + fnstcw 0x54(%rdi) /* save x87 control word */ + + ldmxcsr 0x50(%rsi) /* restore MMX control and status word */ + fldcw 0x54(%rsi) /* restore x87 control word */ +1: + + leaq 0x8(%rsp), %rax /* exclude the return address and save as stack pointer */ + movq %rax, 0x30(%rdi) /* save as stack pointer */ + movq (%rsp), %rax /* save return address */ + movq %rax, 0x38(%rdi) /* save return address as RIP */ movq (%rsi), %rbx /* restore RBX */ movq 0x8(%rsi), %r12 /* restore R12 */ @@ -65,9 +72,6 @@ jump_fcontext: movq 0x20(%rsi), %r15 /* restore R15 */ movq 0x28(%rsi), %rbp /* restore RBP */ - ldmxcsr 0x50(%rsi) /* restore MMX control and status word */ - fldcw 0x54(%rsi) /* restore x87 control word */ - movq 0x30(%rsi), %rsp /* restore RSP */ movq 0x38(%rsi), %rcx /* fetch the address to return to */ diff --git a/src/asm/fcontext_x86_64_sysv_macho_gas.S b/src/asm/fcontext_x86_64_sysv_macho_gas.S index 5e07e63..f31a251 100644 --- a/src/asm/fcontext_x86_64_sysv_macho_gas.S +++ b/src/asm/fcontext_x86_64_sysv_macho_gas.S @@ -49,13 +49,20 @@ _jump_fcontext: movq %r15, 0x20(%rdi) /* save R15 */ movq %rbp, 0x28(%rdi) /* save RBP */ + cmp $0, %rcx + je 1f + stmxcsr 0x50(%rdi) /* save MMX control and status word */ fnstcw 0x54(%rdi) /* save x87 control word */ - leaq 0x8(%rsp), %rax /* exclude the return address and save as stack pointer */ - movq %rax, 0x30(%rdi) /* save as stack pointer */ - movq (%rsp), %rax /* save return address */ - movq %rax, 0x38(%rdi) /* save return address as RIP */ + ldmxcsr 0x50(%rsi) /* restore MMX control and status word */ + fldcw 0x54(%rsi) /* restore x87 control word */ +1: + + leaq 0x8(%rsp), %rax /* exclude the return address and save as stack pointer */ + movq %rax, 0x30(%rdi) /* save as stack pointer */ + movq (%rsp), %rax /* save return address */ + movq %rax, 0x38(%rdi) /* save return address as RIP */ movq (%rsi), %rbx /* restore RBX */ movq 0x8(%rsi), %r12 /* restore R12 */ @@ -64,9 +71,6 @@ _jump_fcontext: movq 0x20(%rsi), %r15 /* restore R15 */ movq 0x28(%rsi), %rbp /* restore RBP */ - ldmxcsr 0x50(%rsi) /* restore MMX control and status word */ - fldcw 0x54(%rsi) /* restore x87 control word */ - movq 0x30(%rsi), %rsp /* restore RSP */ movq 0x38(%rsi), %rcx /* fetch the address to return to */