mirror of
https://github.com/xmrig/xmrig.git
synced 2026-04-25 15:42:39 -04:00
Merge pull request #3805 from SChernykh/dev
ARM64 JIT: Optimize Group E register conversion
This commit is contained in:
@@ -1059,11 +1059,8 @@ void JitCompilerA64::h_FDIV_M(Instruction& instr, uint32_t& codePos)
|
|||||||
constexpr uint32_t tmp_reg_fp = 28;
|
constexpr uint32_t tmp_reg_fp = 28;
|
||||||
emitMemLoadFP<tmp_reg_fp>(src, instr, code, k);
|
emitMemLoadFP<tmp_reg_fp>(src, instr, code, k);
|
||||||
|
|
||||||
// and tmp_reg_fp, tmp_reg_fp, and_mask_reg
|
// bif tmp_reg_fp, or_mask_reg, and_mask_reg
|
||||||
emit32(0x4E201C00 | tmp_reg_fp | (tmp_reg_fp << 5) | (29 << 16), code, k);
|
emit32(0x6EE01C00 | tmp_reg_fp | (30 << 5) | (29 << 16), code, k);
|
||||||
|
|
||||||
// orr tmp_reg_fp, tmp_reg_fp, or_mask_reg
|
|
||||||
emit32(0x4EA01C00 | tmp_reg_fp | (tmp_reg_fp << 5) | (30 << 16), code, k);
|
|
||||||
|
|
||||||
emit32(ARMV8A::FDIV | dst | (dst << 5) | (tmp_reg_fp << 16), code, k);
|
emit32(ARMV8A::FDIV | dst | (dst << 5) | (tmp_reg_fp << 16), code, k);
|
||||||
|
|
||||||
|
|||||||
@@ -109,7 +109,7 @@
|
|||||||
# v26 -> "a2"
|
# v26 -> "a2"
|
||||||
# v27 -> "a3"
|
# v27 -> "a3"
|
||||||
# v28 -> temporary
|
# v28 -> temporary
|
||||||
# v29 -> E 'and' mask = 0x00ffffffffffffff'00ffffffffffffff
|
# v29 -> E 'and' mask = 0x00ffffffffc00000'00ffffffffc00000
|
||||||
# v30 -> E 'or' mask = 0x3*00000000******'3*00000000******
|
# v30 -> E 'or' mask = 0x3*00000000******'3*00000000******
|
||||||
# v31 -> scale mask = 0x80f0000000000000'80f0000000000000
|
# v31 -> scale mask = 0x80f0000000000000'80f0000000000000
|
||||||
|
|
||||||
@@ -151,7 +151,9 @@ DECL(randomx_program_aarch64):
|
|||||||
ldp q26, q27, [x0, 224]
|
ldp q26, q27, [x0, 224]
|
||||||
|
|
||||||
# Load E 'and' mask
|
# Load E 'and' mask
|
||||||
movi v29.2d, #0x00FFFFFFFFFFFFFF
|
mov x16, 0x00FFFFFFFFC00000
|
||||||
|
ins v29.d[0], x16
|
||||||
|
ins v29.d[1], x16
|
||||||
|
|
||||||
# Load E 'or' mask (stored in reg.f[0])
|
# Load E 'or' mask (stored in reg.f[0])
|
||||||
ldr q30, [x0, 64]
|
ldr q30, [x0, 64]
|
||||||
@@ -239,14 +241,10 @@ DECL(randomx_program_aarch64_main_loop):
|
|||||||
sxtl2 v23.2d, v23.4s
|
sxtl2 v23.2d, v23.4s
|
||||||
scvtf v23.2d, v23.2d
|
scvtf v23.2d, v23.2d
|
||||||
|
|
||||||
and v20.16b, v20.16b, v29.16b
|
bif v20.16b, v30.16b, v29.16b
|
||||||
and v21.16b, v21.16b, v29.16b
|
bif v21.16b, v30.16b, v29.16b
|
||||||
and v22.16b, v22.16b, v29.16b
|
bif v22.16b, v30.16b, v29.16b
|
||||||
and v23.16b, v23.16b, v29.16b
|
bif v23.16b, v30.16b, v29.16b
|
||||||
orr v20.16b, v20.16b, v30.16b
|
|
||||||
orr v21.16b, v21.16b, v30.16b
|
|
||||||
orr v22.16b, v22.16b, v30.16b
|
|
||||||
orr v23.16b, v23.16b, v30.16b
|
|
||||||
|
|
||||||
# Execute VM instructions
|
# Execute VM instructions
|
||||||
DECL(randomx_program_aarch64_vm_instructions):
|
DECL(randomx_program_aarch64_vm_instructions):
|
||||||
|
|||||||
Reference in New Issue
Block a user