1
0
mirror of https://github.com/xmrig/xmrig.git synced 2026-04-26 07:52:47 -04:00

ARM64 JIT: Optimize Group E register conversion

Based on https://github.com/tevador/RandomX/pull/324
This commit is contained in:
SChernykh
2026-04-25 10:53:11 +02:00
parent 6bf43053f7
commit 5347458fc7
2 changed files with 10 additions and 15 deletions

View File

@@ -1059,11 +1059,8 @@ void JitCompilerA64::h_FDIV_M(Instruction& instr, uint32_t& codePos)
constexpr uint32_t tmp_reg_fp = 28;
emitMemLoadFP<tmp_reg_fp>(src, instr, code, k);
// and tmp_reg_fp, tmp_reg_fp, and_mask_reg
emit32(0x4E201C00 | tmp_reg_fp | (tmp_reg_fp << 5) | (29 << 16), code, k);
// orr tmp_reg_fp, tmp_reg_fp, or_mask_reg
emit32(0x4EA01C00 | tmp_reg_fp | (tmp_reg_fp << 5) | (30 << 16), code, k);
// bif tmp_reg_fp, or_mask_reg, and_mask_reg
emit32(0x6EE01C00 | tmp_reg_fp | (30 << 5) | (29 << 16), code, k);
emit32(ARMV8A::FDIV | dst | (dst << 5) | (tmp_reg_fp << 16), code, k);

View File

@@ -109,7 +109,7 @@
# v26 -> "a2"
# v27 -> "a3"
# v28 -> temporary
# v29 -> E 'and' mask = 0x00ffffffffffffff'00ffffffffffffff
# v29 -> E 'and' mask = 0x00ffffffffc00000'00ffffffffc00000
# v30 -> E 'or' mask = 0x3*00000000******'3*00000000******
# v31 -> scale mask = 0x80f0000000000000'80f0000000000000
@@ -151,7 +151,9 @@ DECL(randomx_program_aarch64):
ldp q26, q27, [x0, 224]
# Load E 'and' mask
movi v29.2d, #0x00FFFFFFFFFFFFFF
mov x16, 0x00FFFFFFFFC00000
ins v29.d[0], x16
ins v29.d[1], x16
# Load E 'or' mask (stored in reg.f[0])
ldr q30, [x0, 64]
@@ -239,14 +241,10 @@ DECL(randomx_program_aarch64_main_loop):
sxtl2 v23.2d, v23.4s
scvtf v23.2d, v23.2d
and v20.16b, v20.16b, v29.16b
and v21.16b, v21.16b, v29.16b
and v22.16b, v22.16b, v29.16b
and v23.16b, v23.16b, v29.16b
orr v20.16b, v20.16b, v30.16b
orr v21.16b, v21.16b, v30.16b
orr v22.16b, v22.16b, v30.16b
orr v23.16b, v23.16b, v30.16b
bif v20.16b, v30.16b, v29.16b
bif v21.16b, v30.16b, v29.16b
bif v22.16b, v30.16b, v29.16b
bif v23.16b, v30.16b, v29.16b
# Execute VM instructions
DECL(randomx_program_aarch64_vm_instructions):