1
0
mirror of https://github.com/xmrig/xmrig.git synced 2026-06-20 03:24:37 -04:00

Compare commits

...

2 Commits

Author SHA1 Message Date
xmrig
80eff55ed6 Merge pull request #3805 from SChernykh/dev
ARM64 JIT: Optimize Group E register conversion
2026-04-25 17:38:04 +07:00
SChernykh
5347458fc7 ARM64 JIT: Optimize Group E register conversion
Based on https://github.com/tevador/RandomX/pull/324
2026-04-25 11:37:47 +02:00
2 changed files with 10 additions and 15 deletions

View File

@@ -1059,11 +1059,8 @@ void JitCompilerA64::h_FDIV_M(Instruction& instr, uint32_t& codePos)
constexpr uint32_t tmp_reg_fp = 28;
emitMemLoadFP<tmp_reg_fp>(src, instr, code, k);
// and tmp_reg_fp, tmp_reg_fp, and_mask_reg
emit32(0x4E201C00 | tmp_reg_fp | (tmp_reg_fp << 5) | (29 << 16), code, k);
// orr tmp_reg_fp, tmp_reg_fp, or_mask_reg
emit32(0x4EA01C00 | tmp_reg_fp | (tmp_reg_fp << 5) | (30 << 16), code, k);
// bif tmp_reg_fp, or_mask_reg, and_mask_reg
emit32(0x6EE01C00 | tmp_reg_fp | (30 << 5) | (29 << 16), code, k);
emit32(ARMV8A::FDIV | dst | (dst << 5) | (tmp_reg_fp << 16), code, k);

View File

@@ -109,7 +109,7 @@
# v26 -> "a2"
# v27 -> "a3"
# v28 -> temporary
# v29 -> E 'and' mask = 0x00ffffffffffffff'00ffffffffffffff
# v29 -> E 'and' mask = 0x00ffffffffc00000'00ffffffffc00000
# v30 -> E 'or' mask = 0x3*00000000******'3*00000000******
# v31 -> scale mask = 0x80f0000000000000'80f0000000000000
@@ -151,7 +151,9 @@ DECL(randomx_program_aarch64):
ldp q26, q27, [x0, 224]
# Load E 'and' mask
movi v29.2d, #0x00FFFFFFFFFFFFFF
mov x16, 0x00FFFFFFFFC00000
ins v29.d[0], x16
ins v29.d[1], x16
# Load E 'or' mask (stored in reg.f[0])
ldr q30, [x0, 64]
@@ -239,14 +241,10 @@ DECL(randomx_program_aarch64_main_loop):
sxtl2 v23.2d, v23.4s
scvtf v23.2d, v23.2d
and v20.16b, v20.16b, v29.16b
and v21.16b, v21.16b, v29.16b
and v22.16b, v22.16b, v29.16b
and v23.16b, v23.16b, v29.16b
orr v20.16b, v20.16b, v30.16b
orr v21.16b, v21.16b, v30.16b
orr v22.16b, v22.16b, v30.16b
orr v23.16b, v23.16b, v30.16b
bif v20.16b, v30.16b, v29.16b
bif v21.16b, v30.16b, v29.16b
bif v22.16b, v30.16b, v29.16b
bif v23.16b, v30.16b, v29.16b
# Execute VM instructions
DECL(randomx_program_aarch64_vm_instructions):