1
0
mirror of https://github.com/xmrig/xmrig.git synced 2026-04-25 15:42:39 -04:00

Merge pull request #3805 from SChernykh/dev

ARM64 JIT: Optimize Group E register conversion
This commit is contained in:
xmrig
2026-04-25 17:38:04 +07:00
committed by GitHub
2 changed files with 10 additions and 15 deletions

View File

@@ -1059,11 +1059,8 @@ void JitCompilerA64::h_FDIV_M(Instruction& instr, uint32_t& codePos)
constexpr uint32_t tmp_reg_fp = 28; constexpr uint32_t tmp_reg_fp = 28;
emitMemLoadFP<tmp_reg_fp>(src, instr, code, k); emitMemLoadFP<tmp_reg_fp>(src, instr, code, k);
// and tmp_reg_fp, tmp_reg_fp, and_mask_reg // bif tmp_reg_fp, or_mask_reg, and_mask_reg
emit32(0x4E201C00 | tmp_reg_fp | (tmp_reg_fp << 5) | (29 << 16), code, k); emit32(0x6EE01C00 | tmp_reg_fp | (30 << 5) | (29 << 16), code, k);
// orr tmp_reg_fp, tmp_reg_fp, or_mask_reg
emit32(0x4EA01C00 | tmp_reg_fp | (tmp_reg_fp << 5) | (30 << 16), code, k);
emit32(ARMV8A::FDIV | dst | (dst << 5) | (tmp_reg_fp << 16), code, k); emit32(ARMV8A::FDIV | dst | (dst << 5) | (tmp_reg_fp << 16), code, k);

View File

@@ -109,7 +109,7 @@
# v26 -> "a2" # v26 -> "a2"
# v27 -> "a3" # v27 -> "a3"
# v28 -> temporary # v28 -> temporary
# v29 -> E 'and' mask = 0x00ffffffffffffff'00ffffffffffffff # v29 -> E 'and' mask = 0x00ffffffffc00000'00ffffffffc00000
# v30 -> E 'or' mask = 0x3*00000000******'3*00000000****** # v30 -> E 'or' mask = 0x3*00000000******'3*00000000******
# v31 -> scale mask = 0x80f0000000000000'80f0000000000000 # v31 -> scale mask = 0x80f0000000000000'80f0000000000000
@@ -151,7 +151,9 @@ DECL(randomx_program_aarch64):
ldp q26, q27, [x0, 224] ldp q26, q27, [x0, 224]
# Load E 'and' mask # Load E 'and' mask
movi v29.2d, #0x00FFFFFFFFFFFFFF mov x16, 0x00FFFFFFFFC00000
ins v29.d[0], x16
ins v29.d[1], x16
# Load E 'or' mask (stored in reg.f[0]) # Load E 'or' mask (stored in reg.f[0])
ldr q30, [x0, 64] ldr q30, [x0, 64]
@@ -239,14 +241,10 @@ DECL(randomx_program_aarch64_main_loop):
sxtl2 v23.2d, v23.4s sxtl2 v23.2d, v23.4s
scvtf v23.2d, v23.2d scvtf v23.2d, v23.2d
and v20.16b, v20.16b, v29.16b bif v20.16b, v30.16b, v29.16b
and v21.16b, v21.16b, v29.16b bif v21.16b, v30.16b, v29.16b
and v22.16b, v22.16b, v29.16b bif v22.16b, v30.16b, v29.16b
and v23.16b, v23.16b, v29.16b bif v23.16b, v30.16b, v29.16b
orr v20.16b, v20.16b, v30.16b
orr v21.16b, v21.16b, v30.16b
orr v22.16b, v22.16b, v30.16b
orr v23.16b, v23.16b, v30.16b
# Execute VM instructions # Execute VM instructions
DECL(randomx_program_aarch64_vm_instructions): DECL(randomx_program_aarch64_vm_instructions):