diff --git a/src/crypto/randomx/jit_compiler_a64.cpp b/src/crypto/randomx/jit_compiler_a64.cpp index 5ced86387..fa24380ce 100644 --- a/src/crypto/randomx/jit_compiler_a64.cpp +++ b/src/crypto/randomx/jit_compiler_a64.cpp @@ -1059,11 +1059,8 @@ void JitCompilerA64::h_FDIV_M(Instruction& instr, uint32_t& codePos) constexpr uint32_t tmp_reg_fp = 28; emitMemLoadFP(src, instr, code, k); - // and tmp_reg_fp, tmp_reg_fp, and_mask_reg - emit32(0x4E201C00 | tmp_reg_fp | (tmp_reg_fp << 5) | (29 << 16), code, k); - - // orr tmp_reg_fp, tmp_reg_fp, or_mask_reg - emit32(0x4EA01C00 | tmp_reg_fp | (tmp_reg_fp << 5) | (30 << 16), code, k); + // bif tmp_reg_fp, or_mask_reg, and_mask_reg + emit32(0x6EE01C00 | tmp_reg_fp | (30 << 5) | (29 << 16), code, k); emit32(ARMV8A::FDIV | dst | (dst << 5) | (tmp_reg_fp << 16), code, k); diff --git a/src/crypto/randomx/jit_compiler_a64_static.S b/src/crypto/randomx/jit_compiler_a64_static.S index 791a19e75..1bc55ae38 100644 --- a/src/crypto/randomx/jit_compiler_a64_static.S +++ b/src/crypto/randomx/jit_compiler_a64_static.S @@ -109,7 +109,7 @@ # v26 -> "a2" # v27 -> "a3" # v28 -> temporary -# v29 -> E 'and' mask = 0x00ffffffffffffff'00ffffffffffffff +# v29 -> E 'and' mask = 0x00ffffffffc00000'00ffffffffc00000 # v30 -> E 'or' mask = 0x3*00000000******'3*00000000****** # v31 -> scale mask = 0x80f0000000000000'80f0000000000000 @@ -151,7 +151,9 @@ DECL(randomx_program_aarch64): ldp q26, q27, [x0, 224] # Load E 'and' mask - movi v29.2d, #0x00FFFFFFFFFFFFFF + mov x16, 0x00FFFFFFFFC00000 + ins v29.d[0], x16 + ins v29.d[1], x16 # Load E 'or' mask (stored in reg.f[0]) ldr q30, [x0, 64] @@ -239,14 +241,10 @@ DECL(randomx_program_aarch64_main_loop): sxtl2 v23.2d, v23.4s scvtf v23.2d, v23.2d - and v20.16b, v20.16b, v29.16b - and v21.16b, v21.16b, v29.16b - and v22.16b, v22.16b, v29.16b - and v23.16b, v23.16b, v29.16b - orr v20.16b, v20.16b, v30.16b - orr v21.16b, v21.16b, v30.16b - orr v22.16b, v22.16b, v30.16b - orr v23.16b, v23.16b, v30.16b + bif v20.16b, v30.16b, v29.16b + bif v21.16b, v30.16b, v29.16b + bif v22.16b, v30.16b, v29.16b + bif v23.16b, v30.16b, v29.16b # Execute VM instructions DECL(randomx_program_aarch64_vm_instructions):