From 720325c40f812739e964a127a640039b15ee0db8 Mon Sep 17 00:00:00 2001 From: SChernykh <15806605+SChernykh@users.noreply.github.com> Date: Sun, 17 May 2026 17:35:40 +0200 Subject: [PATCH] RandomX optimizations: - ARM64: optimized emitMovImmediate/emitMemLoad - ARM64: disabled 32-bit literal preloading (it was slower) - Android and Linux: added MADV_COLLAPSE support to memory allocation --- src/crypto/common/VirtualMemory_unix.cpp | 10 +++++-- src/crypto/randomx/jit_compiler_a64.cpp | 33 ++++++++++++++++++------ 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/src/crypto/common/VirtualMemory_unix.cpp b/src/crypto/common/VirtualMemory_unix.cpp index 471c9cf07..dedf4603a 100644 --- a/src/crypto/common/VirtualMemory_unix.cpp +++ b/src/crypto/common/VirtualMemory_unix.cpp @@ -74,6 +74,11 @@ #endif +#ifndef MADV_COLLAPSE +# define MADV_COLLAPSE 25 +#endif + + #if defined(XMRIG_OS_LINUX) || (!defined(XMRIG_OS_APPLE) && !defined(XMRIG_OS_FREEBSD)) static inline int hugePagesFlag(size_t size) { @@ -278,8 +283,9 @@ bool xmrig::VirtualMemory::allocateOneGbPagesMemory() bool xmrig::VirtualMemory::adviseLargePages(void *p, size_t size) { -# ifdef XMRIG_OS_LINUX - return (madvise(p, size, MADV_HUGEPAGE) == 0); +# if defined(XMRIG_OS_ANDROID) || defined(XMRIG_OS_LINUX) + // MADV_COLLAPSE works even if /sys/kernel/mm/transparent_hugepage/enabled is set to "never", but only on Linux 6.1+ + return (madvise(p, size, MADV_COLLAPSE) == 0) || (madvise(p, size, MADV_HUGEPAGE) == 0); # else return false; # endif diff --git a/src/crypto/randomx/jit_compiler_a64.cpp b/src/crypto/randomx/jit_compiler_a64.cpp index 0d624dfa9..bae695cf1 100644 --- a/src/crypto/randomx/jit_compiler_a64.cpp +++ b/src/crypto/randomx/jit_compiler_a64.cpp @@ -141,7 +141,7 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con codePos = PrologueSize; literalPos = ImulRcpLiteralsEnd; - num32bitLiterals = 0; + num32bitLiterals = 64; // effectively disabled because it's slower than plain movn/movz+movk for (uint32_t i = 0; i < RegistersCount; ++i) reg_changed_offset[i] = codePos; @@ -237,7 +237,7 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration codePos = PrologueSize; literalPos = ImulRcpLiteralsEnd; - num32bitLiterals = 0; + num32bitLiterals = 64; // effectively disabled because it's slower than plain movn/movz+movk for (uint32_t i = 0; i < RegistersCount; ++i) reg_changed_offset[i] = codePos; @@ -488,13 +488,31 @@ void JitCompilerA64::emitMovImmediate(uint32_t dst, uint32_t imm, uint8_t* code, { uint32_t k = codePos; + // 196606 different values can be encoded with a single instruction, the rest requires smov/umov load, or movn/movz+movk pair if (imm < (1 << 16)) { + // Sign-extended 64-bit value: 0x000000000000xxxx // movz tmp_reg, imm32 (16 low bits) emit32(ARMV8A::MOVZ | dst | (imm << 5), code, k); } + else if ((imm >> 16) == 0xFFFF) { + // Sign-extended 64-bit value: 0xFFFFFFFFFFFFxxxx + // movn tmp_reg, ~imm32 (16 low bits) + emit32(ARMV8A::MOVN | dst | ((~imm & 0xFFFF) << 5), code, k); + } + else if (((imm & 0xFFFF) == 0xFFFF) && (static_cast(imm) < 0)) { + // Sign-extended 64-bit value: 0xFFFFFFFFxxxxFFFF + // movn tmp_reg, ~imm32 (16 high bits) + emit32(ARMV8A::MOVN | dst | (1 << 21) | ((~imm >> 16) << 5), code, k); + } + else if (((imm & 0xFFFF) == 0) && (static_cast(imm) >= 0)) { + // Sign-extended 64-bit value: 0x00000000xxxx0000 + // movz tmp_reg, imm32 (16 high bits) + emit32(ARMV8A::MOVZ | dst | (1 << 21) | ((imm >> 16) << 5), code, k); + } else { + // Full sign-extended 64-bit value: 0x00000000xxxxxxxx or 0xFFFFFFFFxxxxxxxx if (num32bitLiterals < 64) { if (static_cast(imm) < 0) @@ -611,18 +629,17 @@ void JitCompilerA64::emitMemLoad(uint32_t dst, uint32_t src, Instruction& instr, else { imm = (imm & ScratchpadL3Mask) >> 3; - if (imm) + if (imm < 4096) { + // ldr tmp_reg, [x2, #imm*8] + emit32(0xf9400040 | tmp_reg | (imm << 10), code, k); + } + else { emitMovImmediate(tmp_reg, imm, code, k); // ldr tmp_reg, [x2, tmp_reg, lsl 3] emit32(0xf8607840 | tmp_reg | (tmp_reg << 16), code, k); } - else - { - // ldr tmp_reg, [x2] - emit32(0xf9400040 | tmp_reg, code, k); - } } codePos = k;