mirror of
https://github.com/xmrig/xmrig.git
synced 2026-06-18 10:22:39 -04:00
RandomX optimizations:
- ARM64: optimized emitMovImmediate/emitMemLoad - ARM64: disabled 32-bit literal preloading (it was slower) - Android and Linux: added MADV_COLLAPSE support to memory allocation
This commit is contained in:
@@ -74,6 +74,11 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef MADV_COLLAPSE
|
||||||
|
# define MADV_COLLAPSE 25
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(XMRIG_OS_LINUX) || (!defined(XMRIG_OS_APPLE) && !defined(XMRIG_OS_FREEBSD))
|
#if defined(XMRIG_OS_LINUX) || (!defined(XMRIG_OS_APPLE) && !defined(XMRIG_OS_FREEBSD))
|
||||||
static inline int hugePagesFlag(size_t size)
|
static inline int hugePagesFlag(size_t size)
|
||||||
{
|
{
|
||||||
@@ -278,8 +283,9 @@ bool xmrig::VirtualMemory::allocateOneGbPagesMemory()
|
|||||||
|
|
||||||
bool xmrig::VirtualMemory::adviseLargePages(void *p, size_t size)
|
bool xmrig::VirtualMemory::adviseLargePages(void *p, size_t size)
|
||||||
{
|
{
|
||||||
# ifdef XMRIG_OS_LINUX
|
# if defined(XMRIG_OS_ANDROID) || defined(XMRIG_OS_LINUX)
|
||||||
return (madvise(p, size, MADV_HUGEPAGE) == 0);
|
// MADV_COLLAPSE works even if /sys/kernel/mm/transparent_hugepage/enabled is set to "never", but only on Linux 6.1+
|
||||||
|
return (madvise(p, size, MADV_COLLAPSE) == 0) || (madvise(p, size, MADV_HUGEPAGE) == 0);
|
||||||
# else
|
# else
|
||||||
return false;
|
return false;
|
||||||
# endif
|
# endif
|
||||||
|
|||||||
@@ -141,7 +141,7 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
|
|||||||
|
|
||||||
codePos = PrologueSize;
|
codePos = PrologueSize;
|
||||||
literalPos = ImulRcpLiteralsEnd;
|
literalPos = ImulRcpLiteralsEnd;
|
||||||
num32bitLiterals = 0;
|
num32bitLiterals = 64; // effectively disabled because it's slower than plain movn/movz+movk
|
||||||
|
|
||||||
for (uint32_t i = 0; i < RegistersCount; ++i)
|
for (uint32_t i = 0; i < RegistersCount; ++i)
|
||||||
reg_changed_offset[i] = codePos;
|
reg_changed_offset[i] = codePos;
|
||||||
@@ -237,7 +237,7 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
|
|||||||
|
|
||||||
codePos = PrologueSize;
|
codePos = PrologueSize;
|
||||||
literalPos = ImulRcpLiteralsEnd;
|
literalPos = ImulRcpLiteralsEnd;
|
||||||
num32bitLiterals = 0;
|
num32bitLiterals = 64; // effectively disabled because it's slower than plain movn/movz+movk
|
||||||
|
|
||||||
for (uint32_t i = 0; i < RegistersCount; ++i)
|
for (uint32_t i = 0; i < RegistersCount; ++i)
|
||||||
reg_changed_offset[i] = codePos;
|
reg_changed_offset[i] = codePos;
|
||||||
@@ -488,13 +488,31 @@ void JitCompilerA64::emitMovImmediate(uint32_t dst, uint32_t imm, uint8_t* code,
|
|||||||
{
|
{
|
||||||
uint32_t k = codePos;
|
uint32_t k = codePos;
|
||||||
|
|
||||||
|
// 196606 different values can be encoded with a single instruction, the rest requires smov/umov load, or movn/movz+movk pair
|
||||||
if (imm < (1 << 16))
|
if (imm < (1 << 16))
|
||||||
{
|
{
|
||||||
|
// Sign-extended 64-bit value: 0x000000000000xxxx
|
||||||
// movz tmp_reg, imm32 (16 low bits)
|
// movz tmp_reg, imm32 (16 low bits)
|
||||||
emit32(ARMV8A::MOVZ | dst | (imm << 5), code, k);
|
emit32(ARMV8A::MOVZ | dst | (imm << 5), code, k);
|
||||||
}
|
}
|
||||||
|
else if ((imm >> 16) == 0xFFFF) {
|
||||||
|
// Sign-extended 64-bit value: 0xFFFFFFFFFFFFxxxx
|
||||||
|
// movn tmp_reg, ~imm32 (16 low bits)
|
||||||
|
emit32(ARMV8A::MOVN | dst | ((~imm & 0xFFFF) << 5), code, k);
|
||||||
|
}
|
||||||
|
else if (((imm & 0xFFFF) == 0xFFFF) && (static_cast<int32_t>(imm) < 0)) {
|
||||||
|
// Sign-extended 64-bit value: 0xFFFFFFFFxxxxFFFF
|
||||||
|
// movn tmp_reg, ~imm32 (16 high bits)
|
||||||
|
emit32(ARMV8A::MOVN | dst | (1 << 21) | ((~imm >> 16) << 5), code, k);
|
||||||
|
}
|
||||||
|
else if (((imm & 0xFFFF) == 0) && (static_cast<int32_t>(imm) >= 0)) {
|
||||||
|
// Sign-extended 64-bit value: 0x00000000xxxx0000
|
||||||
|
// movz tmp_reg, imm32 (16 high bits)
|
||||||
|
emit32(ARMV8A::MOVZ | dst | (1 << 21) | ((imm >> 16) << 5), code, k);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
// Full sign-extended 64-bit value: 0x00000000xxxxxxxx or 0xFFFFFFFFxxxxxxxx
|
||||||
if (num32bitLiterals < 64)
|
if (num32bitLiterals < 64)
|
||||||
{
|
{
|
||||||
if (static_cast<int32_t>(imm) < 0)
|
if (static_cast<int32_t>(imm) < 0)
|
||||||
@@ -611,18 +629,17 @@ void JitCompilerA64::emitMemLoad(uint32_t dst, uint32_t src, Instruction& instr,
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
imm = (imm & ScratchpadL3Mask) >> 3;
|
imm = (imm & ScratchpadL3Mask) >> 3;
|
||||||
if (imm)
|
if (imm < 4096) {
|
||||||
|
// ldr tmp_reg, [x2, #imm*8]
|
||||||
|
emit32(0xf9400040 | tmp_reg | (imm << 10), code, k);
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
emitMovImmediate(tmp_reg, imm, code, k);
|
emitMovImmediate(tmp_reg, imm, code, k);
|
||||||
|
|
||||||
// ldr tmp_reg, [x2, tmp_reg, lsl 3]
|
// ldr tmp_reg, [x2, tmp_reg, lsl 3]
|
||||||
emit32(0xf8607840 | tmp_reg | (tmp_reg << 16), code, k);
|
emit32(0xf8607840 | tmp_reg | (tmp_reg << 16), code, k);
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
// ldr tmp_reg, [x2]
|
|
||||||
emit32(0xf9400040 | tmp_reg, code, k);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
codePos = k;
|
codePos = k;
|
||||||
|
|||||||
Reference in New Issue
Block a user