mirror of
https://github.com/xmrig/xmrig.git
synced 2026-06-28 05:37:39 -04:00
Compare commits
7 Commits
27f116e2da
...
dev
| Author | SHA1 | Date | |
|---|---|---|---|
| 6dc014f71f | |||
| 906fd4693b | |||
| 3fb851d91d | |||
| 9ac373fea5 | |||
| 978720462d | |||
| 7eadfdc9c6 | |||
| 720325c40f |
@@ -74,6 +74,11 @@
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef MADV_COLLAPSE
|
||||
# define MADV_COLLAPSE 25
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(XMRIG_OS_LINUX) || (!defined(XMRIG_OS_APPLE) && !defined(XMRIG_OS_FREEBSD))
|
||||
static inline int hugePagesFlag(size_t size)
|
||||
{
|
||||
@@ -278,8 +283,9 @@ bool xmrig::VirtualMemory::allocateOneGbPagesMemory()
|
||||
|
||||
bool xmrig::VirtualMemory::adviseLargePages(void *p, size_t size)
|
||||
{
|
||||
# ifdef XMRIG_OS_LINUX
|
||||
return (madvise(p, size, MADV_HUGEPAGE) == 0);
|
||||
# if defined(XMRIG_OS_ANDROID) || defined(XMRIG_OS_LINUX)
|
||||
// MADV_COLLAPSE works even if /sys/kernel/mm/transparent_hugepage/enabled is set to "never", but only on Linux 6.1+
|
||||
return (madvise(p, size, MADV_COLLAPSE) == 0) || (madvise(p, size, MADV_HUGEPAGE) == 0);
|
||||
# else
|
||||
return false;
|
||||
# endif
|
||||
|
||||
@@ -141,7 +141,7 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
|
||||
|
||||
codePos = PrologueSize;
|
||||
literalPos = ImulRcpLiteralsEnd;
|
||||
num32bitLiterals = 0;
|
||||
num32bitLiterals = 64; // effectively disabled because it's slower than plain movn/movz+movk
|
||||
|
||||
for (uint32_t i = 0; i < RegistersCount; ++i)
|
||||
reg_changed_offset[i] = codePos;
|
||||
@@ -237,7 +237,7 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
|
||||
|
||||
codePos = PrologueSize;
|
||||
literalPos = ImulRcpLiteralsEnd;
|
||||
num32bitLiterals = 0;
|
||||
num32bitLiterals = 64; // effectively disabled because it's slower than plain movn/movz+movk
|
||||
|
||||
for (uint32_t i = 0; i < RegistersCount; ++i)
|
||||
reg_changed_offset[i] = codePos;
|
||||
@@ -488,13 +488,31 @@ void JitCompilerA64::emitMovImmediate(uint32_t dst, uint32_t imm, uint8_t* code,
|
||||
{
|
||||
uint32_t k = codePos;
|
||||
|
||||
// 196606 different values can be encoded with a single instruction, the rest requires smov/umov load, or movn/movz+movk pair
|
||||
if (imm < (1 << 16))
|
||||
{
|
||||
// Sign-extended 64-bit value: 0x000000000000xxxx
|
||||
// movz tmp_reg, imm32 (16 low bits)
|
||||
emit32(ARMV8A::MOVZ | dst | (imm << 5), code, k);
|
||||
}
|
||||
else if ((imm >> 16) == 0xFFFF) {
|
||||
// Sign-extended 64-bit value: 0xFFFFFFFFFFFFxxxx
|
||||
// movn tmp_reg, ~imm32 (16 low bits)
|
||||
emit32(ARMV8A::MOVN | dst | ((~imm & 0xFFFF) << 5), code, k);
|
||||
}
|
||||
else if (((imm & 0xFFFF) == 0xFFFF) && (static_cast<int32_t>(imm) < 0)) {
|
||||
// Sign-extended 64-bit value: 0xFFFFFFFFxxxxFFFF
|
||||
// movn tmp_reg, ~imm32 (16 high bits)
|
||||
emit32(ARMV8A::MOVN | dst | (1 << 21) | ((~imm >> 16) << 5), code, k);
|
||||
}
|
||||
else if (((imm & 0xFFFF) == 0) && (static_cast<int32_t>(imm) >= 0)) {
|
||||
// Sign-extended 64-bit value: 0x00000000xxxx0000
|
||||
// movz tmp_reg, imm32 (16 high bits)
|
||||
emit32(ARMV8A::MOVZ | dst | (1 << 21) | ((imm >> 16) << 5), code, k);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Full sign-extended 64-bit value: 0x00000000xxxxxxxx or 0xFFFFFFFFxxxxxxxx
|
||||
if (num32bitLiterals < 64)
|
||||
{
|
||||
if (static_cast<int32_t>(imm) < 0)
|
||||
@@ -611,18 +629,17 @@ void JitCompilerA64::emitMemLoad(uint32_t dst, uint32_t src, Instruction& instr,
|
||||
else
|
||||
{
|
||||
imm = (imm & ScratchpadL3Mask) >> 3;
|
||||
if (imm)
|
||||
if (imm < 4096) {
|
||||
// ldr tmp_reg, [x2, #imm*8]
|
||||
emit32(0xf9400040 | tmp_reg | (imm << 10), code, k);
|
||||
}
|
||||
else
|
||||
{
|
||||
emitMovImmediate(tmp_reg, imm, code, k);
|
||||
|
||||
// ldr tmp_reg, [x2, tmp_reg, lsl 3]
|
||||
emit32(0xf8607840 | tmp_reg | (tmp_reg << 16), code, k);
|
||||
}
|
||||
else
|
||||
{
|
||||
// ldr tmp_reg, [x2]
|
||||
emit32(0xf9400040 | tmp_reg, code, k);
|
||||
}
|
||||
}
|
||||
|
||||
codePos = k;
|
||||
@@ -709,8 +726,17 @@ void JitCompilerA64::h_ISUB_R(Instruction& instr, uint32_t& codePos)
|
||||
}
|
||||
else
|
||||
{
|
||||
const uint32_t imm = instr.getImm32();
|
||||
|
||||
if (imm == 0x80000000ul) {
|
||||
constexpr uint32_t tmp_reg = 20;
|
||||
emit32(ARMV8A::MOVZ | tmp_reg | (1u << 21) | (0x8000u << 5), code, k);
|
||||
emit32(ARMV8A::ADD | dst | (dst << 5) | (tmp_reg << 16), code, k);
|
||||
}
|
||||
else {
|
||||
emitAddImmediate(dst, dst, -instr.getImm32(), code, k);
|
||||
}
|
||||
}
|
||||
|
||||
reg_changed_offset[instr.dst] = k;
|
||||
codePos = k;
|
||||
|
||||
@@ -303,7 +303,7 @@ DECL(randomx_program_aarch64_cacheline_align_mask1):
|
||||
add x20, x20, x1
|
||||
|
||||
# Prefetch dataset data
|
||||
prfm pldl2strm, [x20]
|
||||
prfm pldl1strm, [x20]
|
||||
|
||||
DECL(randomx_program_aarch64_cacheline_align_mask2):
|
||||
# Actual mask will be inserted by JIT compiler
|
||||
@@ -312,16 +312,16 @@ DECL(randomx_program_aarch64_cacheline_align_mask2):
|
||||
|
||||
DECL(randomx_program_aarch64_xor_with_dataset_line):
|
||||
# xor integer registers with dataset data
|
||||
ldp x20, x19, [x10]
|
||||
ldnp x20, x19, [x10]
|
||||
eor x4, x4, x20
|
||||
eor x5, x5, x19
|
||||
ldp x20, x19, [x10, 16]
|
||||
ldnp x20, x19, [x10, 16]
|
||||
eor x6, x6, x20
|
||||
eor x7, x7, x19
|
||||
ldp x20, x19, [x10, 32]
|
||||
ldnp x20, x19, [x10, 32]
|
||||
eor x12, x12, x20
|
||||
eor x13, x13, x19
|
||||
ldp x20, x19, [x10, 48]
|
||||
ldnp x20, x19, [x10, 48]
|
||||
eor x14, x14, x20
|
||||
eor x15, x15, x19
|
||||
|
||||
|
||||
@@ -814,11 +814,18 @@ namespace randomx {
|
||||
state.emit(rvi(rv64::SUB, regR(isn.dst), regR(isn.dst), regR(isn.src)));
|
||||
}
|
||||
else {
|
||||
int32_t imm = unsigned32ToSigned2sCompl(-isn.getImm32()); //convert to add
|
||||
const uint32_t uimm = isn.getImm32();
|
||||
if (uimm == 0x80000000ul) {
|
||||
state.emit(rv64::LUI | (0x80000 << 12) | rvrd(Tmp1Reg));
|
||||
state.emit(rvi(rv64::SUB, regR(isn.dst), regR(isn.dst), Tmp1Reg));
|
||||
}
|
||||
else {
|
||||
int32_t imm = unsigned32ToSigned2sCompl(-uimm); //convert to add
|
||||
//x{dst} = x{dst} + {-imm}
|
||||
emitImm32(state, imm, regR(isn.dst), regR(isn.dst), Tmp1Reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerRV64::v1_ISUB_M(HANDLER_ARGS) {
|
||||
state.registerUsage[isn.dst] = i;
|
||||
|
||||
@@ -444,6 +444,12 @@ void* generateProgramVectorRV64(uint8_t* buf, Program& prog, ProgramConfiguratio
|
||||
// sub x20 + dst, x20 + dst, x20 + src
|
||||
emit32(0x414A0A33 + (dst << 7) + (dst << 15) + (src << 20));
|
||||
}
|
||||
else if (imm == 0x80000000U) {
|
||||
// lui x5, 0x80000000U
|
||||
emit32(0x800002B7);
|
||||
// sub x20 + dst, x20 + dst, x5
|
||||
emit32(0x405A0A33 + (dst << 7) + (dst << 15));
|
||||
}
|
||||
else {
|
||||
imm_to_x5(-imm, p);
|
||||
// c.add x20 + dst, x5
|
||||
|
||||
@@ -163,7 +163,7 @@ extern RandomX_ConfigurationGraft RandomX_GraftConfig;
|
||||
extern RandomX_ConfigurationSafex RandomX_SafexConfig;
|
||||
extern RandomX_ConfigurationYada RandomX_YadaConfig;
|
||||
|
||||
extern RandomX_ConfigurationBase RandomX_CurrentConfig;
|
||||
alignas(64) extern RandomX_ConfigurationBase RandomX_CurrentConfig;
|
||||
|
||||
template<typename T>
|
||||
void randomx_apply_config(const T& config)
|
||||
|
||||
Reference in New Issue
Block a user