diff --git a/src/crypto/randomx/asm/program_prologue_linux.inc b/src/crypto/randomx/asm/program_prologue_linux.inc index fcd09fd36..26a305c12 100644 --- a/src/crypto/randomx/asm/program_prologue_linux.inc +++ b/src/crypto/randomx/asm/program_prologue_linux.inc @@ -13,12 +13,6 @@ mov rbp, qword ptr [rsi] ;# "mx", "ma" mov rdi, qword ptr [rsi+8] ;# uint8_t* dataset - ;# dataset prefetch for the first iteration of the main loop - mov rax, rbp - shr rax, 32 - and eax, RANDOMX_DATASET_BASE_MASK - prefetchnta byte ptr [rdi+rax] - mov rsi, rdx ;# uint8_t* scratchpad mov rax, rbp diff --git a/src/crypto/randomx/asm/program_prologue_win64.inc b/src/crypto/randomx/asm/program_prologue_win64.inc index d70e04911..46a4e9c34 100644 --- a/src/crypto/randomx/asm/program_prologue_win64.inc +++ b/src/crypto/randomx/asm/program_prologue_win64.inc @@ -25,12 +25,6 @@ mov rbp, qword ptr [rdx] ;# "mx", "ma" mov rdi, qword ptr [rdx+8] ;# uint8_t* dataset - ;# dataset prefetch for the first iteration of the main loop - mov rax, rbp - shr rax, 32 - and eax, RANDOMX_DATASET_BASE_MASK - prefetchnta byte ptr [rdi+rax] - mov rsi, r8 ;# uint8_t* scratchpad mov rbx, r9 ;# loop counter diff --git a/src/crypto/randomx/intrin_portable.h b/src/crypto/randomx/intrin_portable.h index a24c21ac8..857ea7744 100644 --- a/src/crypto/randomx/intrin_portable.h +++ b/src/crypto/randomx/intrin_portable.h @@ -577,8 +577,13 @@ inline void* rx_aligned_alloc(size_t size, size_t align) { # define rx_aligned_free(a) free(a) #endif +#ifdef __GNUC__ +#define rx_prefetch_nta(x) __builtin_prefetch((x), 0, 0) +#define rx_prefetch_t0(x) __builtin_prefetch((x), 0, 3) +#else #define rx_prefetch_nta(x) #define rx_prefetch_t0(x) +#endif FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) { rx_vec_f128 x; diff --git a/src/crypto/randomx/vm_compiled.cpp b/src/crypto/randomx/vm_compiled.cpp index 1985e7cae..e3f276bc7 100644 --- a/src/crypto/randomx/vm_compiled.cpp +++ b/src/crypto/randomx/vm_compiled.cpp @@ -61,6 +61,17 @@ namespace randomx { # if defined(XMRIG_ARM) || defined(XMRIG_RISCV) memcpy(reg.f, config.eMask, sizeof(config.eMask)); # endif + + const uint8_t* p = mem.memory; + + // dataset prefetch for the first iteration of the main loop + rx_prefetch_nta(p + (mem.ma & (RandomX_ConfigurationBase::DatasetBaseSize - 64))); + + // dataset prefetch for the second iteration of the main loop (RandomX v2) + if (RandomX_CurrentConfig.Tweak_V2_PREFETCH) { + rx_prefetch_nta(p + (mem.mx & (RandomX_ConfigurationBase::DatasetBaseSize - 64))); + } + compiler.getProgramFunc()(reg, mem, scratchpad, RandomX_CurrentConfig.ProgramIterations); }