diff --git a/src/crypto/randomx/jit_compiler_a64.cpp b/src/crypto/randomx/jit_compiler_a64.cpp
index bae695cf1..dc693d71f 100644
--- a/src/crypto/randomx/jit_compiler_a64.cpp
+++ b/src/crypto/randomx/jit_compiler_a64.cpp
@@ -152,6 +152,20 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
 		(this->*engine[instr.opcode])(instr, codePos);
 	}
 
+	{
+		const uint32_t rr2Off = reg_changed_offset[config.readReg2];
+		const uint32_t rr3Off = reg_changed_offset[config.readReg3];
+		const uint32_t maxOff = (rr2Off > rr3Off) ? rr2Off : rr3Off;
+		if (codePos - maxOff > 40 * 4) {
+			const uint32_t datasetMask = ((RandomX_CurrentConfig.Log2_DatasetBaseSize - 7) << 10);
+			emit32(ARMV8A::EOR32 | 20 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
+			emit32(ARMV8A::EOR32 | 19 | (9 << 5) | (20 << 16), code, codePos);
+			emit32(0x121A0000 | 19 | (19 << 5) | datasetMask, code, codePos);
+			emit32(ARMV8A::ADD | 19 | (19 << 5) | (1 << 16), code, codePos);
+			emit32(0xF9800000 | 3 | (19 << 5), code, codePos);
+		}
+	}
+
 	// Update spMix2
 	// eor w20, config.readReg2, config.readReg3
 	emit32(ARMV8A::EOR32 | 20 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
diff --git a/src/crypto/randomx/jit_compiler_a64_static.S b/src/crypto/randomx/jit_compiler_a64_static.S
index 2a7859cb4..3a2435211 100644
--- a/src/crypto/randomx/jit_compiler_a64_static.S
+++ b/src/crypto/randomx/jit_compiler_a64_static.S
@@ -303,7 +303,7 @@ DECL(randomx_program_aarch64_cacheline_align_mask1):
 	add	x20, x20, x1
 
 	# Prefetch dataset data
-	prfm	pldl2strm, [x20]
+	prfm	pldl1strm, [x20]
 
 DECL(randomx_program_aarch64_cacheline_align_mask2):
 	# Actual mask will be inserted by JIT compiler
@@ -312,16 +312,16 @@ DECL(randomx_program_aarch64_cacheline_align_mask2):
 
 DECL(randomx_program_aarch64_xor_with_dataset_line):
 	# xor integer registers with dataset data
-	ldp	x20, x19, [x10]
+	ldnp	x20, x19, [x10]
 	eor	x4, x4, x20
 	eor	x5, x5, x19
-	ldp	x20, x19, [x10, 16]
+	ldnp	x20, x19, [x10, 16]
 	eor	x6, x6, x20
 	eor	x7, x7, x19
-	ldp	x20, x19, [x10, 32]
+	ldnp	x20, x19, [x10, 32]
 	eor	x12, x12, x20
 	eor	x13, x13, x19
-	ldp	x20, x19, [x10, 48]
+	ldnp	x20, x19, [x10, 48]
 	eor	x14, x14, x20
 	eor	x15, x15, x19