Merge d51b01e559 into cdd5dff337

v6.19.4-dev
Merge branch 'master' into dev
2025-12-24 05:23:16 -05:00 · 2023-06-03 10:16:03 -04:00 · 2023-06-03 21:14:26 +07:00 · 2023-06-03 21:13:51 +07:00 · 2023-06-03 19:59:18 +07:00 · 2023-06-03 19:57:36 +07:00
24 changed files with 293 additions and 124 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,11 @@
+# v6.19.3
+- [#3245](https://github.com/xmrig/xmrig/issues/3245) Improved algorithm negotiation for donation rounds by sending extra information about current mining job.
+- [#3254](https://github.com/xmrig/xmrig/pull/3254) Tweaked auto-tuning for Intel CPUs.
+- [#3271](https://github.com/xmrig/xmrig/pull/3271) RandomX: optimized program generation.
+- [#3273](https://github.com/xmrig/xmrig/pull/3273) RandomX: fixed undefined behavior.
+- [#3275](https://github.com/xmrig/xmrig/pull/3275) RandomX: fixed `jccErratum` list.
+- [#3280](https://github.com/xmrig/xmrig/pull/3280) Updated example scripts.
+
 # v6.19.2
 - [#3230](https://github.com/xmrig/xmrig/pull/3230) Fixed parsing of `TX_EXTRA_MERGE_MINING_TAG`.
 - [#3232](https://github.com/xmrig/xmrig/pull/3232) Added new `X-Hash-Difficulty` HTTP header.
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -14,7 +14,9 @@ option(WITH_HTTP            "Enable HTTP protocol support (client/server)" ON)
 option(WITH_DEBUG_LOG       "Enable debug log output" OFF)
 option(WITH_TLS             "Enable OpenSSL support" ON)
 option(WITH_ASM             "Enable ASM PoW implementations" ON)
-option(WITH_MSR             "Enable MSR mod & 1st-gen Ryzen fix" ON)
+option(WITH_ASM_AMD         "Enable ASM for AMD processors" ON)
+option(WITH_MSR             "Enable MSR mod" ON)
+option(WITH_MSR_ZEN         "Enable MSR mod for AMD Zen-based processors" ON)
 option(WITH_ENV_VARS        "Enable environment variables support in config file" ON)
 option(WITH_EMBEDDED_CONFIG "Enable internal embedded JSON config" OFF)
 option(WITH_OPENCL          "Enable OpenCL backend" ON)
--- a/cmake/asm.cmake
+++ b/cmake/asm.cmake
@@ -44,9 +44,17 @@ if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
    set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C)

    add_definitions(/DXMRIG_FEATURE_ASM)
+    if (WITH_ASM_AMD)
+        add_definitions(/DXMRIG_FEATURE_ASM_AMD)
+        message("-- WITH_ASM=ON (+amd)")
+    else()
+        message("-- WITH_ASM=ON (-amd)")
+    endif()
 else()
    set(XMRIG_ASM_SOURCES "")
    set(XMRIG_ASM_LIBRARY "")

    remove_definitions(/DXMRIG_FEATURE_ASM)
+    remove_definitions(/DXMRIG_FEATURE_ASM_AMD)
+    message("-- WITH_ASM=OFF")
 endif()
--- a/cmake/randomx.cmake
+++ b/cmake/randomx.cmake
@@ -104,8 +104,13 @@ if (WITH_RANDOMX)

    if (WITH_MSR AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8 AND (XMRIG_OS_WIN OR XMRIG_OS_LINUX))
        add_definitions(/DXMRIG_FEATURE_MSR)
-        add_definitions(/DXMRIG_FIX_RYZEN)
-        message("-- WITH_MSR=ON")
+        if (WITH_MSR_ZEN)
+            add_definitions(/DXMRIG_FIX_RYZEN)
+            message("-- WITH_MSR=ON (+zen)")
+        else()
+            remove_definitions(/DXMRIG_FIX_RYZEN)
+            message("-- WITH_MSR=ON (-zen)")
+        endif()

        if (XMRIG_OS_WIN)
            list(APPEND SOURCES_CRYPTO
--- a/scripts/pool_mine_example.cmd
+++ b/scripts/pool_mine_example.cmd
@@ -16,5 +16,5 @@
 :: Smaller pools also often have smaller fees/payout limits.

 cd /d "%~dp0"
-xmrig.exe -o pool.hashvault.pro:3333 -u 48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD -p x
+xmrig.exe -o xmrpool.eu:3333 -u 48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD -p x
 pause
--- a/scripts/solo_mine_example.cmd
+++ b/scripts/solo_mine_example.cmd
@@ -12,5 +12,5 @@
 :: But you will only get a payout when you find a block which can take more than a year for a single low-end PC.

 cd /d "%~dp0"
-xmrig.exe -o node.xmr.to:18081 -a rx/0 -u 48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD --daemon
+xmrig.exe -o YOUR_NODE_IP:18081 -a rx/0 -u 48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD --daemon
 pause
--- a/src/backend/cpu/platform/BasicCpuInfo.cpp
+++ b/src/backend/cpu/platform/BasicCpuInfo.cpp
@@ -296,7 +296,7 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
                // Affected CPU models and stepping numbers are taken from https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
                m_jccErratum =
                    ((model == 0x4E) && (stepping == 0x3)) ||
-                    ((model == 0x55) && (stepping == 0x4)) ||
+                    ((model == 0x55) && ((stepping == 0x4) || (stepping == 0x7))) ||
                    ((model == 0x5E) && (stepping == 0x3)) ||
                    ((model == 0x8E) && (stepping >= 0x9) && (stepping <= 0xC)) ||
                    ((model == 0x9E) && (stepping >= 0x9) && (stepping <= 0xD)) ||
--- a/src/backend/cpu/platform/HwlocCpuInfo.cpp
+++ b/src/backend/cpu/platform/HwlocCpuInfo.cpp
@@ -298,8 +298,10 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith
    cores.reserve(m_cores);
    findByType(cache, HWLOC_OBJ_CORE, [&cores](hwloc_obj_t found) { cores.emplace_back(found); });

+    const bool L3_exclusive = isCacheExclusive(cache);
+
 #   ifdef XMRIG_ALGO_GHOSTRIDER
-    if ((algorithm == Algorithm::GHOSTRIDER_RTM) && (PUs > cores.size()) && (PUs < cores.size() * 2)) {
+    if ((algorithm == Algorithm::GHOSTRIDER_RTM) && L3_exclusive && (PUs > cores.size()) && (PUs < cores.size() * 2)) {
        // Don't use E-cores on Alder Lake
        cores.erase(std::remove_if(cores.begin(), cores.end(), [](hwloc_obj_t c) { return hwloc_bitmap_weight(c->cpuset) == 1; }), cores.end());

@@ -311,7 +313,6 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith
 #   endif

    size_t L3               = cache->attr->cache.size;
-    const bool L3_exclusive = isCacheExclusive(cache);
    size_t L2               = 0;
    int L2_associativity    = 0;
    size_t extra            = 0;
@@ -349,6 +350,10 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith
    }

 #   ifdef XMRIG_ALGO_RANDOMX
+    if ((algorithm.family() == Algorithm::RANDOM_X) && L3_exclusive && (PUs > cores.size()) && (PUs < cores.size() * 2)) {
+        // Use all L3+L2 on latest Intel CPUs with P-cores, E-cores and exclusive L3 cache
+        cacheHashes = (L3 + L2) / scratchpad;
+    }
    if (extra == 0 && algorithm.l2() > 0) {
        cacheHashes = std::min<size_t>(std::max<size_t>(L2 / algorithm.l2(), cores.size()), cacheHashes);
    }
--- a/src/core/config/usage.h
+++ b/src/core/config/usage.h
@@ -94,7 +94,13 @@ static inline const std::string &usage()
 #   ifdef XMRIG_ALGO_RANDOMX
    u += "      --huge-pages-jit          enable huge pages support for RandomX JIT code\n";
 #   endif
+#   ifdef XMRIG_FEATURE_ASM
+#   ifdef XMRIG_FEATURE_ASM_AMD
    u += "      --asm=ASM                 ASM optimizations, possible values: auto, none, intel, ryzen, bulldozer\n";
+#   else
+    u += "      --asm=ASM                 ASM optimizations, possible values: auto, none, intel\n";
+#   endif
+#   endif

 #   if defined(__x86_64__) || defined(_M_AMD64)
    u += "      --argon2-impl=IMPL        argon2 implementation: x86_64, SSE2, SSSE3, XOP, AVX2, AVX-512F\n";
--- a/src/crypto/cn/CnHash.cpp
+++ b/src/crypto/cn/CnHash.cpp
@@ -55,6 +55,7 @@ bool cn_vaes_enabled = false;


 #ifdef XMRIG_FEATURE_ASM
+#ifdef XMRIG_FEATURE_ASM_AMD
 #   define ADD_FN_ASM(algo) do {                                                                                    \
        m_map[algo]->data[AV_SINGLE][Assembly::INTEL]     = cryptonight_single_hash_asm<algo, Assembly::INTEL>;     \
        m_map[algo]->data[AV_SINGLE][Assembly::RYZEN]     = cryptonight_single_hash_asm<algo, Assembly::RYZEN>;     \
@@ -63,34 +64,50 @@ bool cn_vaes_enabled = false;
        m_map[algo]->data[AV_DOUBLE][Assembly::RYZEN]     = cryptonight_double_hash_asm<algo, Assembly::RYZEN>;     \
        m_map[algo]->data[AV_DOUBLE][Assembly::BULLDOZER] = cryptonight_double_hash_asm<algo, Assembly::BULLDOZER>; \
    } while (0)
+#else
+#   define ADD_FN_ASM(algo) do {                                                                                    \
+        m_map[algo]->data[AV_SINGLE][Assembly::INTEL]     = cryptonight_single_hash_asm<algo, Assembly::INTEL>;     \
+        m_map[algo]->data[AV_DOUBLE][Assembly::INTEL]     = cryptonight_double_hash_asm<algo, Assembly::INTEL>;     \
+    } while (0)
+#endif


 namespace xmrig {


 cn_mainloop_fun        cn_half_mainloop_ivybridge_asm             = nullptr;
+#ifdef XMRIG_FEATURE_ASM_AMD
 cn_mainloop_fun        cn_half_mainloop_ryzen_asm                 = nullptr;
 cn_mainloop_fun        cn_half_mainloop_bulldozer_asm             = nullptr;
+#endif
 cn_mainloop_fun        cn_half_double_mainloop_sandybridge_asm    = nullptr;

 cn_mainloop_fun        cn_trtl_mainloop_ivybridge_asm             = nullptr;
+#ifdef XMRIG_FEATURE_ASM_AMD
 cn_mainloop_fun        cn_trtl_mainloop_ryzen_asm                 = nullptr;
 cn_mainloop_fun        cn_trtl_mainloop_bulldozer_asm             = nullptr;
+#endif
 cn_mainloop_fun        cn_trtl_double_mainloop_sandybridge_asm    = nullptr;

 cn_mainloop_fun        cn_tlo_mainloop_ivybridge_asm              = nullptr;
+#ifdef XMRIG_FEATURE_ASM_AMD
 cn_mainloop_fun        cn_tlo_mainloop_ryzen_asm                  = nullptr;
 cn_mainloop_fun        cn_tlo_mainloop_bulldozer_asm              = nullptr;
+#endif
 cn_mainloop_fun        cn_tlo_double_mainloop_sandybridge_asm     = nullptr;

 cn_mainloop_fun        cn_zls_mainloop_ivybridge_asm              = nullptr;
+#ifdef XMRIG_FEATURE_ASM_AMD
 cn_mainloop_fun        cn_zls_mainloop_ryzen_asm                  = nullptr;
 cn_mainloop_fun        cn_zls_mainloop_bulldozer_asm              = nullptr;
+#endif
 cn_mainloop_fun        cn_zls_double_mainloop_sandybridge_asm     = nullptr;

 cn_mainloop_fun        cn_double_mainloop_ivybridge_asm           = nullptr;
+#ifdef XMRIG_FEATURE_ASM_AMD
 cn_mainloop_fun        cn_double_mainloop_ryzen_asm               = nullptr;
 cn_mainloop_fun        cn_double_mainloop_bulldozer_asm           = nullptr;
+#endif
 cn_mainloop_fun        cn_double_double_mainloop_sandybridge_asm  = nullptr;

 cn_mainloop_fun        cn_upx2_mainloop_asm                       = nullptr;
@@ -160,31 +177,41 @@ static void patchAsmVariants()
    auto base = static_cast<uint8_t *>(VirtualMemory::allocateExecutableMemory(allocation_size, false));

    cn_half_mainloop_ivybridge_asm              = reinterpret_cast<cn_mainloop_fun>         (base + 0x0000);
+#   ifdef XMRIG_FEATURE_ASM_AMD
    cn_half_mainloop_ryzen_asm                  = reinterpret_cast<cn_mainloop_fun>         (base + 0x1000);
    cn_half_mainloop_bulldozer_asm              = reinterpret_cast<cn_mainloop_fun>         (base + 0x2000);
+#   endif
    cn_half_double_mainloop_sandybridge_asm     = reinterpret_cast<cn_mainloop_fun>         (base + 0x3000);

 #   ifdef XMRIG_ALGO_CN_PICO
    cn_trtl_mainloop_ivybridge_asm              = reinterpret_cast<cn_mainloop_fun>         (base + 0x4000);
+#   ifdef XMRIG_FEATURE_ASM_AMD
    cn_trtl_mainloop_ryzen_asm                  = reinterpret_cast<cn_mainloop_fun>         (base + 0x5000);
    cn_trtl_mainloop_bulldozer_asm              = reinterpret_cast<cn_mainloop_fun>         (base + 0x6000);
+#   endif
    cn_trtl_double_mainloop_sandybridge_asm     = reinterpret_cast<cn_mainloop_fun>         (base + 0x7000);
 #   endif

    cn_zls_mainloop_ivybridge_asm               = reinterpret_cast<cn_mainloop_fun>         (base + 0x8000);
+#   ifdef XMRIG_FEATURE_ASM_AMD
    cn_zls_mainloop_ryzen_asm                   = reinterpret_cast<cn_mainloop_fun>         (base + 0x9000);
    cn_zls_mainloop_bulldozer_asm               = reinterpret_cast<cn_mainloop_fun>         (base + 0xA000);
+#   endif
    cn_zls_double_mainloop_sandybridge_asm      = reinterpret_cast<cn_mainloop_fun>         (base + 0xB000);

    cn_double_mainloop_ivybridge_asm            = reinterpret_cast<cn_mainloop_fun>         (base + 0xC000);
+#   ifdef XMRIG_FEATURE_ASM_AMD
    cn_double_mainloop_ryzen_asm                = reinterpret_cast<cn_mainloop_fun>         (base + 0xD000);
    cn_double_mainloop_bulldozer_asm            = reinterpret_cast<cn_mainloop_fun>         (base + 0xE000);
+#   endif
    cn_double_double_mainloop_sandybridge_asm   = reinterpret_cast<cn_mainloop_fun>         (base + 0xF000);

 #   ifdef XMRIG_ALGO_CN_PICO
    cn_tlo_mainloop_ivybridge_asm               = reinterpret_cast<cn_mainloop_fun>         (base + 0x10000);
+#   ifdef XMRIG_FEATURE_ASM_AMD
    cn_tlo_mainloop_ryzen_asm                   = reinterpret_cast<cn_mainloop_fun>         (base + 0x11000);
    cn_tlo_mainloop_bulldozer_asm               = reinterpret_cast<cn_mainloop_fun>         (base + 0x12000);
+#   endif
    cn_tlo_double_mainloop_sandybridge_asm      = reinterpret_cast<cn_mainloop_fun>         (base + 0x13000);
 #   endif

@@ -220,8 +247,10 @@ static void patchAsmVariants()
        constexpr uint32_t ITER = CnAlgo<Algorithm::CN_HALF>().iterations();

        patchCode(cn_half_mainloop_ivybridge_asm,            cnv2_mainloop_ivybridge_asm,           ITER);
+#       ifdef XMRIG_FEATURE_ASM_AMD
        patchCode(cn_half_mainloop_ryzen_asm,                cnv2_mainloop_ryzen_asm,               ITER);
        patchCode(cn_half_mainloop_bulldozer_asm,            cnv2_mainloop_bulldozer_asm,           ITER);
+#       endif
        patchCode(cn_half_double_mainloop_sandybridge_asm,   cnv2_double_mainloop_sandybridge_asm,  ITER);
    }

@@ -231,8 +260,10 @@ static void patchAsmVariants()
        constexpr uint32_t MASK = CnAlgo<Algorithm::CN_PICO_0>().mask();

        patchCode(cn_trtl_mainloop_ivybridge_asm,            cnv2_mainloop_ivybridge_asm,           ITER,   MASK);
+#       ifdef XMRIG_FEATURE_ASM_AMD
        patchCode(cn_trtl_mainloop_ryzen_asm,                cnv2_mainloop_ryzen_asm,               ITER,   MASK);
        patchCode(cn_trtl_mainloop_bulldozer_asm,            cnv2_mainloop_bulldozer_asm,           ITER,   MASK);
+#       endif
        patchCode(cn_trtl_double_mainloop_sandybridge_asm,   cnv2_double_mainloop_sandybridge_asm,  ITER,   MASK);
    }

@@ -241,8 +272,10 @@ static void patchAsmVariants()
        constexpr uint32_t MASK = CnAlgo<Algorithm::CN_PICO_TLO>().mask();

        patchCode(cn_tlo_mainloop_ivybridge_asm,             cnv2_mainloop_ivybridge_asm,           ITER,   MASK);
+#       ifdef XMRIG_FEATURE_ASM_AMD
        patchCode(cn_tlo_mainloop_ryzen_asm,                 cnv2_mainloop_ryzen_asm,               ITER,   MASK);
        patchCode(cn_tlo_mainloop_bulldozer_asm,             cnv2_mainloop_bulldozer_asm,           ITER,   MASK);
+#       endif
        patchCode(cn_tlo_double_mainloop_sandybridge_asm,    cnv2_double_mainloop_sandybridge_asm,  ITER,   MASK);
    }
 #   endif
@@ -251,8 +284,10 @@ static void patchAsmVariants()
        constexpr uint32_t ITER = CnAlgo<Algorithm::CN_ZLS>().iterations();

        patchCode(cn_zls_mainloop_ivybridge_asm,             cnv2_mainloop_ivybridge_asm,           ITER);
+#       ifdef XMRIG_FEATURE_ASM_AMD
        patchCode(cn_zls_mainloop_ryzen_asm,                 cnv2_mainloop_ryzen_asm,               ITER);
        patchCode(cn_zls_mainloop_bulldozer_asm,             cnv2_mainloop_bulldozer_asm,           ITER);
+#       endif
        patchCode(cn_zls_double_mainloop_sandybridge_asm,    cnv2_double_mainloop_sandybridge_asm,  ITER);
    }

@@ -260,8 +295,10 @@ static void patchAsmVariants()
        constexpr uint32_t ITER = CnAlgo<Algorithm::CN_DOUBLE>().iterations();

        patchCode(cn_double_mainloop_ivybridge_asm,          cnv2_mainloop_ivybridge_asm,           ITER);
+#       ifdef XMRIG_FEATURE_ASM_AMD
        patchCode(cn_double_mainloop_ryzen_asm,              cnv2_mainloop_ryzen_asm,               ITER);
        patchCode(cn_double_mainloop_bulldozer_asm,          cnv2_mainloop_bulldozer_asm,           ITER);
+#       endif
        patchCode(cn_double_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm,  ITER);
    }

--- a/src/crypto/cn/CryptoNight_x86.h
+++ b/src/crypto/cn/CryptoNight_x86.h
@@ -852,12 +852,16 @@ extern "C" void cnv1_single_mainloop_asm(cryptonight_ctx * *ctx);
 extern "C" void cnv1_double_mainloop_asm(cryptonight_ctx **ctx);
 extern "C" void cnv1_quad_mainloop_asm(cryptonight_ctx **ctx);
 extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx **ctx);
+#ifdef XMRIG_FEATURE_ASM_AMD
 extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx **ctx);
 extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx **ctx);
+#endif
 extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx **ctx);
 extern "C" void cnv2_rwz_mainloop_asm(cryptonight_ctx **ctx);
 extern "C" void cnv2_rwz_double_mainloop_asm(cryptonight_ctx **ctx);
+#ifdef XMRIG_FEATURE_ASM_AMD
 extern "C" void cnv2_upx_double_mainloop_zen3_asm(cryptonight_ctx **ctx);
+#endif


 namespace xmrig {
@@ -867,28 +871,38 @@ typedef void (*cn_mainloop_fun)(cryptonight_ctx **ctx);


 extern cn_mainloop_fun cn_half_mainloop_ivybridge_asm;
+#ifdef XMRIG_FEATURE_ASM_AMD
 extern cn_mainloop_fun cn_half_mainloop_ryzen_asm;
 extern cn_mainloop_fun cn_half_mainloop_bulldozer_asm;
+#endif
 extern cn_mainloop_fun cn_half_double_mainloop_sandybridge_asm;

 extern cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm;
+#ifdef XMRIG_FEATURE_ASM_AMD
 extern cn_mainloop_fun cn_trtl_mainloop_ryzen_asm;
 extern cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm;
+#endif
 extern cn_mainloop_fun cn_trtl_double_mainloop_sandybridge_asm;

 extern cn_mainloop_fun cn_tlo_mainloop_ivybridge_asm;
+#ifdef XMRIG_FEATURE_ASM_AMD
 extern cn_mainloop_fun cn_tlo_mainloop_ryzen_asm;
 extern cn_mainloop_fun cn_tlo_mainloop_bulldozer_asm;
+#endif
 extern cn_mainloop_fun cn_tlo_double_mainloop_sandybridge_asm;

 extern cn_mainloop_fun cn_zls_mainloop_ivybridge_asm;
+#ifdef XMRIG_FEATURE_ASM_AMD
 extern cn_mainloop_fun cn_zls_mainloop_ryzen_asm;
 extern cn_mainloop_fun cn_zls_mainloop_bulldozer_asm;
+#endif
 extern cn_mainloop_fun cn_zls_double_mainloop_sandybridge_asm;

 extern cn_mainloop_fun cn_double_mainloop_ivybridge_asm;
+#ifdef XMRIG_FEATURE_ASM_AMD
 extern cn_mainloop_fun cn_double_mainloop_ryzen_asm;
 extern cn_mainloop_fun cn_double_mainloop_bulldozer_asm;
+#endif
 extern cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm;

 extern cn_mainloop_fun cn_upx2_mainloop_asm;
@@ -964,46 +978,54 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
        if (ASM == Assembly::INTEL) {
            cnv2_mainloop_ivybridge_asm(ctx);
        }
+#       ifdef XMRIG_FEATURE_ASM_AMD
        else if (ASM == Assembly::RYZEN) {
            cnv2_mainloop_ryzen_asm(ctx);
        }
        else {
            cnv2_mainloop_bulldozer_asm(ctx);
        }
+#       endif
    }
    else if (ALGO == Algorithm::CN_HALF) {
        if (ASM == Assembly::INTEL) {
            cn_half_mainloop_ivybridge_asm(ctx);
        }
+#       ifdef XMRIG_FEATURE_ASM_AMD
        else if (ASM == Assembly::RYZEN) {
            cn_half_mainloop_ryzen_asm(ctx);
        }
        else {
            cn_half_mainloop_bulldozer_asm(ctx);
        }
+#       endif
    }
 #   ifdef XMRIG_ALGO_CN_PICO
    else if (ALGO == Algorithm::CN_PICO_0) {
        if (ASM == Assembly::INTEL) {
            cn_trtl_mainloop_ivybridge_asm(ctx);
        }
+#       ifdef XMRIG_FEATURE_ASM_AMD
        else if (ASM == Assembly::RYZEN) {
            cn_trtl_mainloop_ryzen_asm(ctx);
        }
        else {
            cn_trtl_mainloop_bulldozer_asm(ctx);
        }
+#       endif
    }
    else if (ALGO == Algorithm::CN_PICO_TLO) {
        if (ASM == Assembly::INTEL) {
            cn_tlo_mainloop_ivybridge_asm(ctx);
        }
+#       ifdef XMRIG_FEATURE_ASM_AMD
        else if (ASM == Assembly::RYZEN) {
            cn_tlo_mainloop_ryzen_asm(ctx);
        }
        else {
            cn_tlo_mainloop_bulldozer_asm(ctx);
        }
+#       endif
    }
 #   endif
    else if (ALGO == Algorithm::CN_RWZ) {
@@ -1013,23 +1035,27 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
        if (ASM == Assembly::INTEL) {
            cn_zls_mainloop_ivybridge_asm(ctx);
        }
+#       ifdef XMRIG_FEATURE_ASM_AMD
        else if (ASM == Assembly::RYZEN) {
            cn_zls_mainloop_ryzen_asm(ctx);
        }
        else {
            cn_zls_mainloop_bulldozer_asm(ctx);
        }
+#       endif
    }
    else if (ALGO == Algorithm::CN_DOUBLE) {
        if (ASM == Assembly::INTEL) {
            cn_double_mainloop_ivybridge_asm(ctx);
        }
+#       ifdef XMRIG_FEATURE_ASM_AMD
        else if (ASM == Assembly::RYZEN) {
            cn_double_mainloop_ryzen_asm(ctx);
        }
        else {
            cn_double_mainloop_bulldozer_asm(ctx);
        }
+#       endif
    }
 #   ifdef XMRIG_ALGO_CN_FEMTO
    else if (ALGO == Algorithm::CN_UPX2) {
@@ -1094,12 +1120,16 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
 #   endif
 #   ifdef XMRIG_ALGO_CN_FEMTO
    else if (ALGO == Algorithm::CN_UPX2) {
+#       ifdef XMRIG_FEATURE_ASM_AMD
        if (Cpu::info()->arch() == ICpuInfo::ARCH_ZEN3) {
            cnv2_upx_double_mainloop_zen3_asm(ctx);
        }
        else {
            cn_upx2_double_mainloop_asm(ctx);
        }
+#       else
+        cn_upx2_double_mainloop_asm(ctx);
+#       endif
    }
 #   endif
    else if (ALGO == Algorithm::CN_RWZ) {
--- a/src/crypto/cn/asm/cn_main_loop.S
+++ b/src/crypto/cn/asm/cn_main_loop.S
@@ -15,12 +15,16 @@
 .global FN_PREFIX(cnv1_double_mainloop_asm)
 .global FN_PREFIX(cnv1_quad_mainloop_asm)
 .global FN_PREFIX(cnv2_mainloop_ivybridge_asm)
+#ifdef XMRIG_FEATURE_ASM_AMD
 .global FN_PREFIX(cnv2_mainloop_ryzen_asm)
 .global FN_PREFIX(cnv2_mainloop_bulldozer_asm)
+#endif
 .global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm)
 .global FN_PREFIX(cnv2_rwz_mainloop_asm)
 .global FN_PREFIX(cnv2_rwz_double_mainloop_asm)
+#ifdef XMRIG_FEATURE_ASM_AMD
 .global FN_PREFIX(cnv2_upx_double_mainloop_zen3_asm)
+#endif

 ALIGN(64)
 FN_PREFIX(cnv1_single_mainloop_asm):
@@ -58,6 +62,7 @@ FN_PREFIX(cnv2_mainloop_ivybridge_asm):
 	ret 0
 	mov eax, 3735929054

+#ifdef XMRIG_FEATURE_ASM_AMD
 ALIGN(64)
 FN_PREFIX(cnv2_mainloop_ryzen_asm):
 	sub rsp, 48
@@ -75,6 +80,7 @@ FN_PREFIX(cnv2_mainloop_bulldozer_asm):
 	add rsp, 48
 	ret 0
 	mov eax, 3735929054
+#endif

 ALIGN(64)
 FN_PREFIX(cnv2_double_mainloop_sandybridge_asm):
@@ -103,6 +109,7 @@ FN_PREFIX(cnv2_rwz_double_mainloop_asm):
 	ret 0
 	mov eax, 3735929054

+#ifdef XMRIG_FEATURE_ASM_AMD
 ALIGN(64)
 FN_PREFIX(cnv2_upx_double_mainloop_zen3_asm):
 	sub rsp, 48
@@ -111,6 +118,7 @@ FN_PREFIX(cnv2_upx_double_mainloop_zen3_asm):
 	add rsp, 48
 	ret 0
 	mov eax, 3735929054
+#endif

 #if defined(__linux__) && defined(__ELF__)
 .section .note.GNU-stack,"",%progbits
--- a/src/crypto/cn/asm/win64/cn_main_loop.S
+++ b/src/crypto/cn/asm/win64/cn_main_loop.S
@@ -5,12 +5,16 @@
 .global cnv1_double_mainloop_asm
 .global cnv1_quad_mainloop_asm
 .global cnv2_mainloop_ivybridge_asm
+#ifdef XMRIG_FEATURE_ASM_AMD
 .global cnv2_mainloop_ryzen_asm
 .global cnv2_mainloop_bulldozer_asm
+#endif
 .global cnv2_double_mainloop_sandybridge_asm
 .global cnv2_rwz_mainloop_asm
 .global cnv2_rwz_double_mainloop_asm
+#ifdef XMRIG_FEATURE_ASM_AMD
 .global cnv2_upx_double_mainloop_zen3_asm
+#endif

 ALIGN(64)
 cnv1_single_mainloop_asm:
@@ -36,6 +40,7 @@ cnv2_mainloop_ivybridge_asm:
 	ret 0
 	mov eax, 3735929054

+#ifdef XMRIG_FEATURE_ASM_AMD
 ALIGN(64)
 cnv2_mainloop_ryzen_asm:
 	#include "../cn2/cnv2_main_loop_ryzen.inc"
@@ -47,6 +52,7 @@ cnv2_mainloop_bulldozer_asm:
 	#include "../cn2/cnv2_main_loop_bulldozer.inc"
 	ret 0
 	mov eax, 3735929054
+#endif

 ALIGN(64)
 cnv2_double_mainloop_sandybridge_asm:
@@ -66,8 +72,10 @@ cnv2_rwz_double_mainloop_asm:
 	ret 0
 	mov eax, 3735929054

+#ifdef XMRIG_FEATURE_ASM_AMD
 ALIGN(64)
 cnv2_upx_double_mainloop_zen3_asm:
 	#include "cn2/cnv2_upx_double_mainloop_zen3.inc"
 	ret 0
 	mov eax, 3735929054
+#endif
--- a/src/crypto/kawpow/KPHash.h
+++ b/src/crypto/kawpow/KPHash.h
@@ -7,8 +7,8 @@
 * Copyright 2017-2019 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
 * Copyright 2018      Lee Clagett <https://github.com/vtnerd>
 * Copyright 2018-2019 tevador     <tevador@gmail.com>
- * Copyright 2018-2020 SChernykh   <https://github.com/SChernykh>
- * Copyright 2016-2019 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright 2018-2023 SChernykh   <https://github.com/SChernykh>
+ * Copyright 2016-2023 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@@ -28,7 +28,7 @@
 #define XMRIG_KP_HASH_H


-#include <stdint.h>
+#include <cstdint>


 namespace xmrig
@@ -43,16 +43,16 @@ class KPHash
 public:
    static constexpr uint32_t EPOCH_LENGTH  = 7500;
    static constexpr uint32_t PERIOD_LENGTH = 3;
-    static constexpr int CNT_CACHE = 11;
-    static constexpr int CNT_MATH = 18;
-    static constexpr uint32_t REGS = 32;
-    static constexpr uint32_t LANES = 16;
+    static constexpr int CNT_CACHE          = 11;
+    static constexpr int CNT_MATH           = 18;
+    static constexpr uint32_t REGS          = 32;
+    static constexpr uint32_t LANES         = 16;

    static void calculate(const KPCache& light_cache, uint32_t block_height, const uint8_t (&header_hash)[32], uint64_t nonce, uint32_t (&output)[8], uint32_t (&mix_hash)[8]);
 };


-} /* namespace xmrig */
+} // namespace xmrig


-#endif /* XMRIG_KP_HASH_H */
+#endif // XMRIG_KP_HASH_H
--- a/src/crypto/randomx/aes_hash.cpp
+++ b/src/crypto/randomx/aes_hash.cpp
@@ -34,6 +34,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "base/tools/Chrono.h"
 #include "crypto/randomx/randomx.h"
 #include "crypto/randomx/soft_aes.h"
+#include "crypto/randomx/instruction.hpp"
+#include "crypto/randomx/common.hpp"
 #include "crypto/rx/Profiler.h"

 #define AES_HASH_1R_STATE0 0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d
@@ -165,6 +167,9 @@ void fillAes1Rx4(void *state, size_t outputSize, void *buffer) {
 template void fillAes1Rx4<true>(void *state, size_t outputSize, void *buffer);
 template void fillAes1Rx4<false>(void *state, size_t outputSize, void *buffer);

+static constexpr randomx::Instruction inst{ 0xFF, 7, 7, 0xFF, 0xFFFFFFFFU };
+alignas(16) static const randomx::Instruction inst_mask[2] = { inst, inst };
+
 template<int softAes>
 void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
 	const uint8_t* outptr = (uint8_t*)buffer;
@@ -187,32 +192,42 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
 	state2 = rx_load_vec_i128((rx_vec_i128*)state + 2);
 	state3 = rx_load_vec_i128((rx_vec_i128*)state + 3);

-	while (outptr < outputEnd) {
-		state0 = aesdec<softAes>(state0, key0);
-		state1 = aesenc<softAes>(state1, key0);
-		state2 = aesdec<softAes>(state2, key4);
-		state3 = aesenc<softAes>(state3, key4);
-
-		state0 = aesdec<softAes>(state0, key1);
-		state1 = aesenc<softAes>(state1, key1);
-		state2 = aesdec<softAes>(state2, key5);
-		state3 = aesenc<softAes>(state3, key5);
-
-		state0 = aesdec<softAes>(state0, key2);
-		state1 = aesenc<softAes>(state1, key2);
-		state2 = aesdec<softAes>(state2, key6);
-		state3 = aesenc<softAes>(state3, key6);
-
-		state0 = aesdec<softAes>(state0, key3);
-		state1 = aesenc<softAes>(state1, key3);
-		state2 = aesdec<softAes>(state2, key7);
-		state3 = aesenc<softAes>(state3, key7);
+#define TRANSFORM do { \
+	state0 = aesdec<softAes>(state0, key0); \
+	state1 = aesenc<softAes>(state1, key0); \
+	state2 = aesdec<softAes>(state2, key4); \
+	state3 = aesenc<softAes>(state3, key4); \
+	state0 = aesdec<softAes>(state0, key1); \
+	state1 = aesenc<softAes>(state1, key1); \
+	state2 = aesdec<softAes>(state2, key5); \
+	state3 = aesenc<softAes>(state3, key5); \
+	state0 = aesdec<softAes>(state0, key2); \
+	state1 = aesenc<softAes>(state1, key2); \
+	state2 = aesdec<softAes>(state2, key6); \
+	state3 = aesenc<softAes>(state3, key6); \
+	state0 = aesdec<softAes>(state0, key3); \
+	state1 = aesenc<softAes>(state1, key3); \
+	state2 = aesdec<softAes>(state2, key7); \
+	state3 = aesenc<softAes>(state3, key7); \
+} while (0)

+	for (int i = 0; i < 2; ++i, outptr += 64) {
+		TRANSFORM;
 		rx_store_vec_i128((rx_vec_i128*)outptr + 0, state0);
 		rx_store_vec_i128((rx_vec_i128*)outptr + 1, state1);
 		rx_store_vec_i128((rx_vec_i128*)outptr + 2, state2);
 		rx_store_vec_i128((rx_vec_i128*)outptr + 3, state3);
+	}

+	static_assert(sizeof(inst_mask) == sizeof(rx_vec_i128), "Incorrect inst_mask size");
+	const rx_vec_i128 mask = *reinterpret_cast<const rx_vec_i128*>(inst_mask);
+
+	while (outptr < outputEnd) {
+		TRANSFORM;
+		rx_store_vec_i128((rx_vec_i128*)outptr + 0, rx_and_vec_i128(state0, mask));
+		rx_store_vec_i128((rx_vec_i128*)outptr + 1, rx_and_vec_i128(state1, mask));
+		rx_store_vec_i128((rx_vec_i128*)outptr + 2, rx_and_vec_i128(state2, mask));
+		rx_store_vec_i128((rx_vec_i128*)outptr + 3, rx_and_vec_i128(state3, mask));
 		outptr += 64;
 	}
 }
--- a/src/crypto/randomx/intrin_portable.h
+++ b/src/crypto/randomx/intrin_portable.h
@@ -126,6 +126,7 @@ FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) {

 #define rx_xor_vec_f128 _mm_xor_pd
 #define rx_and_vec_f128 _mm_and_pd
+#define rx_and_vec_i128 _mm_and_si128
 #define rx_or_vec_f128 _mm_or_pd

 #ifdef __AES__
@@ -278,6 +279,10 @@ FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
 	return (rx_vec_f128)vec_and(a,b);
 }

+FORCE_INLINE rx_vec_i128 rx_and_vec_i128(rx_vec_i128 a, rx_vec_i128 b) {
+	return (rx_vec_i128)vec_and(a, b);
+}
+
 FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
 	return (rx_vec_f128)vec_or(a,b);
 }
@@ -444,6 +449,8 @@ FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
 	return vreinterpretq_f64_u8(vandq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b)));
 }

+#define rx_and_vec_i128 vandq_u8
+
 FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
 	return vreinterpretq_f64_u8(vorrq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b)));
 }
@@ -635,6 +642,13 @@ FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
 	return x;
 }

+FORCE_INLINE rx_vec_i128 rx_and_vec_i128(rx_vec_i128 a, rx_vec_i128 b) {
+	rx_vec_i128 x;
+	x.u64[0] = a.u64[0] & b.u64[0];
+	x.u64[1] = a.u64[1] & b.u64[1];
+	return x;
+}
+
 FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
 	rx_vec_f128 x;
 	x.i.u64[0] = a.i.u64[0] | b.i.u64[0];
--- a/src/crypto/randomx/jit_compiler_a64.cpp
+++ b/src/crypto/randomx/jit_compiler_a64.cpp
@@ -144,8 +144,6 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
 	for (uint32_t i = 0; i < program.getSize(); ++i)
 	{
 		Instruction& instr = program(i);
-		instr.src %= RegistersCount;
-		instr.dst %= RegistersCount;
 		(this->*engine[instr.opcode])(instr, codePos);
 	}

@@ -204,8 +202,6 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
 	for (uint32_t i = 0; i < program.getSize(); ++i)
 	{
 		Instruction& instr = program(i);
-		instr.src %= RegistersCount;
-		instr.dst %= RegistersCount;
 		(this->*engine[instr.opcode])(instr, codePos);
 	}

--- a/src/crypto/randomx/jit_compiler_x86.cpp
+++ b/src/crypto/randomx/jit_compiler_x86.cpp
@@ -312,11 +312,19 @@ namespace randomx {
 		freePagedMemory(allocatedCode, allocatedSize);
 	}

+	template<size_t N>
+	static FORCE_INLINE void prefetch_data(const void* data) {
+		rx_prefetch_nta(data);
+		prefetch_data<N - 1>(reinterpret_cast<const char*>(data) + 64);
+	}
+
+	template<> FORCE_INLINE void prefetch_data<0>(const void*) {}
+
+	template<typename T> static FORCE_INLINE void prefetch_data(const T& data) { prefetch_data<(sizeof(T) + 63) / 64>(&data); }
+
 	void JitCompilerX86::prepare() {
-		for (size_t i = 0; i < sizeof(engine); i += 64)
-			rx_prefetch_nta((const char*)(&engine) + i);
-		for (size_t i = 0; i < sizeof(RandomX_CurrentConfig); i += 64)
-			rx_prefetch_nta((const char*)(&RandomX_CurrentConfig) + i);
+		prefetch_data(engine);
+		prefetch_data(RandomX_CurrentConfig);
 	}

 	void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg, uint32_t flags) {
@@ -748,7 +756,7 @@ namespace randomx {
 	template void JitCompilerX86::genAddressReg<true>(const Instruction& instr, const uint32_t src, uint8_t* code, uint32_t& codePos);

 	FORCE_INLINE void JitCompilerX86::genAddressRegDst(const Instruction& instr, uint8_t* code, uint32_t& codePos) {
-		const uint32_t dst = static_cast<uint32_t>(instr.dst % RegistersCount) << 16;
+		const uint32_t dst = static_cast<uint32_t>(instr.dst) << 16;
 		*(uint32_t*)(code + codePos) = 0x24808d41 + dst;
 		codePos += (dst == (RegisterNeedsSib << 16)) ? 4 : 3;

@@ -768,8 +776,8 @@ namespace randomx {
 		uint32_t pos = codePos;
 		uint8_t* const p = code + pos;

-		const uint32_t dst = instr.dst % RegistersCount;
-		const uint32_t sib = (instr.getModShift() << 6) | ((instr.src % RegistersCount) << 3) | dst;
+		const uint32_t dst = instr.dst;
+		const uint32_t sib = (instr.getModShift() << 6) | (instr.src << 3) | dst;

 		uint32_t k = 0x048d4f + (dst << 19);
 		if (dst == RegisterNeedsDisplacement)
@@ -788,8 +796,8 @@ namespace randomx {
 		uint8_t* const p = code;
 		uint32_t pos = codePos;

-		const uint32_t src = instr.src % RegistersCount;
-		const uint32_t dst = instr.dst % RegistersCount;
+		const uint32_t src = instr.src;
+		const uint32_t dst = instr.dst;

 		if (src != dst) {
 			genAddressReg<true>(instr, src, p, pos);
@@ -809,8 +817,8 @@ namespace randomx {
 		uint8_t* const p = code;
 		uint32_t pos = codePos;
 		
-		const uint32_t src = instr.src % RegistersCount;
-		const uint32_t dst = instr.dst % RegistersCount;
+		const uint32_t src = instr.src;
+		const uint32_t dst = instr.dst;

 		if (src != dst) {
 			*(uint32_t*)(p + pos) = 0xc02b4d + (dst << 19) + (src << 16);
@@ -830,8 +838,8 @@ namespace randomx {
 		uint8_t* const p = code;
 		uint32_t pos = codePos;

-		const uint32_t src = instr.src % RegistersCount;
-		const uint32_t dst = instr.dst % RegistersCount;
+		const uint32_t src = instr.src;
+		const uint32_t dst = instr.dst;

 		if (src != dst) {
 			genAddressReg<true>(instr, src, p, pos);
@@ -851,8 +859,8 @@ namespace randomx {
 		uint8_t* const p = code;
 		uint32_t pos = codePos;

-		const uint32_t src = instr.src % RegistersCount;
-		const uint32_t dst = instr.dst % RegistersCount;
+		const uint32_t src = instr.src;
+		const uint32_t dst = instr.dst;

 		if (src != dst) {
 			emit32(0xc0af0f4d + ((dst * 8 + src) << 24), p, pos);
@@ -871,8 +879,8 @@ namespace randomx {
 		uint8_t* const p = code;
 		uint32_t pos = codePos;

-		const uint64_t src = instr.src % RegistersCount;
-		const uint64_t dst = instr.dst % RegistersCount;
+		const uint64_t src = instr.src;
+		const uint64_t dst = instr.dst;

 		if (src != dst) {
 			genAddressReg<true>(instr, src, p, pos);
@@ -892,8 +900,8 @@ namespace randomx {
 		uint8_t* const p = code;
 		uint32_t pos = codePos;

-		const uint32_t src = instr.src % RegistersCount;
-		const uint32_t dst = instr.dst % RegistersCount;
+		const uint32_t src = instr.src;
+		const uint32_t dst = instr.dst;

 		*(uint32_t*)(p + pos) = 0xc08b49 + (dst << 16);
 		*(uint32_t*)(p + pos + 3) = 0xe0f749 + (src << 16);
@@ -908,8 +916,8 @@ namespace randomx {
 		uint8_t* const p = code;
 		uint32_t pos = codePos;

-		const uint32_t src = instr.src % RegistersCount;
-		const uint32_t dst = instr.dst % RegistersCount;
+		const uint32_t src = instr.src;
+		const uint32_t dst = instr.dst;

 		*(uint32_t*)(p + pos) = 0xC4D08B49 + (dst << 16);
 		*(uint32_t*)(p + pos + 4) = 0xC0F6FB42 + (dst << 27) + (src << 24);
@@ -923,8 +931,8 @@ namespace randomx {
 		uint8_t* const p = code;
 		uint32_t pos = codePos;

-		const uint64_t src = instr.src % RegistersCount;
-		const uint64_t dst = instr.dst % RegistersCount;
+		const uint64_t src = instr.src;
+		const uint64_t dst = instr.dst;

 		if (src != dst) {
 			genAddressReg<false>(instr, src, p, pos);
@@ -947,8 +955,8 @@ namespace randomx {
 		uint8_t* const p = code;
 		uint32_t pos = codePos;

-		const uint64_t src = instr.src % RegistersCount;
-		const uint64_t dst = instr.dst % RegistersCount;
+		const uint64_t src = instr.src;
+		const uint64_t dst = instr.dst;

 		if (src != dst) {
 			genAddressReg<false>(instr, src, p, pos);
@@ -970,8 +978,8 @@ namespace randomx {
 		uint8_t* const p = code;
 		uint32_t pos = codePos;

-		const uint64_t src = instr.src % RegistersCount;
-		const uint64_t dst = instr.dst % RegistersCount;
+		const uint64_t src = instr.src;
+		const uint64_t dst = instr.dst;

 		*(uint64_t*)(p + pos) = 0x8b4ce8f749c08b49ull + (dst << 16) + (src << 40);
 		pos += 8;
@@ -985,8 +993,8 @@ namespace randomx {
 		uint8_t* const p = code;
 		uint32_t pos = codePos;

-		const uint64_t src = instr.src % RegistersCount;
-		const uint64_t dst = instr.dst % RegistersCount;
+		const uint64_t src = instr.src;
+		const uint64_t dst = instr.dst;

 		if (src != dst) {
 			genAddressReg<false>(instr, src, p, pos);
@@ -1011,7 +1019,7 @@ namespace randomx {
 		
 		uint64_t divisor = instr.getImm32();
 		if (!isZeroOrPowerOf2(divisor)) {
-			const uint32_t dst = instr.dst % RegistersCount;
+			const uint32_t dst = instr.dst;

 			const uint64_t reciprocal = randomx_reciprocal_fast(divisor);
 			if (imul_rcp_storage_used < 16) {
@@ -1040,7 +1048,7 @@ namespace randomx {
 		uint8_t* const p = code;
 		uint32_t pos = codePos;

-		const uint32_t dst = instr.dst % RegistersCount;
+		const uint32_t dst = instr.dst;
 		*(uint32_t*)(p + pos) = 0xd8f749 + (dst << 16);
 		pos += 3;

@@ -1052,8 +1060,8 @@ namespace randomx {
 		uint8_t* const p = code;
 		uint32_t pos = codePos;

-		const uint64_t src = instr.src % RegistersCount;
-		const uint64_t dst = instr.dst % RegistersCount;
+		const uint64_t src = instr.src;
+		const uint64_t dst = instr.dst;

 		if (src != dst) {
 			*(uint32_t*)(p + pos) = 0xc0334d + (((dst << 3) + src) << 16);
@@ -1073,8 +1081,8 @@ namespace randomx {
 		uint8_t* const p = code;
 		uint32_t pos = codePos;

-		const uint64_t src = instr.src % RegistersCount;
-		const uint64_t dst = instr.dst % RegistersCount;
+		const uint64_t src = instr.src;
+		const uint64_t dst = instr.dst;

 		if (src != dst) {
 			genAddressReg<true>(instr, src, p, pos);
@@ -1094,8 +1102,8 @@ namespace randomx {
 		uint8_t* const p = code;
 		uint32_t pos = codePos;

-		const uint64_t src = instr.src % RegistersCount;
-		const uint64_t dst = instr.dst % RegistersCount;
+		const uint64_t src = instr.src;
+		const uint64_t dst = instr.dst;

 		if (src != dst) {
 			*(uint64_t*)(p + pos) = 0xc8d349c88b41ull + (src << 16) + (dst << 40);
@@ -1115,8 +1123,8 @@ namespace randomx {
 		uint8_t* const p = code;
 		uint32_t pos = codePos;

-		const uint64_t src = instr.src % RegistersCount;
-		const uint64_t dst = instr.dst % RegistersCount;
+		const uint64_t src = instr.src;
+		const uint64_t dst = instr.dst;

 		if (src != dst) {
 			*(uint64_t*)(p + pos) = 0xc0d349c88b41ull + (src << 16) + (dst << 40);
@@ -1136,8 +1144,8 @@ namespace randomx {
 		uint8_t* const p = code;
 		uint32_t pos = codePos;

-		const uint32_t src = instr.src % RegistersCount;
-		const uint32_t dst = instr.dst % RegistersCount;
+		const uint32_t src = instr.src;
+		const uint32_t dst = instr.dst;

 		if (src != dst) {
 			*(uint32_t*)(p + pos) = 0xc0874d + (((dst << 3) + src) << 16);
@@ -1153,7 +1161,7 @@ namespace randomx {
 		uint8_t* const p = code;
 		uint32_t pos = codePos;

-		const uint64_t dst = instr.dst % RegistersCount;
+		const uint64_t dst = instr.dst;

 		*(uint64_t*)(p + pos) = 0x01c0c60f66ull + (((dst << 3) + dst) << 24);
 		pos += 5;
@@ -1182,7 +1190,7 @@ namespace randomx {

 		prevFPOperation = pos;

-		const uint32_t src = instr.src % RegistersCount;
+		const uint32_t src = instr.src;
 		const uint32_t dst = instr.dst % RegisterCountFlt;

 		genAddressReg<true>(instr, src, p, pos);
@@ -1214,7 +1222,7 @@ namespace randomx {

 		prevFPOperation = pos;

-		const uint32_t src = instr.src % RegistersCount;
+		const uint32_t src = instr.src;
 		const uint32_t dst = instr.dst % RegisterCountFlt;

 		genAddressReg<true>(instr, src, p, pos);
@@ -1257,7 +1265,7 @@ namespace randomx {

 		prevFPOperation = pos;

-		const uint32_t src = instr.src % RegistersCount;
+		const uint32_t src = instr.src;
 		const uint64_t dst = instr.dst % RegisterCountFlt;

 		genAddressReg<true>(instr, src, p, pos);
@@ -1307,7 +1315,7 @@ namespace randomx {
 		uint32_t pos = codePos;
 		prevCFROUND = pos;

-		const uint32_t src = instr.src % RegistersCount;
+		const uint32_t src = instr.src;

 		*(uint32_t*)(p + pos) = 0x00C08B49 + (src << 16);
 		const int rotate = (static_cast<int>(instr.getImm32() & 63) - 2) & 63;
@@ -1343,7 +1351,7 @@ namespace randomx {
 		uint32_t pos = codePos;
 		prevCFROUND = pos;

-		const uint64_t src = instr.src % RegistersCount;
+		const uint64_t src = instr.src;

 		const uint64_t rotate = (static_cast<int>(instr.getImm32() & 63) - 2) & 63;
 		*(uint64_t*)(p + pos) = 0xC0F0FBC3C4ULL | (src << 32) | (rotate << 40);
@@ -1367,7 +1375,7 @@ namespace randomx {
 		uint8_t* const p = code;
 		uint32_t pos = codePos;

-		const int reg = instr.dst % RegistersCount;
+		const int reg = instr.dst;
 		int32_t jmp_offset = registerUsage[reg];

 		// if it jumps over the previous FP instruction that uses rounding, treat it as if FP instruction happened now
@@ -1426,7 +1434,7 @@ namespace randomx {
 		uint32_t pos = codePos;

 		genAddressRegDst(instr, p, pos);
-		emit32(0x0604894c + (static_cast<uint32_t>(instr.src % RegistersCount) << 19), p, pos);
+		emit32(0x0604894c + (static_cast<uint32_t>(instr.src) << 19), p, pos);

 		codePos = pos;
 	}
--- a/src/crypto/rx/RxVm.cpp
+++ b/src/crypto/rx/RxVm.cpp
@@ -41,10 +41,12 @@ randomx_vm *xmrig::RxVm::create(RxDataset *dataset, uint8_t *scratchpad, bool so
        flags |= RANDOMX_FLAG_JIT;
    }

+#   ifdef XMRIG_FEATURE_ASM_AMD
    const auto asmId = assembly == Assembly::AUTO ? Cpu::info()->assembly() : assembly.id();
    if ((asmId == Assembly::RYZEN) || (asmId == Assembly::BULLDOZER)) {
        flags |= RANDOMX_FLAG_AMD;
    }
+#   endif

    return randomx_create_vm(static_cast<randomx_flags>(flags), !dataset->get() ? dataset->cache()->get() : nullptr, dataset->get(), scratchpad, node);
 }
--- a/src/net/Network.cpp
+++ b/src/net/Network.cpp
@@ -1,7 +1,7 @@
 /* XMRig
 * Copyright (c) 2019      Howard Chu  <https://github.com/hyc>
- * Copyright (c) 2018-2021 SChernykh   <https://github.com/SChernykh>
- * Copyright (c) 2016-2021 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright (c) 2018-2023 SChernykh   <https://github.com/SChernykh>
+ * Copyright (c) 2016-2023 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@@ -292,8 +292,7 @@ void xmrig::Network::setJob(IClient *client, const Job &job, bool donate)
    }

    if (!donate && m_donate) {
-        m_donate->setAlgo(job.algorithm());
-        m_donate->setProxy(client->pool().proxy());
+        static_cast<DonateStrategy *>(m_donate)->update(client, job);
    }

    m_controller->miner()->setJob(job, donate);
--- a/src/net/Network.h
+++ b/src/net/Network.h
@@ -1,7 +1,7 @@
 /* XMRig
 * Copyright (c) 2019      Howard Chu  <https://github.com/hyc>
- * Copyright (c) 2018-2021 SChernykh   <https://github.com/SChernykh>
- * Copyright (c) 2016-2021 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright (c) 2018-2023 SChernykh   <https://github.com/SChernykh>
+ * Copyright (c) 2016-2023 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@@ -89,7 +89,7 @@ private:
 };


-} /* namespace xmrig */
+} // namespace xmrig


-#endif /* XMRIG_NETWORK_H */
+#endif // XMRIG_NETWORK_H
--- a/src/net/strategies/DonateStrategy.cpp
+++ b/src/net/strategies/DonateStrategy.cpp
@@ -1,6 +1,6 @@
 /* XMRig
- * Copyright (c) 2018-2022 SChernykh   <https://github.com/SChernykh>
- * Copyright (c) 2016-2022 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright (c) 2018-2023 SChernykh   <https://github.com/SChernykh>
+ * Copyright (c) 2016-2023 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@@ -48,7 +48,7 @@ static const char *kDonateHost = "donate.v2.xmrig.com";
 static const char *kDonateHostTls = "donate.ssl.xmrig.com";
 #endif

-} /* namespace xmrig */
+} // namespace xmrig


 xmrig::DonateStrategy::DonateStrategy(Controller *controller, IStrategyListener *listener) :
@@ -98,6 +98,17 @@ xmrig::DonateStrategy::~DonateStrategy()
 }


+void xmrig::DonateStrategy::update(IClient *client, const Job &job)
+{
+    setAlgo(job.algorithm());
+    setProxy(client->pool().proxy());
+
+    m_diff   = job.diff();
+    m_height = job.height();
+    m_seed   = job.seed();
+}
+
+
 int64_t xmrig::DonateStrategy::submit(const JobResult &result)
 {
    return m_proxy ? m_proxy->submit(result) : m_strategy->submit(result);
@@ -199,13 +210,13 @@ void xmrig::DonateStrategy::onLogin(IClient *, rapidjson::Document &doc, rapidjs
    params.AddMember("url", m_pools[0].url().toJSON(), allocator);
 #   endif

-    setAlgorithms(doc, params);
+    setParams(doc, params);
 }


 void xmrig::DonateStrategy::onLogin(IStrategy *, IClient *, rapidjson::Document &doc, rapidjson::Value &params)
 {
-    setAlgorithms(doc, params);
+    setParams(doc, params);
 }


@@ -270,12 +281,20 @@ void xmrig::DonateStrategy::idle(double min, double max)
 }


-void xmrig::DonateStrategy::setAlgorithms(rapidjson::Document &doc, rapidjson::Value &params)
+void xmrig::DonateStrategy::setJob(IClient *client, const Job &job, const rapidjson::Value &params)
+{
+    if (isActive()) {
+        m_listener->onJob(this, client, job, params);
+    }
+}
+
+
+void xmrig::DonateStrategy::setParams(rapidjson::Document &doc, rapidjson::Value &params)
 {
    using namespace rapidjson;
    auto &allocator = doc.GetAllocator();
+    auto algorithms = m_controller->miner()->algorithms();

-    Algorithms algorithms = m_controller->miner()->algorithms();
    const size_t index = static_cast<size_t>(std::distance(algorithms.begin(), std::find(algorithms.begin(), algorithms.end(), m_algorithm)));
    if (index > 0 && index < algorithms.size()) {
        std::swap(algorithms[0], algorithms[index]);
@@ -287,14 +306,12 @@ void xmrig::DonateStrategy::setAlgorithms(rapidjson::Document &doc, rapidjson::V
        algo.PushBack(StringRef(a.name()), allocator);
    }

-    params.AddMember("algo", algo, allocator);
-}
+    params.AddMember("algo",    algo, allocator);
+    params.AddMember("diff",    m_diff, allocator);
+    params.AddMember("height",  m_height, allocator);

-
-void xmrig::DonateStrategy::setJob(IClient *client, const Job &job, const rapidjson::Value &params)
-{
-    if (isActive()) {
-        m_listener->onJob(this, client, job, params);
+    if (!m_seed.empty()) {
+       params.AddMember("seed_hash", Cvt::toHex(m_seed, doc), allocator);
    }
 }

--- a/src/net/strategies/DonateStrategy.h
+++ b/src/net/strategies/DonateStrategy.h
@@ -1,6 +1,6 @@
 /* XMRig
- * Copyright (c) 2018-2022 SChernykh   <https://github.com/SChernykh>
- * Copyright (c) 2016-2022 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright (c) 2018-2023 SChernykh   <https://github.com/SChernykh>
+ * Copyright (c) 2016-2023 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@@ -20,15 +20,12 @@
 #define XMRIG_DONATESTRATEGY_H


-#include <vector>
-
-
 #include "base/kernel/interfaces/IClientListener.h"
 #include "base/kernel/interfaces/IStrategy.h"
 #include "base/kernel/interfaces/IStrategyListener.h"
 #include "base/kernel/interfaces/ITimerListener.h"
 #include "base/net/stratum/Pool.h"
-#include "base/tools/Object.h"
+#include "base/tools/Buffer.h"


 namespace xmrig {
@@ -36,7 +33,6 @@ namespace xmrig {

 class Client;
 class Controller;
-class IStrategyListener;


 class DonateStrategy : public IStrategy, public IStrategyListener, public ITimerListener, public IClientListener
@@ -47,6 +43,8 @@ public:
    DonateStrategy(Controller *controller, IStrategyListener *listener);
    ~DonateStrategy() override;

+    void update(IClient *client, const Job &job);
+
 protected:
    inline bool isActive() const override                                                                              { return state() == STATE_ACTIVE; }
    inline IClient *client() const override                                                                            { return m_proxy ? m_proxy : m_strategy->client(); }
@@ -88,13 +86,14 @@ private:

    IClient *createProxy();
    void idle(double min, double max);
-    void setAlgorithms(rapidjson::Document &doc, rapidjson::Value &params);
    void setJob(IClient *client, const Job &job, const rapidjson::Value &params);
+    void setParams(rapidjson::Document &doc, rapidjson::Value &params);
    void setResult(IClient *client, const SubmitResult &result, const char *error);
    void setState(State state);

    Algorithm m_algorithm;
    bool m_tls                      = false;
+    Buffer m_seed;
    char m_userId[65]               = { 0 };
    const uint64_t m_donateTime;
    const uint64_t m_idleTime;
@@ -105,12 +104,14 @@ private:
    State m_state                   = STATE_NEW;
    std::vector<Pool> m_pools;
    Timer *m_timer                  = nullptr;
+    uint64_t m_diff                 = 0;
+    uint64_t m_height               = 0;
    uint64_t m_now                  = 0;
    uint64_t m_timestamp            = 0;
 };


-} /* namespace xmrig */
+} // namespace xmrig


-#endif /* XMRIG_DONATESTRATEGY_H */
+#endif // XMRIG_DONATESTRATEGY_H
--- a/src/version.h
+++ b/src/version.h
@@ -22,7 +22,7 @@
 #define APP_ID        "xmrig"
 #define APP_NAME      "XMRig"
 #define APP_DESC      "XMRig miner"
-#define APP_VERSION   "6.19.2"
+#define APP_VERSION   "6.19.4-dev"
 #define APP_DOMAIN    "xmrig.com"
 #define APP_SITE      "www.xmrig.com"
 #define APP_COPYRIGHT "Copyright (C) 2016-2023 xmrig.com"
@@ -30,7 +30,7 @@

 #define APP_VER_MAJOR  6
 #define APP_VER_MINOR  19
-#define APP_VER_PATCH  2
+#define APP_VER_PATCH  4

 #ifdef _MSC_VER
 #   if (_MSC_VER >= 1930)
Author	SHA1	Message	Date
Tony Butler	25c4fceeca	Merge `d51b01e559` into `cdd5dff337`	2023-06-03 10:16:03 -04:00
XMRig	cdd5dff337	v6.19.4-dev	2023-06-03 21:14:26 +07:00
XMRig	bc5fe8f456	Merge branch 'master' into dev	2023-06-03 21:13:51 +07:00
XMRig	0bc87345c4	v6.19.3	2023-06-03 19:59:18 +07:00
XMRig	f17d31e61a	Merge branch 'dev'	2023-06-03 19:57:36 +07:00
xmrig	e6bf4c0077	Update CHANGELOG.md	2023-06-02 22:12:18 +07:00
xmrig	ff79b8fce4	Merge pull request #3280 from SChernykh/dev Updated example scripts	2023-06-02 17:47:13 +07:00
SChernykh	af87369e4f	Updated example scripts - Hashvault is top 1 pool now, so changed it to a smaller pool - node.xmr.to doesn't exist anymore	2023-06-02 09:34:26 +02:00
xmrig	65fc16d5ac	Merge pull request #3275 from SChernykh/dev RandomX: fixed `jccErratum` list	2023-05-26 18:25:57 +07:00
SChernykh	826e23b4c4	Fixed `jccErratum` list	2023-05-26 12:46:59 +02:00
Tony Butler	d51b01e559	Make AMD assembly completely optional through WITH_ASM_AMD (default ON)	2023-05-23 16:49:52 -06:00
xmrig	02d45834e1	Merge pull request #3273 from SChernykh/dev RandomX: fixed undefined behavior	2023-05-23 20:18:32 +07:00
SChernykh	1252a4710e	RandomX: fixed undefined behavior Using an inactive member of a `union` is an undefined behavior in C++	2023-05-23 14:40:12 +02:00
xmrig	5891f1f06b	Merge pull request #3271 from SChernykh/opt_genprog RandomX: optimized program generation	2023-05-22 05:25:32 +07:00
SChernykh	5dcbab7e3a	RandomX: optimized program generation	2023-05-21 17:44:20 +02:00
xmrig	7b51e23aa0	Merge pull request #3254 from SChernykh/dev Tweaked auto-tuning for Intel CPUs	2023-04-19 12:29:58 +07:00
SChernykh	7f7fc363e1	Tweaked auto-tuning for Intel CPUs Alder Lake and newer CPUs have exclusive L3 cache and benefit from more threads until L3+L2 is filled.	2023-04-18 21:20:45 +02:00
XMRig	c4e1363148	#3245 Improved algorithm negotiation for donation rounds by sending extra information about current mining job.	2023-04-07 23:35:05 +07:00
XMRig	a2e9b3456d	v6.19.3-dev	2023-04-04 00:34:54 +07:00
XMRig	4790318685	Merge branch 'master' into dev	2023-04-04 00:34:22 +07:00