Merge a776ebf394 into e855723cd9

Make AMD assembly completely optional through WITH_ASM_AMD (default ON)
2026-06-29 14:02:39 -04:00 · 2023-09-01 13:28:19 -07:00 · 2023-07-12 02:06:53 -06:00
22 changed files with 830 additions and 895 deletions
@@ -1,13 +1,3 @@
-# v6.21.0
- [#3302](https://github.com/xmrig/xmrig/pull/3302) [#3312](https://github.com/xmrig/xmrig/pull/3312) Enabled keepalive for Windows (>= Vista).
- [#3320](https://github.com/xmrig/xmrig/pull/3320) Added "built for OS/architecture/bits" to "ABOUT".
- [#3339](https://github.com/xmrig/xmrig/pull/3339) Added SNI option for TLS connections.
- [#3342](https://github.com/xmrig/xmrig/pull/3342) Update `cn_main_loop.asm`.
- [#3346](https://github.com/xmrig/xmrig/pull/3346) ARM64 JIT: don't use `x18` register.
- [#3348](https://github.com/xmrig/xmrig/pull/3348) Update to latest `sse2neon.h`.
- [#3356](https://github.com/xmrig/xmrig/pull/3356) Updated pricing record size for **Zephyr** solo mining.
- [#3358](https://github.com/xmrig/xmrig/pull/3358) **Zephyr** solo mining: handle multiple outputs.
-
 # v6.20.0
 - Added new ARM CPU names.
 - [#2394](https://github.com/xmrig/xmrig/pull/2394) Added new CMake options `ARM_V8` and `ARM_V7`.
@@ -14,7 +14,9 @@ option(WITH_HTTP            "Enable HTTP protocol support (client/server)" ON)
 option(WITH_DEBUG_LOG       "Enable debug log output" OFF)
 option(WITH_TLS             "Enable OpenSSL support" ON)
 option(WITH_ASM             "Enable ASM PoW implementations" ON)
-option(WITH_MSR             "Enable MSR mod & 1st-gen Ryzen fix" ON)
+option(WITH_ASM_AMD         "Enable ASM for AMD processors" ON)
+option(WITH_MSR             "Enable MSR mod" ON)
+option(WITH_MSR_ZEN         "Enable MSR mod for AMD Zen-based processors" ON)
 option(WITH_ENV_VARS        "Enable environment variables support in config file" ON)
 option(WITH_EMBEDDED_CONFIG "Enable internal embedded JSON config" OFF)
 option(WITH_OPENCL          "Enable OpenCL backend" ON)
@@ -44,9 +44,17 @@ if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
    set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C)

    add_definitions(/DXMRIG_FEATURE_ASM)
+    if (WITH_ASM_AMD)
+        add_definitions(/DXMRIG_FEATURE_ASM_AMD)
+        message("-- WITH_ASM=ON (+amd)")
+    else()
+        message("-- WITH_ASM=ON (-amd)")
+    endif()
 else()
    set(XMRIG_ASM_SOURCES "")
    set(XMRIG_ASM_LIBRARY "")

    remove_definitions(/DXMRIG_FEATURE_ASM)
+    remove_definitions(/DXMRIG_FEATURE_ASM_AMD)
+    message("-- WITH_ASM=OFF")
 endif()
@@ -104,8 +104,13 @@ if (WITH_RANDOMX)

    if (WITH_MSR AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8 AND (XMRIG_OS_WIN OR XMRIG_OS_LINUX))
        add_definitions(/DXMRIG_FEATURE_MSR)
-        add_definitions(/DXMRIG_FIX_RYZEN)
-        message("-- WITH_MSR=ON")
+        if (WITH_MSR_ZEN)
+            add_definitions(/DXMRIG_FIX_RYZEN)
+            message("-- WITH_MSR=ON (+zen)")
+        else()
+            remove_definitions(/DXMRIG_FIX_RYZEN)
+            message("-- WITH_MSR=ON (-zen)")
+        endif()

        if (XMRIG_OS_WIN)
            list(APPEND SOURCES_CRYPTO
@@ -589,7 +589,7 @@ void xmrig::Client::handshake()
    if (isTLS()) {
        m_expire = Chrono::steadyMSecs() + kResponseTimeout;

-        m_tls->handshake(m_pool.isSNI() ? m_pool.host().data() : nullptr);
+        m_tls->handshake();
    }
    else
 #   endif
@@ -77,7 +77,6 @@ const char *Pool::kSelfSelect             = "self-select";
 const char *Pool::kSOCKS5                 = "socks5";
 const char *Pool::kSubmitToOrigin         = "submit-to-origin";
 const char *Pool::kTls                    = "tls";
-const char *Pool::kSni                    = "sni";
 const char *Pool::kUrl                    = "url";
 const char *Pool::kUser                   = "user";
 const char *Pool::kSpendSecretKey         = "spend-secret-key";
@@ -138,7 +137,6 @@ xmrig::Pool::Pool(const rapidjson::Value &object) :
    m_flags.set(FLAG_ENABLED,  Json::getBool(object, kEnabled, true));
    m_flags.set(FLAG_NICEHASH, Json::getBool(object, kNicehash) || m_url.host().contains(kNicehashHost));
    m_flags.set(FLAG_TLS,      Json::getBool(object, kTls) || m_url.isTLS());
-    m_flags.set(FLAG_SNI,      Json::getBool(object, kSni));

    setKeepAlive(Json::getValue(object, kKeepalive));

@@ -301,7 +299,6 @@ rapidjson::Value xmrig::Pool::toJSON(rapidjson::Document &doc) const

    obj.AddMember(StringRef(kEnabled),      m_flags.test(FLAG_ENABLED), allocator);
    obj.AddMember(StringRef(kTls),          isTLS(), allocator);
-    obj.AddMember(StringRef(kSni),          isSNI(), allocator);
    obj.AddMember(StringRef(kFingerprint),  m_fingerprint.toJSON(), allocator);
    obj.AddMember(StringRef(kDaemon),       m_mode == MODE_DAEMON, allocator);
    obj.AddMember(StringRef(kSOCKS5),       m_proxy.toJSON(doc), allocator);
@@ -70,7 +70,6 @@ public:
    static const char *kSOCKS5;
    static const char *kSubmitToOrigin;
    static const char *kTls;
-    static const char* kSni;
    static const char *kUrl;
    static const char *kUser;
    static const char* kSpendSecretKey;
@@ -96,7 +95,6 @@ public:

    inline bool isNicehash() const                      { return m_flags.test(FLAG_NICEHASH); }
    inline bool isTLS() const                           { return m_flags.test(FLAG_TLS) || m_url.isTLS(); }
-    inline bool isSNI() const                           { return m_flags.test(FLAG_SNI); }
    inline bool isValid() const                         { return m_url.isValid(); }
    inline const Algorithm &algorithm() const           { return m_algorithm; }
    inline const Coin &coin() const                     { return m_coin; }
@@ -140,7 +138,6 @@ private:
        FLAG_ENABLED,
        FLAG_NICEHASH,
        FLAG_TLS,
-        FLAG_SNI,
        FLAG_MAX
    };

@@ -60,7 +60,7 @@ xmrig::Client::Tls::~Tls()
 }


-bool xmrig::Client::Tls::handshake(const char* servername)
+bool xmrig::Client::Tls::handshake()
 {
    m_ssl = SSL_new(m_ctx);
    assert(m_ssl != nullptr);
@@ -69,10 +69,6 @@ bool xmrig::Client::Tls::handshake(const char* servername)
        return false;
    }

-    if (servername) {
-        SSL_set_tlsext_host_name(m_ssl, servername);
-    }
-
    SSL_set_connect_state(m_ssl);
    SSL_set_bio(m_ssl, m_read, m_write);
    SSL_do_handshake(m_ssl);
@@ -42,7 +42,7 @@ public:
    Tls(Client *client);
    ~Tls();

-    bool handshake(const char* servername);
+    bool handshake();
    bool send(const char *data, size_t size);
    const char *fingerprint() const;
    const char *version() const;
@@ -198,7 +198,7 @@ bool xmrig::BlockTemplate::parse(bool hashes)
    }

    if (m_coin == Coin::ZEPHYR) {
-        uint8_t pricing_record[120];
+        uint8_t pricing_record[24];
        ar(pricing_record);
    }

@@ -225,12 +225,8 @@ bool xmrig::BlockTemplate::parse(bool hashes)
    ar(m_height);
    ar(m_numOutputs);

-    if (m_coin == Coin::ZEPHYR) {
-        if (m_numOutputs < 2) {
-            return false;
-        }
-    }
-    else if (m_numOutputs != 1) {
+    const uint64_t expected_outputs = (m_coin == Coin::ZEPHYR) ? 2 : 1;
+    if (m_numOutputs != expected_outputs) {
        return false;
    }

@@ -256,25 +252,23 @@ bool xmrig::BlockTemplate::parse(bool hashes)
        ar.skip(asset_type_len);
        ar(m_viewTag);

-        for (uint64_t k = 1; k < m_numOutputs; ++k) {
-            uint64_t amount2;
-            ar(amount2);
+        uint64_t amount2;
+        ar(amount2);

-            uint8_t output_type2;
-            ar(output_type2);
-            if (output_type2 != 2) {
-                return false;
-            }
-
-            Span key2;
-            ar(key2, kKeySize);
-
-            ar(asset_type_len);
-            ar.skip(asset_type_len);
-
-            uint8_t view_tag2;
-            ar(view_tag2);
+        uint8_t output_type2;
+        ar(output_type2);
+        if (output_type2 != 2) {
+            return false;
        }
+
+        Span key2;
+        ar(key2, kKeySize);
+
+        ar(asset_type_len);
+        ar.skip(asset_type_len);
+
+        uint8_t view_tag2;
+        ar(view_tag2);
    }
    else if (m_outputType == 3) {
        ar(m_viewTag);
@@ -94,7 +94,13 @@ static inline const std::string &usage()
 #   ifdef XMRIG_ALGO_RANDOMX
    u += "      --huge-pages-jit          enable huge pages support for RandomX JIT code\n";
 #   endif
+#   ifdef XMRIG_FEATURE_ASM
+#   ifdef XMRIG_FEATURE_ASM_AMD
    u += "      --asm=ASM                 ASM optimizations, possible values: auto, none, intel, ryzen, bulldozer\n";
+#   else
+    u += "      --asm=ASM                 ASM optimizations, possible values: auto, none, intel\n";
+#   endif
+#   endif

 #   if defined(__x86_64__) || defined(_M_AMD64)
    u += "      --argon2-impl=IMPL        argon2 implementation: x86_64, SSE2, SSSE3, XOP, AVX2, AVX-512F\n";
@@ -55,6 +55,7 @@ bool cn_vaes_enabled = false;


 #ifdef XMRIG_FEATURE_ASM
+#ifdef XMRIG_FEATURE_ASM_AMD
 #   define ADD_FN_ASM(algo) do {                                                                                    \
        m_map[algo]->data[AV_SINGLE][Assembly::INTEL]     = cryptonight_single_hash_asm<algo, Assembly::INTEL>;     \
        m_map[algo]->data[AV_SINGLE][Assembly::RYZEN]     = cryptonight_single_hash_asm<algo, Assembly::RYZEN>;     \
@@ -63,34 +64,50 @@ bool cn_vaes_enabled = false;
        m_map[algo]->data[AV_DOUBLE][Assembly::RYZEN]     = cryptonight_double_hash_asm<algo, Assembly::RYZEN>;     \
        m_map[algo]->data[AV_DOUBLE][Assembly::BULLDOZER] = cryptonight_double_hash_asm<algo, Assembly::BULLDOZER>; \
    } while (0)
+#else
+#   define ADD_FN_ASM(algo) do {                                                                                    \
+        m_map[algo]->data[AV_SINGLE][Assembly::INTEL]     = cryptonight_single_hash_asm<algo, Assembly::INTEL>;     \
+        m_map[algo]->data[AV_DOUBLE][Assembly::INTEL]     = cryptonight_double_hash_asm<algo, Assembly::INTEL>;     \
+    } while (0)
+#endif


 namespace xmrig {


 cn_mainloop_fun        cn_half_mainloop_ivybridge_asm             = nullptr;
+#ifdef XMRIG_FEATURE_ASM_AMD
 cn_mainloop_fun        cn_half_mainloop_ryzen_asm                 = nullptr;
 cn_mainloop_fun        cn_half_mainloop_bulldozer_asm             = nullptr;
+#endif
 cn_mainloop_fun        cn_half_double_mainloop_sandybridge_asm    = nullptr;

 cn_mainloop_fun        cn_trtl_mainloop_ivybridge_asm             = nullptr;
+#ifdef XMRIG_FEATURE_ASM_AMD
 cn_mainloop_fun        cn_trtl_mainloop_ryzen_asm                 = nullptr;
 cn_mainloop_fun        cn_trtl_mainloop_bulldozer_asm             = nullptr;
+#endif
 cn_mainloop_fun        cn_trtl_double_mainloop_sandybridge_asm    = nullptr;

 cn_mainloop_fun        cn_tlo_mainloop_ivybridge_asm              = nullptr;
+#ifdef XMRIG_FEATURE_ASM_AMD
 cn_mainloop_fun        cn_tlo_mainloop_ryzen_asm                  = nullptr;
 cn_mainloop_fun        cn_tlo_mainloop_bulldozer_asm              = nullptr;
+#endif
 cn_mainloop_fun        cn_tlo_double_mainloop_sandybridge_asm     = nullptr;

 cn_mainloop_fun        cn_zls_mainloop_ivybridge_asm              = nullptr;
+#ifdef XMRIG_FEATURE_ASM_AMD
 cn_mainloop_fun        cn_zls_mainloop_ryzen_asm                  = nullptr;
 cn_mainloop_fun        cn_zls_mainloop_bulldozer_asm              = nullptr;
+#endif
 cn_mainloop_fun        cn_zls_double_mainloop_sandybridge_asm     = nullptr;

 cn_mainloop_fun        cn_double_mainloop_ivybridge_asm           = nullptr;
+#ifdef XMRIG_FEATURE_ASM_AMD
 cn_mainloop_fun        cn_double_mainloop_ryzen_asm               = nullptr;
 cn_mainloop_fun        cn_double_mainloop_bulldozer_asm           = nullptr;
+#endif
 cn_mainloop_fun        cn_double_double_mainloop_sandybridge_asm  = nullptr;

 cn_mainloop_fun        cn_upx2_mainloop_asm                       = nullptr;
@@ -160,31 +177,41 @@ static void patchAsmVariants()
    auto base = static_cast<uint8_t *>(VirtualMemory::allocateExecutableMemory(allocation_size, false));

    cn_half_mainloop_ivybridge_asm              = reinterpret_cast<cn_mainloop_fun>         (base + 0x0000);
+#   ifdef XMRIG_FEATURE_ASM_AMD
    cn_half_mainloop_ryzen_asm                  = reinterpret_cast<cn_mainloop_fun>         (base + 0x1000);
    cn_half_mainloop_bulldozer_asm              = reinterpret_cast<cn_mainloop_fun>         (base + 0x2000);
+#   endif
    cn_half_double_mainloop_sandybridge_asm     = reinterpret_cast<cn_mainloop_fun>         (base + 0x3000);

 #   ifdef XMRIG_ALGO_CN_PICO
    cn_trtl_mainloop_ivybridge_asm              = reinterpret_cast<cn_mainloop_fun>         (base + 0x4000);
+#   ifdef XMRIG_FEATURE_ASM_AMD
    cn_trtl_mainloop_ryzen_asm                  = reinterpret_cast<cn_mainloop_fun>         (base + 0x5000);
    cn_trtl_mainloop_bulldozer_asm              = reinterpret_cast<cn_mainloop_fun>         (base + 0x6000);
+#   endif
    cn_trtl_double_mainloop_sandybridge_asm     = reinterpret_cast<cn_mainloop_fun>         (base + 0x7000);
 #   endif

    cn_zls_mainloop_ivybridge_asm               = reinterpret_cast<cn_mainloop_fun>         (base + 0x8000);
+#   ifdef XMRIG_FEATURE_ASM_AMD
    cn_zls_mainloop_ryzen_asm                   = reinterpret_cast<cn_mainloop_fun>         (base + 0x9000);
    cn_zls_mainloop_bulldozer_asm               = reinterpret_cast<cn_mainloop_fun>         (base + 0xA000);
+#   endif
    cn_zls_double_mainloop_sandybridge_asm      = reinterpret_cast<cn_mainloop_fun>         (base + 0xB000);

    cn_double_mainloop_ivybridge_asm            = reinterpret_cast<cn_mainloop_fun>         (base + 0xC000);
+#   ifdef XMRIG_FEATURE_ASM_AMD
    cn_double_mainloop_ryzen_asm                = reinterpret_cast<cn_mainloop_fun>         (base + 0xD000);
    cn_double_mainloop_bulldozer_asm            = reinterpret_cast<cn_mainloop_fun>         (base + 0xE000);
+#   endif
    cn_double_double_mainloop_sandybridge_asm   = reinterpret_cast<cn_mainloop_fun>         (base + 0xF000);

 #   ifdef XMRIG_ALGO_CN_PICO
    cn_tlo_mainloop_ivybridge_asm               = reinterpret_cast<cn_mainloop_fun>         (base + 0x10000);
+#   ifdef XMRIG_FEATURE_ASM_AMD
    cn_tlo_mainloop_ryzen_asm                   = reinterpret_cast<cn_mainloop_fun>         (base + 0x11000);
    cn_tlo_mainloop_bulldozer_asm               = reinterpret_cast<cn_mainloop_fun>         (base + 0x12000);
+#   endif
    cn_tlo_double_mainloop_sandybridge_asm      = reinterpret_cast<cn_mainloop_fun>         (base + 0x13000);
 #   endif

@@ -220,8 +247,10 @@ static void patchAsmVariants()
        constexpr uint32_t ITER = CnAlgo<Algorithm::CN_HALF>().iterations();

        patchCode(cn_half_mainloop_ivybridge_asm,            cnv2_mainloop_ivybridge_asm,           ITER);
+#       ifdef XMRIG_FEATURE_ASM_AMD
        patchCode(cn_half_mainloop_ryzen_asm,                cnv2_mainloop_ryzen_asm,               ITER);
        patchCode(cn_half_mainloop_bulldozer_asm,            cnv2_mainloop_bulldozer_asm,           ITER);
+#       endif
        patchCode(cn_half_double_mainloop_sandybridge_asm,   cnv2_double_mainloop_sandybridge_asm,  ITER);
    }

@@ -231,8 +260,10 @@ static void patchAsmVariants()
        constexpr uint32_t MASK = CnAlgo<Algorithm::CN_PICO_0>().mask();

        patchCode(cn_trtl_mainloop_ivybridge_asm,            cnv2_mainloop_ivybridge_asm,           ITER,   MASK);
+#       ifdef XMRIG_FEATURE_ASM_AMD
        patchCode(cn_trtl_mainloop_ryzen_asm,                cnv2_mainloop_ryzen_asm,               ITER,   MASK);
        patchCode(cn_trtl_mainloop_bulldozer_asm,            cnv2_mainloop_bulldozer_asm,           ITER,   MASK);
+#       endif
        patchCode(cn_trtl_double_mainloop_sandybridge_asm,   cnv2_double_mainloop_sandybridge_asm,  ITER,   MASK);
    }

@@ -241,8 +272,10 @@ static void patchAsmVariants()
        constexpr uint32_t MASK = CnAlgo<Algorithm::CN_PICO_TLO>().mask();

        patchCode(cn_tlo_mainloop_ivybridge_asm,             cnv2_mainloop_ivybridge_asm,           ITER,   MASK);
+#       ifdef XMRIG_FEATURE_ASM_AMD
        patchCode(cn_tlo_mainloop_ryzen_asm,                 cnv2_mainloop_ryzen_asm,               ITER,   MASK);
        patchCode(cn_tlo_mainloop_bulldozer_asm,             cnv2_mainloop_bulldozer_asm,           ITER,   MASK);
+#       endif
        patchCode(cn_tlo_double_mainloop_sandybridge_asm,    cnv2_double_mainloop_sandybridge_asm,  ITER,   MASK);
    }
 #   endif
@@ -251,8 +284,10 @@ static void patchAsmVariants()
        constexpr uint32_t ITER = CnAlgo<Algorithm::CN_ZLS>().iterations();

        patchCode(cn_zls_mainloop_ivybridge_asm,             cnv2_mainloop_ivybridge_asm,           ITER);
+#       ifdef XMRIG_FEATURE_ASM_AMD
        patchCode(cn_zls_mainloop_ryzen_asm,                 cnv2_mainloop_ryzen_asm,               ITER);
        patchCode(cn_zls_mainloop_bulldozer_asm,             cnv2_mainloop_bulldozer_asm,           ITER);
+#       endif
        patchCode(cn_zls_double_mainloop_sandybridge_asm,    cnv2_double_mainloop_sandybridge_asm,  ITER);
    }

@@ -260,8 +295,10 @@ static void patchAsmVariants()
        constexpr uint32_t ITER = CnAlgo<Algorithm::CN_DOUBLE>().iterations();

        patchCode(cn_double_mainloop_ivybridge_asm,          cnv2_mainloop_ivybridge_asm,           ITER);
+#       ifdef XMRIG_FEATURE_ASM_AMD
        patchCode(cn_double_mainloop_ryzen_asm,              cnv2_mainloop_ryzen_asm,               ITER);
        patchCode(cn_double_mainloop_bulldozer_asm,          cnv2_mainloop_bulldozer_asm,           ITER);
+#       endif
        patchCode(cn_double_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm,  ITER);
    }

@@ -852,12 +852,16 @@ extern "C" void cnv1_single_mainloop_asm(cryptonight_ctx * *ctx);
 extern "C" void cnv1_double_mainloop_asm(cryptonight_ctx **ctx);
 extern "C" void cnv1_quad_mainloop_asm(cryptonight_ctx **ctx);
 extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx **ctx);
+#ifdef XMRIG_FEATURE_ASM_AMD
 extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx **ctx);
 extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx **ctx);
+#endif
 extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx **ctx);
 extern "C" void cnv2_rwz_mainloop_asm(cryptonight_ctx **ctx);
 extern "C" void cnv2_rwz_double_mainloop_asm(cryptonight_ctx **ctx);
+#ifdef XMRIG_FEATURE_ASM_AMD
 extern "C" void cnv2_upx_double_mainloop_zen3_asm(cryptonight_ctx **ctx);
+#endif


 namespace xmrig {
@@ -867,28 +871,38 @@ typedef void (*cn_mainloop_fun)(cryptonight_ctx **ctx);


 extern cn_mainloop_fun cn_half_mainloop_ivybridge_asm;
+#ifdef XMRIG_FEATURE_ASM_AMD
 extern cn_mainloop_fun cn_half_mainloop_ryzen_asm;
 extern cn_mainloop_fun cn_half_mainloop_bulldozer_asm;
+#endif
 extern cn_mainloop_fun cn_half_double_mainloop_sandybridge_asm;

 extern cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm;
+#ifdef XMRIG_FEATURE_ASM_AMD
 extern cn_mainloop_fun cn_trtl_mainloop_ryzen_asm;
 extern cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm;
+#endif
 extern cn_mainloop_fun cn_trtl_double_mainloop_sandybridge_asm;

 extern cn_mainloop_fun cn_tlo_mainloop_ivybridge_asm;
+#ifdef XMRIG_FEATURE_ASM_AMD
 extern cn_mainloop_fun cn_tlo_mainloop_ryzen_asm;
 extern cn_mainloop_fun cn_tlo_mainloop_bulldozer_asm;
+#endif
 extern cn_mainloop_fun cn_tlo_double_mainloop_sandybridge_asm;

 extern cn_mainloop_fun cn_zls_mainloop_ivybridge_asm;
+#ifdef XMRIG_FEATURE_ASM_AMD
 extern cn_mainloop_fun cn_zls_mainloop_ryzen_asm;
 extern cn_mainloop_fun cn_zls_mainloop_bulldozer_asm;
+#endif
 extern cn_mainloop_fun cn_zls_double_mainloop_sandybridge_asm;

 extern cn_mainloop_fun cn_double_mainloop_ivybridge_asm;
+#ifdef XMRIG_FEATURE_ASM_AMD
 extern cn_mainloop_fun cn_double_mainloop_ryzen_asm;
 extern cn_mainloop_fun cn_double_mainloop_bulldozer_asm;
+#endif
 extern cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm;

 extern cn_mainloop_fun cn_upx2_mainloop_asm;
@@ -964,46 +978,54 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
        if (ASM == Assembly::INTEL) {
            cnv2_mainloop_ivybridge_asm(ctx);
        }
+#       ifdef XMRIG_FEATURE_ASM_AMD
        else if (ASM == Assembly::RYZEN) {
            cnv2_mainloop_ryzen_asm(ctx);
        }
        else {
            cnv2_mainloop_bulldozer_asm(ctx);
        }
+#       endif
    }
    else if (ALGO == Algorithm::CN_HALF) {
        if (ASM == Assembly::INTEL) {
            cn_half_mainloop_ivybridge_asm(ctx);
        }
+#       ifdef XMRIG_FEATURE_ASM_AMD
        else if (ASM == Assembly::RYZEN) {
            cn_half_mainloop_ryzen_asm(ctx);
        }
        else {
            cn_half_mainloop_bulldozer_asm(ctx);
        }
+#       endif
    }
 #   ifdef XMRIG_ALGO_CN_PICO
    else if (ALGO == Algorithm::CN_PICO_0) {
        if (ASM == Assembly::INTEL) {
            cn_trtl_mainloop_ivybridge_asm(ctx);
        }
+#       ifdef XMRIG_FEATURE_ASM_AMD
        else if (ASM == Assembly::RYZEN) {
            cn_trtl_mainloop_ryzen_asm(ctx);
        }
        else {
            cn_trtl_mainloop_bulldozer_asm(ctx);
        }
+#       endif
    }
    else if (ALGO == Algorithm::CN_PICO_TLO) {
        if (ASM == Assembly::INTEL) {
            cn_tlo_mainloop_ivybridge_asm(ctx);
        }
+#       ifdef XMRIG_FEATURE_ASM_AMD
        else if (ASM == Assembly::RYZEN) {
            cn_tlo_mainloop_ryzen_asm(ctx);
        }
        else {
            cn_tlo_mainloop_bulldozer_asm(ctx);
        }
+#       endif
    }
 #   endif
    else if (ALGO == Algorithm::CN_RWZ) {
@@ -1013,23 +1035,27 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
        if (ASM == Assembly::INTEL) {
            cn_zls_mainloop_ivybridge_asm(ctx);
        }
+#       ifdef XMRIG_FEATURE_ASM_AMD
        else if (ASM == Assembly::RYZEN) {
            cn_zls_mainloop_ryzen_asm(ctx);
        }
        else {
            cn_zls_mainloop_bulldozer_asm(ctx);
        }
+#       endif
    }
    else if (ALGO == Algorithm::CN_DOUBLE) {
        if (ASM == Assembly::INTEL) {
            cn_double_mainloop_ivybridge_asm(ctx);
        }
+#       ifdef XMRIG_FEATURE_ASM_AMD
        else if (ASM == Assembly::RYZEN) {
            cn_double_mainloop_ryzen_asm(ctx);
        }
        else {
            cn_double_mainloop_bulldozer_asm(ctx);
        }
+#       endif
    }
 #   ifdef XMRIG_ALGO_CN_FEMTO
    else if (ALGO == Algorithm::CN_UPX2) {
@@ -1094,12 +1120,16 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
 #   endif
 #   ifdef XMRIG_ALGO_CN_FEMTO
    else if (ALGO == Algorithm::CN_UPX2) {
+#       ifdef XMRIG_FEATURE_ASM_AMD
        if (Cpu::info()->arch() == ICpuInfo::ARCH_ZEN3) {
            cnv2_upx_double_mainloop_zen3_asm(ctx);
        }
        else {
            cn_upx2_double_mainloop_asm(ctx);
        }
+#       else
+        cn_upx2_double_mainloop_asm(ctx);
+#       endif
    }
 #   endif
    else if (ALGO == Algorithm::CN_RWZ) {
@@ -15,12 +15,16 @@
 .global FN_PREFIX(cnv1_double_mainloop_asm)
 .global FN_PREFIX(cnv1_quad_mainloop_asm)
 .global FN_PREFIX(cnv2_mainloop_ivybridge_asm)
+#ifdef XMRIG_FEATURE_ASM_AMD
 .global FN_PREFIX(cnv2_mainloop_ryzen_asm)
 .global FN_PREFIX(cnv2_mainloop_bulldozer_asm)
+#endif
 .global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm)
 .global FN_PREFIX(cnv2_rwz_mainloop_asm)
 .global FN_PREFIX(cnv2_rwz_double_mainloop_asm)
+#ifdef XMRIG_FEATURE_ASM_AMD
 .global FN_PREFIX(cnv2_upx_double_mainloop_zen3_asm)
+#endif

 ALIGN(64)
 FN_PREFIX(cnv1_single_mainloop_asm):
@@ -58,6 +62,7 @@ FN_PREFIX(cnv2_mainloop_ivybridge_asm):
 	ret 0
 	mov eax, 3735929054

+#ifdef XMRIG_FEATURE_ASM_AMD
 ALIGN(64)
 FN_PREFIX(cnv2_mainloop_ryzen_asm):
 	sub rsp, 48
@@ -75,6 +80,7 @@ FN_PREFIX(cnv2_mainloop_bulldozer_asm):
 	add rsp, 48
 	ret 0
 	mov eax, 3735929054
+#endif

 ALIGN(64)
 FN_PREFIX(cnv2_double_mainloop_sandybridge_asm):
@@ -103,6 +109,7 @@ FN_PREFIX(cnv2_rwz_double_mainloop_asm):
 	ret 0
 	mov eax, 3735929054

+#ifdef XMRIG_FEATURE_ASM_AMD
 ALIGN(64)
 FN_PREFIX(cnv2_upx_double_mainloop_zen3_asm):
 	sub rsp, 48
@@ -111,6 +118,7 @@ FN_PREFIX(cnv2_upx_double_mainloop_zen3_asm):
 	add rsp, 48
 	ret 0
 	mov eax, 3735929054
+#endif

 #if defined(__linux__) && defined(__ELF__)
 .section .note.GNU-stack,"",%progbits
@@ -8,7 +8,6 @@ PUBLIC cnv2_mainloop_bulldozer_asm
 PUBLIC cnv2_double_mainloop_sandybridge_asm
 PUBLIC cnv2_rwz_mainloop_asm
 PUBLIC cnv2_rwz_double_mainloop_asm
-PUBLIC cnv2_upx_double_mainloop_zen3_asm

 ALIGN(64)
 cnv1_single_mainloop_asm PROC
@@ -5,12 +5,16 @@
 .global cnv1_double_mainloop_asm
 .global cnv1_quad_mainloop_asm
 .global cnv2_mainloop_ivybridge_asm
+#ifdef XMRIG_FEATURE_ASM_AMD
 .global cnv2_mainloop_ryzen_asm
 .global cnv2_mainloop_bulldozer_asm
+#endif
 .global cnv2_double_mainloop_sandybridge_asm
 .global cnv2_rwz_mainloop_asm
 .global cnv2_rwz_double_mainloop_asm
+#ifdef XMRIG_FEATURE_ASM_AMD
 .global cnv2_upx_double_mainloop_zen3_asm
+#endif

 ALIGN(64)
 cnv1_single_mainloop_asm:
@@ -36,6 +40,7 @@ cnv2_mainloop_ivybridge_asm:
 	ret 0
 	mov eax, 3735929054

+#ifdef XMRIG_FEATURE_ASM_AMD
 ALIGN(64)
 cnv2_mainloop_ryzen_asm:
 	#include "../cn2/cnv2_main_loop_ryzen.inc"
@@ -47,6 +52,7 @@ cnv2_mainloop_bulldozer_asm:
 	#include "../cn2/cnv2_main_loop_bulldozer.inc"
 	ret 0
 	mov eax, 3735929054
+#endif

 ALIGN(64)
 cnv2_double_mainloop_sandybridge_asm:
@@ -66,8 +72,10 @@ cnv2_rwz_double_mainloop_asm:
 	ret 0
 	mov eax, 3735929054

+#ifdef XMRIG_FEATURE_ASM_AMD
 ALIGN(64)
 cnv2_upx_double_mainloop_zen3_asm:
 	#include "cn2/cnv2_upx_double_mainloop_zen3.inc"
 	ret 0
 	mov eax, 3735929054
+#endif
@@ -8,7 +8,6 @@ PUBLIC cnv2_mainloop_bulldozer_asm
 PUBLIC cnv2_double_mainloop_sandybridge_asm
 PUBLIC cnv2_rwz_mainloop_asm
 PUBLIC cnv2_rwz_double_mainloop_asm
-PUBLIC cnv2_upx_double_mainloop_zen3_asm

 ALIGN(64)
 cnv1_single_mainloop_asm PROC
@@ -131,8 +131,8 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
 	// and w16, w10, ScratchpadL3Mask64
 	emit32(0x121A0000 | 16 | (10 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);

-	// and w17, w20, ScratchpadL3Mask64
-	emit32(0x121A0000 | 17 | (20 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
+	// and w17, w18, ScratchpadL3Mask64
+	emit32(0x121A0000 | 17 | (18 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);

 	codePos = PrologueSize;
 	literalPos = ImulRcpLiteralsEnd;
@@ -148,16 +148,16 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
 	}

 	// Update spMix2
-	// eor w20, config.readReg2, config.readReg3
-	emit32(ARMV8A::EOR32 | 20 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
+	// eor w18, config.readReg2, config.readReg3
+	emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);

 	// Jump back to the main loop
 	const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end) - ((uint8_t*)randomx_program_aarch64)) - codePos;
 	emit32(ARMV8A::B | (offset / 4), code, codePos);

-	// and w20, w20, CacheLineAlignMask
+	// and w18, w18, CacheLineAlignMask
 	codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask1) - ((uint8_t*)randomx_program_aarch64));
-	emit32(0x121A0000 | 20 | (20 << 5) | ((RandomX_CurrentConfig.Log2_DatasetBaseSize - 7) << 10), code, codePos);
+	emit32(0x121A0000 | 18 | (18 << 5) | ((RandomX_CurrentConfig.Log2_DatasetBaseSize - 7) << 10), code, codePos);

 	// and w10, w10, CacheLineAlignMask
 	codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask2) - ((uint8_t*)randomx_program_aarch64));
@@ -189,8 +189,8 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
 	// and w16, w10, ScratchpadL3Mask64
 	emit32(0x121A0000 | 16 | (10 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);

-	// and w17, w20, ScratchpadL3Mask64
-	emit32(0x121A0000 | 17 | (20 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
+	// and w17, w18, ScratchpadL3Mask64
+	emit32(0x121A0000 | 17 | (18 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);

 	codePos = PrologueSize;
 	literalPos = ImulRcpLiteralsEnd;
@@ -206,8 +206,8 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
 	}

 	// Update spMix2
-	// eor w20, config.readReg2, config.readReg3
-	emit32(ARMV8A::EOR32 | 20 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
+	// eor w18, config.readReg2, config.readReg3
+	emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);

 	// Jump back to the main loop
 	const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end_light) - ((uint8_t*)randomx_program_aarch64)) - codePos;
@@ -477,7 +477,7 @@ void JitCompilerA64::emitAddImmediate(uint32_t dst, uint32_t src, uint32_t imm,
 	}
 	else
 	{
-		constexpr uint32_t tmp_reg = 20;
+		constexpr uint32_t tmp_reg = 18;
 		emitMovImmediate(tmp_reg, imm, code, k);

 		// add dst, src, tmp_reg
@@ -526,7 +526,7 @@ void JitCompilerA64::emitMemLoadFP(uint32_t src, Instruction& instr, uint8_t* co
 	uint32_t k = codePos;

 	uint32_t imm = instr.getImm32();
-	constexpr uint32_t tmp_reg = 19;
+	constexpr uint32_t tmp_reg = 18;

 	imm &= instr.getModMem() ? (RandomX_CurrentConfig.ScratchpadL1_Size - 1) : (RandomX_CurrentConfig.ScratchpadL2_Size - 1);
 	emitAddImmediate(tmp_reg, src, imm, code, k);
@@ -580,7 +580,7 @@ void JitCompilerA64::h_IADD_M(Instruction& instr, uint32_t& codePos)
 	const uint32_t src = IntRegMap[instr.src];
 	const uint32_t dst = IntRegMap[instr.dst];

-	constexpr uint32_t tmp_reg = 20;
+	constexpr uint32_t tmp_reg = 18;
 	emitMemLoad<tmp_reg>(dst, src, instr, code, k);

 	// add dst, dst, tmp_reg
@@ -618,7 +618,7 @@ void JitCompilerA64::h_ISUB_M(Instruction& instr, uint32_t& codePos)
 	const uint32_t src = IntRegMap[instr.src];
 	const uint32_t dst = IntRegMap[instr.dst];

-	constexpr uint32_t tmp_reg = 20;
+	constexpr uint32_t tmp_reg = 18;
 	emitMemLoad<tmp_reg>(dst, src, instr, code, k);

 	// sub dst, dst, tmp_reg
@@ -637,7 +637,7 @@ void JitCompilerA64::h_IMUL_R(Instruction& instr, uint32_t& codePos)

 	if (src == dst)
 	{
-		src = 20;
+		src = 18;
 		emitMovImmediate(src, instr.getImm32(), code, k);
 	}

@@ -655,7 +655,7 @@ void JitCompilerA64::h_IMUL_M(Instruction& instr, uint32_t& codePos)
 	const uint32_t src = IntRegMap[instr.src];
 	const uint32_t dst = IntRegMap[instr.dst];

-	constexpr uint32_t tmp_reg = 20;
+	constexpr uint32_t tmp_reg = 18;
 	emitMemLoad<tmp_reg>(dst, src, instr, code, k);

 	// sub dst, dst, tmp_reg
@@ -686,7 +686,7 @@ void JitCompilerA64::h_IMULH_M(Instruction& instr, uint32_t& codePos)
 	const uint32_t src = IntRegMap[instr.src];
 	const uint32_t dst = IntRegMap[instr.dst];

-	constexpr uint32_t tmp_reg = 20;
+	constexpr uint32_t tmp_reg = 18;
 	emitMemLoad<tmp_reg>(dst, src, instr, code, k);

 	// umulh dst, dst, tmp_reg
@@ -717,7 +717,7 @@ void JitCompilerA64::h_ISMULH_M(Instruction& instr, uint32_t& codePos)
 	const uint32_t src = IntRegMap[instr.src];
 	const uint32_t dst = IntRegMap[instr.dst];

-	constexpr uint32_t tmp_reg = 20;
+	constexpr uint32_t tmp_reg = 18;
 	emitMemLoad<tmp_reg>(dst, src, instr, code, k);

 	// smulh dst, dst, tmp_reg
@@ -735,7 +735,7 @@ void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)

 	uint32_t k = codePos;

-	constexpr uint32_t tmp_reg = 20;
+	constexpr uint32_t tmp_reg = 18;
 	const uint32_t dst = IntRegMap[instr.dst];

 	constexpr uint64_t N = 1ULL << 63;
@@ -754,9 +754,9 @@ void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)
 	literalPos -= sizeof(uint64_t);
 	*(uint64_t*)(code + literalPos) = (q << shift) + ((r << shift) / divisor);

-	if (literal_id < 12)
+	if (literal_id < 13)
 	{
-		static constexpr uint32_t literal_regs[12] = { 30 << 16, 29 << 16, 28 << 16, 27 << 16, 26 << 16, 25 << 16, 24 << 16, 23 << 16, 22 << 16, 21 << 16, 11 << 16, 0 };
+		static constexpr uint32_t literal_regs[13] = { 30 << 16, 29 << 16, 28 << 16, 27 << 16, 26 << 16, 25 << 16, 24 << 16, 23 << 16, 22 << 16, 21 << 16, 20 << 16, 11 << 16, 0 };

 		// mul dst, dst, literal_reg
 		emit32(ARMV8A::MUL | dst | (dst << 5) | literal_regs[literal_id], code, k);
@@ -794,7 +794,7 @@ void JitCompilerA64::h_IXOR_R(Instruction& instr, uint32_t& codePos)

 	if (src == dst)
 	{
-		src = 20;
+		src = 18;
 		emitMovImmediate(src, instr.getImm32(), code, k);
 	}

@@ -812,7 +812,7 @@ void JitCompilerA64::h_IXOR_M(Instruction& instr, uint32_t& codePos)
 	const uint32_t src = IntRegMap[instr.src];
 	const uint32_t dst = IntRegMap[instr.dst];

-	constexpr uint32_t tmp_reg = 20;
+	constexpr uint32_t tmp_reg = 18;
 	emitMemLoad<tmp_reg>(dst, src, instr, code, k);

 	// eor dst, dst, tmp_reg
@@ -850,7 +850,7 @@ void JitCompilerA64::h_IROL_R(Instruction& instr, uint32_t& codePos)

 	if (src != dst)
 	{
-		constexpr uint32_t tmp_reg = 20;
+		constexpr uint32_t tmp_reg = 18;

 		// sub tmp_reg, xzr, src
 		emit32(ARMV8A::SUB | tmp_reg | (31 << 5) | (src << 16), code, k);
@@ -878,7 +878,7 @@ void JitCompilerA64::h_ISWAP_R(Instruction& instr, uint32_t& codePos)

 	uint32_t k = codePos;

-	constexpr uint32_t tmp_reg = 20;
+	constexpr uint32_t tmp_reg = 18;
 	emit32(ARMV8A::MOV_REG | tmp_reg | (dst << 16), code, k);
 	emit32(ARMV8A::MOV_REG | dst | (src << 16), code, k);
 	emit32(ARMV8A::MOV_REG | src | (tmp_reg << 16), code, k);
@@ -1026,7 +1026,7 @@ void JitCompilerA64::h_CFROUND(Instruction& instr, uint32_t& codePos)

 	const uint32_t src = IntRegMap[instr.src];

-	constexpr uint32_t tmp_reg = 20;
+	constexpr uint32_t tmp_reg = 18;
 	constexpr uint32_t fpcr_tmp_reg = 8;

 	// ror tmp_reg, src, imm
@@ -1050,7 +1050,7 @@ void JitCompilerA64::h_ISTORE(Instruction& instr, uint32_t& codePos)

 	const uint32_t src = IntRegMap[instr.src];
 	const uint32_t dst = IntRegMap[instr.dst];
-	constexpr uint32_t tmp_reg = 20;
+	constexpr uint32_t tmp_reg = 18;

 	uint32_t imm = instr.getImm32();

@@ -72,9 +72,9 @@
 # x15 -> "r7"
 # x16 -> spAddr0
 # x17 -> spAddr1
-# x18 -> unused (platform register, don't touch it)
+# x18 -> temporary
 # x19 -> temporary
-# x20 -> temporary
+# x20 -> literal for IMUL_RCP
 # x21 -> literal for IMUL_RCP
 # x22 -> literal for IMUL_RCP
 # x23 -> literal for IMUL_RCP
@@ -109,7 +109,7 @@ DECL(randomx_program_aarch64):
 	# Save callee-saved registers
 	sub	sp, sp, 192
 	stp	x16, x17, [sp]
-	str	x19, [sp, 16]
+	stp	x18, x19, [sp, 16]
 	stp	x20, x21, [sp, 32]
 	stp	x22, x23, [sp, 48]
 	stp	x24, x25, [sp, 64]
@@ -164,6 +164,7 @@ DECL(randomx_program_aarch64):
 	# Read literals
 	ldr	x0, literal_x0
 	ldr	x11, literal_x11
+	ldr	x20, literal_x20
 	ldr	x21, literal_x21
 	ldr	x22, literal_x22
 	ldr	x23, literal_x23
@@ -195,11 +196,11 @@ DECL(randomx_program_aarch64):
 DECL(randomx_program_aarch64_main_loop):
 	# spAddr0 = spMix1 & ScratchpadL3Mask64;
 	# spAddr1 = (spMix1 >> 32) & ScratchpadL3Mask64;
-	lsr	x20, x10, 32
+	lsr	x18, x10, 32

 	# Actual mask will be inserted by JIT compiler
 	and	w16, w10, 1
-	and	w17, w20, 1
+	and	w17, w18, 1

 	# x16 = scratchpad + spAddr0
 	# x17 = scratchpad + spAddr1
@@ -207,31 +208,31 @@ DECL(randomx_program_aarch64_main_loop):
 	add	x17, x17, x2

 	# xor integer registers with scratchpad data (spAddr0)
-	ldp	x20, x19, [x16]
-	eor	x4, x4, x20
+	ldp	x18, x19, [x16]
+	eor	x4, x4, x18
 	eor	x5, x5, x19
-	ldp	x20, x19, [x16, 16]
-	eor	x6, x6, x20
+	ldp	x18, x19, [x16, 16]
+	eor	x6, x6, x18
 	eor	x7, x7, x19
-	ldp	x20, x19, [x16, 32]
-	eor	x12, x12, x20
+	ldp	x18, x19, [x16, 32]
+	eor	x12, x12, x18
 	eor	x13, x13, x19
-	ldp	x20, x19, [x16, 48]
-	eor	x14, x14, x20
+	ldp	x18, x19, [x16, 48]
+	eor	x14, x14, x18
 	eor	x15, x15, x19

 	# Load group F registers (spAddr1)
-	ldpsw	x20, x19, [x17]
-	ins	v16.d[0], x20
+	ldpsw	x18, x19, [x17]
+	ins	v16.d[0], x18
 	ins	v16.d[1], x19
-	ldpsw	x20, x19, [x17, 8]
-	ins	v17.d[0], x20
+	ldpsw	x18, x19, [x17, 8]
+	ins	v17.d[0], x18
 	ins	v17.d[1], x19
-	ldpsw	x20, x19, [x17, 16]
-	ins	v18.d[0], x20
+	ldpsw	x18, x19, [x17, 16]
+	ins	v18.d[0], x18
 	ins	v18.d[1], x19
-	ldpsw	x20, x19, [x17, 24]
-	ins	v19.d[0], x20
+	ldpsw	x18, x19, [x17, 24]
+	ins	v19.d[0], x18
 	ins	v19.d[1], x19
 	scvtf	v16.2d, v16.2d
 	scvtf	v17.2d, v17.2d
@@ -239,17 +240,17 @@ DECL(randomx_program_aarch64_main_loop):
 	scvtf	v19.2d, v19.2d

 	# Load group E registers (spAddr1)
-	ldpsw	x20, x19, [x17, 32]
-	ins	v20.d[0], x20
+	ldpsw	x18, x19, [x17, 32]
+	ins	v20.d[0], x18
 	ins	v20.d[1], x19
-	ldpsw	x20, x19, [x17, 40]
-	ins	v21.d[0], x20
+	ldpsw	x18, x19, [x17, 40]
+	ins	v21.d[0], x18
 	ins	v21.d[1], x19
-	ldpsw	x20, x19, [x17, 48]
-	ins	v22.d[0], x20
+	ldpsw	x18, x19, [x17, 48]
+	ins	v22.d[0], x18
 	ins	v22.d[1], x19
-	ldpsw	x20, x19, [x17, 56]
-	ins	v23.d[0], x20
+	ldpsw	x18, x19, [x17, 56]
+	ins	v23.d[0], x18
 	ins	v23.d[1], x19
 	scvtf	v20.2d, v20.2d
 	scvtf	v21.2d, v21.2d
@@ -272,6 +273,7 @@ DECL(randomx_program_aarch64_vm_instructions):

 literal_x0:  .fill 1,8,0
 literal_x11: .fill 1,8,0
+literal_x20: .fill 1,8,0
 literal_x21: .fill 1,8,0
 literal_x22: .fill 1,8,0
 literal_x23: .fill 1,8,0
@@ -307,17 +309,17 @@ DECL(randomx_program_aarch64_vm_instructions_end):
 	lsr	x10, x9, 32

 	# mx ^= r[readReg2] ^ r[readReg3];
-	eor	x9, x9, x20
+	eor	x9, x9, x18

 	# Calculate dataset pointer for dataset prefetch
-	mov	w20, w9
+	mov	w18, w9
 DECL(randomx_program_aarch64_cacheline_align_mask1):
 	# Actual mask will be inserted by JIT compiler
-	and	x20, x20, 1
-	add	x20, x20, x1
+	and	x18, x18, 1
+	add	x18, x18, x1

 	# Prefetch dataset data
-	prfm	pldl2strm, [x20]
+	prfm	pldl2strm, [x18]

 	# mx <-> ma
 	ror	x9, x9, 32
@@ -329,17 +331,17 @@ DECL(randomx_program_aarch64_cacheline_align_mask2):

 DECL(randomx_program_aarch64_xor_with_dataset_line):
 	# xor integer registers with dataset data
-	ldp	x20, x19, [x10]
-	eor	x4, x4, x20
+	ldp	x18, x19, [x10]
+	eor	x4, x4, x18
 	eor	x5, x5, x19
-	ldp	x20, x19, [x10, 16]
-	eor	x6, x6, x20
+	ldp	x18, x19, [x10, 16]
+	eor	x6, x6, x18
 	eor	x7, x7, x19
-	ldp	x20, x19, [x10, 32]
-	eor	x12, x12, x20
+	ldp	x18, x19, [x10, 32]
+	eor	x12, x12, x18
 	eor	x13, x13, x19
-	ldp	x20, x19, [x10, 48]
-	eor	x14, x14, x20
+	ldp	x18, x19, [x10, 48]
+	eor	x14, x14, x18
 	eor	x15, x15, x19

 DECL(randomx_program_aarch64_update_spMix1):
@@ -382,7 +384,7 @@ DECL(randomx_program_aarch64_update_spMix1):

 	# Restore callee-saved registers
 	ldp	x16, x17, [sp]
-	ldr	x19, [sp, 16]
+	ldp	x18, x19, [sp, 16]
 	ldp	x20, x21, [sp, 32]
 	ldp	x22, x23, [sp, 48]
 	ldp	x24, x25, [sp, 64]
@@ -403,7 +405,7 @@ DECL(randomx_program_aarch64_vm_instructions_end_light):
 	stp	x2, x30, [sp, 80]

 	# mx ^= r[readReg2] ^ r[readReg3];
-	eor	x9, x9, x20
+	eor	x9, x9, x18

 	# mx <-> ma
 	ror	x9, x9, 32
@@ -445,8 +447,8 @@ DECL(randomx_program_aarch64_light_dataset_offset):
 # x3 -> end item

 DECL(randomx_init_dataset_aarch64):
-	# Save x20 (used as temporary, but must be saved to not break ABI) and x30 (return address)
-	stp	x20, x30, [sp, -16]!
+	# Save x30 (return address)
+	str	x30, [sp, -16]!

 	# Load pointer to cache memory
 	ldr	x0, [x0]
@@ -458,8 +460,8 @@ DECL(randomx_init_dataset_aarch64_main_loop):
 	cmp	x2, x3
 	bne	DECL(randomx_init_dataset_aarch64_main_loop)

-	# Restore x20 and x30
-	ldp	x20, x30, [sp], 16
+	# Restore x30 (return address)
+	ldr	x30, [sp], 16

 	ret

@@ -41,10 +41,12 @@ randomx_vm *xmrig::RxVm::create(RxDataset *dataset, uint8_t *scratchpad, bool so
        flags |= RANDOMX_FLAG_JIT;
    }

+#   ifdef XMRIG_FEATURE_ASM_AMD
    const auto asmId = assembly == Assembly::AUTO ? Cpu::info()->assembly() : assembly.id();
    if ((asmId == Assembly::RYZEN) || (asmId == Assembly::BULLDOZER)) {
        flags |= RANDOMX_FLAG_AMD;
    }
+#   endif

    return randomx_create_vm(static_cast<randomx_flags>(flags), !dataset->get() ? dataset->cache()->get() : nullptr, dataset->get(), scratchpad, node);
 }
@@ -22,15 +22,15 @@
 #define APP_ID        "xmrig"
 #define APP_NAME      "XMRig"
 #define APP_DESC      "XMRig miner"
-#define APP_VERSION   "6.21.0"
+#define APP_VERSION   "6.20.1-dev"
 #define APP_DOMAIN    "xmrig.com"
 #define APP_SITE      "www.xmrig.com"
 #define APP_COPYRIGHT "Copyright (C) 2016-2023 xmrig.com"
 #define APP_KIND      "miner"

 #define APP_VER_MAJOR  6
-#define APP_VER_MINOR  21
-#define APP_VER_PATCH  0
+#define APP_VER_MINOR  20
+#define APP_VER_PATCH  1

 #ifdef _MSC_VER
 #   if (_MSC_VER >= 1930)
Author	SHA1	Message	Date
Tony Butler	10f1994c91	Merge `a776ebf394` into `e855723cd9`	2023-09-01 13:28:19 -07:00
Tony Butler	a776ebf394	Make AMD assembly completely optional through WITH_ASM_AMD (default ON)	2023-07-12 02:06:53 -06:00