Merge 862280f28c into fee51b20fa

Merge pull request #3346 from SChernykh/dev
ARM64 JIT: don't use `x18` register
2026-07-06 00:22:40 -04:00 · 2023-10-21 13:30:07 -07:00 · 2023-10-20 07:36:12 +07:00 · 2023-10-19 17:45:15 +02:00 · 2023-07-12 02:06:53 -06:00 · 2023-07-12 02:06:53 -06:00
4 changed files with 211 additions and 246 deletions
@@ -37,14 +37,36 @@ class CnAlgo
 public:
    constexpr CnAlgo() {};

-    constexpr inline Algorithm::Id base() const  { static_assert(Algorithm::isCN(ALGO), "invalid CRYPTONIGHT algorithm"); return Algorithm::base(ALGO); }
-    constexpr inline bool isHeavy() const        { return Algorithm::family(ALGO) == Algorithm::CN_HEAVY; }
-    constexpr inline bool isR() const            { return ALGO == Algorithm::CN_R; }
-    constexpr inline size_t memory() const       { static_assert(Algorithm::isCN(ALGO), "invalid CRYPTONIGHT algorithm"); return Algorithm::l3(ALGO); }
-    constexpr inline uint32_t iterations() const { static_assert(Algorithm::isCN(ALGO), "invalid CRYPTONIGHT algorithm"); return CN_ITER; }
+#   define ASSERT_CN static_assert(Algorithm::isCN(ALGO), "invalid CRYPTONIGHT algorithm")
+    constexpr inline Algorithm::Id base() const  { ASSERT_CN; return Algorithm::base(ALGO); }
+    constexpr inline size_t memory() const       { ASSERT_CN; return Algorithm::l3(ALGO); }
+    constexpr inline uint32_t iterations() const { ASSERT_CN; return CN_ITER; }
    constexpr inline uint32_t mask() const       { return static_cast<uint32_t>(((memory() - 1) / 16) * 16); }
    constexpr inline uint32_t half_mem() const   { return mask() < memory() / 2; }

+    constexpr inline bool isBase1() const        { ASSERT_CN; return Algorithm::base(ALGO) == Algorithm::CN_1; }
+    constexpr inline bool isBase2() const        { ASSERT_CN; return Algorithm::base(ALGO) == Algorithm::CN_2; }
+    constexpr inline bool is2() const            { return ALGO == Algorithm::CN_2; }
+    constexpr inline bool isR() const            { return ALGO == Algorithm::CN_R; }
+    constexpr inline bool isHalf() const         { return ALGO == Algorithm::CN_HALF; }
+    constexpr inline bool isRTO() const          { return ALGO == Algorithm::CN_RTO; }
+    constexpr inline bool isRWZ() const          { return ALGO == Algorithm::CN_RWZ; }
+    constexpr inline bool isZLS() const          { return ALGO == Algorithm::CN_ZLS; }
+    constexpr inline bool isDouble() const       { return ALGO == Algorithm::CN_DOUBLE; }
+    constexpr inline bool isCCX() const          { return ALGO == Algorithm::CN_CCX; }
+    constexpr inline bool isHeavy() const        { ASSERT_CN; return Algorithm::family(ALGO) == Algorithm::CN_HEAVY; }
+    constexpr inline bool isHeavyTube() const    { return ALGO == Algorithm::CN_HEAVY_TUBE; }
+    constexpr inline bool isHeavyXHV() const     { return ALGO == Algorithm::CN_HEAVY_XHV; }
+    constexpr inline bool isPico0() const        { return ALGO == Algorithm::CN_PICO_0; }
+    constexpr inline bool isPicoTLO() const      { return ALGO == Algorithm::CN_PICO_TLO; }
+    constexpr inline bool isUPX2() const         { return ALGO == Algorithm::CN_UPX2; }
+    constexpr inline bool isGR0() const          { return ALGO == Algorithm::CN_GR_0; }
+    constexpr inline bool isGR1() const          { return ALGO == Algorithm::CN_GR_1; }
+    constexpr inline bool isGR2() const          { return ALGO == Algorithm::CN_GR_2; }
+    constexpr inline bool isGR3() const          { return ALGO == Algorithm::CN_GR_3; }
+    constexpr inline bool isGR4() const          { return ALGO == Algorithm::CN_GR_4; }
+    constexpr inline bool isGR5() const          { return ALGO == Algorithm::CN_GR_5; }
+
    inline static uint32_t iterations(Algorithm::Id algo)
    {
        switch (algo) {
@@ -603,7 +603,7 @@ static inline void cryptonight_monero_tweak(uint64_t *mem_out, const uint8_t *l,
    constexpr CnAlgo<ALGO> props;

    if (props.base() == Algorithm::CN_2) {
-        VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1, cx, (((ALGO == Algorithm::CN_RWZ) || (ALGO == Algorithm::CN_UPX2)) ? 1 : 0));
+        VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1, cx, ((props.isRWZ() || props.isUPX2()) ? 1 : 0));
        _mm_store_si128(reinterpret_cast<__m128i *>(mem_out), _mm_xor_si128(bx0, cx));
    } else {
        __m128i tmp = _mm_xor_si128(bx0, cx);
@@ -665,15 +665,8 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si

    constexpr CnAlgo<ALGO> props;
    constexpr size_t MASK        = props.mask();
-    constexpr Algorithm::Id BASE = props.base();

-#   ifdef XMRIG_ALGO_CN_HEAVY
-    constexpr bool IS_CN_HEAVY_TUBE = ALGO == Algorithm::CN_HEAVY_TUBE;
-#   else
-    constexpr bool IS_CN_HEAVY_TUBE = false;
-#   endif
-
-    if (BASE == Algorithm::CN_1 && size < 43) {
+    if (props.isBase1() && size < 43) {
        memset(output, 0, 32);
        return;
    }
@@ -694,10 +687,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
            V4_Instruction code[256];
            const int code_size = v4_random_math_init<ALGO>(code, height);

-            if (ALGO == Algorithm::CN_R) {
-                v4_soft_aes_compile_code(code, code_size, reinterpret_cast<void*>(ctx[0]->generated_code), Assembly::NONE);
-            }
-
+            v4_soft_aes_compile_code(code, code_size, reinterpret_cast<void*>(ctx[0]->generated_code), Assembly::NONE);
            ctx[0]->generated_code_data = { ALGO, height };
        }

@@ -718,26 +708,26 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
    __m128i bx1   = _mm_set_epi64x(static_cast<int64_t>(h0[9] ^ h0[11]), static_cast<int64_t>(h0[8] ^ h0[10]));

    __m128 conc_var;
-    if (ALGO == Algorithm::CN_CCX) {
+    if (props.isCCX()) {
        conc_var = _mm_setzero_ps();
        RESTORE_ROUNDING_MODE();
    }

    for (size_t i = 0; i < props.iterations(); i++) {
        __m128i cx;
-        if (IS_CN_HEAVY_TUBE || !SOFT_AES) {
+        if (props.isHeavyTube() || !SOFT_AES) {
            cx = _mm_load_si128(reinterpret_cast<const __m128i *>(&l0[interleaved_index<interleave>(idx0 & MASK)]));
-            if (ALGO == Algorithm::CN_CCX) {
+            if (props.isCCX()) {
                cryptonight_conceal_tweak(cx, conc_var);
            }
        }

        const __m128i ax0 = _mm_set_epi64x(static_cast<int64_t>(ah0), static_cast<int64_t>(al0));
-        if (IS_CN_HEAVY_TUBE) {
+        if (props.isHeavyTube()) {
            cx = aes_round_tweak_div(cx, ax0);
        }
        else if (SOFT_AES) {
-            if (ALGO == Algorithm::CN_CCX) {
+            if (props.isCCX()) {
                cx = _mm_load_si128(reinterpret_cast<const __m128i*>(&l0[interleaved_index<interleave>(idx0 & MASK)]));
                cryptonight_conceal_tweak(cx, conc_var);
                cx = soft_aesenc(&cx, ax0, reinterpret_cast<const uint32_t*>(saes_table));
@@ -750,7 +740,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
            cx = _mm_aesenc_si128(cx, ax0);
        }

-        if (BASE == Algorithm::CN_1 || BASE == Algorithm::CN_2) {
+        if (props.isBase1() || props.isBase2()) {
            cryptonight_monero_tweak<ALGO>(reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)]), l0, idx0 & MASK, ax0, bx0, bx1, cx);
        } else {
            _mm_store_si128(reinterpret_cast<__m128i *>(&l0[interleaved_index<interleave>(idx0 & MASK)]), _mm_xor_si128(bx0, cx));
@@ -762,13 +752,11 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
        cl = (reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)]))[0];
        ch = (reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)]))[1];

-        if (BASE == Algorithm::CN_2) {
+        if (props.isBase2()) {
            if (props.isR()) {
                VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx0, bx1);
-                if (ALGO == Algorithm::CN_R) {
-                    al0 ^= r0[2] | (static_cast<uint64_t>(r0[3]) << 32);
-                    ah0 ^= r0[0] | (static_cast<uint64_t>(r0[1]) << 32);
-                }
+                al0 ^= r0[2] | (static_cast<uint64_t>(r0[3]) << 32);
+                ah0 ^= r0[0] | (static_cast<uint64_t>(r0[1]) << 32);
            } else {
                VARIANT2_INTEGER_MATH(0, cl, cx);
            }
@@ -776,11 +764,11 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si

        lo = __umul128(idx0, cl, &hi);

-        if (BASE == Algorithm::CN_2) {
-            if (ALGO == Algorithm::CN_R) {
+        if (props.isBase2()) {
+            if (props.isR()) {
                VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1, cx, 0);
            } else {
-                VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo, (((ALGO == Algorithm::CN_RWZ) || (ALGO == Algorithm::CN_UPX2)) ? 1 : 0));
+                VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo, ((props.isRWZ() || props.isUPX2()) ? 1 : 0));
            }
        }

@@ -789,9 +777,9 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si

        reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[0] = al0;

-        if (IS_CN_HEAVY_TUBE || ALGO == Algorithm::CN_RTO) {
+        if (props.isHeavyTube() || props.isRTO()) {
            reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[1] = ah0 ^ tweak1_2_0 ^ al0;
-        } else if (BASE == Algorithm::CN_1) {
+        } else if (props.isBase1()) {
            reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[1] = ah0 ^ tweak1_2_0;
        } else {
            reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[1] = ah0;
@@ -819,7 +807,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si

            ((int64_t*)&l0[interleaved_index<interleave>(idx0 & MASK)])[0] = n ^ q;

-            if (ALGO == Algorithm::CN_HEAVY_XHV) {
+            if (props.isHeavyXHV()) {
                d = ~d;
            }

@@ -827,7 +815,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
        }
 #       endif

-        if (BASE == Algorithm::CN_2) {
+        if (props.isBase2()) {
            bx1 = bx0;
        }

@@ -960,7 +948,7 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
    }
    cn_explode_scratchpad<ALGO, false, 0>(ctx[0]);

-    if (ALGO == Algorithm::CN_2) {
+    if (props.is2()) {
        if (ASM == Assembly::INTEL) {
            cnv2_mainloop_ivybridge_asm(ctx);
        }
@@ -971,7 +959,7 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
            cnv2_mainloop_bulldozer_asm(ctx);
        }
    }
-    else if (ALGO == Algorithm::CN_HALF) {
+    else if (props.isHalf()) {
        if (ASM == Assembly::INTEL) {
            cn_half_mainloop_ivybridge_asm(ctx);
        }
@@ -983,7 +971,7 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
        }
    }
 #   ifdef XMRIG_ALGO_CN_PICO
-    else if (ALGO == Algorithm::CN_PICO_0) {
+    else if (props.isPico0()) {
        if (ASM == Assembly::INTEL) {
            cn_trtl_mainloop_ivybridge_asm(ctx);
        }
@@ -994,7 +982,7 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
            cn_trtl_mainloop_bulldozer_asm(ctx);
        }
    }
-    else if (ALGO == Algorithm::CN_PICO_TLO) {
+    else if (props.isPicoTLO()) {
        if (ASM == Assembly::INTEL) {
            cn_tlo_mainloop_ivybridge_asm(ctx);
        }
@@ -1006,10 +994,10 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
        }
    }
 #   endif
-    else if (ALGO == Algorithm::CN_RWZ) {
+    else if (props.isRWZ()) {
        cnv2_rwz_mainloop_asm(ctx);
    }
-    else if (ALGO == Algorithm::CN_ZLS) {
+    else if (props.isZLS()) {
        if (ASM == Assembly::INTEL) {
            cn_zls_mainloop_ivybridge_asm(ctx);
        }
@@ -1020,7 +1008,7 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
            cn_zls_mainloop_bulldozer_asm(ctx);
        }
    }
-    else if (ALGO == Algorithm::CN_DOUBLE) {
+    else if (props.isDouble()) {
        if (ASM == Assembly::INTEL) {
            cn_double_mainloop_ivybridge_asm(ctx);
        }
@@ -1032,7 +1020,7 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
        }
    }
 #   ifdef XMRIG_ALGO_CN_FEMTO
-    else if (ALGO == Algorithm::CN_UPX2) {
+    else if (props.isUPX2()) {
        cn_upx2_mainloop_asm(ctx);
    }
 #   endif
@@ -1078,22 +1066,22 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
        cn_explode_scratchpad<ALGO, false, 0>(ctx[1]);
    }

-    if (ALGO == Algorithm::CN_2) {
+    if (props.is2()) {
        cnv2_double_mainloop_sandybridge_asm(ctx);
    }
-    else if (ALGO == Algorithm::CN_HALF) {
+    else if (props.isHalf()){
        cn_half_double_mainloop_sandybridge_asm(ctx);
    }
 #   ifdef XMRIG_ALGO_CN_PICO
-    else if (ALGO == Algorithm::CN_PICO_0) {
+    else if (props.isPico0()) {
        cn_trtl_double_mainloop_sandybridge_asm(ctx);
    }
-    else if (ALGO == Algorithm::CN_PICO_TLO) {
+    else if (props.isPicoTLO()) {
        cn_tlo_double_mainloop_sandybridge_asm(ctx);
    }
 #   endif
 #   ifdef XMRIG_ALGO_CN_FEMTO
-    else if (ALGO == Algorithm::CN_UPX2) {
+    else if (props.isUPX2()) {
        if (Cpu::info()->arch() == ICpuInfo::ARCH_ZEN3) {
            cnv2_upx_double_mainloop_zen3_asm(ctx);
        }
@@ -1102,13 +1090,13 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
        }
    }
 #   endif
-    else if (ALGO == Algorithm::CN_RWZ) {
+    else if (props.isRWZ()) {
        cnv2_rwz_double_mainloop_asm(ctx);
    }
-    else if (ALGO == Algorithm::CN_ZLS) {
+    else if (props.isZLS()) {
        cn_zls_double_mainloop_sandybridge_asm(ctx);
    }
-    else if (ALGO == Algorithm::CN_DOUBLE) {
+    else if (props.isDouble()) {
        cn_double_double_mainloop_sandybridge_asm(ctx);
    }
    else if (props.isR()) {
@@ -1146,9 +1134,8 @@ template<Algorithm::Id ALGO>
 static NOINLINE void cryptonight_single_hash_gr_sse41(const uint8_t* __restrict__ input, size_t size, uint8_t* __restrict__ output, cryptonight_ctx** __restrict__ ctx, uint64_t height)
 {
    constexpr CnAlgo<ALGO> props;
-    constexpr Algorithm::Id BASE = props.base();

-    if (BASE == Algorithm::CN_1 && size < 43) {
+    if (props.isBase1() && size < 43) {
        memset(output, 0, 32);
        return;
    }
@@ -1163,12 +1150,12 @@ static NOINLINE void cryptonight_single_hash_gr_sse41(const uint8_t* __restrict_
    VARIANT1_INIT(0);
    ctx[0]->tweak1_2 = tweak1_2_0;
    ctx[0]->tweak1_table = tweak1_table;
-    if (ALGO == Algorithm::CN_GR_0) cn_gr0_single_mainloop_asm(ctx);
-    if (ALGO == Algorithm::CN_GR_1) cn_gr1_single_mainloop_asm(ctx);
-    if (ALGO == Algorithm::CN_GR_2) cn_gr2_single_mainloop_asm(ctx);
-    if (ALGO == Algorithm::CN_GR_3) cn_gr3_single_mainloop_asm(ctx);
-    if (ALGO == Algorithm::CN_GR_4) cn_gr4_single_mainloop_asm(ctx);
-    if (ALGO == Algorithm::CN_GR_5) cn_gr5_single_mainloop_asm(ctx);
+    if (props.isGR0()) cn_gr0_single_mainloop_asm(ctx);
+    if (props.isGR1()) cn_gr1_single_mainloop_asm(ctx);
+    if (props.isGR2()) cn_gr2_single_mainloop_asm(ctx);
+    if (props.isGR3()) cn_gr3_single_mainloop_asm(ctx);
+    if (props.isGR4()) cn_gr4_single_mainloop_asm(ctx);
+    if (props.isGR5()) cn_gr5_single_mainloop_asm(ctx);

    cn_implode_scratchpad<ALGO, false, 0>(ctx[0]);
    keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
@@ -1180,9 +1167,8 @@ template<Algorithm::Id ALGO>
 static NOINLINE void cryptonight_double_hash_gr_sse41(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
 {
    constexpr CnAlgo<ALGO> props;
-    constexpr Algorithm::Id BASE = props.base();

-    if (BASE == Algorithm::CN_1 && size < 43) {
+    if (props.isBase1() && size < 43) {
        memset(output, 0, 64);
        return;
    }
@@ -1196,7 +1182,7 @@ static NOINLINE void cryptonight_double_hash_gr_sse41(const uint8_t *__restrict_
    }

 #   ifdef XMRIG_VAES
-    if (!props.isHeavy() && cn_vaes_enabled) {
+    if (cn_vaes_enabled) {
        cn_explode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
    }
    else
@@ -1214,15 +1200,15 @@ static NOINLINE void cryptonight_double_hash_gr_sse41(const uint8_t *__restrict_

    ctx[0]->tweak1_table = tweak1_table;

-    if (ALGO == Algorithm::CN_GR_0) cn_gr0_double_mainloop_asm(ctx);
-    if (ALGO == Algorithm::CN_GR_1) cn_gr1_double_mainloop_asm(ctx);
-    if (ALGO == Algorithm::CN_GR_2) cn_gr2_double_mainloop_asm(ctx);
-    if (ALGO == Algorithm::CN_GR_3) cn_gr3_double_mainloop_asm(ctx);
-    if (ALGO == Algorithm::CN_GR_4) cn_gr4_double_mainloop_asm(ctx);
-    if (ALGO == Algorithm::CN_GR_5) cn_gr5_double_mainloop_asm(ctx);
+    if (props.isGR0()) cn_gr0_double_mainloop_asm(ctx);
+    if (props.isGR1()) cn_gr1_double_mainloop_asm(ctx);
+    if (props.isGR2()) cn_gr2_double_mainloop_asm(ctx);
+    if (props.isGR3()) cn_gr3_double_mainloop_asm(ctx);
+    if (props.isGR4()) cn_gr4_double_mainloop_asm(ctx);
+    if (props.isGR5()) cn_gr5_double_mainloop_asm(ctx);

 #   ifdef XMRIG_VAES
-    if (!props.isHeavy() && cn_vaes_enabled) {
+    if (cn_vaes_enabled) {
        cn_implode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
    }
    else
@@ -1267,15 +1253,8 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si

    constexpr CnAlgo<ALGO> props;
    constexpr size_t MASK        = props.mask();
-    constexpr Algorithm::Id BASE = props.base();

-#   ifdef XMRIG_ALGO_CN_HEAVY
-    constexpr bool IS_CN_HEAVY_TUBE = ALGO == Algorithm::CN_HEAVY_TUBE;
-#   else
-    constexpr bool IS_CN_HEAVY_TUBE = false;
-#   endif
-
-    if (BASE == Algorithm::CN_1 && size < 43) {
+    if (props.isBase1() && size < 43) {
        memset(output, 0, 64);
        return;
    }
@@ -1323,7 +1302,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
    __m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);

    __m128 conc_var0, conc_var1;
-    if (ALGO == Algorithm::CN_CCX) {
+    if (props.isCCX()) {
        conc_var0 = _mm_setzero_ps();
        conc_var1 = _mm_setzero_ps();
        RESTORE_ROUNDING_MODE();
@@ -1334,10 +1313,10 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si

    for (size_t i = 0; i < props.iterations(); i++) {
        __m128i cx0, cx1;
-        if (IS_CN_HEAVY_TUBE || !SOFT_AES) {
+        if (props.isHeavyTube() || !SOFT_AES) {
            cx0 = _mm_load_si128(reinterpret_cast<const __m128i *>(&l0[idx0 & MASK]));
            cx1 = _mm_load_si128(reinterpret_cast<const __m128i *>(&l1[idx1 & MASK]));
-            if (ALGO == Algorithm::CN_CCX) {
+            if (props.isCCX()) {
                cryptonight_conceal_tweak(cx0, conc_var0);
                cryptonight_conceal_tweak(cx1, conc_var1);
            }
@@ -1345,12 +1324,12 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si

        const __m128i ax0 = _mm_set_epi64x(ah0, al0);
        const __m128i ax1 = _mm_set_epi64x(ah1, al1);
-        if (IS_CN_HEAVY_TUBE) {
+        if (props.isHeavyTube()) {
            cx0 = aes_round_tweak_div(cx0, ax0);
            cx1 = aes_round_tweak_div(cx1, ax1);
        }
        else if (SOFT_AES) {
-            if (ALGO == Algorithm::CN_CCX) {
+            if (props.isCCX()) {
                cx0 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l0[idx0 & MASK]));
                cx1 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l1[idx1 & MASK]));
                cryptonight_conceal_tweak(cx0, conc_var0);
@@ -1368,7 +1347,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
            cx1 = _mm_aesenc_si128(cx1, ax1);
        }

-        if (BASE == Algorithm::CN_1 || BASE == Algorithm::CN_2) {
+        if (props.isBase1() || props.isBase2()) {
            cryptonight_monero_tweak<ALGO>((uint64_t*)&l0[idx0 & MASK], l0, idx0 & MASK, ax0, bx00, bx01, cx0);
            cryptonight_monero_tweak<ALGO>((uint64_t*)&l1[idx1 & MASK], l1, idx1 & MASK, ax1, bx10, bx11, cx1);
        } else {
@@ -1383,13 +1362,11 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
        cl = ((uint64_t*) &l0[idx0 & MASK])[0];
        ch = ((uint64_t*) &l0[idx0 & MASK])[1];

-        if (BASE == Algorithm::CN_2) {
+        if (props.isBase2()) {
            if (props.isR()) {
                VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx01);
-                if (ALGO == Algorithm::CN_R) {
-                    al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
-                    ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
-                }
+                al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
+                ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
            } else {
                VARIANT2_INTEGER_MATH(0, cl, cx0);
            }
@@ -1397,11 +1374,11 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si

        lo = __umul128(idx0, cl, &hi);

-        if (BASE == Algorithm::CN_2) {
-            if (ALGO == Algorithm::CN_R) {
+        if (props.isBase2()) {
+            if (props.isR()) {
                VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01, cx0, 0);
            } else {
-                VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo, (((ALGO == Algorithm::CN_RWZ) || (ALGO == Algorithm::CN_UPX2)) ? 1 : 0));
+                VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo, ((props.isRWZ() || props.isUPX2()) ? 1 : 0));
            }
        }

@@ -1410,9 +1387,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si

        ((uint64_t*)&l0[idx0 & MASK])[0] = al0;

-        if (IS_CN_HEAVY_TUBE || ALGO == Algorithm::CN_RTO) {
+        if (props.isHeavyTube() || props.isRTO()) {
            ((uint64_t*) &l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0;
-        } else if (BASE == Algorithm::CN_1) {
+        } else if (props.isBase1()) {
            ((uint64_t*) &l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0;
        } else {
            ((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
@@ -1430,7 +1407,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si

            ((int64_t*)&l0[idx0 & MASK])[0] = n ^ q;

-            if (ALGO == Algorithm::CN_HEAVY_XHV) {
+            if (props.isHeavyXHV()) {
                d = ~d;
            }

@@ -1441,13 +1418,11 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
        cl = ((uint64_t*) &l1[idx1 & MASK])[0];
        ch = ((uint64_t*) &l1[idx1 & MASK])[1];

-        if (BASE == Algorithm::CN_2) {
+        if (props.isBase2()) {
            if (props.isR()) {
                VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx10, bx11);
-                if (ALGO == Algorithm::CN_R) {
-                    al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32);
-                    ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32);
-                }
+                al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32);
+                ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32);
            } else {
                VARIANT2_INTEGER_MATH(1, cl, cx1);
            }
@@ -1455,11 +1430,11 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si

        lo = __umul128(idx1, cl, &hi);

-        if (BASE == Algorithm::CN_2) {
-            if (ALGO == Algorithm::CN_R) {
+        if (props.isBase2()) {
+            if (props.isR()) {
                VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11, cx1, 0);
            } else {
-                VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo, (((ALGO == Algorithm::CN_RWZ) || (ALGO == Algorithm::CN_UPX2)) ? 1 : 0));
+                VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo, ((props.isRWZ() || props.isUPX2()) ? 1 : 0));
            }
        }

@@ -1468,9 +1443,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si

        ((uint64_t*)&l1[idx1 & MASK])[0] = al1;

-        if (IS_CN_HEAVY_TUBE || ALGO == Algorithm::CN_RTO) {
+        if (props.isHeavyTube() || props.isRTO()) {
            ((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1 ^ al1;
-        } else if (BASE == Algorithm::CN_1) {
+        } else if (props.isBase1()) {
            ((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1;
        } else {
            ((uint64_t*)&l1[idx1 & MASK])[1] = ah1;
@@ -1488,7 +1463,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si

            ((int64_t*)&l1[idx1 & MASK])[0] = n ^ q;

-            if (ALGO == Algorithm::CN_HEAVY_XHV) {
+            if (props.isHeavyXHV()) {
                d = ~d;
            }

@@ -1496,7 +1471,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
        }
 #       endif

-        if (BASE == Algorithm::CN_2) {
+        if (props.isBase2()) {
            bx01 = bx00;
            bx11 = bx10;
        }
@@ -1529,9 +1504,8 @@ template<Algorithm::Id ALGO>
 static NOINLINE void cryptonight_quad_hash_gr_sse41(const uint8_t* __restrict__ input, size_t size, uint8_t* __restrict__ output, cryptonight_ctx** __restrict__ ctx, uint64_t height)
 {
    constexpr CnAlgo<ALGO> props;
-    constexpr Algorithm::Id BASE = props.base();

-    if (BASE == Algorithm::CN_1 && size < 43) {
+    if (props.isBase1() && size < 43) {
        memset(output, 0, 32 * 4);
        return;
    }
@@ -1549,7 +1523,7 @@ static NOINLINE void cryptonight_quad_hash_gr_sse41(const uint8_t* __restrict__
    }

 #   ifdef XMRIG_VAES
-    if (!props.isHeavy() && cn_vaes_enabled) {
+    if (cn_vaes_enabled) {
        cn_explode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
        cn_explode_scratchpad_vaes_double(ctx[2], ctx[3], props.memory(), props.half_mem());
    }
@@ -1569,15 +1543,15 @@ static NOINLINE void cryptonight_quad_hash_gr_sse41(const uint8_t* __restrict__

    ctx[0]->tweak1_table = tweak1_table;

-    if (ALGO == Algorithm::CN_GR_0) cn_gr0_quad_mainloop_asm(ctx);
-    if (ALGO == Algorithm::CN_GR_1) cn_gr1_quad_mainloop_asm(ctx);
-    if (ALGO == Algorithm::CN_GR_2) cn_gr2_quad_mainloop_asm(ctx);
-    if (ALGO == Algorithm::CN_GR_3) cn_gr3_quad_mainloop_asm(ctx);
-    if (ALGO == Algorithm::CN_GR_4) cn_gr4_quad_mainloop_asm(ctx);
-    if (ALGO == Algorithm::CN_GR_5) cn_gr5_quad_mainloop_asm(ctx);
+    if (props.isGR0()) cn_gr0_quad_mainloop_asm(ctx);
+    if (props.isGR1()) cn_gr1_quad_mainloop_asm(ctx);
+    if (props.isGR2()) cn_gr2_quad_mainloop_asm(ctx);
+    if (props.isGR3()) cn_gr3_quad_mainloop_asm(ctx);
+    if (props.isGR4()) cn_gr4_quad_mainloop_asm(ctx);
+    if (props.isGR5()) cn_gr5_quad_mainloop_asm(ctx);

 #   ifdef XMRIG_VAES
-    if (!props.isHeavy() && cn_vaes_enabled) {
+    if (cn_vaes_enabled) {
        cn_implode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
        cn_implode_scratchpad_vaes_double(ctx[2], ctx[3], props.memory(), props.half_mem());
    }
@@ -1606,14 +1580,14 @@ static NOINLINE void cryptonight_quad_hash_gr_sse41(const uint8_t* __restrict__
 #define CN_STEP1(a, b0, b1, c, l, ptr, idx, conc_var) \
    ptr = reinterpret_cast<__m128i*>(&l[idx & MASK]); \
    c = _mm_load_si128(ptr);                          \
-    if (ALGO == Algorithm::CN_CCX) {                  \
+    if (props.isCCX()) {                              \
        cryptonight_conceal_tweak(c, conc_var);       \
    }



 #define CN_STEP2(a, b0, b1, c, l, ptr, idx)                                             \
-    if (IS_CN_HEAVY_TUBE) {                                                             \
+    if (props.isHeavyTube()) {                                                          \
        c = aes_round_tweak_div(c, a);                                                  \
    }                                                                                   \
    else if (SOFT_AES) {                                                                \
@@ -1622,7 +1596,7 @@ static NOINLINE void cryptonight_quad_hash_gr_sse41(const uint8_t* __restrict__
        c = _mm_aesenc_si128(c, a);                                                     \
    }                                                                                   \
                                                                                        \
-    if (BASE == Algorithm::CN_1 || BASE == Algorithm::CN_2) {                           \
+    if (props.isBase1() || props.isBase2()) {                                           \
        cryptonight_monero_tweak<ALGO>((uint64_t*)ptr, l, idx & MASK, a, b0, b1, c);    \
    } else {                                                                            \
        _mm_store_si128(ptr, _mm_xor_si128(b0, c));                                     \
@@ -1638,36 +1612,34 @@ static NOINLINE void cryptonight_quad_hash_gr_sse41(const uint8_t* __restrict__

 #define CN_STEP4(part, a, b0, b1, c, l, mc, ptr, idx)                                                       \
    uint64_t al##part, ah##part;                                                                            \
-    if (BASE == Algorithm::CN_2) {                                                                          \
+    if (props.isBase2()) {                                                                                  \
        if (props.isR()) {                                                                                  \
            al##part = _mm_cvtsi128_si64(a);                                                                \
            ah##part = _mm_cvtsi128_si64(_mm_srli_si128(a, 8));                                             \
            VARIANT4_RANDOM_MATH(part, al##part, ah##part, cl##part, b0, b1);                               \
-            if (ALGO == Algorithm::CN_R) {                                                                  \
-                al##part ^= r##part[2] | ((uint64_t)(r##part[3]) << 32);                                    \
-                ah##part ^= r##part[0] | ((uint64_t)(r##part[1]) << 32);                                    \
-            }                                                                                               \
+            al##part ^= r##part[2] | ((uint64_t)(r##part[3]) << 32);                                        \
+            ah##part ^= r##part[0] | ((uint64_t)(r##part[1]) << 32);                                        \
        } else {                                                                                            \
            VARIANT2_INTEGER_MATH(part, cl##part, c);                                                       \
        }                                                                                                   \
    }                                                                                                       \
    lo = __umul128(idx, cl##part, &hi);                                                                     \
-    if (BASE == Algorithm::CN_2) {                                                                          \
-        if (ALGO == Algorithm::CN_R) {                                                                      \
+    if (props.isBase2()) {                                                                                  \
+        if (props.isR()) {                                                                                  \
            VARIANT2_SHUFFLE(l, idx & MASK, a, b0, b1, c, 0);                                               \
        } else {                                                                                            \
-            VARIANT2_SHUFFLE2(l, idx & MASK, a, b0, b1, hi, lo, (((ALGO == Algorithm::CN_RWZ) || (ALGO == Algorithm::CN_UPX2)) ? 1 : 0)); \
+            VARIANT2_SHUFFLE2(l, idx & MASK, a, b0, b1, hi, lo, ((props.isRWZ() || props.isUPX2()) ? 1 : 0)); \
        }                                                                                                   \
    }                                                                                                       \
-    if (ALGO == Algorithm::CN_R) {                                                                          \
+    if (props.isR()) {                                                                                      \
        a = _mm_set_epi64x(ah##part, al##part);                                                             \
    }                                                                                                       \
    a = _mm_add_epi64(a, _mm_set_epi64x(lo, hi));                                                           \
                                                                                                            \
-    if (BASE == Algorithm::CN_1) {                                                                          \
+    if (props.isBase1()) {                                                                                  \
        _mm_store_si128(ptr, _mm_xor_si128(a, mc));                                                         \
                                                                                                            \
-        if (IS_CN_HEAVY_TUBE || ALGO == Algorithm::CN_RTO) {                                                \
+        if (props.isHeavyTube() || props.isRTO()) {                                                         \
            ((uint64_t*)ptr)[1] ^= ((uint64_t*)ptr)[0];                                                     \
        }                                                                                                   \
    } else {                                                                                                \
@@ -1681,13 +1653,13 @@ static NOINLINE void cryptonight_quad_hash_gr_sse41(const uint8_t* __restrict__
        int32_t d = ((int32_t*)&l[idx & MASK])[2];                                                          \
        int64_t q = n / (d | 0x5);                                                                          \
        ((int64_t*)&l[idx & MASK])[0] = n ^ q;                                                              \
-        if (IS_CN_HEAVY_XHV) {                                                                              \
+        if (props.isHeavyXHV()) {                                                                           \
            d = ~d;                                                                                         \
        }                                                                                                   \
                                                                                                            \
        idx = d ^ q;                                                                                        \
    }                                                                                                       \
-    if (BASE == Algorithm::CN_2) {                                                                          \
+    if (props.isBase2()) {                                                                                  \
        b1 = b0;                                                                                            \
    }                                                                                                       \
    b0 = c;
@@ -1697,11 +1669,11 @@ static NOINLINE void cryptonight_quad_hash_gr_sse41(const uint8_t* __restrict__
    __m128i mc##n;                                                                               \
    __m128i division_result_xmm_##n;                                                             \
    __m128i sqrt_result_xmm_##n;                                                                 \
-    if (BASE == Algorithm::CN_1) {                                                               \
+    if (props.isBase1()) {                                                                       \
        mc##n = _mm_set_epi64x(*reinterpret_cast<const uint64_t*>(input + n * size + 35) ^       \
                               *(reinterpret_cast<const uint64_t*>((ctx)->state) + 24), 0);      \
    }                                                                                            \
-    if (BASE == Algorithm::CN_2) {                                                               \
+    if (props.isBase2()) {                                                                       \
        division_result_xmm_##n = _mm_cvtsi64_si128(h##n[12]);                                   \
        sqrt_result_xmm_##n = _mm_cvtsi64_si128(h##n[13]);                                       \
    }                                                                                            \
@@ -1710,7 +1682,7 @@ static NOINLINE void cryptonight_quad_hash_gr_sse41(const uint8_t* __restrict__
    __m128i bx##n##1 = _mm_set_epi64x(h##n[9] ^ h##n[11], h##n[8] ^ h##n[10]);                   \
    __m128i cx##n = _mm_setzero_si128();                                                         \
    __m128 conc_var##n;                                                                          \
-    if (ALGO == Algorithm::CN_CCX) {                                                             \
+    if (props.isCCX()) {                                                                         \
        conc_var##n = _mm_setzero_ps();                                                          \
    }                                                                                            \
    VARIANT4_RANDOM_MATH_INIT(n);
@@ -1721,17 +1693,8 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
 {
    constexpr CnAlgo<ALGO> props;
    constexpr size_t MASK        = props.mask();
-    constexpr Algorithm::Id BASE = props.base();

-#   ifdef XMRIG_ALGO_CN_HEAVY
-    constexpr bool IS_CN_HEAVY_TUBE = ALGO == Algorithm::CN_HEAVY_TUBE;
-    constexpr bool IS_CN_HEAVY_XHV  = ALGO == Algorithm::CN_HEAVY_XHV;
-#   else
-    constexpr bool IS_CN_HEAVY_TUBE = false;
-    constexpr bool IS_CN_HEAVY_XHV  = false;
-#   endif
-
-    if (BASE == Algorithm::CN_1 && size < 43) {
+    if (props.isBase1() && size < 43) {
        memset(output, 0, 32 * 3);
        return;
    }
@@ -1755,7 +1718,7 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
    CONST_INIT(ctx[1], 1);
    CONST_INIT(ctx[2], 2);
    VARIANT2_SET_ROUNDING_MODE();
-    if (ALGO == Algorithm::CN_CCX) {
+    if (props.isCCX()) {
        RESTORE_ROUNDING_MODE();
    }

@@ -1819,17 +1782,8 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size

    constexpr CnAlgo<ALGO> props;
    constexpr size_t MASK        = props.mask();
-    constexpr Algorithm::Id BASE = props.base();

-#   ifdef XMRIG_ALGO_CN_HEAVY
-    constexpr bool IS_CN_HEAVY_TUBE = ALGO == Algorithm::CN_HEAVY_TUBE;
-    constexpr bool IS_CN_HEAVY_XHV  = ALGO == Algorithm::CN_HEAVY_XHV;
-#   else
-    constexpr bool IS_CN_HEAVY_TUBE = false;
-    constexpr bool IS_CN_HEAVY_XHV  = false;
-#   endif
-
-    if (BASE == Algorithm::CN_1 && size < 43) {
+    if (props.isBase1() && size < 43) {
        memset(output, 0, 32 * 4);
        return;
    }
@@ -1869,7 +1823,7 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
    CONST_INIT(ctx[2], 2);
    CONST_INIT(ctx[3], 3);
    VARIANT2_SET_ROUNDING_MODE();
-    if (ALGO == Algorithm::CN_CCX) {
+    if (props.isCCX()) {
        RESTORE_ROUNDING_MODE();
    }

@@ -1930,17 +1884,8 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz
 {
    constexpr CnAlgo<ALGO> props;
    constexpr size_t MASK        = props.mask();
-    constexpr Algorithm::Id BASE = props.base();

-#   ifdef XMRIG_ALGO_CN_HEAVY
-    constexpr bool IS_CN_HEAVY_TUBE = ALGO == Algorithm::CN_HEAVY_TUBE;
-    constexpr bool IS_CN_HEAVY_XHV  = ALGO == Algorithm::CN_HEAVY_XHV;
-#   else
-    constexpr bool IS_CN_HEAVY_TUBE = false;
-    constexpr bool IS_CN_HEAVY_XHV  = false;
-#   endif
-
-    if (BASE == Algorithm::CN_1 && size < 43) {
+    if (props.isBase1() && size < 43) {
        memset(output, 0, 32 * 5);
        return;
    }
@@ -1970,7 +1915,7 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz
    CONST_INIT(ctx[3], 3);
    CONST_INIT(ctx[4], 4);
    VARIANT2_SET_ROUNDING_MODE();
-    if (ALGO == Algorithm::CN_CCX) {
+    if (props.isCCX()) {
        RESTORE_ROUNDING_MODE();
    }

@@ -131,8 +131,8 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
 	// and w16, w10, ScratchpadL3Mask64
 	emit32(0x121A0000 | 16 | (10 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);

-	// and w17, w18, ScratchpadL3Mask64
-	emit32(0x121A0000 | 17 | (18 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
+	// and w17, w20, ScratchpadL3Mask64
+	emit32(0x121A0000 | 17 | (20 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);

 	codePos = PrologueSize;
 	literalPos = ImulRcpLiteralsEnd;
@@ -148,16 +148,16 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
 	}

 	// Update spMix2
-	// eor w18, config.readReg2, config.readReg3
-	emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
+	// eor w20, config.readReg2, config.readReg3
+	emit32(ARMV8A::EOR32 | 20 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);

 	// Jump back to the main loop
 	const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end) - ((uint8_t*)randomx_program_aarch64)) - codePos;
 	emit32(ARMV8A::B | (offset / 4), code, codePos);

-	// and w18, w18, CacheLineAlignMask
+	// and w20, w20, CacheLineAlignMask
 	codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask1) - ((uint8_t*)randomx_program_aarch64));
-	emit32(0x121A0000 | 18 | (18 << 5) | ((RandomX_CurrentConfig.Log2_DatasetBaseSize - 7) << 10), code, codePos);
+	emit32(0x121A0000 | 20 | (20 << 5) | ((RandomX_CurrentConfig.Log2_DatasetBaseSize - 7) << 10), code, codePos);

 	// and w10, w10, CacheLineAlignMask
 	codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask2) - ((uint8_t*)randomx_program_aarch64));
@@ -189,8 +189,8 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
 	// and w16, w10, ScratchpadL3Mask64
 	emit32(0x121A0000 | 16 | (10 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);

-	// and w17, w18, ScratchpadL3Mask64
-	emit32(0x121A0000 | 17 | (18 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
+	// and w17, w20, ScratchpadL3Mask64
+	emit32(0x121A0000 | 17 | (20 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);

 	codePos = PrologueSize;
 	literalPos = ImulRcpLiteralsEnd;
@@ -206,8 +206,8 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
 	}

 	// Update spMix2
-	// eor w18, config.readReg2, config.readReg3
-	emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
+	// eor w20, config.readReg2, config.readReg3
+	emit32(ARMV8A::EOR32 | 20 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);

 	// Jump back to the main loop
 	const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end_light) - ((uint8_t*)randomx_program_aarch64)) - codePos;
@@ -477,7 +477,7 @@ void JitCompilerA64::emitAddImmediate(uint32_t dst, uint32_t src, uint32_t imm,
 	}
 	else
 	{
-		constexpr uint32_t tmp_reg = 18;
+		constexpr uint32_t tmp_reg = 20;
 		emitMovImmediate(tmp_reg, imm, code, k);

 		// add dst, src, tmp_reg
@@ -526,7 +526,7 @@ void JitCompilerA64::emitMemLoadFP(uint32_t src, Instruction& instr, uint8_t* co
 	uint32_t k = codePos;

 	uint32_t imm = instr.getImm32();
-	constexpr uint32_t tmp_reg = 18;
+	constexpr uint32_t tmp_reg = 19;

 	imm &= instr.getModMem() ? (RandomX_CurrentConfig.ScratchpadL1_Size - 1) : (RandomX_CurrentConfig.ScratchpadL2_Size - 1);
 	emitAddImmediate(tmp_reg, src, imm, code, k);
@@ -580,7 +580,7 @@ void JitCompilerA64::h_IADD_M(Instruction& instr, uint32_t& codePos)
 	const uint32_t src = IntRegMap[instr.src];
 	const uint32_t dst = IntRegMap[instr.dst];

-	constexpr uint32_t tmp_reg = 18;
+	constexpr uint32_t tmp_reg = 20;
 	emitMemLoad<tmp_reg>(dst, src, instr, code, k);

 	// add dst, dst, tmp_reg
@@ -618,7 +618,7 @@ void JitCompilerA64::h_ISUB_M(Instruction& instr, uint32_t& codePos)
 	const uint32_t src = IntRegMap[instr.src];
 	const uint32_t dst = IntRegMap[instr.dst];

-	constexpr uint32_t tmp_reg = 18;
+	constexpr uint32_t tmp_reg = 20;
 	emitMemLoad<tmp_reg>(dst, src, instr, code, k);

 	// sub dst, dst, tmp_reg
@@ -637,7 +637,7 @@ void JitCompilerA64::h_IMUL_R(Instruction& instr, uint32_t& codePos)

 	if (src == dst)
 	{
-		src = 18;
+		src = 20;
 		emitMovImmediate(src, instr.getImm32(), code, k);
 	}

@@ -655,7 +655,7 @@ void JitCompilerA64::h_IMUL_M(Instruction& instr, uint32_t& codePos)
 	const uint32_t src = IntRegMap[instr.src];
 	const uint32_t dst = IntRegMap[instr.dst];

-	constexpr uint32_t tmp_reg = 18;
+	constexpr uint32_t tmp_reg = 20;
 	emitMemLoad<tmp_reg>(dst, src, instr, code, k);

 	// sub dst, dst, tmp_reg
@@ -686,7 +686,7 @@ void JitCompilerA64::h_IMULH_M(Instruction& instr, uint32_t& codePos)
 	const uint32_t src = IntRegMap[instr.src];
 	const uint32_t dst = IntRegMap[instr.dst];

-	constexpr uint32_t tmp_reg = 18;
+	constexpr uint32_t tmp_reg = 20;
 	emitMemLoad<tmp_reg>(dst, src, instr, code, k);

 	// umulh dst, dst, tmp_reg
@@ -717,7 +717,7 @@ void JitCompilerA64::h_ISMULH_M(Instruction& instr, uint32_t& codePos)
 	const uint32_t src = IntRegMap[instr.src];
 	const uint32_t dst = IntRegMap[instr.dst];

-	constexpr uint32_t tmp_reg = 18;
+	constexpr uint32_t tmp_reg = 20;
 	emitMemLoad<tmp_reg>(dst, src, instr, code, k);

 	// smulh dst, dst, tmp_reg
@@ -735,7 +735,7 @@ void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)

 	uint32_t k = codePos;

-	constexpr uint32_t tmp_reg = 18;
+	constexpr uint32_t tmp_reg = 20;
 	const uint32_t dst = IntRegMap[instr.dst];

 	constexpr uint64_t N = 1ULL << 63;
@@ -754,9 +754,9 @@ void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)
 	literalPos -= sizeof(uint64_t);
 	*(uint64_t*)(code + literalPos) = (q << shift) + ((r << shift) / divisor);

-	if (literal_id < 13)
+	if (literal_id < 12)
 	{
-		static constexpr uint32_t literal_regs[13] = { 30 << 16, 29 << 16, 28 << 16, 27 << 16, 26 << 16, 25 << 16, 24 << 16, 23 << 16, 22 << 16, 21 << 16, 20 << 16, 11 << 16, 0 };
+		static constexpr uint32_t literal_regs[12] = { 30 << 16, 29 << 16, 28 << 16, 27 << 16, 26 << 16, 25 << 16, 24 << 16, 23 << 16, 22 << 16, 21 << 16, 11 << 16, 0 };

 		// mul dst, dst, literal_reg
 		emit32(ARMV8A::MUL | dst | (dst << 5) | literal_regs[literal_id], code, k);
@@ -794,7 +794,7 @@ void JitCompilerA64::h_IXOR_R(Instruction& instr, uint32_t& codePos)

 	if (src == dst)
 	{
-		src = 18;
+		src = 20;
 		emitMovImmediate(src, instr.getImm32(), code, k);
 	}

@@ -812,7 +812,7 @@ void JitCompilerA64::h_IXOR_M(Instruction& instr, uint32_t& codePos)
 	const uint32_t src = IntRegMap[instr.src];
 	const uint32_t dst = IntRegMap[instr.dst];

-	constexpr uint32_t tmp_reg = 18;
+	constexpr uint32_t tmp_reg = 20;
 	emitMemLoad<tmp_reg>(dst, src, instr, code, k);

 	// eor dst, dst, tmp_reg
@@ -850,7 +850,7 @@ void JitCompilerA64::h_IROL_R(Instruction& instr, uint32_t& codePos)

 	if (src != dst)
 	{
-		constexpr uint32_t tmp_reg = 18;
+		constexpr uint32_t tmp_reg = 20;

 		// sub tmp_reg, xzr, src
 		emit32(ARMV8A::SUB | tmp_reg | (31 << 5) | (src << 16), code, k);
@@ -878,7 +878,7 @@ void JitCompilerA64::h_ISWAP_R(Instruction& instr, uint32_t& codePos)

 	uint32_t k = codePos;

-	constexpr uint32_t tmp_reg = 18;
+	constexpr uint32_t tmp_reg = 20;
 	emit32(ARMV8A::MOV_REG | tmp_reg | (dst << 16), code, k);
 	emit32(ARMV8A::MOV_REG | dst | (src << 16), code, k);
 	emit32(ARMV8A::MOV_REG | src | (tmp_reg << 16), code, k);
@@ -1026,7 +1026,7 @@ void JitCompilerA64::h_CFROUND(Instruction& instr, uint32_t& codePos)

 	const uint32_t src = IntRegMap[instr.src];

-	constexpr uint32_t tmp_reg = 18;
+	constexpr uint32_t tmp_reg = 20;
 	constexpr uint32_t fpcr_tmp_reg = 8;

 	// ror tmp_reg, src, imm
@@ -1050,7 +1050,7 @@ void JitCompilerA64::h_ISTORE(Instruction& instr, uint32_t& codePos)

 	const uint32_t src = IntRegMap[instr.src];
 	const uint32_t dst = IntRegMap[instr.dst];
-	constexpr uint32_t tmp_reg = 18;
+	constexpr uint32_t tmp_reg = 20;

 	uint32_t imm = instr.getImm32();

@@ -72,9 +72,9 @@
 # x15 -> "r7"
 # x16 -> spAddr0
 # x17 -> spAddr1
-# x18 -> temporary
+# x18 -> unused (platform register, don't touch it)
 # x19 -> temporary
-# x20 -> literal for IMUL_RCP
+# x20 -> temporary
 # x21 -> literal for IMUL_RCP
 # x22 -> literal for IMUL_RCP
 # x23 -> literal for IMUL_RCP
@@ -109,7 +109,7 @@ DECL(randomx_program_aarch64):
 	# Save callee-saved registers
 	sub	sp, sp, 192
 	stp	x16, x17, [sp]
-	stp	x18, x19, [sp, 16]
+	str	x19, [sp, 16]
 	stp	x20, x21, [sp, 32]
 	stp	x22, x23, [sp, 48]
 	stp	x24, x25, [sp, 64]
@@ -164,7 +164,6 @@ DECL(randomx_program_aarch64):
 	# Read literals
 	ldr	x0, literal_x0
 	ldr	x11, literal_x11
-	ldr	x20, literal_x20
 	ldr	x21, literal_x21
 	ldr	x22, literal_x22
 	ldr	x23, literal_x23
@@ -196,11 +195,11 @@ DECL(randomx_program_aarch64):
 DECL(randomx_program_aarch64_main_loop):
 	# spAddr0 = spMix1 & ScratchpadL3Mask64;
 	# spAddr1 = (spMix1 >> 32) & ScratchpadL3Mask64;
-	lsr	x18, x10, 32
+	lsr	x20, x10, 32

 	# Actual mask will be inserted by JIT compiler
 	and	w16, w10, 1
-	and	w17, w18, 1
+	and	w17, w20, 1

 	# x16 = scratchpad + spAddr0
 	# x17 = scratchpad + spAddr1
@@ -208,31 +207,31 @@ DECL(randomx_program_aarch64_main_loop):
 	add	x17, x17, x2

 	# xor integer registers with scratchpad data (spAddr0)
-	ldp	x18, x19, [x16]
-	eor	x4, x4, x18
+	ldp	x20, x19, [x16]
+	eor	x4, x4, x20
 	eor	x5, x5, x19
-	ldp	x18, x19, [x16, 16]
-	eor	x6, x6, x18
+	ldp	x20, x19, [x16, 16]
+	eor	x6, x6, x20
 	eor	x7, x7, x19
-	ldp	x18, x19, [x16, 32]
-	eor	x12, x12, x18
+	ldp	x20, x19, [x16, 32]
+	eor	x12, x12, x20
 	eor	x13, x13, x19
-	ldp	x18, x19, [x16, 48]
-	eor	x14, x14, x18
+	ldp	x20, x19, [x16, 48]
+	eor	x14, x14, x20
 	eor	x15, x15, x19

 	# Load group F registers (spAddr1)
-	ldpsw	x18, x19, [x17]
-	ins	v16.d[0], x18
+	ldpsw	x20, x19, [x17]
+	ins	v16.d[0], x20
 	ins	v16.d[1], x19
-	ldpsw	x18, x19, [x17, 8]
-	ins	v17.d[0], x18
+	ldpsw	x20, x19, [x17, 8]
+	ins	v17.d[0], x20
 	ins	v17.d[1], x19
-	ldpsw	x18, x19, [x17, 16]
-	ins	v18.d[0], x18
+	ldpsw	x20, x19, [x17, 16]
+	ins	v18.d[0], x20
 	ins	v18.d[1], x19
-	ldpsw	x18, x19, [x17, 24]
-	ins	v19.d[0], x18
+	ldpsw	x20, x19, [x17, 24]
+	ins	v19.d[0], x20
 	ins	v19.d[1], x19
 	scvtf	v16.2d, v16.2d
 	scvtf	v17.2d, v17.2d
@@ -240,17 +239,17 @@ DECL(randomx_program_aarch64_main_loop):
 	scvtf	v19.2d, v19.2d

 	# Load group E registers (spAddr1)
-	ldpsw	x18, x19, [x17, 32]
-	ins	v20.d[0], x18
+	ldpsw	x20, x19, [x17, 32]
+	ins	v20.d[0], x20
 	ins	v20.d[1], x19
-	ldpsw	x18, x19, [x17, 40]
-	ins	v21.d[0], x18
+	ldpsw	x20, x19, [x17, 40]
+	ins	v21.d[0], x20
 	ins	v21.d[1], x19
-	ldpsw	x18, x19, [x17, 48]
-	ins	v22.d[0], x18
+	ldpsw	x20, x19, [x17, 48]
+	ins	v22.d[0], x20
 	ins	v22.d[1], x19
-	ldpsw	x18, x19, [x17, 56]
-	ins	v23.d[0], x18
+	ldpsw	x20, x19, [x17, 56]
+	ins	v23.d[0], x20
 	ins	v23.d[1], x19
 	scvtf	v20.2d, v20.2d
 	scvtf	v21.2d, v21.2d
@@ -273,7 +272,6 @@ DECL(randomx_program_aarch64_vm_instructions):

 literal_x0:  .fill 1,8,0
 literal_x11: .fill 1,8,0
-literal_x20: .fill 1,8,0
 literal_x21: .fill 1,8,0
 literal_x22: .fill 1,8,0
 literal_x23: .fill 1,8,0
@@ -309,17 +307,17 @@ DECL(randomx_program_aarch64_vm_instructions_end):
 	lsr	x10, x9, 32

 	# mx ^= r[readReg2] ^ r[readReg3];
-	eor	x9, x9, x18
+	eor	x9, x9, x20

 	# Calculate dataset pointer for dataset prefetch
-	mov	w18, w9
+	mov	w20, w9
 DECL(randomx_program_aarch64_cacheline_align_mask1):
 	# Actual mask will be inserted by JIT compiler
-	and	x18, x18, 1
-	add	x18, x18, x1
+	and	x20, x20, 1
+	add	x20, x20, x1

 	# Prefetch dataset data
-	prfm	pldl2strm, [x18]
+	prfm	pldl2strm, [x20]

 	# mx <-> ma
 	ror	x9, x9, 32
@@ -331,17 +329,17 @@ DECL(randomx_program_aarch64_cacheline_align_mask2):

 DECL(randomx_program_aarch64_xor_with_dataset_line):
 	# xor integer registers with dataset data
-	ldp	x18, x19, [x10]
-	eor	x4, x4, x18
+	ldp	x20, x19, [x10]
+	eor	x4, x4, x20
 	eor	x5, x5, x19
-	ldp	x18, x19, [x10, 16]
-	eor	x6, x6, x18
+	ldp	x20, x19, [x10, 16]
+	eor	x6, x6, x20
 	eor	x7, x7, x19
-	ldp	x18, x19, [x10, 32]
-	eor	x12, x12, x18
+	ldp	x20, x19, [x10, 32]
+	eor	x12, x12, x20
 	eor	x13, x13, x19
-	ldp	x18, x19, [x10, 48]
-	eor	x14, x14, x18
+	ldp	x20, x19, [x10, 48]
+	eor	x14, x14, x20
 	eor	x15, x15, x19

 DECL(randomx_program_aarch64_update_spMix1):
@@ -384,7 +382,7 @@ DECL(randomx_program_aarch64_update_spMix1):

 	# Restore callee-saved registers
 	ldp	x16, x17, [sp]
-	ldp	x18, x19, [sp, 16]
+	ldr	x19, [sp, 16]
 	ldp	x20, x21, [sp, 32]
 	ldp	x22, x23, [sp, 48]
 	ldp	x24, x25, [sp, 64]
@@ -405,7 +403,7 @@ DECL(randomx_program_aarch64_vm_instructions_end_light):
 	stp	x2, x30, [sp, 80]

 	# mx ^= r[readReg2] ^ r[readReg3];
-	eor	x9, x9, x18
+	eor	x9, x9, x20

 	# mx <-> ma
 	ror	x9, x9, 32
@@ -447,8 +445,8 @@ DECL(randomx_program_aarch64_light_dataset_offset):
 # x3 -> end item

 DECL(randomx_init_dataset_aarch64):
-	# Save x30 (return address)
-	str	x30, [sp, -16]!
+	# Save x20 (used as temporary, but must be saved to not break ABI) and x30 (return address)
+	stp	x20, x30, [sp, -16]!

 	# Load pointer to cache memory
 	ldr	x0, [x0]
@@ -460,8 +458,8 @@ DECL(randomx_init_dataset_aarch64_main_loop):
 	cmp	x2, x3
 	bne	DECL(randomx_init_dataset_aarch64_main_loop)

-	# Restore x30 (return address)
-	ldr	x30, [sp], 16
+	# Restore x20 and x30
+	ldp	x20, x30, [sp], 16

 	ret
Author	SHA1	Message	Date
Tony Butler	8aa110d182	Merge `862280f28c` into `fee51b20fa`	2023-10-21 13:30:07 -07:00
xmrig	fee51b20fa	Merge pull request #3346 from SChernykh/dev ARM64 JIT: don't use `x18` register	2023-10-20 07:36:12 +07:00
SChernykh	5e66efabcf	ARM64 JIT: don't use `x18` register From https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms > The platforms reserve register x18. Don’t use this register. This PR fixes invalid hashes when running on Apple silicon with the latest macOS SDK.	2023-10-19 17:45:15 +02:00
Tony Butler	862280f28c	How about this way	2023-07-12 02:06:53 -06:00
Tony Butler	814e1de2a6	CN: Consistency cleanup	2023-07-12 02:06:53 -06:00