mirror of
https://github.com/xmrig/xmrig.git
synced 2025-12-09 08:42:40 -05:00
Compare commits
5 Commits
7872b22677
...
8aa110d182
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8aa110d182 | ||
|
|
fee51b20fa | ||
|
|
5e66efabcf | ||
|
|
862280f28c | ||
|
|
814e1de2a6 |
@@ -37,14 +37,36 @@ class CnAlgo
|
||||
public:
|
||||
constexpr CnAlgo() {};
|
||||
|
||||
constexpr inline Algorithm::Id base() const { static_assert(Algorithm::isCN(ALGO), "invalid CRYPTONIGHT algorithm"); return Algorithm::base(ALGO); }
|
||||
constexpr inline bool isHeavy() const { return Algorithm::family(ALGO) == Algorithm::CN_HEAVY; }
|
||||
constexpr inline bool isR() const { return ALGO == Algorithm::CN_R; }
|
||||
constexpr inline size_t memory() const { static_assert(Algorithm::isCN(ALGO), "invalid CRYPTONIGHT algorithm"); return Algorithm::l3(ALGO); }
|
||||
constexpr inline uint32_t iterations() const { static_assert(Algorithm::isCN(ALGO), "invalid CRYPTONIGHT algorithm"); return CN_ITER; }
|
||||
# define ASSERT_CN static_assert(Algorithm::isCN(ALGO), "invalid CRYPTONIGHT algorithm")
|
||||
constexpr inline Algorithm::Id base() const { ASSERT_CN; return Algorithm::base(ALGO); }
|
||||
constexpr inline size_t memory() const { ASSERT_CN; return Algorithm::l3(ALGO); }
|
||||
constexpr inline uint32_t iterations() const { ASSERT_CN; return CN_ITER; }
|
||||
constexpr inline uint32_t mask() const { return static_cast<uint32_t>(((memory() - 1) / 16) * 16); }
|
||||
constexpr inline uint32_t half_mem() const { return mask() < memory() / 2; }
|
||||
|
||||
constexpr inline bool isBase1() const { ASSERT_CN; return Algorithm::base(ALGO) == Algorithm::CN_1; }
|
||||
constexpr inline bool isBase2() const { ASSERT_CN; return Algorithm::base(ALGO) == Algorithm::CN_2; }
|
||||
constexpr inline bool is2() const { return ALGO == Algorithm::CN_2; }
|
||||
constexpr inline bool isR() const { return ALGO == Algorithm::CN_R; }
|
||||
constexpr inline bool isHalf() const { return ALGO == Algorithm::CN_HALF; }
|
||||
constexpr inline bool isRTO() const { return ALGO == Algorithm::CN_RTO; }
|
||||
constexpr inline bool isRWZ() const { return ALGO == Algorithm::CN_RWZ; }
|
||||
constexpr inline bool isZLS() const { return ALGO == Algorithm::CN_ZLS; }
|
||||
constexpr inline bool isDouble() const { return ALGO == Algorithm::CN_DOUBLE; }
|
||||
constexpr inline bool isCCX() const { return ALGO == Algorithm::CN_CCX; }
|
||||
constexpr inline bool isHeavy() const { ASSERT_CN; return Algorithm::family(ALGO) == Algorithm::CN_HEAVY; }
|
||||
constexpr inline bool isHeavyTube() const { return ALGO == Algorithm::CN_HEAVY_TUBE; }
|
||||
constexpr inline bool isHeavyXHV() const { return ALGO == Algorithm::CN_HEAVY_XHV; }
|
||||
constexpr inline bool isPico0() const { return ALGO == Algorithm::CN_PICO_0; }
|
||||
constexpr inline bool isPicoTLO() const { return ALGO == Algorithm::CN_PICO_TLO; }
|
||||
constexpr inline bool isUPX2() const { return ALGO == Algorithm::CN_UPX2; }
|
||||
constexpr inline bool isGR0() const { return ALGO == Algorithm::CN_GR_0; }
|
||||
constexpr inline bool isGR1() const { return ALGO == Algorithm::CN_GR_1; }
|
||||
constexpr inline bool isGR2() const { return ALGO == Algorithm::CN_GR_2; }
|
||||
constexpr inline bool isGR3() const { return ALGO == Algorithm::CN_GR_3; }
|
||||
constexpr inline bool isGR4() const { return ALGO == Algorithm::CN_GR_4; }
|
||||
constexpr inline bool isGR5() const { return ALGO == Algorithm::CN_GR_5; }
|
||||
|
||||
inline static uint32_t iterations(Algorithm::Id algo)
|
||||
{
|
||||
switch (algo) {
|
||||
|
||||
@@ -603,7 +603,7 @@ static inline void cryptonight_monero_tweak(uint64_t *mem_out, const uint8_t *l,
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
|
||||
if (props.base() == Algorithm::CN_2) {
|
||||
VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1, cx, (((ALGO == Algorithm::CN_RWZ) || (ALGO == Algorithm::CN_UPX2)) ? 1 : 0));
|
||||
VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1, cx, ((props.isRWZ() || props.isUPX2()) ? 1 : 0));
|
||||
_mm_store_si128(reinterpret_cast<__m128i *>(mem_out), _mm_xor_si128(bx0, cx));
|
||||
} else {
|
||||
__m128i tmp = _mm_xor_si128(bx0, cx);
|
||||
@@ -665,15 +665,8 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
||||
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
constexpr size_t MASK = props.mask();
|
||||
constexpr Algorithm::Id BASE = props.base();
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_HEAVY
|
||||
constexpr bool IS_CN_HEAVY_TUBE = ALGO == Algorithm::CN_HEAVY_TUBE;
|
||||
# else
|
||||
constexpr bool IS_CN_HEAVY_TUBE = false;
|
||||
# endif
|
||||
|
||||
if (BASE == Algorithm::CN_1 && size < 43) {
|
||||
if (props.isBase1() && size < 43) {
|
||||
memset(output, 0, 32);
|
||||
return;
|
||||
}
|
||||
@@ -694,10 +687,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
||||
V4_Instruction code[256];
|
||||
const int code_size = v4_random_math_init<ALGO>(code, height);
|
||||
|
||||
if (ALGO == Algorithm::CN_R) {
|
||||
v4_soft_aes_compile_code(code, code_size, reinterpret_cast<void*>(ctx[0]->generated_code), Assembly::NONE);
|
||||
}
|
||||
|
||||
v4_soft_aes_compile_code(code, code_size, reinterpret_cast<void*>(ctx[0]->generated_code), Assembly::NONE);
|
||||
ctx[0]->generated_code_data = { ALGO, height };
|
||||
}
|
||||
|
||||
@@ -718,26 +708,26 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
||||
__m128i bx1 = _mm_set_epi64x(static_cast<int64_t>(h0[9] ^ h0[11]), static_cast<int64_t>(h0[8] ^ h0[10]));
|
||||
|
||||
__m128 conc_var;
|
||||
if (ALGO == Algorithm::CN_CCX) {
|
||||
if (props.isCCX()) {
|
||||
conc_var = _mm_setzero_ps();
|
||||
RESTORE_ROUNDING_MODE();
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < props.iterations(); i++) {
|
||||
__m128i cx;
|
||||
if (IS_CN_HEAVY_TUBE || !SOFT_AES) {
|
||||
if (props.isHeavyTube() || !SOFT_AES) {
|
||||
cx = _mm_load_si128(reinterpret_cast<const __m128i *>(&l0[interleaved_index<interleave>(idx0 & MASK)]));
|
||||
if (ALGO == Algorithm::CN_CCX) {
|
||||
if (props.isCCX()) {
|
||||
cryptonight_conceal_tweak(cx, conc_var);
|
||||
}
|
||||
}
|
||||
|
||||
const __m128i ax0 = _mm_set_epi64x(static_cast<int64_t>(ah0), static_cast<int64_t>(al0));
|
||||
if (IS_CN_HEAVY_TUBE) {
|
||||
if (props.isHeavyTube()) {
|
||||
cx = aes_round_tweak_div(cx, ax0);
|
||||
}
|
||||
else if (SOFT_AES) {
|
||||
if (ALGO == Algorithm::CN_CCX) {
|
||||
if (props.isCCX()) {
|
||||
cx = _mm_load_si128(reinterpret_cast<const __m128i*>(&l0[interleaved_index<interleave>(idx0 & MASK)]));
|
||||
cryptonight_conceal_tweak(cx, conc_var);
|
||||
cx = soft_aesenc(&cx, ax0, reinterpret_cast<const uint32_t*>(saes_table));
|
||||
@@ -750,7 +740,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
||||
cx = _mm_aesenc_si128(cx, ax0);
|
||||
}
|
||||
|
||||
if (BASE == Algorithm::CN_1 || BASE == Algorithm::CN_2) {
|
||||
if (props.isBase1() || props.isBase2()) {
|
||||
cryptonight_monero_tweak<ALGO>(reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)]), l0, idx0 & MASK, ax0, bx0, bx1, cx);
|
||||
} else {
|
||||
_mm_store_si128(reinterpret_cast<__m128i *>(&l0[interleaved_index<interleave>(idx0 & MASK)]), _mm_xor_si128(bx0, cx));
|
||||
@@ -762,13 +752,11 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
||||
cl = (reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)]))[0];
|
||||
ch = (reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)]))[1];
|
||||
|
||||
if (BASE == Algorithm::CN_2) {
|
||||
if (props.isBase2()) {
|
||||
if (props.isR()) {
|
||||
VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx0, bx1);
|
||||
if (ALGO == Algorithm::CN_R) {
|
||||
al0 ^= r0[2] | (static_cast<uint64_t>(r0[3]) << 32);
|
||||
ah0 ^= r0[0] | (static_cast<uint64_t>(r0[1]) << 32);
|
||||
}
|
||||
al0 ^= r0[2] | (static_cast<uint64_t>(r0[3]) << 32);
|
||||
ah0 ^= r0[0] | (static_cast<uint64_t>(r0[1]) << 32);
|
||||
} else {
|
||||
VARIANT2_INTEGER_MATH(0, cl, cx);
|
||||
}
|
||||
@@ -776,11 +764,11 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
||||
|
||||
lo = __umul128(idx0, cl, &hi);
|
||||
|
||||
if (BASE == Algorithm::CN_2) {
|
||||
if (ALGO == Algorithm::CN_R) {
|
||||
if (props.isBase2()) {
|
||||
if (props.isR()) {
|
||||
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1, cx, 0);
|
||||
} else {
|
||||
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo, (((ALGO == Algorithm::CN_RWZ) || (ALGO == Algorithm::CN_UPX2)) ? 1 : 0));
|
||||
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo, ((props.isRWZ() || props.isUPX2()) ? 1 : 0));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -789,9 +777,9 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
||||
|
||||
reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[0] = al0;
|
||||
|
||||
if (IS_CN_HEAVY_TUBE || ALGO == Algorithm::CN_RTO) {
|
||||
if (props.isHeavyTube() || props.isRTO()) {
|
||||
reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[1] = ah0 ^ tweak1_2_0 ^ al0;
|
||||
} else if (BASE == Algorithm::CN_1) {
|
||||
} else if (props.isBase1()) {
|
||||
reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[1] = ah0 ^ tweak1_2_0;
|
||||
} else {
|
||||
reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[1] = ah0;
|
||||
@@ -819,7 +807,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
||||
|
||||
((int64_t*)&l0[interleaved_index<interleave>(idx0 & MASK)])[0] = n ^ q;
|
||||
|
||||
if (ALGO == Algorithm::CN_HEAVY_XHV) {
|
||||
if (props.isHeavyXHV()) {
|
||||
d = ~d;
|
||||
}
|
||||
|
||||
@@ -827,7 +815,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
||||
}
|
||||
# endif
|
||||
|
||||
if (BASE == Algorithm::CN_2) {
|
||||
if (props.isBase2()) {
|
||||
bx1 = bx0;
|
||||
}
|
||||
|
||||
@@ -960,7 +948,7 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
|
||||
}
|
||||
cn_explode_scratchpad<ALGO, false, 0>(ctx[0]);
|
||||
|
||||
if (ALGO == Algorithm::CN_2) {
|
||||
if (props.is2()) {
|
||||
if (ASM == Assembly::INTEL) {
|
||||
cnv2_mainloop_ivybridge_asm(ctx);
|
||||
}
|
||||
@@ -971,7 +959,7 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
|
||||
cnv2_mainloop_bulldozer_asm(ctx);
|
||||
}
|
||||
}
|
||||
else if (ALGO == Algorithm::CN_HALF) {
|
||||
else if (props.isHalf()) {
|
||||
if (ASM == Assembly::INTEL) {
|
||||
cn_half_mainloop_ivybridge_asm(ctx);
|
||||
}
|
||||
@@ -983,7 +971,7 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
|
||||
}
|
||||
}
|
||||
# ifdef XMRIG_ALGO_CN_PICO
|
||||
else if (ALGO == Algorithm::CN_PICO_0) {
|
||||
else if (props.isPico0()) {
|
||||
if (ASM == Assembly::INTEL) {
|
||||
cn_trtl_mainloop_ivybridge_asm(ctx);
|
||||
}
|
||||
@@ -994,7 +982,7 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
|
||||
cn_trtl_mainloop_bulldozer_asm(ctx);
|
||||
}
|
||||
}
|
||||
else if (ALGO == Algorithm::CN_PICO_TLO) {
|
||||
else if (props.isPicoTLO()) {
|
||||
if (ASM == Assembly::INTEL) {
|
||||
cn_tlo_mainloop_ivybridge_asm(ctx);
|
||||
}
|
||||
@@ -1006,10 +994,10 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
|
||||
}
|
||||
}
|
||||
# endif
|
||||
else if (ALGO == Algorithm::CN_RWZ) {
|
||||
else if (props.isRWZ()) {
|
||||
cnv2_rwz_mainloop_asm(ctx);
|
||||
}
|
||||
else if (ALGO == Algorithm::CN_ZLS) {
|
||||
else if (props.isZLS()) {
|
||||
if (ASM == Assembly::INTEL) {
|
||||
cn_zls_mainloop_ivybridge_asm(ctx);
|
||||
}
|
||||
@@ -1020,7 +1008,7 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
|
||||
cn_zls_mainloop_bulldozer_asm(ctx);
|
||||
}
|
||||
}
|
||||
else if (ALGO == Algorithm::CN_DOUBLE) {
|
||||
else if (props.isDouble()) {
|
||||
if (ASM == Assembly::INTEL) {
|
||||
cn_double_mainloop_ivybridge_asm(ctx);
|
||||
}
|
||||
@@ -1032,7 +1020,7 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
|
||||
}
|
||||
}
|
||||
# ifdef XMRIG_ALGO_CN_FEMTO
|
||||
else if (ALGO == Algorithm::CN_UPX2) {
|
||||
else if (props.isUPX2()) {
|
||||
cn_upx2_mainloop_asm(ctx);
|
||||
}
|
||||
# endif
|
||||
@@ -1078,22 +1066,22 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
|
||||
cn_explode_scratchpad<ALGO, false, 0>(ctx[1]);
|
||||
}
|
||||
|
||||
if (ALGO == Algorithm::CN_2) {
|
||||
if (props.is2()) {
|
||||
cnv2_double_mainloop_sandybridge_asm(ctx);
|
||||
}
|
||||
else if (ALGO == Algorithm::CN_HALF) {
|
||||
else if (props.isHalf()){
|
||||
cn_half_double_mainloop_sandybridge_asm(ctx);
|
||||
}
|
||||
# ifdef XMRIG_ALGO_CN_PICO
|
||||
else if (ALGO == Algorithm::CN_PICO_0) {
|
||||
else if (props.isPico0()) {
|
||||
cn_trtl_double_mainloop_sandybridge_asm(ctx);
|
||||
}
|
||||
else if (ALGO == Algorithm::CN_PICO_TLO) {
|
||||
else if (props.isPicoTLO()) {
|
||||
cn_tlo_double_mainloop_sandybridge_asm(ctx);
|
||||
}
|
||||
# endif
|
||||
# ifdef XMRIG_ALGO_CN_FEMTO
|
||||
else if (ALGO == Algorithm::CN_UPX2) {
|
||||
else if (props.isUPX2()) {
|
||||
if (Cpu::info()->arch() == ICpuInfo::ARCH_ZEN3) {
|
||||
cnv2_upx_double_mainloop_zen3_asm(ctx);
|
||||
}
|
||||
@@ -1102,13 +1090,13 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
|
||||
}
|
||||
}
|
||||
# endif
|
||||
else if (ALGO == Algorithm::CN_RWZ) {
|
||||
else if (props.isRWZ()) {
|
||||
cnv2_rwz_double_mainloop_asm(ctx);
|
||||
}
|
||||
else if (ALGO == Algorithm::CN_ZLS) {
|
||||
else if (props.isZLS()) {
|
||||
cn_zls_double_mainloop_sandybridge_asm(ctx);
|
||||
}
|
||||
else if (ALGO == Algorithm::CN_DOUBLE) {
|
||||
else if (props.isDouble()) {
|
||||
cn_double_double_mainloop_sandybridge_asm(ctx);
|
||||
}
|
||||
else if (props.isR()) {
|
||||
@@ -1146,9 +1134,8 @@ template<Algorithm::Id ALGO>
|
||||
static NOINLINE void cryptonight_single_hash_gr_sse41(const uint8_t* __restrict__ input, size_t size, uint8_t* __restrict__ output, cryptonight_ctx** __restrict__ ctx, uint64_t height)
|
||||
{
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
constexpr Algorithm::Id BASE = props.base();
|
||||
|
||||
if (BASE == Algorithm::CN_1 && size < 43) {
|
||||
if (props.isBase1() && size < 43) {
|
||||
memset(output, 0, 32);
|
||||
return;
|
||||
}
|
||||
@@ -1163,12 +1150,12 @@ static NOINLINE void cryptonight_single_hash_gr_sse41(const uint8_t* __restrict_
|
||||
VARIANT1_INIT(0);
|
||||
ctx[0]->tweak1_2 = tweak1_2_0;
|
||||
ctx[0]->tweak1_table = tweak1_table;
|
||||
if (ALGO == Algorithm::CN_GR_0) cn_gr0_single_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_1) cn_gr1_single_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_2) cn_gr2_single_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_3) cn_gr3_single_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_4) cn_gr4_single_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_5) cn_gr5_single_mainloop_asm(ctx);
|
||||
if (props.isGR0()) cn_gr0_single_mainloop_asm(ctx);
|
||||
if (props.isGR1()) cn_gr1_single_mainloop_asm(ctx);
|
||||
if (props.isGR2()) cn_gr2_single_mainloop_asm(ctx);
|
||||
if (props.isGR3()) cn_gr3_single_mainloop_asm(ctx);
|
||||
if (props.isGR4()) cn_gr4_single_mainloop_asm(ctx);
|
||||
if (props.isGR5()) cn_gr5_single_mainloop_asm(ctx);
|
||||
|
||||
cn_implode_scratchpad<ALGO, false, 0>(ctx[0]);
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
|
||||
@@ -1180,9 +1167,8 @@ template<Algorithm::Id ALGO>
|
||||
static NOINLINE void cryptonight_double_hash_gr_sse41(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
|
||||
{
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
constexpr Algorithm::Id BASE = props.base();
|
||||
|
||||
if (BASE == Algorithm::CN_1 && size < 43) {
|
||||
if (props.isBase1() && size < 43) {
|
||||
memset(output, 0, 64);
|
||||
return;
|
||||
}
|
||||
@@ -1196,7 +1182,7 @@ static NOINLINE void cryptonight_double_hash_gr_sse41(const uint8_t *__restrict_
|
||||
}
|
||||
|
||||
# ifdef XMRIG_VAES
|
||||
if (!props.isHeavy() && cn_vaes_enabled) {
|
||||
if (cn_vaes_enabled) {
|
||||
cn_explode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
|
||||
}
|
||||
else
|
||||
@@ -1214,15 +1200,15 @@ static NOINLINE void cryptonight_double_hash_gr_sse41(const uint8_t *__restrict_
|
||||
|
||||
ctx[0]->tweak1_table = tweak1_table;
|
||||
|
||||
if (ALGO == Algorithm::CN_GR_0) cn_gr0_double_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_1) cn_gr1_double_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_2) cn_gr2_double_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_3) cn_gr3_double_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_4) cn_gr4_double_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_5) cn_gr5_double_mainloop_asm(ctx);
|
||||
if (props.isGR0()) cn_gr0_double_mainloop_asm(ctx);
|
||||
if (props.isGR1()) cn_gr1_double_mainloop_asm(ctx);
|
||||
if (props.isGR2()) cn_gr2_double_mainloop_asm(ctx);
|
||||
if (props.isGR3()) cn_gr3_double_mainloop_asm(ctx);
|
||||
if (props.isGR4()) cn_gr4_double_mainloop_asm(ctx);
|
||||
if (props.isGR5()) cn_gr5_double_mainloop_asm(ctx);
|
||||
|
||||
# ifdef XMRIG_VAES
|
||||
if (!props.isHeavy() && cn_vaes_enabled) {
|
||||
if (cn_vaes_enabled) {
|
||||
cn_implode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
|
||||
}
|
||||
else
|
||||
@@ -1267,15 +1253,8 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
constexpr size_t MASK = props.mask();
|
||||
constexpr Algorithm::Id BASE = props.base();
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_HEAVY
|
||||
constexpr bool IS_CN_HEAVY_TUBE = ALGO == Algorithm::CN_HEAVY_TUBE;
|
||||
# else
|
||||
constexpr bool IS_CN_HEAVY_TUBE = false;
|
||||
# endif
|
||||
|
||||
if (BASE == Algorithm::CN_1 && size < 43) {
|
||||
if (props.isBase1() && size < 43) {
|
||||
memset(output, 0, 64);
|
||||
return;
|
||||
}
|
||||
@@ -1323,7 +1302,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||
__m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
|
||||
|
||||
__m128 conc_var0, conc_var1;
|
||||
if (ALGO == Algorithm::CN_CCX) {
|
||||
if (props.isCCX()) {
|
||||
conc_var0 = _mm_setzero_ps();
|
||||
conc_var1 = _mm_setzero_ps();
|
||||
RESTORE_ROUNDING_MODE();
|
||||
@@ -1334,10 +1313,10 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||
|
||||
for (size_t i = 0; i < props.iterations(); i++) {
|
||||
__m128i cx0, cx1;
|
||||
if (IS_CN_HEAVY_TUBE || !SOFT_AES) {
|
||||
if (props.isHeavyTube() || !SOFT_AES) {
|
||||
cx0 = _mm_load_si128(reinterpret_cast<const __m128i *>(&l0[idx0 & MASK]));
|
||||
cx1 = _mm_load_si128(reinterpret_cast<const __m128i *>(&l1[idx1 & MASK]));
|
||||
if (ALGO == Algorithm::CN_CCX) {
|
||||
if (props.isCCX()) {
|
||||
cryptonight_conceal_tweak(cx0, conc_var0);
|
||||
cryptonight_conceal_tweak(cx1, conc_var1);
|
||||
}
|
||||
@@ -1345,12 +1324,12 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||
|
||||
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
|
||||
const __m128i ax1 = _mm_set_epi64x(ah1, al1);
|
||||
if (IS_CN_HEAVY_TUBE) {
|
||||
if (props.isHeavyTube()) {
|
||||
cx0 = aes_round_tweak_div(cx0, ax0);
|
||||
cx1 = aes_round_tweak_div(cx1, ax1);
|
||||
}
|
||||
else if (SOFT_AES) {
|
||||
if (ALGO == Algorithm::CN_CCX) {
|
||||
if (props.isCCX()) {
|
||||
cx0 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l0[idx0 & MASK]));
|
||||
cx1 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l1[idx1 & MASK]));
|
||||
cryptonight_conceal_tweak(cx0, conc_var0);
|
||||
@@ -1368,7 +1347,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||
cx1 = _mm_aesenc_si128(cx1, ax1);
|
||||
}
|
||||
|
||||
if (BASE == Algorithm::CN_1 || BASE == Algorithm::CN_2) {
|
||||
if (props.isBase1() || props.isBase2()) {
|
||||
cryptonight_monero_tweak<ALGO>((uint64_t*)&l0[idx0 & MASK], l0, idx0 & MASK, ax0, bx00, bx01, cx0);
|
||||
cryptonight_monero_tweak<ALGO>((uint64_t*)&l1[idx1 & MASK], l1, idx1 & MASK, ax1, bx10, bx11, cx1);
|
||||
} else {
|
||||
@@ -1383,13 +1362,11 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||
cl = ((uint64_t*) &l0[idx0 & MASK])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & MASK])[1];
|
||||
|
||||
if (BASE == Algorithm::CN_2) {
|
||||
if (props.isBase2()) {
|
||||
if (props.isR()) {
|
||||
VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx01);
|
||||
if (ALGO == Algorithm::CN_R) {
|
||||
al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
|
||||
ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
|
||||
}
|
||||
al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
|
||||
ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
|
||||
} else {
|
||||
VARIANT2_INTEGER_MATH(0, cl, cx0);
|
||||
}
|
||||
@@ -1397,11 +1374,11 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||
|
||||
lo = __umul128(idx0, cl, &hi);
|
||||
|
||||
if (BASE == Algorithm::CN_2) {
|
||||
if (ALGO == Algorithm::CN_R) {
|
||||
if (props.isBase2()) {
|
||||
if (props.isR()) {
|
||||
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01, cx0, 0);
|
||||
} else {
|
||||
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo, (((ALGO == Algorithm::CN_RWZ) || (ALGO == Algorithm::CN_UPX2)) ? 1 : 0));
|
||||
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo, ((props.isRWZ() || props.isUPX2()) ? 1 : 0));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1410,9 +1387,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||
|
||||
((uint64_t*)&l0[idx0 & MASK])[0] = al0;
|
||||
|
||||
if (IS_CN_HEAVY_TUBE || ALGO == Algorithm::CN_RTO) {
|
||||
if (props.isHeavyTube() || props.isRTO()) {
|
||||
((uint64_t*) &l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0;
|
||||
} else if (BASE == Algorithm::CN_1) {
|
||||
} else if (props.isBase1()) {
|
||||
((uint64_t*) &l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0;
|
||||
} else {
|
||||
((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
|
||||
@@ -1430,7 +1407,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||
|
||||
((int64_t*)&l0[idx0 & MASK])[0] = n ^ q;
|
||||
|
||||
if (ALGO == Algorithm::CN_HEAVY_XHV) {
|
||||
if (props.isHeavyXHV()) {
|
||||
d = ~d;
|
||||
}
|
||||
|
||||
@@ -1441,13 +1418,11 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||
cl = ((uint64_t*) &l1[idx1 & MASK])[0];
|
||||
ch = ((uint64_t*) &l1[idx1 & MASK])[1];
|
||||
|
||||
if (BASE == Algorithm::CN_2) {
|
||||
if (props.isBase2()) {
|
||||
if (props.isR()) {
|
||||
VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx10, bx11);
|
||||
if (ALGO == Algorithm::CN_R) {
|
||||
al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32);
|
||||
ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32);
|
||||
}
|
||||
al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32);
|
||||
ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32);
|
||||
} else {
|
||||
VARIANT2_INTEGER_MATH(1, cl, cx1);
|
||||
}
|
||||
@@ -1455,11 +1430,11 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||
|
||||
lo = __umul128(idx1, cl, &hi);
|
||||
|
||||
if (BASE == Algorithm::CN_2) {
|
||||
if (ALGO == Algorithm::CN_R) {
|
||||
if (props.isBase2()) {
|
||||
if (props.isR()) {
|
||||
VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11, cx1, 0);
|
||||
} else {
|
||||
VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo, (((ALGO == Algorithm::CN_RWZ) || (ALGO == Algorithm::CN_UPX2)) ? 1 : 0));
|
||||
VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo, ((props.isRWZ() || props.isUPX2()) ? 1 : 0));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1468,9 +1443,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||
|
||||
((uint64_t*)&l1[idx1 & MASK])[0] = al1;
|
||||
|
||||
if (IS_CN_HEAVY_TUBE || ALGO == Algorithm::CN_RTO) {
|
||||
if (props.isHeavyTube() || props.isRTO()) {
|
||||
((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1 ^ al1;
|
||||
} else if (BASE == Algorithm::CN_1) {
|
||||
} else if (props.isBase1()) {
|
||||
((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1;
|
||||
} else {
|
||||
((uint64_t*)&l1[idx1 & MASK])[1] = ah1;
|
||||
@@ -1488,7 +1463,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||
|
||||
((int64_t*)&l1[idx1 & MASK])[0] = n ^ q;
|
||||
|
||||
if (ALGO == Algorithm::CN_HEAVY_XHV) {
|
||||
if (props.isHeavyXHV()) {
|
||||
d = ~d;
|
||||
}
|
||||
|
||||
@@ -1496,7 +1471,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||
}
|
||||
# endif
|
||||
|
||||
if (BASE == Algorithm::CN_2) {
|
||||
if (props.isBase2()) {
|
||||
bx01 = bx00;
|
||||
bx11 = bx10;
|
||||
}
|
||||
@@ -1529,9 +1504,8 @@ template<Algorithm::Id ALGO>
|
||||
static NOINLINE void cryptonight_quad_hash_gr_sse41(const uint8_t* __restrict__ input, size_t size, uint8_t* __restrict__ output, cryptonight_ctx** __restrict__ ctx, uint64_t height)
|
||||
{
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
constexpr Algorithm::Id BASE = props.base();
|
||||
|
||||
if (BASE == Algorithm::CN_1 && size < 43) {
|
||||
if (props.isBase1() && size < 43) {
|
||||
memset(output, 0, 32 * 4);
|
||||
return;
|
||||
}
|
||||
@@ -1549,7 +1523,7 @@ static NOINLINE void cryptonight_quad_hash_gr_sse41(const uint8_t* __restrict__
|
||||
}
|
||||
|
||||
# ifdef XMRIG_VAES
|
||||
if (!props.isHeavy() && cn_vaes_enabled) {
|
||||
if (cn_vaes_enabled) {
|
||||
cn_explode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
|
||||
cn_explode_scratchpad_vaes_double(ctx[2], ctx[3], props.memory(), props.half_mem());
|
||||
}
|
||||
@@ -1569,15 +1543,15 @@ static NOINLINE void cryptonight_quad_hash_gr_sse41(const uint8_t* __restrict__
|
||||
|
||||
ctx[0]->tweak1_table = tweak1_table;
|
||||
|
||||
if (ALGO == Algorithm::CN_GR_0) cn_gr0_quad_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_1) cn_gr1_quad_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_2) cn_gr2_quad_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_3) cn_gr3_quad_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_4) cn_gr4_quad_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_5) cn_gr5_quad_mainloop_asm(ctx);
|
||||
if (props.isGR0()) cn_gr0_quad_mainloop_asm(ctx);
|
||||
if (props.isGR1()) cn_gr1_quad_mainloop_asm(ctx);
|
||||
if (props.isGR2()) cn_gr2_quad_mainloop_asm(ctx);
|
||||
if (props.isGR3()) cn_gr3_quad_mainloop_asm(ctx);
|
||||
if (props.isGR4()) cn_gr4_quad_mainloop_asm(ctx);
|
||||
if (props.isGR5()) cn_gr5_quad_mainloop_asm(ctx);
|
||||
|
||||
# ifdef XMRIG_VAES
|
||||
if (!props.isHeavy() && cn_vaes_enabled) {
|
||||
if (cn_vaes_enabled) {
|
||||
cn_implode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
|
||||
cn_implode_scratchpad_vaes_double(ctx[2], ctx[3], props.memory(), props.half_mem());
|
||||
}
|
||||
@@ -1606,14 +1580,14 @@ static NOINLINE void cryptonight_quad_hash_gr_sse41(const uint8_t* __restrict__
|
||||
#define CN_STEP1(a, b0, b1, c, l, ptr, idx, conc_var) \
|
||||
ptr = reinterpret_cast<__m128i*>(&l[idx & MASK]); \
|
||||
c = _mm_load_si128(ptr); \
|
||||
if (ALGO == Algorithm::CN_CCX) { \
|
||||
if (props.isCCX()) { \
|
||||
cryptonight_conceal_tweak(c, conc_var); \
|
||||
}
|
||||
|
||||
|
||||
|
||||
#define CN_STEP2(a, b0, b1, c, l, ptr, idx) \
|
||||
if (IS_CN_HEAVY_TUBE) { \
|
||||
if (props.isHeavyTube()) { \
|
||||
c = aes_round_tweak_div(c, a); \
|
||||
} \
|
||||
else if (SOFT_AES) { \
|
||||
@@ -1622,7 +1596,7 @@ static NOINLINE void cryptonight_quad_hash_gr_sse41(const uint8_t* __restrict__
|
||||
c = _mm_aesenc_si128(c, a); \
|
||||
} \
|
||||
\
|
||||
if (BASE == Algorithm::CN_1 || BASE == Algorithm::CN_2) { \
|
||||
if (props.isBase1() || props.isBase2()) { \
|
||||
cryptonight_monero_tweak<ALGO>((uint64_t*)ptr, l, idx & MASK, a, b0, b1, c); \
|
||||
} else { \
|
||||
_mm_store_si128(ptr, _mm_xor_si128(b0, c)); \
|
||||
@@ -1638,36 +1612,34 @@ static NOINLINE void cryptonight_quad_hash_gr_sse41(const uint8_t* __restrict__
|
||||
|
||||
#define CN_STEP4(part, a, b0, b1, c, l, mc, ptr, idx) \
|
||||
uint64_t al##part, ah##part; \
|
||||
if (BASE == Algorithm::CN_2) { \
|
||||
if (props.isBase2()) { \
|
||||
if (props.isR()) { \
|
||||
al##part = _mm_cvtsi128_si64(a); \
|
||||
ah##part = _mm_cvtsi128_si64(_mm_srli_si128(a, 8)); \
|
||||
VARIANT4_RANDOM_MATH(part, al##part, ah##part, cl##part, b0, b1); \
|
||||
if (ALGO == Algorithm::CN_R) { \
|
||||
al##part ^= r##part[2] | ((uint64_t)(r##part[3]) << 32); \
|
||||
ah##part ^= r##part[0] | ((uint64_t)(r##part[1]) << 32); \
|
||||
} \
|
||||
al##part ^= r##part[2] | ((uint64_t)(r##part[3]) << 32); \
|
||||
ah##part ^= r##part[0] | ((uint64_t)(r##part[1]) << 32); \
|
||||
} else { \
|
||||
VARIANT2_INTEGER_MATH(part, cl##part, c); \
|
||||
} \
|
||||
} \
|
||||
lo = __umul128(idx, cl##part, &hi); \
|
||||
if (BASE == Algorithm::CN_2) { \
|
||||
if (ALGO == Algorithm::CN_R) { \
|
||||
if (props.isBase2()) { \
|
||||
if (props.isR()) { \
|
||||
VARIANT2_SHUFFLE(l, idx & MASK, a, b0, b1, c, 0); \
|
||||
} else { \
|
||||
VARIANT2_SHUFFLE2(l, idx & MASK, a, b0, b1, hi, lo, (((ALGO == Algorithm::CN_RWZ) || (ALGO == Algorithm::CN_UPX2)) ? 1 : 0)); \
|
||||
VARIANT2_SHUFFLE2(l, idx & MASK, a, b0, b1, hi, lo, ((props.isRWZ() || props.isUPX2()) ? 1 : 0)); \
|
||||
} \
|
||||
} \
|
||||
if (ALGO == Algorithm::CN_R) { \
|
||||
if (props.isR()) { \
|
||||
a = _mm_set_epi64x(ah##part, al##part); \
|
||||
} \
|
||||
a = _mm_add_epi64(a, _mm_set_epi64x(lo, hi)); \
|
||||
\
|
||||
if (BASE == Algorithm::CN_1) { \
|
||||
if (props.isBase1()) { \
|
||||
_mm_store_si128(ptr, _mm_xor_si128(a, mc)); \
|
||||
\
|
||||
if (IS_CN_HEAVY_TUBE || ALGO == Algorithm::CN_RTO) { \
|
||||
if (props.isHeavyTube() || props.isRTO()) { \
|
||||
((uint64_t*)ptr)[1] ^= ((uint64_t*)ptr)[0]; \
|
||||
} \
|
||||
} else { \
|
||||
@@ -1681,13 +1653,13 @@ static NOINLINE void cryptonight_quad_hash_gr_sse41(const uint8_t* __restrict__
|
||||
int32_t d = ((int32_t*)&l[idx & MASK])[2]; \
|
||||
int64_t q = n / (d | 0x5); \
|
||||
((int64_t*)&l[idx & MASK])[0] = n ^ q; \
|
||||
if (IS_CN_HEAVY_XHV) { \
|
||||
if (props.isHeavyXHV()) { \
|
||||
d = ~d; \
|
||||
} \
|
||||
\
|
||||
idx = d ^ q; \
|
||||
} \
|
||||
if (BASE == Algorithm::CN_2) { \
|
||||
if (props.isBase2()) { \
|
||||
b1 = b0; \
|
||||
} \
|
||||
b0 = c;
|
||||
@@ -1697,11 +1669,11 @@ static NOINLINE void cryptonight_quad_hash_gr_sse41(const uint8_t* __restrict__
|
||||
__m128i mc##n; \
|
||||
__m128i division_result_xmm_##n; \
|
||||
__m128i sqrt_result_xmm_##n; \
|
||||
if (BASE == Algorithm::CN_1) { \
|
||||
if (props.isBase1()) { \
|
||||
mc##n = _mm_set_epi64x(*reinterpret_cast<const uint64_t*>(input + n * size + 35) ^ \
|
||||
*(reinterpret_cast<const uint64_t*>((ctx)->state) + 24), 0); \
|
||||
} \
|
||||
if (BASE == Algorithm::CN_2) { \
|
||||
if (props.isBase2()) { \
|
||||
division_result_xmm_##n = _mm_cvtsi64_si128(h##n[12]); \
|
||||
sqrt_result_xmm_##n = _mm_cvtsi64_si128(h##n[13]); \
|
||||
} \
|
||||
@@ -1710,7 +1682,7 @@ static NOINLINE void cryptonight_quad_hash_gr_sse41(const uint8_t* __restrict__
|
||||
__m128i bx##n##1 = _mm_set_epi64x(h##n[9] ^ h##n[11], h##n[8] ^ h##n[10]); \
|
||||
__m128i cx##n = _mm_setzero_si128(); \
|
||||
__m128 conc_var##n; \
|
||||
if (ALGO == Algorithm::CN_CCX) { \
|
||||
if (props.isCCX()) { \
|
||||
conc_var##n = _mm_setzero_ps(); \
|
||||
} \
|
||||
VARIANT4_RANDOM_MATH_INIT(n);
|
||||
@@ -1721,17 +1693,8 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
|
||||
{
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
constexpr size_t MASK = props.mask();
|
||||
constexpr Algorithm::Id BASE = props.base();
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_HEAVY
|
||||
constexpr bool IS_CN_HEAVY_TUBE = ALGO == Algorithm::CN_HEAVY_TUBE;
|
||||
constexpr bool IS_CN_HEAVY_XHV = ALGO == Algorithm::CN_HEAVY_XHV;
|
||||
# else
|
||||
constexpr bool IS_CN_HEAVY_TUBE = false;
|
||||
constexpr bool IS_CN_HEAVY_XHV = false;
|
||||
# endif
|
||||
|
||||
if (BASE == Algorithm::CN_1 && size < 43) {
|
||||
if (props.isBase1() && size < 43) {
|
||||
memset(output, 0, 32 * 3);
|
||||
return;
|
||||
}
|
||||
@@ -1755,7 +1718,7 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
|
||||
CONST_INIT(ctx[1], 1);
|
||||
CONST_INIT(ctx[2], 2);
|
||||
VARIANT2_SET_ROUNDING_MODE();
|
||||
if (ALGO == Algorithm::CN_CCX) {
|
||||
if (props.isCCX()) {
|
||||
RESTORE_ROUNDING_MODE();
|
||||
}
|
||||
|
||||
@@ -1819,17 +1782,8 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
|
||||
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
constexpr size_t MASK = props.mask();
|
||||
constexpr Algorithm::Id BASE = props.base();
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_HEAVY
|
||||
constexpr bool IS_CN_HEAVY_TUBE = ALGO == Algorithm::CN_HEAVY_TUBE;
|
||||
constexpr bool IS_CN_HEAVY_XHV = ALGO == Algorithm::CN_HEAVY_XHV;
|
||||
# else
|
||||
constexpr bool IS_CN_HEAVY_TUBE = false;
|
||||
constexpr bool IS_CN_HEAVY_XHV = false;
|
||||
# endif
|
||||
|
||||
if (BASE == Algorithm::CN_1 && size < 43) {
|
||||
if (props.isBase1() && size < 43) {
|
||||
memset(output, 0, 32 * 4);
|
||||
return;
|
||||
}
|
||||
@@ -1869,7 +1823,7 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
|
||||
CONST_INIT(ctx[2], 2);
|
||||
CONST_INIT(ctx[3], 3);
|
||||
VARIANT2_SET_ROUNDING_MODE();
|
||||
if (ALGO == Algorithm::CN_CCX) {
|
||||
if (props.isCCX()) {
|
||||
RESTORE_ROUNDING_MODE();
|
||||
}
|
||||
|
||||
@@ -1930,17 +1884,8 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz
|
||||
{
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
constexpr size_t MASK = props.mask();
|
||||
constexpr Algorithm::Id BASE = props.base();
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_HEAVY
|
||||
constexpr bool IS_CN_HEAVY_TUBE = ALGO == Algorithm::CN_HEAVY_TUBE;
|
||||
constexpr bool IS_CN_HEAVY_XHV = ALGO == Algorithm::CN_HEAVY_XHV;
|
||||
# else
|
||||
constexpr bool IS_CN_HEAVY_TUBE = false;
|
||||
constexpr bool IS_CN_HEAVY_XHV = false;
|
||||
# endif
|
||||
|
||||
if (BASE == Algorithm::CN_1 && size < 43) {
|
||||
if (props.isBase1() && size < 43) {
|
||||
memset(output, 0, 32 * 5);
|
||||
return;
|
||||
}
|
||||
@@ -1970,7 +1915,7 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz
|
||||
CONST_INIT(ctx[3], 3);
|
||||
CONST_INIT(ctx[4], 4);
|
||||
VARIANT2_SET_ROUNDING_MODE();
|
||||
if (ALGO == Algorithm::CN_CCX) {
|
||||
if (props.isCCX()) {
|
||||
RESTORE_ROUNDING_MODE();
|
||||
}
|
||||
|
||||
|
||||
@@ -131,8 +131,8 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
|
||||
// and w16, w10, ScratchpadL3Mask64
|
||||
emit32(0x121A0000 | 16 | (10 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
|
||||
|
||||
// and w17, w18, ScratchpadL3Mask64
|
||||
emit32(0x121A0000 | 17 | (18 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
|
||||
// and w17, w20, ScratchpadL3Mask64
|
||||
emit32(0x121A0000 | 17 | (20 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
|
||||
|
||||
codePos = PrologueSize;
|
||||
literalPos = ImulRcpLiteralsEnd;
|
||||
@@ -148,16 +148,16 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
|
||||
}
|
||||
|
||||
// Update spMix2
|
||||
// eor w18, config.readReg2, config.readReg3
|
||||
emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
|
||||
// eor w20, config.readReg2, config.readReg3
|
||||
emit32(ARMV8A::EOR32 | 20 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
|
||||
|
||||
// Jump back to the main loop
|
||||
const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end) - ((uint8_t*)randomx_program_aarch64)) - codePos;
|
||||
emit32(ARMV8A::B | (offset / 4), code, codePos);
|
||||
|
||||
// and w18, w18, CacheLineAlignMask
|
||||
// and w20, w20, CacheLineAlignMask
|
||||
codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask1) - ((uint8_t*)randomx_program_aarch64));
|
||||
emit32(0x121A0000 | 18 | (18 << 5) | ((RandomX_CurrentConfig.Log2_DatasetBaseSize - 7) << 10), code, codePos);
|
||||
emit32(0x121A0000 | 20 | (20 << 5) | ((RandomX_CurrentConfig.Log2_DatasetBaseSize - 7) << 10), code, codePos);
|
||||
|
||||
// and w10, w10, CacheLineAlignMask
|
||||
codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask2) - ((uint8_t*)randomx_program_aarch64));
|
||||
@@ -189,8 +189,8 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
|
||||
// and w16, w10, ScratchpadL3Mask64
|
||||
emit32(0x121A0000 | 16 | (10 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
|
||||
|
||||
// and w17, w18, ScratchpadL3Mask64
|
||||
emit32(0x121A0000 | 17 | (18 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
|
||||
// and w17, w20, ScratchpadL3Mask64
|
||||
emit32(0x121A0000 | 17 | (20 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
|
||||
|
||||
codePos = PrologueSize;
|
||||
literalPos = ImulRcpLiteralsEnd;
|
||||
@@ -206,8 +206,8 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
|
||||
}
|
||||
|
||||
// Update spMix2
|
||||
// eor w18, config.readReg2, config.readReg3
|
||||
emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
|
||||
// eor w20, config.readReg2, config.readReg3
|
||||
emit32(ARMV8A::EOR32 | 20 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
|
||||
|
||||
// Jump back to the main loop
|
||||
const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end_light) - ((uint8_t*)randomx_program_aarch64)) - codePos;
|
||||
@@ -477,7 +477,7 @@ void JitCompilerA64::emitAddImmediate(uint32_t dst, uint32_t src, uint32_t imm,
|
||||
}
|
||||
else
|
||||
{
|
||||
constexpr uint32_t tmp_reg = 18;
|
||||
constexpr uint32_t tmp_reg = 20;
|
||||
emitMovImmediate(tmp_reg, imm, code, k);
|
||||
|
||||
// add dst, src, tmp_reg
|
||||
@@ -526,7 +526,7 @@ void JitCompilerA64::emitMemLoadFP(uint32_t src, Instruction& instr, uint8_t* co
|
||||
uint32_t k = codePos;
|
||||
|
||||
uint32_t imm = instr.getImm32();
|
||||
constexpr uint32_t tmp_reg = 18;
|
||||
constexpr uint32_t tmp_reg = 19;
|
||||
|
||||
imm &= instr.getModMem() ? (RandomX_CurrentConfig.ScratchpadL1_Size - 1) : (RandomX_CurrentConfig.ScratchpadL2_Size - 1);
|
||||
emitAddImmediate(tmp_reg, src, imm, code, k);
|
||||
@@ -580,7 +580,7 @@ void JitCompilerA64::h_IADD_M(Instruction& instr, uint32_t& codePos)
|
||||
const uint32_t src = IntRegMap[instr.src];
|
||||
const uint32_t dst = IntRegMap[instr.dst];
|
||||
|
||||
constexpr uint32_t tmp_reg = 18;
|
||||
constexpr uint32_t tmp_reg = 20;
|
||||
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
|
||||
|
||||
// add dst, dst, tmp_reg
|
||||
@@ -618,7 +618,7 @@ void JitCompilerA64::h_ISUB_M(Instruction& instr, uint32_t& codePos)
|
||||
const uint32_t src = IntRegMap[instr.src];
|
||||
const uint32_t dst = IntRegMap[instr.dst];
|
||||
|
||||
constexpr uint32_t tmp_reg = 18;
|
||||
constexpr uint32_t tmp_reg = 20;
|
||||
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
|
||||
|
||||
// sub dst, dst, tmp_reg
|
||||
@@ -637,7 +637,7 @@ void JitCompilerA64::h_IMUL_R(Instruction& instr, uint32_t& codePos)
|
||||
|
||||
if (src == dst)
|
||||
{
|
||||
src = 18;
|
||||
src = 20;
|
||||
emitMovImmediate(src, instr.getImm32(), code, k);
|
||||
}
|
||||
|
||||
@@ -655,7 +655,7 @@ void JitCompilerA64::h_IMUL_M(Instruction& instr, uint32_t& codePos)
|
||||
const uint32_t src = IntRegMap[instr.src];
|
||||
const uint32_t dst = IntRegMap[instr.dst];
|
||||
|
||||
constexpr uint32_t tmp_reg = 18;
|
||||
constexpr uint32_t tmp_reg = 20;
|
||||
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
|
||||
|
||||
// sub dst, dst, tmp_reg
|
||||
@@ -686,7 +686,7 @@ void JitCompilerA64::h_IMULH_M(Instruction& instr, uint32_t& codePos)
|
||||
const uint32_t src = IntRegMap[instr.src];
|
||||
const uint32_t dst = IntRegMap[instr.dst];
|
||||
|
||||
constexpr uint32_t tmp_reg = 18;
|
||||
constexpr uint32_t tmp_reg = 20;
|
||||
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
|
||||
|
||||
// umulh dst, dst, tmp_reg
|
||||
@@ -717,7 +717,7 @@ void JitCompilerA64::h_ISMULH_M(Instruction& instr, uint32_t& codePos)
|
||||
const uint32_t src = IntRegMap[instr.src];
|
||||
const uint32_t dst = IntRegMap[instr.dst];
|
||||
|
||||
constexpr uint32_t tmp_reg = 18;
|
||||
constexpr uint32_t tmp_reg = 20;
|
||||
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
|
||||
|
||||
// smulh dst, dst, tmp_reg
|
||||
@@ -735,7 +735,7 @@ void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)
|
||||
|
||||
uint32_t k = codePos;
|
||||
|
||||
constexpr uint32_t tmp_reg = 18;
|
||||
constexpr uint32_t tmp_reg = 20;
|
||||
const uint32_t dst = IntRegMap[instr.dst];
|
||||
|
||||
constexpr uint64_t N = 1ULL << 63;
|
||||
@@ -754,9 +754,9 @@ void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)
|
||||
literalPos -= sizeof(uint64_t);
|
||||
*(uint64_t*)(code + literalPos) = (q << shift) + ((r << shift) / divisor);
|
||||
|
||||
if (literal_id < 13)
|
||||
if (literal_id < 12)
|
||||
{
|
||||
static constexpr uint32_t literal_regs[13] = { 30 << 16, 29 << 16, 28 << 16, 27 << 16, 26 << 16, 25 << 16, 24 << 16, 23 << 16, 22 << 16, 21 << 16, 20 << 16, 11 << 16, 0 };
|
||||
static constexpr uint32_t literal_regs[12] = { 30 << 16, 29 << 16, 28 << 16, 27 << 16, 26 << 16, 25 << 16, 24 << 16, 23 << 16, 22 << 16, 21 << 16, 11 << 16, 0 };
|
||||
|
||||
// mul dst, dst, literal_reg
|
||||
emit32(ARMV8A::MUL | dst | (dst << 5) | literal_regs[literal_id], code, k);
|
||||
@@ -794,7 +794,7 @@ void JitCompilerA64::h_IXOR_R(Instruction& instr, uint32_t& codePos)
|
||||
|
||||
if (src == dst)
|
||||
{
|
||||
src = 18;
|
||||
src = 20;
|
||||
emitMovImmediate(src, instr.getImm32(), code, k);
|
||||
}
|
||||
|
||||
@@ -812,7 +812,7 @@ void JitCompilerA64::h_IXOR_M(Instruction& instr, uint32_t& codePos)
|
||||
const uint32_t src = IntRegMap[instr.src];
|
||||
const uint32_t dst = IntRegMap[instr.dst];
|
||||
|
||||
constexpr uint32_t tmp_reg = 18;
|
||||
constexpr uint32_t tmp_reg = 20;
|
||||
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
|
||||
|
||||
// eor dst, dst, tmp_reg
|
||||
@@ -850,7 +850,7 @@ void JitCompilerA64::h_IROL_R(Instruction& instr, uint32_t& codePos)
|
||||
|
||||
if (src != dst)
|
||||
{
|
||||
constexpr uint32_t tmp_reg = 18;
|
||||
constexpr uint32_t tmp_reg = 20;
|
||||
|
||||
// sub tmp_reg, xzr, src
|
||||
emit32(ARMV8A::SUB | tmp_reg | (31 << 5) | (src << 16), code, k);
|
||||
@@ -878,7 +878,7 @@ void JitCompilerA64::h_ISWAP_R(Instruction& instr, uint32_t& codePos)
|
||||
|
||||
uint32_t k = codePos;
|
||||
|
||||
constexpr uint32_t tmp_reg = 18;
|
||||
constexpr uint32_t tmp_reg = 20;
|
||||
emit32(ARMV8A::MOV_REG | tmp_reg | (dst << 16), code, k);
|
||||
emit32(ARMV8A::MOV_REG | dst | (src << 16), code, k);
|
||||
emit32(ARMV8A::MOV_REG | src | (tmp_reg << 16), code, k);
|
||||
@@ -1026,7 +1026,7 @@ void JitCompilerA64::h_CFROUND(Instruction& instr, uint32_t& codePos)
|
||||
|
||||
const uint32_t src = IntRegMap[instr.src];
|
||||
|
||||
constexpr uint32_t tmp_reg = 18;
|
||||
constexpr uint32_t tmp_reg = 20;
|
||||
constexpr uint32_t fpcr_tmp_reg = 8;
|
||||
|
||||
// ror tmp_reg, src, imm
|
||||
@@ -1050,7 +1050,7 @@ void JitCompilerA64::h_ISTORE(Instruction& instr, uint32_t& codePos)
|
||||
|
||||
const uint32_t src = IntRegMap[instr.src];
|
||||
const uint32_t dst = IntRegMap[instr.dst];
|
||||
constexpr uint32_t tmp_reg = 18;
|
||||
constexpr uint32_t tmp_reg = 20;
|
||||
|
||||
uint32_t imm = instr.getImm32();
|
||||
|
||||
|
||||
@@ -72,9 +72,9 @@
|
||||
# x15 -> "r7"
|
||||
# x16 -> spAddr0
|
||||
# x17 -> spAddr1
|
||||
# x18 -> temporary
|
||||
# x18 -> unused (platform register, don't touch it)
|
||||
# x19 -> temporary
|
||||
# x20 -> literal for IMUL_RCP
|
||||
# x20 -> temporary
|
||||
# x21 -> literal for IMUL_RCP
|
||||
# x22 -> literal for IMUL_RCP
|
||||
# x23 -> literal for IMUL_RCP
|
||||
@@ -109,7 +109,7 @@ DECL(randomx_program_aarch64):
|
||||
# Save callee-saved registers
|
||||
sub sp, sp, 192
|
||||
stp x16, x17, [sp]
|
||||
stp x18, x19, [sp, 16]
|
||||
str x19, [sp, 16]
|
||||
stp x20, x21, [sp, 32]
|
||||
stp x22, x23, [sp, 48]
|
||||
stp x24, x25, [sp, 64]
|
||||
@@ -164,7 +164,6 @@ DECL(randomx_program_aarch64):
|
||||
# Read literals
|
||||
ldr x0, literal_x0
|
||||
ldr x11, literal_x11
|
||||
ldr x20, literal_x20
|
||||
ldr x21, literal_x21
|
||||
ldr x22, literal_x22
|
||||
ldr x23, literal_x23
|
||||
@@ -196,11 +195,11 @@ DECL(randomx_program_aarch64):
|
||||
DECL(randomx_program_aarch64_main_loop):
|
||||
# spAddr0 = spMix1 & ScratchpadL3Mask64;
|
||||
# spAddr1 = (spMix1 >> 32) & ScratchpadL3Mask64;
|
||||
lsr x18, x10, 32
|
||||
lsr x20, x10, 32
|
||||
|
||||
# Actual mask will be inserted by JIT compiler
|
||||
and w16, w10, 1
|
||||
and w17, w18, 1
|
||||
and w17, w20, 1
|
||||
|
||||
# x16 = scratchpad + spAddr0
|
||||
# x17 = scratchpad + spAddr1
|
||||
@@ -208,31 +207,31 @@ DECL(randomx_program_aarch64_main_loop):
|
||||
add x17, x17, x2
|
||||
|
||||
# xor integer registers with scratchpad data (spAddr0)
|
||||
ldp x18, x19, [x16]
|
||||
eor x4, x4, x18
|
||||
ldp x20, x19, [x16]
|
||||
eor x4, x4, x20
|
||||
eor x5, x5, x19
|
||||
ldp x18, x19, [x16, 16]
|
||||
eor x6, x6, x18
|
||||
ldp x20, x19, [x16, 16]
|
||||
eor x6, x6, x20
|
||||
eor x7, x7, x19
|
||||
ldp x18, x19, [x16, 32]
|
||||
eor x12, x12, x18
|
||||
ldp x20, x19, [x16, 32]
|
||||
eor x12, x12, x20
|
||||
eor x13, x13, x19
|
||||
ldp x18, x19, [x16, 48]
|
||||
eor x14, x14, x18
|
||||
ldp x20, x19, [x16, 48]
|
||||
eor x14, x14, x20
|
||||
eor x15, x15, x19
|
||||
|
||||
# Load group F registers (spAddr1)
|
||||
ldpsw x18, x19, [x17]
|
||||
ins v16.d[0], x18
|
||||
ldpsw x20, x19, [x17]
|
||||
ins v16.d[0], x20
|
||||
ins v16.d[1], x19
|
||||
ldpsw x18, x19, [x17, 8]
|
||||
ins v17.d[0], x18
|
||||
ldpsw x20, x19, [x17, 8]
|
||||
ins v17.d[0], x20
|
||||
ins v17.d[1], x19
|
||||
ldpsw x18, x19, [x17, 16]
|
||||
ins v18.d[0], x18
|
||||
ldpsw x20, x19, [x17, 16]
|
||||
ins v18.d[0], x20
|
||||
ins v18.d[1], x19
|
||||
ldpsw x18, x19, [x17, 24]
|
||||
ins v19.d[0], x18
|
||||
ldpsw x20, x19, [x17, 24]
|
||||
ins v19.d[0], x20
|
||||
ins v19.d[1], x19
|
||||
scvtf v16.2d, v16.2d
|
||||
scvtf v17.2d, v17.2d
|
||||
@@ -240,17 +239,17 @@ DECL(randomx_program_aarch64_main_loop):
|
||||
scvtf v19.2d, v19.2d
|
||||
|
||||
# Load group E registers (spAddr1)
|
||||
ldpsw x18, x19, [x17, 32]
|
||||
ins v20.d[0], x18
|
||||
ldpsw x20, x19, [x17, 32]
|
||||
ins v20.d[0], x20
|
||||
ins v20.d[1], x19
|
||||
ldpsw x18, x19, [x17, 40]
|
||||
ins v21.d[0], x18
|
||||
ldpsw x20, x19, [x17, 40]
|
||||
ins v21.d[0], x20
|
||||
ins v21.d[1], x19
|
||||
ldpsw x18, x19, [x17, 48]
|
||||
ins v22.d[0], x18
|
||||
ldpsw x20, x19, [x17, 48]
|
||||
ins v22.d[0], x20
|
||||
ins v22.d[1], x19
|
||||
ldpsw x18, x19, [x17, 56]
|
||||
ins v23.d[0], x18
|
||||
ldpsw x20, x19, [x17, 56]
|
||||
ins v23.d[0], x20
|
||||
ins v23.d[1], x19
|
||||
scvtf v20.2d, v20.2d
|
||||
scvtf v21.2d, v21.2d
|
||||
@@ -273,7 +272,6 @@ DECL(randomx_program_aarch64_vm_instructions):
|
||||
|
||||
literal_x0: .fill 1,8,0
|
||||
literal_x11: .fill 1,8,0
|
||||
literal_x20: .fill 1,8,0
|
||||
literal_x21: .fill 1,8,0
|
||||
literal_x22: .fill 1,8,0
|
||||
literal_x23: .fill 1,8,0
|
||||
@@ -309,17 +307,17 @@ DECL(randomx_program_aarch64_vm_instructions_end):
|
||||
lsr x10, x9, 32
|
||||
|
||||
# mx ^= r[readReg2] ^ r[readReg3];
|
||||
eor x9, x9, x18
|
||||
eor x9, x9, x20
|
||||
|
||||
# Calculate dataset pointer for dataset prefetch
|
||||
mov w18, w9
|
||||
mov w20, w9
|
||||
DECL(randomx_program_aarch64_cacheline_align_mask1):
|
||||
# Actual mask will be inserted by JIT compiler
|
||||
and x18, x18, 1
|
||||
add x18, x18, x1
|
||||
and x20, x20, 1
|
||||
add x20, x20, x1
|
||||
|
||||
# Prefetch dataset data
|
||||
prfm pldl2strm, [x18]
|
||||
prfm pldl2strm, [x20]
|
||||
|
||||
# mx <-> ma
|
||||
ror x9, x9, 32
|
||||
@@ -331,17 +329,17 @@ DECL(randomx_program_aarch64_cacheline_align_mask2):
|
||||
|
||||
DECL(randomx_program_aarch64_xor_with_dataset_line):
|
||||
# xor integer registers with dataset data
|
||||
ldp x18, x19, [x10]
|
||||
eor x4, x4, x18
|
||||
ldp x20, x19, [x10]
|
||||
eor x4, x4, x20
|
||||
eor x5, x5, x19
|
||||
ldp x18, x19, [x10, 16]
|
||||
eor x6, x6, x18
|
||||
ldp x20, x19, [x10, 16]
|
||||
eor x6, x6, x20
|
||||
eor x7, x7, x19
|
||||
ldp x18, x19, [x10, 32]
|
||||
eor x12, x12, x18
|
||||
ldp x20, x19, [x10, 32]
|
||||
eor x12, x12, x20
|
||||
eor x13, x13, x19
|
||||
ldp x18, x19, [x10, 48]
|
||||
eor x14, x14, x18
|
||||
ldp x20, x19, [x10, 48]
|
||||
eor x14, x14, x20
|
||||
eor x15, x15, x19
|
||||
|
||||
DECL(randomx_program_aarch64_update_spMix1):
|
||||
@@ -384,7 +382,7 @@ DECL(randomx_program_aarch64_update_spMix1):
|
||||
|
||||
# Restore callee-saved registers
|
||||
ldp x16, x17, [sp]
|
||||
ldp x18, x19, [sp, 16]
|
||||
ldr x19, [sp, 16]
|
||||
ldp x20, x21, [sp, 32]
|
||||
ldp x22, x23, [sp, 48]
|
||||
ldp x24, x25, [sp, 64]
|
||||
@@ -405,7 +403,7 @@ DECL(randomx_program_aarch64_vm_instructions_end_light):
|
||||
stp x2, x30, [sp, 80]
|
||||
|
||||
# mx ^= r[readReg2] ^ r[readReg3];
|
||||
eor x9, x9, x18
|
||||
eor x9, x9, x20
|
||||
|
||||
# mx <-> ma
|
||||
ror x9, x9, 32
|
||||
@@ -447,8 +445,8 @@ DECL(randomx_program_aarch64_light_dataset_offset):
|
||||
# x3 -> end item
|
||||
|
||||
DECL(randomx_init_dataset_aarch64):
|
||||
# Save x30 (return address)
|
||||
str x30, [sp, -16]!
|
||||
# Save x20 (used as temporary, but must be saved to not break ABI) and x30 (return address)
|
||||
stp x20, x30, [sp, -16]!
|
||||
|
||||
# Load pointer to cache memory
|
||||
ldr x0, [x0]
|
||||
@@ -460,8 +458,8 @@ DECL(randomx_init_dataset_aarch64_main_loop):
|
||||
cmp x2, x3
|
||||
bne DECL(randomx_init_dataset_aarch64_main_loop)
|
||||
|
||||
# Restore x30 (return address)
|
||||
ldr x30, [sp], 16
|
||||
# Restore x20 and x30
|
||||
ldp x20, x30, [sp], 16
|
||||
|
||||
ret
|
||||
|
||||
|
||||
Reference in New Issue
Block a user