1
0
mirror of https://github.com/xmrig/xmrig.git synced 2025-12-07 07:55:04 -05:00

Compare commits

...

6 Commits

Author SHA1 Message Date
Tony Butler
0a54c642d3 Merge 862280f28c into 16ecb8f085 2025-01-22 14:39:04 +00:00
XMRig
16ecb8f085 Allow use of the previous CUDA plugin version with a warning. 2024-12-23 23:14:06 +07:00
xmrig
0229c65232 Merge pull request #3605 from SChernykh/dev
CUDA backend: update RandomX dataset when it changes
2024-12-18 22:36:08 +07:00
SChernykh
4a13a8a75c CUDA backend: update RandomX dataset when it changes 2024-12-18 13:45:10 +01:00
Tony Butler
862280f28c How about this way 2023-07-12 02:06:53 -06:00
Tony Butler
814e1de2a6 CN: Consistency cleanup 2023-07-12 02:06:53 -06:00
7 changed files with 179 additions and 182 deletions

View File

@@ -5,8 +5,8 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet> * Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com> * Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt> * Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh> * Copyright 2018-2024 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com> * Copyright 2016-2024 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@@ -22,7 +22,6 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#include "backend/cuda/runners/CudaRxRunner.h" #include "backend/cuda/runners/CudaRxRunner.h"
#include "backend/cuda/CudaLaunchData.h" #include "backend/cuda/CudaLaunchData.h"
#include "backend/cuda/wrappers/CudaLib.h" #include "backend/cuda/wrappers/CudaLib.h"
@@ -55,12 +54,21 @@ bool xmrig::CudaRxRunner::run(uint32_t startNonce, uint32_t *rescount, uint32_t
bool xmrig::CudaRxRunner::set(const Job &job, uint8_t *blob) bool xmrig::CudaRxRunner::set(const Job &job, uint8_t *blob)
{ {
if (!m_datasetHost && (m_seed != job.seed())) {
m_seed = job.seed();
if (m_ready) {
const auto *dataset = Rx::dataset(job, 0);
callWrapper(CudaLib::rxUpdateDataset(m_ctx, dataset->raw(), dataset->size(false)));
}
}
const bool rc = CudaBaseRunner::set(job, blob); const bool rc = CudaBaseRunner::set(job, blob);
if (!rc || m_ready) { if (!rc || m_ready) {
return rc; return rc;
} }
auto dataset = Rx::dataset(job, 0); const auto *dataset = Rx::dataset(job, 0);
m_ready = callWrapper(CudaLib::rxPrepare(m_ctx, dataset->raw(), dataset->size(false), m_datasetHost, m_intensity)); m_ready = callWrapper(CudaLib::rxPrepare(m_ctx, dataset->raw(), dataset->size(false), m_datasetHost, m_intensity));
return m_ready; return m_ready;

View File

@@ -5,8 +5,8 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet> * Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com> * Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt> * Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh> * Copyright 2018-2024 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com> * Copyright 2016-2024 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@@ -27,6 +27,7 @@
#include "backend/cuda/runners/CudaBaseRunner.h" #include "backend/cuda/runners/CudaBaseRunner.h"
#include "base/tools/Buffer.h"
namespace xmrig { namespace xmrig {
@@ -46,6 +47,7 @@ protected:
private: private:
bool m_ready = false; bool m_ready = false;
const bool m_datasetHost = false; const bool m_datasetHost = false;
Buffer m_seed;
size_t m_intensity = 0; size_t m_intensity = 0;
}; };

View File

@@ -19,10 +19,10 @@
#include <stdexcept> #include <stdexcept>
#include <uv.h> #include <uv.h>
#include "backend/cuda/wrappers/CudaLib.h" #include "backend/cuda/wrappers/CudaLib.h"
#include "base/io/Env.h" #include "base/io/Env.h"
#include "base/io/log/Log.h" #include "base/io/log/Log.h"
#include "base/io/log/Tags.h"
#include "base/kernel/Process.h" #include "base/kernel/Process.h"
#include "crypto/rx/RxAlgo.h" #include "crypto/rx/RxAlgo.h"
@@ -68,6 +68,7 @@ static const char *kPluginVersion = "pluginVersion";
static const char *kRelease = "release"; static const char *kRelease = "release";
static const char *kRxHash = "rxHash"; static const char *kRxHash = "rxHash";
static const char *kRxPrepare = "rxPrepare"; static const char *kRxPrepare = "rxPrepare";
static const char *kRxUpdateDataset = "rxUpdateDataset";
static const char *kSetJob = "setJob"; static const char *kSetJob = "setJob";
static const char *kSetJob_v2 = "setJob_v2"; static const char *kSetJob_v2 = "setJob_v2";
static const char *kVersion = "version"; static const char *kVersion = "version";
@@ -92,6 +93,7 @@ using pluginVersion_t = const char * (*)();
using release_t = void (*)(nvid_ctx *); using release_t = void (*)(nvid_ctx *);
using rxHash_t = bool (*)(nvid_ctx *, uint32_t, uint64_t, uint32_t *, uint32_t *); using rxHash_t = bool (*)(nvid_ctx *, uint32_t, uint64_t, uint32_t *, uint32_t *);
using rxPrepare_t = bool (*)(nvid_ctx *, const void *, size_t, bool, uint32_t); using rxPrepare_t = bool (*)(nvid_ctx *, const void *, size_t, bool, uint32_t);
using rxUpdateDataset_t = bool (*)(nvid_ctx *, const void *, size_t);
using setJob_t = bool (*)(nvid_ctx *, const void *, size_t, uint32_t); using setJob_t = bool (*)(nvid_ctx *, const void *, size_t, uint32_t);
using setJob_v2_t = bool (*)(nvid_ctx *, const void *, size_t, const char *); using setJob_v2_t = bool (*)(nvid_ctx *, const void *, size_t, const char *);
using version_t = uint32_t (*)(Version); using version_t = uint32_t (*)(Version);
@@ -116,6 +118,7 @@ static pluginVersion_t pPluginVersion = nullptr;
static release_t pRelease = nullptr; static release_t pRelease = nullptr;
static rxHash_t pRxHash = nullptr; static rxHash_t pRxHash = nullptr;
static rxPrepare_t pRxPrepare = nullptr; static rxPrepare_t pRxPrepare = nullptr;
static rxUpdateDataset_t pRxUpdateDataset = nullptr;
static setJob_t pSetJob = nullptr; static setJob_t pSetJob = nullptr;
static setJob_v2_t pSetJob_v2 = nullptr; static setJob_v2_t pSetJob_v2 = nullptr;
static version_t pVersion = nullptr; static version_t pVersion = nullptr;
@@ -202,10 +205,26 @@ bool xmrig::CudaLib::rxHash(nvid_ctx *ctx, uint32_t startNonce, uint64_t target,
bool xmrig::CudaLib::rxPrepare(nvid_ctx *ctx, const void *dataset, size_t datasetSize, bool dataset_host, uint32_t batchSize) noexcept bool xmrig::CudaLib::rxPrepare(nvid_ctx *ctx, const void *dataset, size_t datasetSize, bool dataset_host, uint32_t batchSize) noexcept
{ {
# ifdef XMRIG_ALGO_RANDOMX
if (!pRxUpdateDataset) {
LOG_WARN("%s" YELLOW_BOLD("CUDA plugin is outdated. Please update to the latest version"), Tags::randomx());
}
# endif
return pRxPrepare(ctx, dataset, datasetSize, dataset_host, batchSize); return pRxPrepare(ctx, dataset, datasetSize, dataset_host, batchSize);
} }
bool xmrig::CudaLib::rxUpdateDataset(nvid_ctx *ctx, const void *dataset, size_t datasetSize) noexcept
{
if (pRxUpdateDataset) {
return pRxUpdateDataset(ctx, dataset, datasetSize);
}
return true;
}
bool xmrig::CudaLib::kawPowHash(nvid_ctx *ctx, uint8_t* job_blob, uint64_t target, uint32_t *rescount, uint32_t *resnonce, uint32_t *skipped_hashes) noexcept bool xmrig::CudaLib::kawPowHash(nvid_ctx *ctx, uint8_t* job_blob, uint64_t target, uint32_t *rescount, uint32_t *resnonce, uint32_t *skipped_hashes) noexcept
{ {
return pKawPowHash(ctx, job_blob, target, rescount, resnonce, skipped_hashes); return pKawPowHash(ctx, job_blob, target, rescount, resnonce, skipped_hashes);
@@ -401,5 +420,7 @@ void xmrig::CudaLib::load()
DLSYM(SetJob_v2); DLSYM(SetJob_v2);
} }
uv_dlsym(&cudaLib, kRxUpdateDataset, reinterpret_cast<void**>(&pRxUpdateDataset));
pInit(); pInit();
} }

View File

@@ -1,6 +1,6 @@
/* XMRig /* XMRig
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh> * Copyright (c) 2018-2024 SChernykh <https://github.com/SChernykh>
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com> * Copyright (c) 2016-2024 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@@ -71,6 +71,7 @@ public:
static bool deviceInit(nvid_ctx *ctx) noexcept; static bool deviceInit(nvid_ctx *ctx) noexcept;
static bool rxHash(nvid_ctx *ctx, uint32_t startNonce, uint64_t target, uint32_t *rescount, uint32_t *resnonce) noexcept; static bool rxHash(nvid_ctx *ctx, uint32_t startNonce, uint64_t target, uint32_t *rescount, uint32_t *resnonce) noexcept;
static bool rxPrepare(nvid_ctx *ctx, const void *dataset, size_t datasetSize, bool dataset_host, uint32_t batchSize) noexcept; static bool rxPrepare(nvid_ctx *ctx, const void *dataset, size_t datasetSize, bool dataset_host, uint32_t batchSize) noexcept;
static bool rxUpdateDataset(nvid_ctx *ctx, const void *dataset, size_t datasetSize) noexcept;
static bool kawPowHash(nvid_ctx *ctx, uint8_t* job_blob, uint64_t target, uint32_t *rescount, uint32_t *resnonce, uint32_t *skipped_hashes) noexcept; static bool kawPowHash(nvid_ctx *ctx, uint8_t* job_blob, uint64_t target, uint32_t *rescount, uint32_t *resnonce, uint32_t *skipped_hashes) noexcept;
static bool kawPowPrepare(nvid_ctx *ctx, const void* cache, size_t cache_size, const void* dag_precalc, size_t dag_size, uint32_t height, const uint64_t* dag_sizes) noexcept; static bool kawPowPrepare(nvid_ctx *ctx, const void* cache, size_t cache_size, const void* dag_precalc, size_t dag_size, uint32_t height, const uint64_t* dag_sizes) noexcept;
static bool kawPowStopHash(nvid_ctx *ctx) noexcept; static bool kawPowStopHash(nvid_ctx *ctx) noexcept;

View File

@@ -37,14 +37,36 @@ class CnAlgo
public: public:
constexpr CnAlgo() {}; constexpr CnAlgo() {};
constexpr inline Algorithm::Id base() const { static_assert(Algorithm::isCN(ALGO), "invalid CRYPTONIGHT algorithm"); return Algorithm::base(ALGO); } # define ASSERT_CN static_assert(Algorithm::isCN(ALGO), "invalid CRYPTONIGHT algorithm")
constexpr inline bool isHeavy() const { return Algorithm::family(ALGO) == Algorithm::CN_HEAVY; } constexpr inline Algorithm::Id base() const { ASSERT_CN; return Algorithm::base(ALGO); }
constexpr inline bool isR() const { return ALGO == Algorithm::CN_R; } constexpr inline size_t memory() const { ASSERT_CN; return Algorithm::l3(ALGO); }
constexpr inline size_t memory() const { static_assert(Algorithm::isCN(ALGO), "invalid CRYPTONIGHT algorithm"); return Algorithm::l3(ALGO); } constexpr inline uint32_t iterations() const { ASSERT_CN; return CN_ITER; }
constexpr inline uint32_t iterations() const { static_assert(Algorithm::isCN(ALGO), "invalid CRYPTONIGHT algorithm"); return CN_ITER; }
constexpr inline uint32_t mask() const { return static_cast<uint32_t>(((memory() - 1) / 16) * 16); } constexpr inline uint32_t mask() const { return static_cast<uint32_t>(((memory() - 1) / 16) * 16); }
constexpr inline uint32_t half_mem() const { return mask() < memory() / 2; } constexpr inline uint32_t half_mem() const { return mask() < memory() / 2; }
constexpr inline bool isBase1() const { ASSERT_CN; return Algorithm::base(ALGO) == Algorithm::CN_1; }
constexpr inline bool isBase2() const { ASSERT_CN; return Algorithm::base(ALGO) == Algorithm::CN_2; }
constexpr inline bool is2() const { return ALGO == Algorithm::CN_2; }
constexpr inline bool isR() const { return ALGO == Algorithm::CN_R; }
constexpr inline bool isHalf() const { return ALGO == Algorithm::CN_HALF; }
constexpr inline bool isRTO() const { return ALGO == Algorithm::CN_RTO; }
constexpr inline bool isRWZ() const { return ALGO == Algorithm::CN_RWZ; }
constexpr inline bool isZLS() const { return ALGO == Algorithm::CN_ZLS; }
constexpr inline bool isDouble() const { return ALGO == Algorithm::CN_DOUBLE; }
constexpr inline bool isCCX() const { return ALGO == Algorithm::CN_CCX; }
constexpr inline bool isHeavy() const { ASSERT_CN; return Algorithm::family(ALGO) == Algorithm::CN_HEAVY; }
constexpr inline bool isHeavyTube() const { return ALGO == Algorithm::CN_HEAVY_TUBE; }
constexpr inline bool isHeavyXHV() const { return ALGO == Algorithm::CN_HEAVY_XHV; }
constexpr inline bool isPico0() const { return ALGO == Algorithm::CN_PICO_0; }
constexpr inline bool isPicoTLO() const { return ALGO == Algorithm::CN_PICO_TLO; }
constexpr inline bool isUPX2() const { return ALGO == Algorithm::CN_UPX2; }
constexpr inline bool isGR0() const { return ALGO == Algorithm::CN_GR_0; }
constexpr inline bool isGR1() const { return ALGO == Algorithm::CN_GR_1; }
constexpr inline bool isGR2() const { return ALGO == Algorithm::CN_GR_2; }
constexpr inline bool isGR3() const { return ALGO == Algorithm::CN_GR_3; }
constexpr inline bool isGR4() const { return ALGO == Algorithm::CN_GR_4; }
constexpr inline bool isGR5() const { return ALGO == Algorithm::CN_GR_5; }
inline static uint32_t iterations(Algorithm::Id algo) inline static uint32_t iterations(Algorithm::Id algo)
{ {
switch (algo) { switch (algo) {

View File

@@ -603,7 +603,7 @@ static inline void cryptonight_monero_tweak(uint64_t *mem_out, const uint8_t *l,
constexpr CnAlgo<ALGO> props; constexpr CnAlgo<ALGO> props;
if (props.base() == Algorithm::CN_2) { if (props.base() == Algorithm::CN_2) {
VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1, cx, (((ALGO == Algorithm::CN_RWZ) || (ALGO == Algorithm::CN_UPX2)) ? 1 : 0)); VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1, cx, ((props.isRWZ() || props.isUPX2()) ? 1 : 0));
_mm_store_si128(reinterpret_cast<__m128i *>(mem_out), _mm_xor_si128(bx0, cx)); _mm_store_si128(reinterpret_cast<__m128i *>(mem_out), _mm_xor_si128(bx0, cx));
} else { } else {
__m128i tmp = _mm_xor_si128(bx0, cx); __m128i tmp = _mm_xor_si128(bx0, cx);
@@ -665,15 +665,8 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
constexpr CnAlgo<ALGO> props; constexpr CnAlgo<ALGO> props;
constexpr size_t MASK = props.mask(); constexpr size_t MASK = props.mask();
constexpr Algorithm::Id BASE = props.base();
# ifdef XMRIG_ALGO_CN_HEAVY if (props.isBase1() && size < 43) {
constexpr bool IS_CN_HEAVY_TUBE = ALGO == Algorithm::CN_HEAVY_TUBE;
# else
constexpr bool IS_CN_HEAVY_TUBE = false;
# endif
if (BASE == Algorithm::CN_1 && size < 43) {
memset(output, 0, 32); memset(output, 0, 32);
return; return;
} }
@@ -694,10 +687,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
V4_Instruction code[256]; V4_Instruction code[256];
const int code_size = v4_random_math_init<ALGO>(code, height); const int code_size = v4_random_math_init<ALGO>(code, height);
if (ALGO == Algorithm::CN_R) { v4_soft_aes_compile_code(code, code_size, reinterpret_cast<void*>(ctx[0]->generated_code), Assembly::NONE);
v4_soft_aes_compile_code(code, code_size, reinterpret_cast<void*>(ctx[0]->generated_code), Assembly::NONE);
}
ctx[0]->generated_code_data = { ALGO, height }; ctx[0]->generated_code_data = { ALGO, height };
} }
@@ -718,26 +708,26 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
__m128i bx1 = _mm_set_epi64x(static_cast<int64_t>(h0[9] ^ h0[11]), static_cast<int64_t>(h0[8] ^ h0[10])); __m128i bx1 = _mm_set_epi64x(static_cast<int64_t>(h0[9] ^ h0[11]), static_cast<int64_t>(h0[8] ^ h0[10]));
__m128 conc_var; __m128 conc_var;
if (ALGO == Algorithm::CN_CCX) { if (props.isCCX()) {
conc_var = _mm_setzero_ps(); conc_var = _mm_setzero_ps();
RESTORE_ROUNDING_MODE(); RESTORE_ROUNDING_MODE();
} }
for (size_t i = 0; i < props.iterations(); i++) { for (size_t i = 0; i < props.iterations(); i++) {
__m128i cx; __m128i cx;
if (IS_CN_HEAVY_TUBE || !SOFT_AES) { if (props.isHeavyTube() || !SOFT_AES) {
cx = _mm_load_si128(reinterpret_cast<const __m128i *>(&l0[interleaved_index<interleave>(idx0 & MASK)])); cx = _mm_load_si128(reinterpret_cast<const __m128i *>(&l0[interleaved_index<interleave>(idx0 & MASK)]));
if (ALGO == Algorithm::CN_CCX) { if (props.isCCX()) {
cryptonight_conceal_tweak(cx, conc_var); cryptonight_conceal_tweak(cx, conc_var);
} }
} }
const __m128i ax0 = _mm_set_epi64x(static_cast<int64_t>(ah0), static_cast<int64_t>(al0)); const __m128i ax0 = _mm_set_epi64x(static_cast<int64_t>(ah0), static_cast<int64_t>(al0));
if (IS_CN_HEAVY_TUBE) { if (props.isHeavyTube()) {
cx = aes_round_tweak_div(cx, ax0); cx = aes_round_tweak_div(cx, ax0);
} }
else if (SOFT_AES) { else if (SOFT_AES) {
if (ALGO == Algorithm::CN_CCX) { if (props.isCCX()) {
cx = _mm_load_si128(reinterpret_cast<const __m128i*>(&l0[interleaved_index<interleave>(idx0 & MASK)])); cx = _mm_load_si128(reinterpret_cast<const __m128i*>(&l0[interleaved_index<interleave>(idx0 & MASK)]));
cryptonight_conceal_tweak(cx, conc_var); cryptonight_conceal_tweak(cx, conc_var);
cx = soft_aesenc(&cx, ax0, reinterpret_cast<const uint32_t*>(saes_table)); cx = soft_aesenc(&cx, ax0, reinterpret_cast<const uint32_t*>(saes_table));
@@ -750,7 +740,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
cx = _mm_aesenc_si128(cx, ax0); cx = _mm_aesenc_si128(cx, ax0);
} }
if (BASE == Algorithm::CN_1 || BASE == Algorithm::CN_2) { if (props.isBase1() || props.isBase2()) {
cryptonight_monero_tweak<ALGO>(reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)]), l0, idx0 & MASK, ax0, bx0, bx1, cx); cryptonight_monero_tweak<ALGO>(reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)]), l0, idx0 & MASK, ax0, bx0, bx1, cx);
} else { } else {
_mm_store_si128(reinterpret_cast<__m128i *>(&l0[interleaved_index<interleave>(idx0 & MASK)]), _mm_xor_si128(bx0, cx)); _mm_store_si128(reinterpret_cast<__m128i *>(&l0[interleaved_index<interleave>(idx0 & MASK)]), _mm_xor_si128(bx0, cx));
@@ -762,13 +752,11 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
cl = (reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)]))[0]; cl = (reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)]))[0];
ch = (reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)]))[1]; ch = (reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)]))[1];
if (BASE == Algorithm::CN_2) { if (props.isBase2()) {
if (props.isR()) { if (props.isR()) {
VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx0, bx1); VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx0, bx1);
if (ALGO == Algorithm::CN_R) { al0 ^= r0[2] | (static_cast<uint64_t>(r0[3]) << 32);
al0 ^= r0[2] | (static_cast<uint64_t>(r0[3]) << 32); ah0 ^= r0[0] | (static_cast<uint64_t>(r0[1]) << 32);
ah0 ^= r0[0] | (static_cast<uint64_t>(r0[1]) << 32);
}
} else { } else {
VARIANT2_INTEGER_MATH(0, cl, cx); VARIANT2_INTEGER_MATH(0, cl, cx);
} }
@@ -776,11 +764,11 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
lo = __umul128(idx0, cl, &hi); lo = __umul128(idx0, cl, &hi);
if (BASE == Algorithm::CN_2) { if (props.isBase2()) {
if (ALGO == Algorithm::CN_R) { if (props.isR()) {
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1, cx, 0); VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1, cx, 0);
} else { } else {
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo, (((ALGO == Algorithm::CN_RWZ) || (ALGO == Algorithm::CN_UPX2)) ? 1 : 0)); VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo, ((props.isRWZ() || props.isUPX2()) ? 1 : 0));
} }
} }
@@ -789,9 +777,9 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[0] = al0; reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[0] = al0;
if (IS_CN_HEAVY_TUBE || ALGO == Algorithm::CN_RTO) { if (props.isHeavyTube() || props.isRTO()) {
reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[1] = ah0 ^ tweak1_2_0 ^ al0; reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[1] = ah0 ^ tweak1_2_0 ^ al0;
} else if (BASE == Algorithm::CN_1) { } else if (props.isBase1()) {
reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[1] = ah0 ^ tweak1_2_0; reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[1] = ah0 ^ tweak1_2_0;
} else { } else {
reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[1] = ah0; reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[1] = ah0;
@@ -819,7 +807,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
((int64_t*)&l0[interleaved_index<interleave>(idx0 & MASK)])[0] = n ^ q; ((int64_t*)&l0[interleaved_index<interleave>(idx0 & MASK)])[0] = n ^ q;
if (ALGO == Algorithm::CN_HEAVY_XHV) { if (props.isHeavyXHV()) {
d = ~d; d = ~d;
} }
@@ -827,7 +815,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
} }
# endif # endif
if (BASE == Algorithm::CN_2) { if (props.isBase2()) {
bx1 = bx0; bx1 = bx0;
} }
@@ -960,7 +948,7 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
} }
cn_explode_scratchpad<ALGO, false, 0>(ctx[0]); cn_explode_scratchpad<ALGO, false, 0>(ctx[0]);
if (ALGO == Algorithm::CN_2) { if (props.is2()) {
if (ASM == Assembly::INTEL) { if (ASM == Assembly::INTEL) {
cnv2_mainloop_ivybridge_asm(ctx); cnv2_mainloop_ivybridge_asm(ctx);
} }
@@ -971,7 +959,7 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
cnv2_mainloop_bulldozer_asm(ctx); cnv2_mainloop_bulldozer_asm(ctx);
} }
} }
else if (ALGO == Algorithm::CN_HALF) { else if (props.isHalf()) {
if (ASM == Assembly::INTEL) { if (ASM == Assembly::INTEL) {
cn_half_mainloop_ivybridge_asm(ctx); cn_half_mainloop_ivybridge_asm(ctx);
} }
@@ -983,7 +971,7 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
} }
} }
# ifdef XMRIG_ALGO_CN_PICO # ifdef XMRIG_ALGO_CN_PICO
else if (ALGO == Algorithm::CN_PICO_0) { else if (props.isPico0()) {
if (ASM == Assembly::INTEL) { if (ASM == Assembly::INTEL) {
cn_trtl_mainloop_ivybridge_asm(ctx); cn_trtl_mainloop_ivybridge_asm(ctx);
} }
@@ -994,7 +982,7 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
cn_trtl_mainloop_bulldozer_asm(ctx); cn_trtl_mainloop_bulldozer_asm(ctx);
} }
} }
else if (ALGO == Algorithm::CN_PICO_TLO) { else if (props.isPicoTLO()) {
if (ASM == Assembly::INTEL) { if (ASM == Assembly::INTEL) {
cn_tlo_mainloop_ivybridge_asm(ctx); cn_tlo_mainloop_ivybridge_asm(ctx);
} }
@@ -1006,10 +994,10 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
} }
} }
# endif # endif
else if (ALGO == Algorithm::CN_RWZ) { else if (props.isRWZ()) {
cnv2_rwz_mainloop_asm(ctx); cnv2_rwz_mainloop_asm(ctx);
} }
else if (ALGO == Algorithm::CN_ZLS) { else if (props.isZLS()) {
if (ASM == Assembly::INTEL) { if (ASM == Assembly::INTEL) {
cn_zls_mainloop_ivybridge_asm(ctx); cn_zls_mainloop_ivybridge_asm(ctx);
} }
@@ -1020,7 +1008,7 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
cn_zls_mainloop_bulldozer_asm(ctx); cn_zls_mainloop_bulldozer_asm(ctx);
} }
} }
else if (ALGO == Algorithm::CN_DOUBLE) { else if (props.isDouble()) {
if (ASM == Assembly::INTEL) { if (ASM == Assembly::INTEL) {
cn_double_mainloop_ivybridge_asm(ctx); cn_double_mainloop_ivybridge_asm(ctx);
} }
@@ -1032,7 +1020,7 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
} }
} }
# ifdef XMRIG_ALGO_CN_FEMTO # ifdef XMRIG_ALGO_CN_FEMTO
else if (ALGO == Algorithm::CN_UPX2) { else if (props.isUPX2()) {
cn_upx2_mainloop_asm(ctx); cn_upx2_mainloop_asm(ctx);
} }
# endif # endif
@@ -1078,22 +1066,22 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
cn_explode_scratchpad<ALGO, false, 0>(ctx[1]); cn_explode_scratchpad<ALGO, false, 0>(ctx[1]);
} }
if (ALGO == Algorithm::CN_2) { if (props.is2()) {
cnv2_double_mainloop_sandybridge_asm(ctx); cnv2_double_mainloop_sandybridge_asm(ctx);
} }
else if (ALGO == Algorithm::CN_HALF) { else if (props.isHalf()){
cn_half_double_mainloop_sandybridge_asm(ctx); cn_half_double_mainloop_sandybridge_asm(ctx);
} }
# ifdef XMRIG_ALGO_CN_PICO # ifdef XMRIG_ALGO_CN_PICO
else if (ALGO == Algorithm::CN_PICO_0) { else if (props.isPico0()) {
cn_trtl_double_mainloop_sandybridge_asm(ctx); cn_trtl_double_mainloop_sandybridge_asm(ctx);
} }
else if (ALGO == Algorithm::CN_PICO_TLO) { else if (props.isPicoTLO()) {
cn_tlo_double_mainloop_sandybridge_asm(ctx); cn_tlo_double_mainloop_sandybridge_asm(ctx);
} }
# endif # endif
# ifdef XMRIG_ALGO_CN_FEMTO # ifdef XMRIG_ALGO_CN_FEMTO
else if (ALGO == Algorithm::CN_UPX2) { else if (props.isUPX2()) {
if (Cpu::info()->arch() == ICpuInfo::ARCH_ZEN3) { if (Cpu::info()->arch() == ICpuInfo::ARCH_ZEN3) {
cnv2_upx_double_mainloop_zen3_asm(ctx); cnv2_upx_double_mainloop_zen3_asm(ctx);
} }
@@ -1102,13 +1090,13 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
} }
} }
# endif # endif
else if (ALGO == Algorithm::CN_RWZ) { else if (props.isRWZ()) {
cnv2_rwz_double_mainloop_asm(ctx); cnv2_rwz_double_mainloop_asm(ctx);
} }
else if (ALGO == Algorithm::CN_ZLS) { else if (props.isZLS()) {
cn_zls_double_mainloop_sandybridge_asm(ctx); cn_zls_double_mainloop_sandybridge_asm(ctx);
} }
else if (ALGO == Algorithm::CN_DOUBLE) { else if (props.isDouble()) {
cn_double_double_mainloop_sandybridge_asm(ctx); cn_double_double_mainloop_sandybridge_asm(ctx);
} }
else if (props.isR()) { else if (props.isR()) {
@@ -1146,9 +1134,8 @@ template<Algorithm::Id ALGO>
static NOINLINE void cryptonight_single_hash_gr_sse41(const uint8_t* __restrict__ input, size_t size, uint8_t* __restrict__ output, cryptonight_ctx** __restrict__ ctx, uint64_t height) static NOINLINE void cryptonight_single_hash_gr_sse41(const uint8_t* __restrict__ input, size_t size, uint8_t* __restrict__ output, cryptonight_ctx** __restrict__ ctx, uint64_t height)
{ {
constexpr CnAlgo<ALGO> props; constexpr CnAlgo<ALGO> props;
constexpr Algorithm::Id BASE = props.base();
if (BASE == Algorithm::CN_1 && size < 43) { if (props.isBase1() && size < 43) {
memset(output, 0, 32); memset(output, 0, 32);
return; return;
} }
@@ -1163,12 +1150,12 @@ static NOINLINE void cryptonight_single_hash_gr_sse41(const uint8_t* __restrict_
VARIANT1_INIT(0); VARIANT1_INIT(0);
ctx[0]->tweak1_2 = tweak1_2_0; ctx[0]->tweak1_2 = tweak1_2_0;
ctx[0]->tweak1_table = tweak1_table; ctx[0]->tweak1_table = tweak1_table;
if (ALGO == Algorithm::CN_GR_0) cn_gr0_single_mainloop_asm(ctx); if (props.isGR0()) cn_gr0_single_mainloop_asm(ctx);
if (ALGO == Algorithm::CN_GR_1) cn_gr1_single_mainloop_asm(ctx); if (props.isGR1()) cn_gr1_single_mainloop_asm(ctx);
if (ALGO == Algorithm::CN_GR_2) cn_gr2_single_mainloop_asm(ctx); if (props.isGR2()) cn_gr2_single_mainloop_asm(ctx);
if (ALGO == Algorithm::CN_GR_3) cn_gr3_single_mainloop_asm(ctx); if (props.isGR3()) cn_gr3_single_mainloop_asm(ctx);
if (ALGO == Algorithm::CN_GR_4) cn_gr4_single_mainloop_asm(ctx); if (props.isGR4()) cn_gr4_single_mainloop_asm(ctx);
if (ALGO == Algorithm::CN_GR_5) cn_gr5_single_mainloop_asm(ctx); if (props.isGR5()) cn_gr5_single_mainloop_asm(ctx);
cn_implode_scratchpad<ALGO, false, 0>(ctx[0]); cn_implode_scratchpad<ALGO, false, 0>(ctx[0]);
keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24); keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
@@ -1180,9 +1167,8 @@ template<Algorithm::Id ALGO>
static NOINLINE void cryptonight_double_hash_gr_sse41(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) static NOINLINE void cryptonight_double_hash_gr_sse41(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
{ {
constexpr CnAlgo<ALGO> props; constexpr CnAlgo<ALGO> props;
constexpr Algorithm::Id BASE = props.base();
if (BASE == Algorithm::CN_1 && size < 43) { if (props.isBase1() && size < 43) {
memset(output, 0, 64); memset(output, 0, 64);
return; return;
} }
@@ -1196,7 +1182,7 @@ static NOINLINE void cryptonight_double_hash_gr_sse41(const uint8_t *__restrict_
} }
# ifdef XMRIG_VAES # ifdef XMRIG_VAES
if (!props.isHeavy() && cn_vaes_enabled) { if (cn_vaes_enabled) {
cn_explode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem()); cn_explode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
} }
else else
@@ -1214,15 +1200,15 @@ static NOINLINE void cryptonight_double_hash_gr_sse41(const uint8_t *__restrict_
ctx[0]->tweak1_table = tweak1_table; ctx[0]->tweak1_table = tweak1_table;
if (ALGO == Algorithm::CN_GR_0) cn_gr0_double_mainloop_asm(ctx); if (props.isGR0()) cn_gr0_double_mainloop_asm(ctx);
if (ALGO == Algorithm::CN_GR_1) cn_gr1_double_mainloop_asm(ctx); if (props.isGR1()) cn_gr1_double_mainloop_asm(ctx);
if (ALGO == Algorithm::CN_GR_2) cn_gr2_double_mainloop_asm(ctx); if (props.isGR2()) cn_gr2_double_mainloop_asm(ctx);
if (ALGO == Algorithm::CN_GR_3) cn_gr3_double_mainloop_asm(ctx); if (props.isGR3()) cn_gr3_double_mainloop_asm(ctx);
if (ALGO == Algorithm::CN_GR_4) cn_gr4_double_mainloop_asm(ctx); if (props.isGR4()) cn_gr4_double_mainloop_asm(ctx);
if (ALGO == Algorithm::CN_GR_5) cn_gr5_double_mainloop_asm(ctx); if (props.isGR5()) cn_gr5_double_mainloop_asm(ctx);
# ifdef XMRIG_VAES # ifdef XMRIG_VAES
if (!props.isHeavy() && cn_vaes_enabled) { if (cn_vaes_enabled) {
cn_implode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem()); cn_implode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
} }
else else
@@ -1267,15 +1253,8 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
constexpr CnAlgo<ALGO> props; constexpr CnAlgo<ALGO> props;
constexpr size_t MASK = props.mask(); constexpr size_t MASK = props.mask();
constexpr Algorithm::Id BASE = props.base();
# ifdef XMRIG_ALGO_CN_HEAVY if (props.isBase1() && size < 43) {
constexpr bool IS_CN_HEAVY_TUBE = ALGO == Algorithm::CN_HEAVY_TUBE;
# else
constexpr bool IS_CN_HEAVY_TUBE = false;
# endif
if (BASE == Algorithm::CN_1 && size < 43) {
memset(output, 0, 64); memset(output, 0, 64);
return; return;
} }
@@ -1323,7 +1302,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
__m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]); __m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
__m128 conc_var0, conc_var1; __m128 conc_var0, conc_var1;
if (ALGO == Algorithm::CN_CCX) { if (props.isCCX()) {
conc_var0 = _mm_setzero_ps(); conc_var0 = _mm_setzero_ps();
conc_var1 = _mm_setzero_ps(); conc_var1 = _mm_setzero_ps();
RESTORE_ROUNDING_MODE(); RESTORE_ROUNDING_MODE();
@@ -1334,10 +1313,10 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
for (size_t i = 0; i < props.iterations(); i++) { for (size_t i = 0; i < props.iterations(); i++) {
__m128i cx0, cx1; __m128i cx0, cx1;
if (IS_CN_HEAVY_TUBE || !SOFT_AES) { if (props.isHeavyTube() || !SOFT_AES) {
cx0 = _mm_load_si128(reinterpret_cast<const __m128i *>(&l0[idx0 & MASK])); cx0 = _mm_load_si128(reinterpret_cast<const __m128i *>(&l0[idx0 & MASK]));
cx1 = _mm_load_si128(reinterpret_cast<const __m128i *>(&l1[idx1 & MASK])); cx1 = _mm_load_si128(reinterpret_cast<const __m128i *>(&l1[idx1 & MASK]));
if (ALGO == Algorithm::CN_CCX) { if (props.isCCX()) {
cryptonight_conceal_tweak(cx0, conc_var0); cryptonight_conceal_tweak(cx0, conc_var0);
cryptonight_conceal_tweak(cx1, conc_var1); cryptonight_conceal_tweak(cx1, conc_var1);
} }
@@ -1345,12 +1324,12 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
const __m128i ax0 = _mm_set_epi64x(ah0, al0); const __m128i ax0 = _mm_set_epi64x(ah0, al0);
const __m128i ax1 = _mm_set_epi64x(ah1, al1); const __m128i ax1 = _mm_set_epi64x(ah1, al1);
if (IS_CN_HEAVY_TUBE) { if (props.isHeavyTube()) {
cx0 = aes_round_tweak_div(cx0, ax0); cx0 = aes_round_tweak_div(cx0, ax0);
cx1 = aes_round_tweak_div(cx1, ax1); cx1 = aes_round_tweak_div(cx1, ax1);
} }
else if (SOFT_AES) { else if (SOFT_AES) {
if (ALGO == Algorithm::CN_CCX) { if (props.isCCX()) {
cx0 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l0[idx0 & MASK])); cx0 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l0[idx0 & MASK]));
cx1 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l1[idx1 & MASK])); cx1 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l1[idx1 & MASK]));
cryptonight_conceal_tweak(cx0, conc_var0); cryptonight_conceal_tweak(cx0, conc_var0);
@@ -1368,7 +1347,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
cx1 = _mm_aesenc_si128(cx1, ax1); cx1 = _mm_aesenc_si128(cx1, ax1);
} }
if (BASE == Algorithm::CN_1 || BASE == Algorithm::CN_2) { if (props.isBase1() || props.isBase2()) {
cryptonight_monero_tweak<ALGO>((uint64_t*)&l0[idx0 & MASK], l0, idx0 & MASK, ax0, bx00, bx01, cx0); cryptonight_monero_tweak<ALGO>((uint64_t*)&l0[idx0 & MASK], l0, idx0 & MASK, ax0, bx00, bx01, cx0);
cryptonight_monero_tweak<ALGO>((uint64_t*)&l1[idx1 & MASK], l1, idx1 & MASK, ax1, bx10, bx11, cx1); cryptonight_monero_tweak<ALGO>((uint64_t*)&l1[idx1 & MASK], l1, idx1 & MASK, ax1, bx10, bx11, cx1);
} else { } else {
@@ -1383,13 +1362,11 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
cl = ((uint64_t*) &l0[idx0 & MASK])[0]; cl = ((uint64_t*) &l0[idx0 & MASK])[0];
ch = ((uint64_t*) &l0[idx0 & MASK])[1]; ch = ((uint64_t*) &l0[idx0 & MASK])[1];
if (BASE == Algorithm::CN_2) { if (props.isBase2()) {
if (props.isR()) { if (props.isR()) {
VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx01); VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx01);
if (ALGO == Algorithm::CN_R) { al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32); ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
}
} else { } else {
VARIANT2_INTEGER_MATH(0, cl, cx0); VARIANT2_INTEGER_MATH(0, cl, cx0);
} }
@@ -1397,11 +1374,11 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
lo = __umul128(idx0, cl, &hi); lo = __umul128(idx0, cl, &hi);
if (BASE == Algorithm::CN_2) { if (props.isBase2()) {
if (ALGO == Algorithm::CN_R) { if (props.isR()) {
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01, cx0, 0); VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01, cx0, 0);
} else { } else {
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo, (((ALGO == Algorithm::CN_RWZ) || (ALGO == Algorithm::CN_UPX2)) ? 1 : 0)); VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo, ((props.isRWZ() || props.isUPX2()) ? 1 : 0));
} }
} }
@@ -1410,9 +1387,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
((uint64_t*)&l0[idx0 & MASK])[0] = al0; ((uint64_t*)&l0[idx0 & MASK])[0] = al0;
if (IS_CN_HEAVY_TUBE || ALGO == Algorithm::CN_RTO) { if (props.isHeavyTube() || props.isRTO()) {
((uint64_t*) &l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0; ((uint64_t*) &l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0;
} else if (BASE == Algorithm::CN_1) { } else if (props.isBase1()) {
((uint64_t*) &l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0; ((uint64_t*) &l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0;
} else { } else {
((uint64_t*) &l0[idx0 & MASK])[1] = ah0; ((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
@@ -1430,7 +1407,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
((int64_t*)&l0[idx0 & MASK])[0] = n ^ q; ((int64_t*)&l0[idx0 & MASK])[0] = n ^ q;
if (ALGO == Algorithm::CN_HEAVY_XHV) { if (props.isHeavyXHV()) {
d = ~d; d = ~d;
} }
@@ -1441,13 +1418,11 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
cl = ((uint64_t*) &l1[idx1 & MASK])[0]; cl = ((uint64_t*) &l1[idx1 & MASK])[0];
ch = ((uint64_t*) &l1[idx1 & MASK])[1]; ch = ((uint64_t*) &l1[idx1 & MASK])[1];
if (BASE == Algorithm::CN_2) { if (props.isBase2()) {
if (props.isR()) { if (props.isR()) {
VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx10, bx11); VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx10, bx11);
if (ALGO == Algorithm::CN_R) { al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32);
al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32); ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32);
ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32);
}
} else { } else {
VARIANT2_INTEGER_MATH(1, cl, cx1); VARIANT2_INTEGER_MATH(1, cl, cx1);
} }
@@ -1455,11 +1430,11 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
lo = __umul128(idx1, cl, &hi); lo = __umul128(idx1, cl, &hi);
if (BASE == Algorithm::CN_2) { if (props.isBase2()) {
if (ALGO == Algorithm::CN_R) { if (props.isR()) {
VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11, cx1, 0); VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11, cx1, 0);
} else { } else {
VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo, (((ALGO == Algorithm::CN_RWZ) || (ALGO == Algorithm::CN_UPX2)) ? 1 : 0)); VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo, ((props.isRWZ() || props.isUPX2()) ? 1 : 0));
} }
} }
@@ -1468,9 +1443,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
((uint64_t*)&l1[idx1 & MASK])[0] = al1; ((uint64_t*)&l1[idx1 & MASK])[0] = al1;
if (IS_CN_HEAVY_TUBE || ALGO == Algorithm::CN_RTO) { if (props.isHeavyTube() || props.isRTO()) {
((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1 ^ al1; ((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1 ^ al1;
} else if (BASE == Algorithm::CN_1) { } else if (props.isBase1()) {
((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1; ((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1;
} else { } else {
((uint64_t*)&l1[idx1 & MASK])[1] = ah1; ((uint64_t*)&l1[idx1 & MASK])[1] = ah1;
@@ -1488,7 +1463,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
((int64_t*)&l1[idx1 & MASK])[0] = n ^ q; ((int64_t*)&l1[idx1 & MASK])[0] = n ^ q;
if (ALGO == Algorithm::CN_HEAVY_XHV) { if (props.isHeavyXHV()) {
d = ~d; d = ~d;
} }
@@ -1496,7 +1471,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
} }
# endif # endif
if (BASE == Algorithm::CN_2) { if (props.isBase2()) {
bx01 = bx00; bx01 = bx00;
bx11 = bx10; bx11 = bx10;
} }
@@ -1529,9 +1504,8 @@ template<Algorithm::Id ALGO>
static NOINLINE void cryptonight_quad_hash_gr_sse41(const uint8_t* __restrict__ input, size_t size, uint8_t* __restrict__ output, cryptonight_ctx** __restrict__ ctx, uint64_t height) static NOINLINE void cryptonight_quad_hash_gr_sse41(const uint8_t* __restrict__ input, size_t size, uint8_t* __restrict__ output, cryptonight_ctx** __restrict__ ctx, uint64_t height)
{ {
constexpr CnAlgo<ALGO> props; constexpr CnAlgo<ALGO> props;
constexpr Algorithm::Id BASE = props.base();
if (BASE == Algorithm::CN_1 && size < 43) { if (props.isBase1() && size < 43) {
memset(output, 0, 32 * 4); memset(output, 0, 32 * 4);
return; return;
} }
@@ -1549,7 +1523,7 @@ static NOINLINE void cryptonight_quad_hash_gr_sse41(const uint8_t* __restrict__
} }
# ifdef XMRIG_VAES # ifdef XMRIG_VAES
if (!props.isHeavy() && cn_vaes_enabled) { if (cn_vaes_enabled) {
cn_explode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem()); cn_explode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
cn_explode_scratchpad_vaes_double(ctx[2], ctx[3], props.memory(), props.half_mem()); cn_explode_scratchpad_vaes_double(ctx[2], ctx[3], props.memory(), props.half_mem());
} }
@@ -1569,15 +1543,15 @@ static NOINLINE void cryptonight_quad_hash_gr_sse41(const uint8_t* __restrict__
ctx[0]->tweak1_table = tweak1_table; ctx[0]->tweak1_table = tweak1_table;
if (ALGO == Algorithm::CN_GR_0) cn_gr0_quad_mainloop_asm(ctx); if (props.isGR0()) cn_gr0_quad_mainloop_asm(ctx);
if (ALGO == Algorithm::CN_GR_1) cn_gr1_quad_mainloop_asm(ctx); if (props.isGR1()) cn_gr1_quad_mainloop_asm(ctx);
if (ALGO == Algorithm::CN_GR_2) cn_gr2_quad_mainloop_asm(ctx); if (props.isGR2()) cn_gr2_quad_mainloop_asm(ctx);
if (ALGO == Algorithm::CN_GR_3) cn_gr3_quad_mainloop_asm(ctx); if (props.isGR3()) cn_gr3_quad_mainloop_asm(ctx);
if (ALGO == Algorithm::CN_GR_4) cn_gr4_quad_mainloop_asm(ctx); if (props.isGR4()) cn_gr4_quad_mainloop_asm(ctx);
if (ALGO == Algorithm::CN_GR_5) cn_gr5_quad_mainloop_asm(ctx); if (props.isGR5()) cn_gr5_quad_mainloop_asm(ctx);
# ifdef XMRIG_VAES # ifdef XMRIG_VAES
if (!props.isHeavy() && cn_vaes_enabled) { if (cn_vaes_enabled) {
cn_implode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem()); cn_implode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
cn_implode_scratchpad_vaes_double(ctx[2], ctx[3], props.memory(), props.half_mem()); cn_implode_scratchpad_vaes_double(ctx[2], ctx[3], props.memory(), props.half_mem());
} }
@@ -1606,14 +1580,14 @@ static NOINLINE void cryptonight_quad_hash_gr_sse41(const uint8_t* __restrict__
#define CN_STEP1(a, b0, b1, c, l, ptr, idx, conc_var) \ #define CN_STEP1(a, b0, b1, c, l, ptr, idx, conc_var) \
ptr = reinterpret_cast<__m128i*>(&l[idx & MASK]); \ ptr = reinterpret_cast<__m128i*>(&l[idx & MASK]); \
c = _mm_load_si128(ptr); \ c = _mm_load_si128(ptr); \
if (ALGO == Algorithm::CN_CCX) { \ if (props.isCCX()) { \
cryptonight_conceal_tweak(c, conc_var); \ cryptonight_conceal_tweak(c, conc_var); \
} }
#define CN_STEP2(a, b0, b1, c, l, ptr, idx) \ #define CN_STEP2(a, b0, b1, c, l, ptr, idx) \
if (IS_CN_HEAVY_TUBE) { \ if (props.isHeavyTube()) { \
c = aes_round_tweak_div(c, a); \ c = aes_round_tweak_div(c, a); \
} \ } \
else if (SOFT_AES) { \ else if (SOFT_AES) { \
@@ -1622,7 +1596,7 @@ static NOINLINE void cryptonight_quad_hash_gr_sse41(const uint8_t* __restrict__
c = _mm_aesenc_si128(c, a); \ c = _mm_aesenc_si128(c, a); \
} \ } \
\ \
if (BASE == Algorithm::CN_1 || BASE == Algorithm::CN_2) { \ if (props.isBase1() || props.isBase2()) { \
cryptonight_monero_tweak<ALGO>((uint64_t*)ptr, l, idx & MASK, a, b0, b1, c); \ cryptonight_monero_tweak<ALGO>((uint64_t*)ptr, l, idx & MASK, a, b0, b1, c); \
} else { \ } else { \
_mm_store_si128(ptr, _mm_xor_si128(b0, c)); \ _mm_store_si128(ptr, _mm_xor_si128(b0, c)); \
@@ -1638,36 +1612,34 @@ static NOINLINE void cryptonight_quad_hash_gr_sse41(const uint8_t* __restrict__
#define CN_STEP4(part, a, b0, b1, c, l, mc, ptr, idx) \ #define CN_STEP4(part, a, b0, b1, c, l, mc, ptr, idx) \
uint64_t al##part, ah##part; \ uint64_t al##part, ah##part; \
if (BASE == Algorithm::CN_2) { \ if (props.isBase2()) { \
if (props.isR()) { \ if (props.isR()) { \
al##part = _mm_cvtsi128_si64(a); \ al##part = _mm_cvtsi128_si64(a); \
ah##part = _mm_cvtsi128_si64(_mm_srli_si128(a, 8)); \ ah##part = _mm_cvtsi128_si64(_mm_srli_si128(a, 8)); \
VARIANT4_RANDOM_MATH(part, al##part, ah##part, cl##part, b0, b1); \ VARIANT4_RANDOM_MATH(part, al##part, ah##part, cl##part, b0, b1); \
if (ALGO == Algorithm::CN_R) { \ al##part ^= r##part[2] | ((uint64_t)(r##part[3]) << 32); \
al##part ^= r##part[2] | ((uint64_t)(r##part[3]) << 32); \ ah##part ^= r##part[0] | ((uint64_t)(r##part[1]) << 32); \
ah##part ^= r##part[0] | ((uint64_t)(r##part[1]) << 32); \
} \
} else { \ } else { \
VARIANT2_INTEGER_MATH(part, cl##part, c); \ VARIANT2_INTEGER_MATH(part, cl##part, c); \
} \ } \
} \ } \
lo = __umul128(idx, cl##part, &hi); \ lo = __umul128(idx, cl##part, &hi); \
if (BASE == Algorithm::CN_2) { \ if (props.isBase2()) { \
if (ALGO == Algorithm::CN_R) { \ if (props.isR()) { \
VARIANT2_SHUFFLE(l, idx & MASK, a, b0, b1, c, 0); \ VARIANT2_SHUFFLE(l, idx & MASK, a, b0, b1, c, 0); \
} else { \ } else { \
VARIANT2_SHUFFLE2(l, idx & MASK, a, b0, b1, hi, lo, (((ALGO == Algorithm::CN_RWZ) || (ALGO == Algorithm::CN_UPX2)) ? 1 : 0)); \ VARIANT2_SHUFFLE2(l, idx & MASK, a, b0, b1, hi, lo, ((props.isRWZ() || props.isUPX2()) ? 1 : 0)); \
} \ } \
} \ } \
if (ALGO == Algorithm::CN_R) { \ if (props.isR()) { \
a = _mm_set_epi64x(ah##part, al##part); \ a = _mm_set_epi64x(ah##part, al##part); \
} \ } \
a = _mm_add_epi64(a, _mm_set_epi64x(lo, hi)); \ a = _mm_add_epi64(a, _mm_set_epi64x(lo, hi)); \
\ \
if (BASE == Algorithm::CN_1) { \ if (props.isBase1()) { \
_mm_store_si128(ptr, _mm_xor_si128(a, mc)); \ _mm_store_si128(ptr, _mm_xor_si128(a, mc)); \
\ \
if (IS_CN_HEAVY_TUBE || ALGO == Algorithm::CN_RTO) { \ if (props.isHeavyTube() || props.isRTO()) { \
((uint64_t*)ptr)[1] ^= ((uint64_t*)ptr)[0]; \ ((uint64_t*)ptr)[1] ^= ((uint64_t*)ptr)[0]; \
} \ } \
} else { \ } else { \
@@ -1681,13 +1653,13 @@ static NOINLINE void cryptonight_quad_hash_gr_sse41(const uint8_t* __restrict__
int32_t d = ((int32_t*)&l[idx & MASK])[2]; \ int32_t d = ((int32_t*)&l[idx & MASK])[2]; \
int64_t q = n / (d | 0x5); \ int64_t q = n / (d | 0x5); \
((int64_t*)&l[idx & MASK])[0] = n ^ q; \ ((int64_t*)&l[idx & MASK])[0] = n ^ q; \
if (IS_CN_HEAVY_XHV) { \ if (props.isHeavyXHV()) { \
d = ~d; \ d = ~d; \
} \ } \
\ \
idx = d ^ q; \ idx = d ^ q; \
} \ } \
if (BASE == Algorithm::CN_2) { \ if (props.isBase2()) { \
b1 = b0; \ b1 = b0; \
} \ } \
b0 = c; b0 = c;
@@ -1697,11 +1669,11 @@ static NOINLINE void cryptonight_quad_hash_gr_sse41(const uint8_t* __restrict__
__m128i mc##n; \ __m128i mc##n; \
__m128i division_result_xmm_##n; \ __m128i division_result_xmm_##n; \
__m128i sqrt_result_xmm_##n; \ __m128i sqrt_result_xmm_##n; \
if (BASE == Algorithm::CN_1) { \ if (props.isBase1()) { \
mc##n = _mm_set_epi64x(*reinterpret_cast<const uint64_t*>(input + n * size + 35) ^ \ mc##n = _mm_set_epi64x(*reinterpret_cast<const uint64_t*>(input + n * size + 35) ^ \
*(reinterpret_cast<const uint64_t*>((ctx)->state) + 24), 0); \ *(reinterpret_cast<const uint64_t*>((ctx)->state) + 24), 0); \
} \ } \
if (BASE == Algorithm::CN_2) { \ if (props.isBase2()) { \
division_result_xmm_##n = _mm_cvtsi64_si128(h##n[12]); \ division_result_xmm_##n = _mm_cvtsi64_si128(h##n[12]); \
sqrt_result_xmm_##n = _mm_cvtsi64_si128(h##n[13]); \ sqrt_result_xmm_##n = _mm_cvtsi64_si128(h##n[13]); \
} \ } \
@@ -1710,7 +1682,7 @@ static NOINLINE void cryptonight_quad_hash_gr_sse41(const uint8_t* __restrict__
__m128i bx##n##1 = _mm_set_epi64x(h##n[9] ^ h##n[11], h##n[8] ^ h##n[10]); \ __m128i bx##n##1 = _mm_set_epi64x(h##n[9] ^ h##n[11], h##n[8] ^ h##n[10]); \
__m128i cx##n = _mm_setzero_si128(); \ __m128i cx##n = _mm_setzero_si128(); \
__m128 conc_var##n; \ __m128 conc_var##n; \
if (ALGO == Algorithm::CN_CCX) { \ if (props.isCCX()) { \
conc_var##n = _mm_setzero_ps(); \ conc_var##n = _mm_setzero_ps(); \
} \ } \
VARIANT4_RANDOM_MATH_INIT(n); VARIANT4_RANDOM_MATH_INIT(n);
@@ -1721,17 +1693,8 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
{ {
constexpr CnAlgo<ALGO> props; constexpr CnAlgo<ALGO> props;
constexpr size_t MASK = props.mask(); constexpr size_t MASK = props.mask();
constexpr Algorithm::Id BASE = props.base();
# ifdef XMRIG_ALGO_CN_HEAVY if (props.isBase1() && size < 43) {
constexpr bool IS_CN_HEAVY_TUBE = ALGO == Algorithm::CN_HEAVY_TUBE;
constexpr bool IS_CN_HEAVY_XHV = ALGO == Algorithm::CN_HEAVY_XHV;
# else
constexpr bool IS_CN_HEAVY_TUBE = false;
constexpr bool IS_CN_HEAVY_XHV = false;
# endif
if (BASE == Algorithm::CN_1 && size < 43) {
memset(output, 0, 32 * 3); memset(output, 0, 32 * 3);
return; return;
} }
@@ -1755,7 +1718,7 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
CONST_INIT(ctx[1], 1); CONST_INIT(ctx[1], 1);
CONST_INIT(ctx[2], 2); CONST_INIT(ctx[2], 2);
VARIANT2_SET_ROUNDING_MODE(); VARIANT2_SET_ROUNDING_MODE();
if (ALGO == Algorithm::CN_CCX) { if (props.isCCX()) {
RESTORE_ROUNDING_MODE(); RESTORE_ROUNDING_MODE();
} }
@@ -1819,17 +1782,8 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
constexpr CnAlgo<ALGO> props; constexpr CnAlgo<ALGO> props;
constexpr size_t MASK = props.mask(); constexpr size_t MASK = props.mask();
constexpr Algorithm::Id BASE = props.base();
# ifdef XMRIG_ALGO_CN_HEAVY if (props.isBase1() && size < 43) {
constexpr bool IS_CN_HEAVY_TUBE = ALGO == Algorithm::CN_HEAVY_TUBE;
constexpr bool IS_CN_HEAVY_XHV = ALGO == Algorithm::CN_HEAVY_XHV;
# else
constexpr bool IS_CN_HEAVY_TUBE = false;
constexpr bool IS_CN_HEAVY_XHV = false;
# endif
if (BASE == Algorithm::CN_1 && size < 43) {
memset(output, 0, 32 * 4); memset(output, 0, 32 * 4);
return; return;
} }
@@ -1869,7 +1823,7 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
CONST_INIT(ctx[2], 2); CONST_INIT(ctx[2], 2);
CONST_INIT(ctx[3], 3); CONST_INIT(ctx[3], 3);
VARIANT2_SET_ROUNDING_MODE(); VARIANT2_SET_ROUNDING_MODE();
if (ALGO == Algorithm::CN_CCX) { if (props.isCCX()) {
RESTORE_ROUNDING_MODE(); RESTORE_ROUNDING_MODE();
} }
@@ -1930,17 +1884,8 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz
{ {
constexpr CnAlgo<ALGO> props; constexpr CnAlgo<ALGO> props;
constexpr size_t MASK = props.mask(); constexpr size_t MASK = props.mask();
constexpr Algorithm::Id BASE = props.base();
# ifdef XMRIG_ALGO_CN_HEAVY if (props.isBase1() && size < 43) {
constexpr bool IS_CN_HEAVY_TUBE = ALGO == Algorithm::CN_HEAVY_TUBE;
constexpr bool IS_CN_HEAVY_XHV = ALGO == Algorithm::CN_HEAVY_XHV;
# else
constexpr bool IS_CN_HEAVY_TUBE = false;
constexpr bool IS_CN_HEAVY_XHV = false;
# endif
if (BASE == Algorithm::CN_1 && size < 43) {
memset(output, 0, 32 * 5); memset(output, 0, 32 * 5);
return; return;
} }
@@ -1970,7 +1915,7 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz
CONST_INIT(ctx[3], 3); CONST_INIT(ctx[3], 3);
CONST_INIT(ctx[4], 4); CONST_INIT(ctx[4], 4);
VARIANT2_SET_ROUNDING_MODE(); VARIANT2_SET_ROUNDING_MODE();
if (ALGO == Algorithm::CN_CCX) { if (props.isCCX()) {
RESTORE_ROUNDING_MODE(); RESTORE_ROUNDING_MODE();
} }

View File

@@ -1,7 +1,7 @@
/* XMRig /* XMRig
* Copyright (c) 2018-2019 tevador <tevador@gmail.com> * Copyright (c) 2018-2019 tevador <tevador@gmail.com>
* Copyright (c) 2018-2020 SChernykh <https://github.com/SChernykh> * Copyright (c) 2018-2024 SChernykh <https://github.com/SChernykh>
* Copyright (c) 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com> * Copyright (c) 2016-2024 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@@ -17,9 +17,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#include "crypto/rx/RxBasicStorage.h" #include "crypto/rx/RxBasicStorage.h"
#include "backend/common/Tags.h"
#include "base/io/log/Log.h" #include "base/io/log/Log.h"
#include "base/io/log/Tags.h" #include "base/io/log/Tags.h"
#include "base/tools/Chrono.h" #include "base/tools/Chrono.h"