mirror of
https://github.com/xmrig/xmrig.git
synced 2026-02-01 01:42:59 -05:00
Merge pull request #3769 from SChernykh/dev
RandomX v2 (initial support)
This commit is contained in:
@@ -194,9 +194,9 @@ if (WITH_RANDOMX)
|
||||
if (WITH_VAES)
|
||||
set(SOURCES_CRYPTO "${SOURCES_CRYPTO}" src/crypto/randomx/aes_hash_vaes512.cpp)
|
||||
if (CMAKE_C_COMPILER_ID MATCHES MSVC)
|
||||
set_source_files_properties(src/crypto/randomx/aes_hash_vaes512.cpp PROPERTIES COMPILE_FLAGS "/O2 /Ob2 /Oi /Ot /arch:AVX512")
|
||||
set_source_files_properties(src/crypto/randomx/aes_hash_vaes512.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX512")
|
||||
elseif (CMAKE_C_COMPILER_ID MATCHES GNU OR CMAKE_C_COMPILER_ID MATCHES Clang)
|
||||
set_source_files_properties(src/crypto/randomx/aes_hash_vaes512.cpp PROPERTIES COMPILE_FLAGS "-O3 -mavx512f -mvaes")
|
||||
set_source_files_properties(src/crypto/randomx/aes_hash_vaes512.cpp PROPERTIES COMPILE_FLAGS "-mavx512f -mvaes")
|
||||
endif()
|
||||
endif()
|
||||
else()
|
||||
|
||||
@@ -48,6 +48,24 @@ static const std::map<int, std::map<uint32_t, uint64_t> > hashCheck = {
|
||||
{ 9000000U, 0x323935102AB6B45CULL },
|
||||
{ 10000000U, 0xB5231262E2792B26ULL }
|
||||
}},
|
||||
{ Algorithm::RX_V2, {
|
||||
# ifndef NDEBUG
|
||||
{ 10000U, 0x57d2051d099613a4ULL },
|
||||
{ 20000U, 0x0bae0155cc797f01ULL },
|
||||
# endif
|
||||
{ 250000U, 0x18cf741a71484072ULL },
|
||||
{ 500000U, 0xcd8c3e6ec31b2faeULL },
|
||||
{ 1000000U, 0x88d6b8fb70cd479dULL },
|
||||
{ 2000000U, 0x0e16828d236a1a63ULL },
|
||||
{ 3000000U, 0x2739bdd0f25b83a6ULL },
|
||||
{ 4000000U, 0x32f42d9006d2d34bULL },
|
||||
{ 5000000U, 0x16d9c6286cb82251ULL },
|
||||
{ 6000000U, 0x1f916ae19d6bcf07ULL },
|
||||
{ 7000000U, 0x1f474f99a873948fULL },
|
||||
{ 8000000U, 0x8d67e0ddf05476bbULL },
|
||||
{ 9000000U, 0x3ebf37dcd5c4a215ULL },
|
||||
{ 10000000U, 0x7efbddff3f30fb74ULL }
|
||||
}},
|
||||
{ Algorithm::RX_WOW, {
|
||||
# ifndef NDEBUG
|
||||
{ 10000U, 0x6B0918757100B338ULL },
|
||||
@@ -88,6 +106,24 @@ static const std::map<int, std::map<uint32_t, uint64_t> > hashCheck1T = {
|
||||
{ 9000000U, 0xC6D39EF59213A07CULL },
|
||||
{ 10000000U, 0x95E6BAE68DD779CDULL }
|
||||
}},
|
||||
{ Algorithm::RX_V2, {
|
||||
# ifndef NDEBUG
|
||||
{ 10000, 0x90eb7c07cd9e0d90ULL },
|
||||
{ 20000, 0x6523a3658d7d9930ULL },
|
||||
# endif
|
||||
{ 250000, 0xf83b6d9d355ee5b1ULL },
|
||||
{ 500000, 0xbea3c1bf1465e9abULL },
|
||||
{ 1000000, 0x9e16f7cb56b366e1ULL },
|
||||
{ 2000000, 0x3b5e671f47e15e55ULL },
|
||||
{ 3000000, 0xec5819c180df03e2ULL },
|
||||
{ 4000000, 0x19d31b498f86aad4ULL },
|
||||
{ 5000000, 0x2487626c75cd12ccULL },
|
||||
{ 6000000, 0xa323a25a5286c39aULL },
|
||||
{ 7000000, 0xa123b100f3104dfcULL },
|
||||
{ 8000000, 0x602db9d83bfa0ddcULL },
|
||||
{ 9000000, 0x98da909e579765ddULL },
|
||||
{ 10000000, 0x3a45b7247cec9895ULL }
|
||||
}},
|
||||
{ Algorithm::RX_WOW, {
|
||||
# ifndef NDEBUG
|
||||
{ 10000U, 0x9EC1B9B8C8C7F082ULL },
|
||||
|
||||
@@ -256,7 +256,10 @@ void xmrig::CpuWorker<N>::start()
|
||||
|
||||
# ifdef XMRIG_ALGO_RANDOMX
|
||||
bool first = true;
|
||||
alignas(16) uint64_t tempHash[8] = {};
|
||||
alignas(64) uint64_t tempHash[8] = {};
|
||||
|
||||
size_t prev_job_size = 0;
|
||||
alignas(64) uint8_t prev_job[Job::kMaxBlobSize] = {};
|
||||
# endif
|
||||
|
||||
while (!Nonce::isOutdated(Nonce::CPU, m_job.sequence())) {
|
||||
@@ -297,6 +300,11 @@ void xmrig::CpuWorker<N>::start()
|
||||
job.generateMinerSignature(m_job.blob(), job.size(), miner_signature_ptr);
|
||||
}
|
||||
randomx_calculate_hash_first(m_vm, tempHash, m_job.blob(), job.size());
|
||||
|
||||
if (RandomX_CurrentConfig.Tweak_V2_COMMITMENT) {
|
||||
prev_job_size = job.size();
|
||||
memcpy(prev_job, m_job.blob(), prev_job_size);
|
||||
}
|
||||
}
|
||||
|
||||
if (!nextRound()) {
|
||||
@@ -307,7 +315,15 @@ void xmrig::CpuWorker<N>::start()
|
||||
memcpy(miner_signature_saved, miner_signature_ptr, sizeof(miner_signature_saved));
|
||||
job.generateMinerSignature(m_job.blob(), job.size(), miner_signature_ptr);
|
||||
}
|
||||
|
||||
randomx_calculate_hash_next(m_vm, tempHash, m_job.blob(), job.size(), m_hash);
|
||||
|
||||
if (RandomX_CurrentConfig.Tweak_V2_COMMITMENT) {
|
||||
memcpy(m_commitment, m_hash, RANDOMX_HASH_SIZE);
|
||||
randomx_calculate_commitment(prev_job, prev_job_size, m_hash, m_hash);
|
||||
prev_job_size = job.size();
|
||||
memcpy(prev_job, m_job.blob(), prev_job_size);
|
||||
}
|
||||
}
|
||||
else
|
||||
# endif
|
||||
|
||||
@@ -83,6 +83,7 @@ private:
|
||||
void allocateCnCtx();
|
||||
void consumeJob();
|
||||
|
||||
alignas(8) uint8_t m_commitment[N * 32]{ 0 };
|
||||
alignas(8) uint8_t m_hash[N * 32]{ 0 };
|
||||
const Algorithm m_algorithm;
|
||||
const Assembly m_assembly;
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
#define ALGO_CN_PICO_TLO 0x63120274
|
||||
#define ALGO_CN_UPX2 0x63110200
|
||||
#define ALGO_RX_0 0x72151200
|
||||
#define ALGO_RX_V2 0x72151202
|
||||
#define ALGO_RX_WOW 0x72141177
|
||||
#define ALGO_RX_ARQMA 0x72121061
|
||||
#define ALGO_RX_SFX 0x72151273
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -77,6 +77,7 @@ const char *Algorithm::kCN_UPX2 = "cn/upx2";
|
||||
#ifdef XMRIG_ALGO_RANDOMX
|
||||
const char *Algorithm::kRX = "rx";
|
||||
const char *Algorithm::kRX_0 = "rx/0";
|
||||
const char *Algorithm::kRX_V2 = "rx/2";
|
||||
const char *Algorithm::kRX_WOW = "rx/wow";
|
||||
const char *Algorithm::kRX_ARQ = "rx/arq";
|
||||
const char *Algorithm::kRX_GRAFT = "rx/graft";
|
||||
@@ -143,6 +144,7 @@ static const std::map<uint32_t, const char *> kAlgorithmNames = {
|
||||
|
||||
# ifdef XMRIG_ALGO_RANDOMX
|
||||
ALGO_NAME(RX_0),
|
||||
ALGO_NAME(RX_V2),
|
||||
ALGO_NAME(RX_WOW),
|
||||
ALGO_NAME(RX_ARQ),
|
||||
ALGO_NAME(RX_GRAFT),
|
||||
@@ -253,6 +255,8 @@ static const std::map<const char *, Algorithm::Id, aliasCompare> kAlgorithmAlias
|
||||
ALGO_ALIAS(RX_0, "rx/test"),
|
||||
ALGO_ALIAS(RX_0, "randomx"),
|
||||
ALGO_ALIAS(RX_0, "rx"),
|
||||
ALGO_ALIAS_AUTO(RX_V2), ALGO_ALIAS(RX_V2, "randomx/v2"),
|
||||
ALGO_ALIAS(RX_V2, "rx/v2"),
|
||||
ALGO_ALIAS_AUTO(RX_WOW), ALGO_ALIAS(RX_WOW, "randomx/wow"),
|
||||
ALGO_ALIAS(RX_WOW, "randomwow"),
|
||||
ALGO_ALIAS_AUTO(RX_ARQ), ALGO_ALIAS(RX_ARQ, "randomx/arq"),
|
||||
@@ -350,7 +354,7 @@ std::vector<xmrig::Algorithm> xmrig::Algorithm::all(const std::function<bool(con
|
||||
CN_HEAVY_0, CN_HEAVY_TUBE, CN_HEAVY_XHV,
|
||||
CN_PICO_0, CN_PICO_TLO,
|
||||
CN_UPX2,
|
||||
RX_0, RX_WOW, RX_ARQ, RX_GRAFT, RX_SFX, RX_YADA,
|
||||
RX_0, RX_V2, RX_WOW, RX_ARQ, RX_GRAFT, RX_SFX, RX_YADA,
|
||||
AR2_CHUKWA, AR2_CHUKWA_V2, AR2_WRKZ,
|
||||
KAWPOW_RVN,
|
||||
GHOSTRIDER_RTM
|
||||
|
||||
@@ -73,6 +73,7 @@ public:
|
||||
CN_GR_5 = 0x63120105, // "cn/turtle-lite" GhostRider
|
||||
GHOSTRIDER_RTM = 0x6c150000, // "ghostrider" GhostRider
|
||||
RX_0 = 0x72151200, // "rx/0" RandomX (reference configuration).
|
||||
RX_V2 = 0x72151202, // "rx/2" RandomX (Monero v2).
|
||||
RX_WOW = 0x72141177, // "rx/wow" RandomWOW (Wownero).
|
||||
RX_ARQ = 0x72121061, // "rx/arq" RandomARQ (Arqma).
|
||||
RX_GRAFT = 0x72151267, // "rx/graft" RandomGRAFT (Graft).
|
||||
@@ -139,6 +140,7 @@ public:
|
||||
# ifdef XMRIG_ALGO_RANDOMX
|
||||
static const char *kRX;
|
||||
static const char *kRX_0;
|
||||
static const char* kRX_V2;
|
||||
static const char *kRX_WOW;
|
||||
static const char *kRX_ARQ;
|
||||
static const char *kRX_GRAFT;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
;# save VM register values
|
||||
add rsp, 40
|
||||
add rsp, 248
|
||||
pop rcx
|
||||
mov qword ptr [rcx+0], r8
|
||||
mov qword ptr [rcx+8], r9
|
||||
|
||||
30
src/crypto/randomx/asm/program_loop_store_hard_aes.inc
Normal file
30
src/crypto/randomx/asm/program_loop_store_hard_aes.inc
Normal file
@@ -0,0 +1,30 @@
|
||||
mov rcx, [rsp+24]
|
||||
mov qword ptr [rcx+0], r8
|
||||
mov qword ptr [rcx+8], r9
|
||||
mov qword ptr [rcx+16], r10
|
||||
mov qword ptr [rcx+24], r11
|
||||
mov qword ptr [rcx+32], r12
|
||||
mov qword ptr [rcx+40], r13
|
||||
mov qword ptr [rcx+48], r14
|
||||
mov qword ptr [rcx+56], r15
|
||||
mov rcx, [rsp+16]
|
||||
aesenc xmm0, xmm4
|
||||
aesdec xmm1, xmm4
|
||||
aesenc xmm2, xmm4
|
||||
aesdec xmm3, xmm4
|
||||
aesenc xmm0, xmm5
|
||||
aesdec xmm1, xmm5
|
||||
aesenc xmm2, xmm5
|
||||
aesdec xmm3, xmm5
|
||||
aesenc xmm0, xmm6
|
||||
aesdec xmm1, xmm6
|
||||
aesenc xmm2, xmm6
|
||||
aesdec xmm3, xmm6
|
||||
aesenc xmm0, xmm7
|
||||
aesdec xmm1, xmm7
|
||||
aesenc xmm2, xmm7
|
||||
aesdec xmm3, xmm7
|
||||
movapd xmmword ptr [rcx+0], xmm0
|
||||
movapd xmmword ptr [rcx+16], xmm1
|
||||
movapd xmmword ptr [rcx+32], xmm2
|
||||
movapd xmmword ptr [rcx+48], xmm3
|
||||
196
src/crypto/randomx/asm/program_loop_store_soft_aes.inc
Normal file
196
src/crypto/randomx/asm/program_loop_store_soft_aes.inc
Normal file
@@ -0,0 +1,196 @@
|
||||
mov rcx, [rsp+24]
|
||||
mov qword ptr [rcx+0], r8
|
||||
mov qword ptr [rcx+8], r9
|
||||
mov qword ptr [rcx+16], r10
|
||||
mov qword ptr [rcx+24], r11
|
||||
mov qword ptr [rcx+32], r12
|
||||
mov qword ptr [rcx+40], r13
|
||||
mov qword ptr [rcx+48], r14
|
||||
mov qword ptr [rcx+56], r15
|
||||
|
||||
movapd xmmword ptr [rsp+40], xmm0
|
||||
movapd xmmword ptr [rsp+56], xmm1
|
||||
movapd xmmword ptr [rsp+72], xmm2
|
||||
movapd xmmword ptr [rsp+88], xmm3
|
||||
movapd xmmword ptr [rsp+104], xmm4
|
||||
movapd xmmword ptr [rsp+120], xmm5
|
||||
movapd xmmword ptr [rsp+136], xmm6
|
||||
movapd xmmword ptr [rsp+152], xmm7
|
||||
|
||||
mov [rsp+168], rax
|
||||
mov [rsp+176], rbx
|
||||
mov [rsp+184], rdx
|
||||
mov [rsp+192], rsi
|
||||
mov [rsp+200], rdi
|
||||
mov [rsp+208], rbp
|
||||
mov [rsp+216], r8
|
||||
mov [rsp+224], r9
|
||||
|
||||
mov r8, [rsp+232] ;# aes_lut_enc
|
||||
mov r9, [rsp+240] ;# aes_lut_dec
|
||||
|
||||
movapd xmm12, xmmword ptr [rsp-8] ;# "call" will overwrite IMUL_RCP's data on stack, so save it
|
||||
|
||||
lea rsi, [rsp+104]
|
||||
lea rdi, [rsp+40]
|
||||
call soft_aes_enc
|
||||
lea rdi, [rsp+56]
|
||||
call soft_aes_dec
|
||||
lea rdi, [rsp+72]
|
||||
call soft_aes_enc
|
||||
lea rdi, [rsp+88]
|
||||
call soft_aes_dec
|
||||
|
||||
lea rsi, [rsp+120]
|
||||
lea rdi, [rsp+40]
|
||||
call soft_aes_enc
|
||||
lea rdi, [rsp+56]
|
||||
call soft_aes_dec
|
||||
lea rdi, [rsp+72]
|
||||
call soft_aes_enc
|
||||
lea rdi, [rsp+88]
|
||||
call soft_aes_dec
|
||||
|
||||
lea rsi, [rsp+136]
|
||||
lea rdi, [rsp+40]
|
||||
call soft_aes_enc
|
||||
lea rdi, [rsp+56]
|
||||
call soft_aes_dec
|
||||
lea rdi, [rsp+72]
|
||||
call soft_aes_enc
|
||||
lea rdi, [rsp+88]
|
||||
call soft_aes_dec
|
||||
|
||||
lea rsi, [rsp+152]
|
||||
lea rdi, [rsp+40]
|
||||
call soft_aes_enc
|
||||
lea rdi, [rsp+56]
|
||||
call soft_aes_dec
|
||||
lea rdi, [rsp+72]
|
||||
call soft_aes_enc
|
||||
lea rdi, [rsp+88]
|
||||
call soft_aes_dec
|
||||
|
||||
movapd xmmword ptr [rsp-8], xmm12
|
||||
|
||||
jmp soft_aes_end
|
||||
|
||||
soft_aes_enc:
|
||||
mov eax, dword ptr [rsi+0]
|
||||
mov ebx, dword ptr [rsi+4]
|
||||
mov ecx, dword ptr [rsi+8]
|
||||
mov edx, dword ptr [rsi+12]
|
||||
|
||||
movzx ebp, byte ptr [rdi+0]
|
||||
xor eax, dword ptr [r8+rbp*4]
|
||||
movzx ebp, byte ptr [rdi+1]
|
||||
xor edx, dword ptr [r8+rbp*4+1024]
|
||||
movzx ebp, byte ptr [rdi+2]
|
||||
xor ecx, dword ptr [r8+rbp*4+2048]
|
||||
movzx ebp, byte ptr [rdi+3]
|
||||
xor ebx, dword ptr [r8+rbp*4+3072]
|
||||
|
||||
movzx ebp, byte ptr [rdi+4]
|
||||
xor ebx, dword ptr [r8+rbp*4]
|
||||
movzx ebp, byte ptr [rdi+5]
|
||||
xor eax, dword ptr [r8+rbp*4+1024]
|
||||
movzx ebp, byte ptr [rdi+6]
|
||||
xor edx, dword ptr [r8+rbp*4+2048]
|
||||
movzx ebp, byte ptr [rdi+7]
|
||||
xor ecx, dword ptr [r8+rbp*4+3072]
|
||||
|
||||
movzx ebp, byte ptr [rdi+8]
|
||||
xor ecx, dword ptr [r8+rbp*4]
|
||||
movzx ebp, byte ptr [rdi+9]
|
||||
xor ebx, dword ptr [r8+rbp*4+1024]
|
||||
movzx ebp, byte ptr [rdi+10]
|
||||
xor eax, dword ptr [r8+rbp*4+2048]
|
||||
movzx ebp, byte ptr [rdi+11]
|
||||
xor edx, dword ptr [r8+rbp*4+3072]
|
||||
|
||||
movzx ebp, byte ptr [rdi+12]
|
||||
xor edx, dword ptr [r8+rbp*4]
|
||||
movzx ebp, byte ptr [rdi+13]
|
||||
xor ecx, dword ptr [r8+rbp*4+1024]
|
||||
movzx ebp, byte ptr [rdi+14]
|
||||
xor ebx, dword ptr [r8+rbp*4+2048]
|
||||
movzx ebp, byte ptr [rdi+15]
|
||||
xor eax, dword ptr [r8+rbp*4+3072]
|
||||
|
||||
mov dword ptr [rdi+0], eax
|
||||
mov dword ptr [rdi+4], ebx
|
||||
mov dword ptr [rdi+8], ecx
|
||||
mov dword ptr [rdi+12], edx
|
||||
|
||||
ret
|
||||
|
||||
soft_aes_dec:
|
||||
mov eax, dword ptr [rsi+0]
|
||||
mov ebx, dword ptr [rsi+4]
|
||||
mov ecx, dword ptr [rsi+8]
|
||||
mov edx, dword ptr [rsi+12]
|
||||
|
||||
movzx ebp, byte ptr [rdi+0]
|
||||
xor eax, dword ptr [r9+rbp*4]
|
||||
movzx ebp, byte ptr [rdi+1]
|
||||
xor ebx, dword ptr [r9+rbp*4+1024]
|
||||
movzx ebp, byte ptr [rdi+2]
|
||||
xor ecx, dword ptr [r9+rbp*4+2048]
|
||||
movzx ebp, byte ptr [rdi+3]
|
||||
xor edx, dword ptr [r9+rbp*4+3072]
|
||||
|
||||
movzx ebp, byte ptr [rdi+4]
|
||||
xor ebx, dword ptr [r9+rbp*4]
|
||||
movzx ebp, byte ptr [rdi+5]
|
||||
xor ecx, dword ptr [r9+rbp*4+1024]
|
||||
movzx ebp, byte ptr [rdi+6]
|
||||
xor edx, dword ptr [r9+rbp*4+2048]
|
||||
movzx ebp, byte ptr [rdi+7]
|
||||
xor eax, dword ptr [r9+rbp*4+3072]
|
||||
|
||||
movzx ebp, byte ptr [rdi+8]
|
||||
xor ecx, dword ptr [r9+rbp*4]
|
||||
movzx ebp, byte ptr [rdi+9]
|
||||
xor edx, dword ptr [r9+rbp*4+1024]
|
||||
movzx ebp, byte ptr [rdi+10]
|
||||
xor eax, dword ptr [r9+rbp*4+2048]
|
||||
movzx ebp, byte ptr [rdi+11]
|
||||
xor ebx, dword ptr [r9+rbp*4+3072]
|
||||
|
||||
movzx ebp, byte ptr [rdi+12]
|
||||
xor edx, dword ptr [r9+rbp*4]
|
||||
movzx ebp, byte ptr [rdi+13]
|
||||
xor eax, dword ptr [r9+rbp*4+1024]
|
||||
movzx ebp, byte ptr [rdi+14]
|
||||
xor ebx, dword ptr [r9+rbp*4+2048]
|
||||
movzx ebp, byte ptr [rdi+15]
|
||||
xor ecx, dword ptr [r9+rbp*4+3072]
|
||||
|
||||
mov dword ptr [rdi+0], eax
|
||||
mov dword ptr [rdi+4], ebx
|
||||
mov dword ptr [rdi+8], ecx
|
||||
mov dword ptr [rdi+12], edx
|
||||
|
||||
ret
|
||||
|
||||
soft_aes_end:
|
||||
|
||||
mov rax, [rsp+168]
|
||||
mov rbx, [rsp+176]
|
||||
mov rcx, [rsp+16]
|
||||
mov rdx, [rsp+184]
|
||||
mov rsi, [rsp+192]
|
||||
mov rdi, [rsp+200]
|
||||
mov rbp, [rsp+208]
|
||||
mov r8, [rsp+216]
|
||||
mov r9, [rsp+224]
|
||||
|
||||
movapd xmm0, xmmword ptr [rsp+40]
|
||||
movapd xmm1, xmmword ptr [rsp+56]
|
||||
movapd xmm2, xmmword ptr [rsp+72]
|
||||
movapd xmm3, xmmword ptr [rsp+88]
|
||||
|
||||
movapd xmmword ptr [rcx+0], xmm0
|
||||
movapd xmmword ptr [rcx+16], xmm1
|
||||
movapd xmmword ptr [rcx+32], xmm2
|
||||
movapd xmmword ptr [rcx+48], xmm3
|
||||
16
src/crypto/randomx/asm/program_read_dataset_v2.inc
Normal file
16
src/crypto/randomx/asm/program_read_dataset_v2.inc
Normal file
@@ -0,0 +1,16 @@
|
||||
mov ecx, ebp ;# ecx = ma
|
||||
and ecx, RANDOMX_DATASET_BASE_MASK
|
||||
xor r8, qword ptr [rdi+rcx]
|
||||
xor rbp, rax ;# modify "ma"
|
||||
mov edx, ebp ;# edx = "ma"
|
||||
ror rbp, 32 ;# swap "ma" and "mx"
|
||||
and edx, RANDOMX_DATASET_BASE_MASK
|
||||
prefetchnta byte ptr [rdi+rdx]
|
||||
xor r9, qword ptr [rdi+rcx+8]
|
||||
xor r10, qword ptr [rdi+rcx+16]
|
||||
xor r11, qword ptr [rdi+rcx+24]
|
||||
xor r12, qword ptr [rdi+rcx+32]
|
||||
xor r13, qword ptr [rdi+rcx+40]
|
||||
xor r14, qword ptr [rdi+rcx+48]
|
||||
xor r15, qword ptr [rdi+rcx+56]
|
||||
|
||||
@@ -225,7 +225,10 @@ namespace randomx {
|
||||
}
|
||||
|
||||
static void exe_CFROUND(RANDOMX_EXE_ARGS) {
|
||||
rx_set_rounding_mode(rotr64(*ibc.isrc, static_cast<uint32_t>(ibc.imm)) % 4);
|
||||
uint64_t isrc = rotr64(*ibc.isrc, ibc.imm);
|
||||
if (!RandomX_CurrentConfig.Tweak_V2_CFROUND || ((isrc & 60) == 0)) {
|
||||
rx_set_rounding_mode(isrc % 4);
|
||||
}
|
||||
}
|
||||
|
||||
static void exe_ISTORE(RANDOMX_EXE_ARGS) {
|
||||
|
||||
@@ -41,7 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define RANDOMX_DATASET_MAX_SIZE 2181038080
|
||||
|
||||
// Increase it if some configs use larger programs
|
||||
#define RANDOMX_PROGRAM_MAX_SIZE 280
|
||||
#define RANDOMX_PROGRAM_MAX_SIZE 384
|
||||
|
||||
// Increase it if some configs use larger scratchpad
|
||||
#define RANDOMX_SCRATCHPAD_L3_MAX_SIZE 2097152
|
||||
|
||||
@@ -41,6 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#include "crypto/randomx/reciprocal.h"
|
||||
#include "crypto/randomx/superscalar.hpp"
|
||||
#include "crypto/randomx/virtual_memory.hpp"
|
||||
#include "crypto/randomx/soft_aes.h"
|
||||
#include "crypto/rx/Profiler.h"
|
||||
|
||||
#ifdef XMRIG_FIX_RYZEN
|
||||
@@ -116,6 +117,7 @@ namespace randomx {
|
||||
#define codeLoopLoadXOP ADDR(randomx_program_loop_load_xop)
|
||||
#define codeProgramStart ADDR(randomx_program_start)
|
||||
#define codeReadDataset ADDR(randomx_program_read_dataset)
|
||||
#define codeReadDatasetV2 ADDR(randomx_program_read_dataset_v2)
|
||||
#define codeReadDatasetLightSshInit ADDR(randomx_program_read_dataset_sshash_init)
|
||||
#define codeReadDatasetLightSshFin ADDR(randomx_program_read_dataset_sshash_fin)
|
||||
#define codeDatasetInit ADDR(randomx_dataset_init)
|
||||
@@ -125,6 +127,8 @@ namespace randomx {
|
||||
#define codeDatasetInitAVX2SshLoad ADDR(randomx_dataset_init_avx2_ssh_load)
|
||||
#define codeDatasetInitAVX2SshPrefetch ADDR(randomx_dataset_init_avx2_ssh_prefetch)
|
||||
#define codeLoopStore ADDR(randomx_program_loop_store)
|
||||
#define codeLoopStoreHardAES ADDR(randomx_program_loop_store_hard_aes)
|
||||
#define codeLoopStoreSoftAES ADDR(randomx_program_loop_store_soft_aes)
|
||||
#define codeLoopEnd ADDR(randomx_program_loop_end)
|
||||
#define codeEpilogue ADDR(randomx_program_epilogue)
|
||||
#define codeProgramEnd ADDR(randomx_program_end)
|
||||
@@ -136,10 +140,13 @@ namespace randomx {
|
||||
#define prologueSize (codeLoopBegin - codePrologue)
|
||||
#define loopLoadSize (codeLoopLoadXOP - codeLoopLoad)
|
||||
#define loopLoadXOPSize (codeProgramStart - codeLoopLoadXOP)
|
||||
#define readDatasetSize (codeReadDatasetLightSshInit - codeReadDataset)
|
||||
#define readDatasetSize (codeReadDatasetV2 - codeReadDataset)
|
||||
#define readDatasetV2Size (codeReadDatasetLightSshInit - codeReadDatasetV2)
|
||||
#define readDatasetLightInitSize (codeReadDatasetLightSshFin - codeReadDatasetLightSshInit)
|
||||
#define readDatasetLightFinSize (codeLoopStore - codeReadDatasetLightSshFin)
|
||||
#define loopStoreSize (codeLoopEnd - codeLoopStore)
|
||||
#define loopStoreSize (codeLoopStoreHardAES - codeLoopStore)
|
||||
#define loopStoreHardAESSize (codeLoopStoreSoftAES - codeLoopStoreHardAES)
|
||||
#define loopStoreSoftAESSize (codeLoopEnd - codeLoopStoreSoftAES)
|
||||
#define datasetInitSize (codeDatasetInitAVX2Prologue - codeDatasetInit)
|
||||
#define datasetInitAVX2PrologueSize (codeDatasetInitAVX2LoopEnd - codeDatasetInitAVX2Prologue)
|
||||
#define datasetInitAVX2LoopEndSize (codeDatasetInitAVX2Epilogue - codeDatasetInitAVX2LoopEnd)
|
||||
@@ -223,6 +230,8 @@ namespace randomx {
|
||||
JitCompilerX86::JitCompilerX86(bool hugePagesEnable, bool optimizedInitDatasetEnable) {
|
||||
BranchesWithin32B = xmrig::Cpu::info()->jccErratum();
|
||||
|
||||
hasAES = xmrig::Cpu::info()->hasAES();
|
||||
|
||||
hasAVX = xmrig::Cpu::info()->hasAVX();
|
||||
hasAVX2 = xmrig::Cpu::info()->hasAVX2();
|
||||
|
||||
@@ -341,7 +350,14 @@ namespace randomx {
|
||||
vm_flags = flags;
|
||||
|
||||
generateProgramPrologue(prog, pcfg);
|
||||
|
||||
if (RandomX_CurrentConfig.Tweak_V2_PREFETCH) {
|
||||
emit(codeReadDatasetV2, readDatasetV2Size, code, codePos);
|
||||
}
|
||||
else {
|
||||
emit(codeReadDataset, readDatasetSize, code, codePos);
|
||||
}
|
||||
|
||||
generateProgramEpilogue(prog, pcfg);
|
||||
}
|
||||
|
||||
@@ -424,8 +440,15 @@ namespace randomx {
|
||||
|
||||
void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) {
|
||||
codePos = ADDR(randomx_program_prologue_first_load) - ADDR(randomx_program_prologue);
|
||||
*(uint32_t*)(code + codePos + 4) = RandomX_CurrentConfig.ScratchpadL3Mask64_Calculated;
|
||||
*(uint32_t*)(code + codePos + 14) = RandomX_CurrentConfig.ScratchpadL3Mask64_Calculated;
|
||||
|
||||
if (RandomX_CurrentConfig.Tweak_V2_AES && !hasAES) {
|
||||
*(uint64_t*)(code + codePos + 9) = reinterpret_cast<uint64_t>(lutEnc);
|
||||
*(uint64_t*)(code + codePos + 27) = reinterpret_cast<uint64_t>(lutDec);
|
||||
}
|
||||
|
||||
*(uint32_t*)(code + codePos + 47) = RandomX_CurrentConfig.ScratchpadL3Mask64_Calculated;
|
||||
*(uint32_t*)(code + codePos + 57) = RandomX_CurrentConfig.ScratchpadL3Mask64_Calculated;
|
||||
|
||||
if (hasAVX) {
|
||||
uint32_t* p = (uint32_t*)(code + codePos + 61);
|
||||
*p = (*p & 0xFF000000U) | 0x0077F8C5U; // vzeroupper
|
||||
@@ -476,8 +499,21 @@ namespace randomx {
|
||||
*(uint64_t*)(code + codePos) = 0xc03349c08b49ull + (static_cast<uint64_t>(pcfg.readReg0) << 16) + (static_cast<uint64_t>(pcfg.readReg1) << 40);
|
||||
codePos += 6;
|
||||
emit(RandomX_CurrentConfig.codePrefetchScratchpadTweaked, RandomX_CurrentConfig.codePrefetchScratchpadTweakedSize, code, codePos);
|
||||
|
||||
if (RandomX_CurrentConfig.Tweak_V2_AES) {
|
||||
if (hasAES) {
|
||||
memcpy(code + codePos, codeLoopStoreHardAES, loopStoreHardAESSize);
|
||||
codePos += loopStoreHardAESSize;
|
||||
}
|
||||
else {
|
||||
memcpy(code + codePos, codeLoopStoreSoftAES, loopStoreSoftAESSize);
|
||||
codePos += loopStoreSoftAESSize;
|
||||
}
|
||||
}
|
||||
else {
|
||||
memcpy(code + codePos, codeLoopStore, loopStoreSize);
|
||||
codePos += loopStoreSize;
|
||||
}
|
||||
|
||||
if (BranchesWithin32B) {
|
||||
const uint32_t branch_begin = static_cast<uint32_t>(codePos);
|
||||
@@ -1307,7 +1343,7 @@ namespace randomx {
|
||||
uint8_t* const p = code;
|
||||
int32_t t = prevCFROUND;
|
||||
|
||||
if (t > prevFPOperation) {
|
||||
if ((t > prevFPOperation) && !RandomX_CurrentConfig.Tweak_V2_CFROUND) {
|
||||
if (vm_flags & RANDOMX_FLAG_AMD) {
|
||||
memcpy(p + t, NOP26, 26);
|
||||
}
|
||||
@@ -1326,14 +1362,38 @@ namespace randomx {
|
||||
*(uint32_t*)(p + pos + 3) = 0x00C8C148 + (rotate << 24);
|
||||
|
||||
if (vm_flags & RANDOMX_FLAG_AMD) {
|
||||
*(uint64_t*)(p + pos + 7) = 0x742024443B0CE083ULL;
|
||||
*(uint64_t*)(p + pos + 15) = 0x8900EB0414AE0F0AULL;
|
||||
*(uint32_t*)(p + pos + 23) = 0x202444;
|
||||
pos += 26;
|
||||
if (RandomX_CurrentConfig.Tweak_V2_CFROUND) {
|
||||
*(uint32_t*)(p + pos + 7) = 0x1375F0A8;
|
||||
pos += 11;
|
||||
}
|
||||
else {
|
||||
*(uint64_t*)(p + pos + 7) = 0x0414AE0F0CE083ULL;
|
||||
pos += 14;
|
||||
pos += 7;
|
||||
}
|
||||
*(uint64_t*)(p + pos) = 0x742024443B0CE083ULL;
|
||||
*(uint64_t*)(p + pos + 8) = 0x8900EB0414AE0F0AULL;
|
||||
*(uint32_t*)(p + pos + 16) = 0x202444;
|
||||
pos += 19;
|
||||
}
|
||||
else {
|
||||
pos += 7;
|
||||
|
||||
if (RandomX_CurrentConfig.Tweak_V2_CFROUND) {
|
||||
if (BranchesWithin32B) {
|
||||
const uint32_t branch_begin = static_cast<uint32_t>(pos + 2) & 31;
|
||||
|
||||
// If the jump crosses or touches 32-byte boundary, align it
|
||||
if (branch_begin >= 30) {
|
||||
const uint32_t alignment_size = 32 - branch_begin;
|
||||
emit(NOPX[alignment_size - 1], alignment_size, code, pos);
|
||||
}
|
||||
}
|
||||
|
||||
*(uint32_t*)(p + pos) = 0x0775F0A8;
|
||||
pos += 4;
|
||||
}
|
||||
|
||||
*(uint64_t*)(p + pos) = 0x0414AE0F0CE083ULL;
|
||||
pos += 7;
|
||||
}
|
||||
|
||||
codePos = pos;
|
||||
@@ -1343,7 +1403,7 @@ namespace randomx {
|
||||
uint8_t* const p = code;
|
||||
int32_t t = prevCFROUND;
|
||||
|
||||
if (t > prevFPOperation) {
|
||||
if ((t > prevFPOperation) && !RandomX_CurrentConfig.Tweak_V2_CFROUND){
|
||||
if (vm_flags & RANDOMX_FLAG_AMD) {
|
||||
memcpy(p + t, NOP25, 25);
|
||||
}
|
||||
@@ -1361,14 +1421,38 @@ namespace randomx {
|
||||
*(uint64_t*)(p + pos) = 0xC0F0FBC3C4ULL | (src << 32) | (rotate << 40);
|
||||
|
||||
if (vm_flags & RANDOMX_FLAG_AMD) {
|
||||
*(uint64_t*)(p + pos + 6) = 0x742024443B0CE083ULL;
|
||||
*(uint64_t*)(p + pos + 14) = 0x8900EB0414AE0F0AULL;
|
||||
*(uint32_t*)(p + pos + 22) = 0x202444;
|
||||
pos += 25;
|
||||
if (RandomX_CurrentConfig.Tweak_V2_CFROUND) {
|
||||
*(uint32_t*)(p + pos + 6) = 0x1375F0A8;
|
||||
pos += 10;
|
||||
}
|
||||
else {
|
||||
*(uint64_t*)(p + pos + 6) = 0x0414AE0F0CE083ULL;
|
||||
pos += 13;
|
||||
pos += 6;
|
||||
}
|
||||
*(uint64_t*)(p + pos) = 0x742024443B0CE083ULL;
|
||||
*(uint64_t*)(p + pos + 8) = 0x8900EB0414AE0F0AULL;
|
||||
*(uint32_t*)(p + pos + 16) = 0x202444;
|
||||
pos += 19;
|
||||
}
|
||||
else {
|
||||
pos += 6;
|
||||
|
||||
if (RandomX_CurrentConfig.Tweak_V2_CFROUND) {
|
||||
if (BranchesWithin32B) {
|
||||
const uint32_t branch_begin = static_cast<uint32_t>(pos + 2) & 31;
|
||||
|
||||
// If the jump crosses or touches 32-byte boundary, align it
|
||||
if (branch_begin >= 30) {
|
||||
const uint32_t alignment_size = 32 - branch_begin;
|
||||
emit(NOPX[alignment_size - 1], alignment_size, code, pos);
|
||||
}
|
||||
}
|
||||
|
||||
*(uint32_t*)(p + pos) = 0x0775F0A8;
|
||||
pos += 4;
|
||||
}
|
||||
|
||||
*(uint64_t*)(p + pos) = 0x0414AE0F0CE083ULL;
|
||||
pos += 7;
|
||||
}
|
||||
|
||||
codePos = pos;
|
||||
|
||||
@@ -97,6 +97,7 @@ namespace randomx {
|
||||
# endif
|
||||
|
||||
bool BranchesWithin32B = false;
|
||||
bool hasAES;
|
||||
bool hasAVX;
|
||||
bool hasAVX2;
|
||||
bool initDatasetAVX2;
|
||||
|
||||
@@ -48,9 +48,12 @@
|
||||
.global DECL(randomx_program_loop_load_xop)
|
||||
.global DECL(randomx_program_start)
|
||||
.global DECL(randomx_program_read_dataset)
|
||||
.global DECL(randomx_program_read_dataset_v2)
|
||||
.global DECL(randomx_program_read_dataset_sshash_init)
|
||||
.global DECL(randomx_program_read_dataset_sshash_fin)
|
||||
.global DECL(randomx_program_loop_store)
|
||||
.global DECL(randomx_program_loop_store_hard_aes)
|
||||
.global DECL(randomx_program_loop_store_soft_aes)
|
||||
.global DECL(randomx_program_loop_end)
|
||||
.global DECL(randomx_dataset_init)
|
||||
.global DECL(randomx_dataset_init_avx2_prologue)
|
||||
@@ -101,19 +104,23 @@ DECL(randomx_program_prologue):
|
||||
movapd xmm15, xmmword ptr [scaleMask+rip]
|
||||
|
||||
DECL(randomx_program_prologue_first_load):
|
||||
sub rsp, 248
|
||||
mov rdx, 0x1111111111111111
|
||||
mov [rsp+232], rdx ;# aes_lut_enc
|
||||
mov rdx, 0x1111111111111111
|
||||
mov [rsp+240], rdx ;# aes_lut_dec
|
||||
mov rdx, rax
|
||||
and eax, RANDOMX_SCRATCHPAD_MASK
|
||||
ror rdx, 32
|
||||
and edx, RANDOMX_SCRATCHPAD_MASK
|
||||
sub rsp, 40
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
mov dword ptr [rsp], 0x9FC0
|
||||
mov dword ptr [rsp+4], 0xBFC0
|
||||
mov dword ptr [rsp+8], 0xDFC0
|
||||
mov dword ptr [rsp+12], 0xFFC0
|
||||
mov dword ptr [rsp+32], -1
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
jmp DECL(randomx_program_imul_rcp_store)
|
||||
|
||||
.balign 64
|
||||
@@ -139,6 +146,9 @@ DECL(randomx_program_start):
|
||||
DECL(randomx_program_read_dataset):
|
||||
#include "asm/program_read_dataset.inc"
|
||||
|
||||
DECL(randomx_program_read_dataset_v2):
|
||||
#include "asm/program_read_dataset_v2.inc"
|
||||
|
||||
DECL(randomx_program_read_dataset_sshash_init):
|
||||
#include "asm/program_read_dataset_sshash_init.inc"
|
||||
|
||||
@@ -148,6 +158,12 @@ DECL(randomx_program_read_dataset_sshash_fin):
|
||||
DECL(randomx_program_loop_store):
|
||||
#include "asm/program_loop_store.inc"
|
||||
|
||||
DECL(randomx_program_loop_store_hard_aes):
|
||||
#include "asm/program_loop_store_hard_aes.inc"
|
||||
|
||||
DECL(randomx_program_loop_store_soft_aes):
|
||||
#include "asm/program_loop_store_soft_aes.inc"
|
||||
|
||||
DECL(randomx_program_loop_end):
|
||||
nop
|
||||
|
||||
|
||||
@@ -39,6 +39,7 @@ PUBLIC randomx_program_loop_load
|
||||
PUBLIC randomx_program_loop_load_xop
|
||||
PUBLIC randomx_program_start
|
||||
PUBLIC randomx_program_read_dataset
|
||||
PUBLIC randomx_program_read_dataset_v2
|
||||
PUBLIC randomx_program_read_dataset_sshash_init
|
||||
PUBLIC randomx_program_read_dataset_sshash_fin
|
||||
PUBLIC randomx_dataset_init
|
||||
@@ -48,6 +49,8 @@ PUBLIC randomx_dataset_init_avx2_epilogue
|
||||
PUBLIC randomx_dataset_init_avx2_ssh_load
|
||||
PUBLIC randomx_dataset_init_avx2_ssh_prefetch
|
||||
PUBLIC randomx_program_loop_store
|
||||
PUBLIC randomx_program_loop_store_hard_aes
|
||||
PUBLIC randomx_program_loop_store_soft_aes
|
||||
PUBLIC randomx_program_loop_end
|
||||
PUBLIC randomx_program_epilogue
|
||||
PUBLIC randomx_sshash_load
|
||||
@@ -90,19 +93,23 @@ randomx_program_prologue PROC
|
||||
randomx_program_prologue ENDP
|
||||
|
||||
randomx_program_prologue_first_load PROC
|
||||
sub rsp, 248
|
||||
mov rdx, 01111111111111111h
|
||||
mov [rsp+232], rdx ;# aes_lut_enc
|
||||
mov rdx, 01111111111111111h
|
||||
mov [rsp+240], rdx ;# aes_lut_dec
|
||||
mov rdx, rax
|
||||
and eax, RANDOMX_SCRATCHPAD_MASK
|
||||
ror rdx, 32
|
||||
and edx, RANDOMX_SCRATCHPAD_MASK
|
||||
sub rsp, 40
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
mov dword ptr [rsp], 9FC0h
|
||||
mov dword ptr [rsp+4], 0BFC0h
|
||||
mov dword ptr [rsp+8], 0DFC0h
|
||||
mov dword ptr [rsp+12], 0FFC0h
|
||||
mov dword ptr [rsp+32], -1
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
jmp randomx_program_imul_rcp_store
|
||||
randomx_program_prologue_first_load ENDP
|
||||
|
||||
@@ -135,6 +142,10 @@ randomx_program_read_dataset PROC
|
||||
include asm/program_read_dataset.inc
|
||||
randomx_program_read_dataset ENDP
|
||||
|
||||
randomx_program_read_dataset_v2 PROC
|
||||
include asm/program_read_dataset_v2.inc
|
||||
randomx_program_read_dataset_v2 ENDP
|
||||
|
||||
randomx_program_read_dataset_sshash_init PROC
|
||||
include asm/program_read_dataset_sshash_init.inc
|
||||
randomx_program_read_dataset_sshash_init ENDP
|
||||
@@ -147,6 +158,14 @@ randomx_program_loop_store PROC
|
||||
include asm/program_loop_store.inc
|
||||
randomx_program_loop_store ENDP
|
||||
|
||||
randomx_program_loop_store_hard_aes PROC
|
||||
include asm/program_loop_store_hard_aes.inc
|
||||
randomx_program_loop_store_hard_aes ENDP
|
||||
|
||||
randomx_program_loop_store_soft_aes PROC
|
||||
include asm/program_loop_store_soft_aes.inc
|
||||
randomx_program_loop_store_soft_aes ENDP
|
||||
|
||||
randomx_program_loop_end PROC
|
||||
nop
|
||||
randomx_program_loop_end ENDP
|
||||
|
||||
@@ -40,9 +40,12 @@ extern "C" {
|
||||
void randomx_program_loop_load_xop();
|
||||
void randomx_program_start();
|
||||
void randomx_program_read_dataset();
|
||||
void randomx_program_read_dataset_v2();
|
||||
void randomx_program_read_dataset_sshash_init();
|
||||
void randomx_program_read_dataset_sshash_fin();
|
||||
void randomx_program_loop_store();
|
||||
void randomx_program_loop_store_hard_aes();
|
||||
void randomx_program_loop_store_soft_aes();
|
||||
void randomx_program_loop_end();
|
||||
void randomx_dataset_init();
|
||||
void randomx_dataset_init_avx2_prologue();
|
||||
|
||||
@@ -50,6 +50,17 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#include <cassert>
|
||||
|
||||
#include "crypto/rx/Profiler.h"
|
||||
#include "base/net/stratum/Job.h"
|
||||
|
||||
RandomX_ConfigurationMoneroV2::RandomX_ConfigurationMoneroV2()
|
||||
{
|
||||
ProgramSize = 384;
|
||||
|
||||
Tweak_V2_CFROUND = 1;
|
||||
Tweak_V2_AES = 1;
|
||||
Tweak_V2_PREFETCH = 1;
|
||||
Tweak_V2_COMMITMENT = 1;
|
||||
}
|
||||
|
||||
RandomX_ConfigurationWownero::RandomX_ConfigurationWownero()
|
||||
{
|
||||
@@ -150,6 +161,10 @@ RandomX_ConfigurationBase::RandomX_ConfigurationBase()
|
||||
, RANDOMX_FREQ_CFROUND(1)
|
||||
, RANDOMX_FREQ_ISTORE(16)
|
||||
, RANDOMX_FREQ_NOP(0)
|
||||
, Tweak_V2_CFROUND(0)
|
||||
, Tweak_V2_AES(0)
|
||||
, Tweak_V2_PREFETCH(0)
|
||||
, Tweak_V2_COMMITMENT(0)
|
||||
{
|
||||
fillAes4Rx4_Key[0] = rx_set_int_vec_i128(0x99e5d23f, 0x2f546d2b, 0xd1833ddb, 0x6421aadd);
|
||||
fillAes4Rx4_Key[1] = rx_set_int_vec_i128(0xa5dfcde5, 0x06f79d53, 0xb6913f55, 0xb20e3450);
|
||||
@@ -367,6 +382,7 @@ typedef void(randomx::JitCompilerX86::* InstructionGeneratorX86_2)(const randomx
|
||||
}
|
||||
|
||||
RandomX_ConfigurationMonero RandomX_MoneroConfig;
|
||||
RandomX_ConfigurationMoneroV2 RandomX_MoneroConfigV2;
|
||||
RandomX_ConfigurationWownero RandomX_WowneroConfig;
|
||||
RandomX_ConfigurationArqma RandomX_ArqmaConfig;
|
||||
RandomX_ConfigurationGraft RandomX_GraftConfig;
|
||||
@@ -614,4 +630,11 @@ extern "C" {
|
||||
machine->hashAndFill(output, tempHash);
|
||||
}
|
||||
|
||||
void randomx_calculate_commitment(const void* input, size_t inputSize, const void* hash_in, void* com_out) {
|
||||
uint8_t buf[xmrig::Job::kMaxBlobSize + RANDOMX_HASH_SIZE];
|
||||
memcpy(buf, input, inputSize);
|
||||
memcpy(buf + inputSize, hash_in, RANDOMX_HASH_SIZE);
|
||||
rx_blake2b_wrapper::run(com_out, RANDOMX_HASH_SIZE, buf, inputSize + RANDOMX_HASH_SIZE);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -125,6 +125,11 @@ struct RandomX_ConfigurationBase
|
||||
|
||||
rx_vec_i128 fillAes4Rx4_Key[8];
|
||||
|
||||
uint32_t Tweak_V2_CFROUND : 1;
|
||||
uint32_t Tweak_V2_AES : 1;
|
||||
uint32_t Tweak_V2_PREFETCH : 1;
|
||||
uint32_t Tweak_V2_COMMITMENT : 1;
|
||||
|
||||
uint8_t codeSshPrefetchTweaked[20];
|
||||
uint8_t codePrefetchScratchpadTweaked[28];
|
||||
uint32_t codePrefetchScratchpadTweakedSize;
|
||||
@@ -143,6 +148,7 @@ struct RandomX_ConfigurationBase
|
||||
};
|
||||
|
||||
struct RandomX_ConfigurationMonero : public RandomX_ConfigurationBase {};
|
||||
struct RandomX_ConfigurationMoneroV2 : public RandomX_ConfigurationBase { RandomX_ConfigurationMoneroV2(); };
|
||||
struct RandomX_ConfigurationWownero : public RandomX_ConfigurationBase { RandomX_ConfigurationWownero(); };
|
||||
struct RandomX_ConfigurationArqma : public RandomX_ConfigurationBase { RandomX_ConfigurationArqma(); };
|
||||
struct RandomX_ConfigurationGraft : public RandomX_ConfigurationBase { RandomX_ConfigurationGraft(); };
|
||||
@@ -150,6 +156,7 @@ struct RandomX_ConfigurationSafex : public RandomX_ConfigurationBase { RandomX_C
|
||||
struct RandomX_ConfigurationYada : public RandomX_ConfigurationBase { RandomX_ConfigurationYada(); };
|
||||
|
||||
extern RandomX_ConfigurationMonero RandomX_MoneroConfig;
|
||||
extern RandomX_ConfigurationMoneroV2 RandomX_MoneroConfigV2;
|
||||
extern RandomX_ConfigurationWownero RandomX_WowneroConfig;
|
||||
extern RandomX_ConfigurationArqma RandomX_ArqmaConfig;
|
||||
extern RandomX_ConfigurationGraft RandomX_GraftConfig;
|
||||
@@ -318,6 +325,17 @@ RANDOMX_EXPORT void randomx_calculate_hash(randomx_vm *machine, const void *inpu
|
||||
RANDOMX_EXPORT void randomx_calculate_hash_first(randomx_vm* machine, uint64_t (&tempHash)[8], const void* input, size_t inputSize);
|
||||
RANDOMX_EXPORT void randomx_calculate_hash_next(randomx_vm* machine, uint64_t (&tempHash)[8], const void* nextInput, size_t nextInputSize, void* output);
|
||||
|
||||
/**
|
||||
* Calculate a RandomX commitment from a RandomX hash and its input.
|
||||
*
|
||||
* @param input is a pointer to memory that was hashed. Must not be NULL.
|
||||
* @param inputSize is the number of bytes in the input.
|
||||
* @param hash_in is the output from randomx_calculate_hash* (RANDOMX_HASH_SIZE bytes).
|
||||
* @param com_out is a pointer to memory where the commitment will be stored. Must not
|
||||
* be NULL and at least RANDOMX_HASH_SIZE bytes must be available for writing.
|
||||
*/
|
||||
RANDOMX_EXPORT void randomx_calculate_commitment(const void* input, size_t inputSize, const void* hash_in, void* com_out);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -29,15 +29,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include "crypto/randomx/soft_aes.h"
|
||||
|
||||
alignas(64) uint32_t lutEnc0[256];
|
||||
alignas(64) uint32_t lutEnc1[256];
|
||||
alignas(64) uint32_t lutEnc2[256];
|
||||
alignas(64) uint32_t lutEnc3[256];
|
||||
|
||||
alignas(64) uint32_t lutDec0[256];
|
||||
alignas(64) uint32_t lutDec1[256];
|
||||
alignas(64) uint32_t lutDec2[256];
|
||||
alignas(64) uint32_t lutDec3[256];
|
||||
alignas(64) uint32_t lutEnc[4][256];
|
||||
alignas(64) uint32_t lutDec[4][256];
|
||||
|
||||
alignas(64) uint8_t lutEncIndex[4][32];
|
||||
alignas(64) uint8_t lutDecIndex[4][32];
|
||||
@@ -102,10 +95,10 @@ static struct SAESInitializer
|
||||
p[2] = s;
|
||||
p[3] = mul_gf2(s, 3);
|
||||
|
||||
lutEnc0[i] = w; w = (w << 8) | (w >> 24);
|
||||
lutEnc1[i] = w; w = (w << 8) | (w >> 24);
|
||||
lutEnc2[i] = w; w = (w << 8) | (w >> 24);
|
||||
lutEnc3[i] = w;
|
||||
lutEnc[0][i] = w; w = (w << 8) | (w >> 24);
|
||||
lutEnc[1][i] = w; w = (w << 8) | (w >> 24);
|
||||
lutEnc[2][i] = w; w = (w << 8) | (w >> 24);
|
||||
lutEnc[3][i] = w;
|
||||
|
||||
s = sbox_reverse[i];
|
||||
p[0] = mul_gf2(s, 0xe);
|
||||
@@ -113,10 +106,10 @@ static struct SAESInitializer
|
||||
p[2] = mul_gf2(s, 0xd);
|
||||
p[3] = mul_gf2(s, 0xb);
|
||||
|
||||
lutDec0[i] = w; w = (w << 8) | (w >> 24);
|
||||
lutDec1[i] = w; w = (w << 8) | (w >> 24);
|
||||
lutDec2[i] = w; w = (w << 8) | (w >> 24);
|
||||
lutDec3[i] = w;
|
||||
lutDec[0][i] = w; w = (w << 8) | (w >> 24);
|
||||
lutDec[1][i] = w; w = (w << 8) | (w >> 24);
|
||||
lutDec[2][i] = w; w = (w << 8) | (w >> 24);
|
||||
lutDec[3][i] = w;
|
||||
}
|
||||
|
||||
memset(lutEncIndex, -1, sizeof(lutEncIndex));
|
||||
|
||||
@@ -32,14 +32,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#include <stdint.h>
|
||||
#include "crypto/randomx/intrin_portable.h"
|
||||
|
||||
extern uint32_t lutEnc0[256];
|
||||
extern uint32_t lutEnc1[256];
|
||||
extern uint32_t lutEnc2[256];
|
||||
extern uint32_t lutEnc3[256];
|
||||
extern uint32_t lutDec0[256];
|
||||
extern uint32_t lutDec1[256];
|
||||
extern uint32_t lutDec2[256];
|
||||
extern uint32_t lutDec3[256];
|
||||
extern uint32_t lutEnc[4][256];
|
||||
extern uint32_t lutDec[4][256];
|
||||
|
||||
extern uint8_t lutEncIndex[4][32];
|
||||
extern uint8_t lutDecIndex[4][32];
|
||||
@@ -52,25 +46,25 @@ FORCE_INLINE rx_vec_i128 aesenc<1>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||
volatile uint8_t s[16];
|
||||
memcpy((void*) s, &in, 16);
|
||||
|
||||
uint32_t s0 = lutEnc0[s[ 0]];
|
||||
uint32_t s1 = lutEnc0[s[ 4]];
|
||||
uint32_t s2 = lutEnc0[s[ 8]];
|
||||
uint32_t s3 = lutEnc0[s[12]];
|
||||
uint32_t s0 = lutEnc[0][s[ 0]];
|
||||
uint32_t s1 = lutEnc[0][s[ 4]];
|
||||
uint32_t s2 = lutEnc[0][s[ 8]];
|
||||
uint32_t s3 = lutEnc[0][s[12]];
|
||||
|
||||
s0 ^= lutEnc1[s[ 5]];
|
||||
s1 ^= lutEnc1[s[ 9]];
|
||||
s2 ^= lutEnc1[s[13]];
|
||||
s3 ^= lutEnc1[s[ 1]];
|
||||
s0 ^= lutEnc[1][s[ 5]];
|
||||
s1 ^= lutEnc[1][s[ 9]];
|
||||
s2 ^= lutEnc[1][s[13]];
|
||||
s3 ^= lutEnc[1][s[ 1]];
|
||||
|
||||
s0 ^= lutEnc2[s[10]];
|
||||
s1 ^= lutEnc2[s[14]];
|
||||
s2 ^= lutEnc2[s[ 2]];
|
||||
s3 ^= lutEnc2[s[ 6]];
|
||||
s0 ^= lutEnc[2][s[10]];
|
||||
s1 ^= lutEnc[2][s[14]];
|
||||
s2 ^= lutEnc[2][s[ 2]];
|
||||
s3 ^= lutEnc[2][s[ 6]];
|
||||
|
||||
s0 ^= lutEnc3[s[15]];
|
||||
s1 ^= lutEnc3[s[ 3]];
|
||||
s2 ^= lutEnc3[s[ 7]];
|
||||
s3 ^= lutEnc3[s[11]];
|
||||
s0 ^= lutEnc[3][s[15]];
|
||||
s1 ^= lutEnc[3][s[ 3]];
|
||||
s2 ^= lutEnc[3][s[ 7]];
|
||||
s3 ^= lutEnc[3][s[11]];
|
||||
|
||||
return rx_xor_vec_i128(rx_set_int_vec_i128(s3, s2, s1, s0), key);
|
||||
}
|
||||
@@ -80,25 +74,25 @@ FORCE_INLINE rx_vec_i128 aesdec<1>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||
volatile uint8_t s[16];
|
||||
memcpy((void*) s, &in, 16);
|
||||
|
||||
uint32_t s0 = lutDec0[s[ 0]];
|
||||
uint32_t s1 = lutDec0[s[ 4]];
|
||||
uint32_t s2 = lutDec0[s[ 8]];
|
||||
uint32_t s3 = lutDec0[s[12]];
|
||||
uint32_t s0 = lutDec[0][s[ 0]];
|
||||
uint32_t s1 = lutDec[0][s[ 4]];
|
||||
uint32_t s2 = lutDec[0][s[ 8]];
|
||||
uint32_t s3 = lutDec[0][s[12]];
|
||||
|
||||
s0 ^= lutDec1[s[13]];
|
||||
s1 ^= lutDec1[s[ 1]];
|
||||
s2 ^= lutDec1[s[ 5]];
|
||||
s3 ^= lutDec1[s[ 9]];
|
||||
s0 ^= lutDec[1][s[13]];
|
||||
s1 ^= lutDec[1][s[ 1]];
|
||||
s2 ^= lutDec[1][s[ 5]];
|
||||
s3 ^= lutDec[1][s[ 9]];
|
||||
|
||||
s0 ^= lutDec2[s[10]];
|
||||
s1 ^= lutDec2[s[14]];
|
||||
s2 ^= lutDec2[s[ 2]];
|
||||
s3 ^= lutDec2[s[ 6]];
|
||||
s0 ^= lutDec[2][s[10]];
|
||||
s1 ^= lutDec[2][s[14]];
|
||||
s2 ^= lutDec[2][s[ 2]];
|
||||
s3 ^= lutDec[2][s[ 6]];
|
||||
|
||||
s0 ^= lutDec3[s[ 7]];
|
||||
s1 ^= lutDec3[s[11]];
|
||||
s2 ^= lutDec3[s[15]];
|
||||
s3 ^= lutDec3[s[ 3]];
|
||||
s0 ^= lutDec[3][s[ 7]];
|
||||
s1 ^= lutDec[3][s[11]];
|
||||
s2 ^= lutDec[3][s[15]];
|
||||
s3 ^= lutDec[3][s[ 3]];
|
||||
|
||||
return rx_xor_vec_i128(rx_set_int_vec_i128(s3, s2, s1, s0), key);
|
||||
}
|
||||
@@ -113,10 +107,10 @@ FORCE_INLINE rx_vec_i128 aesenc<2>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||
s3 = rx_vec_i128_x(in);
|
||||
|
||||
rx_vec_i128 out = rx_set_int_vec_i128(
|
||||
(lutEnc0[s0 & 0xff] ^ lutEnc1[(s3 >> 8) & 0xff] ^ lutEnc2[(s2 >> 16) & 0xff] ^ lutEnc3[s1 >> 24]),
|
||||
(lutEnc0[s1 & 0xff] ^ lutEnc1[(s0 >> 8) & 0xff] ^ lutEnc2[(s3 >> 16) & 0xff] ^ lutEnc3[s2 >> 24]),
|
||||
(lutEnc0[s2 & 0xff] ^ lutEnc1[(s1 >> 8) & 0xff] ^ lutEnc2[(s0 >> 16) & 0xff] ^ lutEnc3[s3 >> 24]),
|
||||
(lutEnc0[s3 & 0xff] ^ lutEnc1[(s2 >> 8) & 0xff] ^ lutEnc2[(s1 >> 16) & 0xff] ^ lutEnc3[s0 >> 24])
|
||||
(lutEnc[0][s0 & 0xff] ^ lutEnc[1][(s3 >> 8) & 0xff] ^ lutEnc[2][(s2 >> 16) & 0xff] ^ lutEnc[3][s1 >> 24]),
|
||||
(lutEnc[0][s1 & 0xff] ^ lutEnc[1][(s0 >> 8) & 0xff] ^ lutEnc[2][(s3 >> 16) & 0xff] ^ lutEnc[3][s2 >> 24]),
|
||||
(lutEnc[0][s2 & 0xff] ^ lutEnc[1][(s1 >> 8) & 0xff] ^ lutEnc[2][(s0 >> 16) & 0xff] ^ lutEnc[3][s3 >> 24]),
|
||||
(lutEnc[0][s3 & 0xff] ^ lutEnc[1][(s2 >> 8) & 0xff] ^ lutEnc[2][(s1 >> 16) & 0xff] ^ lutEnc[3][s0 >> 24])
|
||||
);
|
||||
|
||||
return rx_xor_vec_i128(out, key);
|
||||
@@ -132,10 +126,10 @@ FORCE_INLINE rx_vec_i128 aesdec<2>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||
s3 = rx_vec_i128_x(in);
|
||||
|
||||
rx_vec_i128 out = rx_set_int_vec_i128(
|
||||
(lutDec0[s0 & 0xff] ^ lutDec1[(s1 >> 8) & 0xff] ^ lutDec2[(s2 >> 16) & 0xff] ^ lutDec3[s3 >> 24]),
|
||||
(lutDec0[s1 & 0xff] ^ lutDec1[(s2 >> 8) & 0xff] ^ lutDec2[(s3 >> 16) & 0xff] ^ lutDec3[s0 >> 24]),
|
||||
(lutDec0[s2 & 0xff] ^ lutDec1[(s3 >> 8) & 0xff] ^ lutDec2[(s0 >> 16) & 0xff] ^ lutDec3[s1 >> 24]),
|
||||
(lutDec0[s3 & 0xff] ^ lutDec1[(s0 >> 8) & 0xff] ^ lutDec2[(s1 >> 16) & 0xff] ^ lutDec3[s2 >> 24])
|
||||
(lutDec[0][s0 & 0xff] ^ lutDec[1][(s1 >> 8) & 0xff] ^ lutDec[2][(s2 >> 16) & 0xff] ^ lutDec[3][s3 >> 24]),
|
||||
(lutDec[0][s1 & 0xff] ^ lutDec[1][(s2 >> 8) & 0xff] ^ lutDec[2][(s3 >> 16) & 0xff] ^ lutDec[3][s0 >> 24]),
|
||||
(lutDec[0][s2 & 0xff] ^ lutDec[1][(s3 >> 8) & 0xff] ^ lutDec[2][(s0 >> 16) & 0xff] ^ lutDec[3][s1 >> 24]),
|
||||
(lutDec[0][s3 & 0xff] ^ lutDec[1][(s0 >> 8) & 0xff] ^ lutDec[2][(s1 >> 16) & 0xff] ^ lutDec[3][s2 >> 24])
|
||||
);
|
||||
|
||||
return rx_xor_vec_i128(out, key);
|
||||
|
||||
@@ -77,10 +77,13 @@ namespace randomx {
|
||||
|
||||
executeBytecode(bytecode, scratchpad, config);
|
||||
|
||||
mem.mx ^= nreg.r[config.readReg2] ^ nreg.r[config.readReg3];
|
||||
mem.mx &= CacheLineAlignMask;
|
||||
datasetPrefetch(datasetOffset + mem.mx);
|
||||
datasetRead(datasetOffset + mem.ma, nreg.r);
|
||||
const uint64_t readPtr = datasetOffset + (mem.ma & CacheLineAlignMask);
|
||||
|
||||
auto& mp = RandomX_CurrentConfig.Tweak_V2_PREFETCH ? mem.ma : mem.mx;
|
||||
mp ^= nreg.r[config.readReg2] ^ nreg.r[config.readReg3];
|
||||
|
||||
datasetPrefetch(datasetOffset + (mp & CacheLineAlignMask));
|
||||
datasetRead(readPtr, nreg.r);
|
||||
std::swap(mem.mx, mem.ma);
|
||||
|
||||
for (unsigned i = 0; i < RegistersCount; ++i)
|
||||
|
||||
@@ -32,6 +32,9 @@ xmrig::Algorithm::Id xmrig::RxAlgo::apply(Algorithm::Id algorithm)
|
||||
const RandomX_ConfigurationBase *xmrig::RxAlgo::base(Algorithm::Id algorithm)
|
||||
{
|
||||
switch (algorithm) {
|
||||
case Algorithm::RX_V2:
|
||||
return &RandomX_MoneroConfigV2;
|
||||
|
||||
case Algorithm::RX_WOW:
|
||||
return &RandomX_WowneroConfig;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user