1
0
mirror of https://github.com/xmrig/xmrig.git synced 2025-12-10 09:02:45 -05:00

Compare commits

...

4 Commits

Author SHA1 Message Date
Tony Butler
e970d9e856 Merge a776ebf394 into 77e2f3a028 2024-01-13 21:51:11 -08:00
xmrig
77e2f3a028 Merge pull request #3399 from SChernykh/dev
Fixed Zephyr mining (OpenCL)
2024-01-14 09:01:44 +07:00
SChernykh
206295c6cb Fixed Zephyr mining (OpenCL) 2024-01-13 20:14:08 +01:00
Tony Butler
a776ebf394 Make AMD assembly completely optional through WITH_ASM_AMD (default ON) 2023-07-12 02:06:53 -06:00
16 changed files with 2868 additions and 2465 deletions

View File

@@ -14,7 +14,9 @@ option(WITH_HTTP "Enable HTTP protocol support (client/server)" ON)
option(WITH_DEBUG_LOG "Enable debug log output" OFF)
option(WITH_TLS "Enable OpenSSL support" ON)
option(WITH_ASM "Enable ASM PoW implementations" ON)
option(WITH_MSR "Enable MSR mod & 1st-gen Ryzen fix" ON)
option(WITH_ASM_AMD "Enable ASM for AMD processors" ON)
option(WITH_MSR "Enable MSR mod" ON)
option(WITH_MSR_ZEN "Enable MSR mod for AMD Zen-based processors" ON)
option(WITH_ENV_VARS "Enable environment variables support in config file" ON)
option(WITH_EMBEDDED_CONFIG "Enable internal embedded JSON config" OFF)
option(WITH_OPENCL "Enable OpenCL backend" ON)

View File

@@ -44,9 +44,17 @@ if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C)
add_definitions(/DXMRIG_FEATURE_ASM)
if (WITH_ASM_AMD)
add_definitions(/DXMRIG_FEATURE_ASM_AMD)
message("-- WITH_ASM=ON (+amd)")
else()
message("-- WITH_ASM=ON (-amd)")
endif()
else()
set(XMRIG_ASM_SOURCES "")
set(XMRIG_ASM_LIBRARY "")
remove_definitions(/DXMRIG_FEATURE_ASM)
remove_definitions(/DXMRIG_FEATURE_ASM_AMD)
message("-- WITH_ASM=OFF")
endif()

View File

@@ -104,8 +104,13 @@ if (WITH_RANDOMX)
if (WITH_MSR AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8 AND (XMRIG_OS_WIN OR XMRIG_OS_LINUX))
add_definitions(/DXMRIG_FEATURE_MSR)
add_definitions(/DXMRIG_FIX_RYZEN)
message("-- WITH_MSR=ON")
if (WITH_MSR_ZEN)
add_definitions(/DXMRIG_FIX_RYZEN)
message("-- WITH_MSR=ON (+zen)")
else()
remove_definitions(/DXMRIG_FIX_RYZEN)
message("-- WITH_MSR=ON (-zen)")
endif()
if (XMRIG_OS_WIN)
list(APPEND SOURCES_CRYPTO

View File

@@ -138,6 +138,93 @@ __kernel void blake2b_initial_hash(__global void *out, __global const void* bloc
t[7] = hash[7];
}
void blake2b_512_process_double_block_variable(ulong *out, ulong* m, __global const ulong* in, uint in_len, uint out_len)
{
ulong v[16] =
{
iv0 ^ (0x01010000u | out_len), iv1, iv2, iv3, iv4 , iv5, iv6, iv7,
iv0 , iv1, iv2, iv3, iv4 ^ 128, iv5, iv6, iv7,
};
BLAKE2B_ROUNDS();
ulong h[8];
v[0] = h[0] = v[0] ^ v[8] ^ iv0 ^ (0x01010000u | out_len);
v[1] = h[1] = v[1] ^ v[9] ^ iv1;
v[2] = h[2] = v[2] ^ v[10] ^ iv2;
v[3] = h[3] = v[3] ^ v[11] ^ iv3;
v[4] = h[4] = v[4] ^ v[12] ^ iv4;
v[5] = h[5] = v[5] ^ v[13] ^ iv5;
v[6] = h[6] = v[6] ^ v[14] ^ iv6;
v[7] = h[7] = v[7] ^ v[15] ^ iv7;
v[8] = iv0;
v[9] = iv1;
v[10] = iv2;
v[11] = iv3;
v[12] = iv4 ^ in_len;
v[13] = iv5;
v[14] = ~iv6;
v[15] = iv7;
m[ 0] = (in_len > 128) ? in[16] : 0;
m[ 1] = (in_len > 136) ? in[17] : 0;
m[ 2] = (in_len > 144) ? in[18] : 0;
m[ 3] = (in_len > 152) ? in[19] : 0;
m[ 4] = (in_len > 160) ? in[20] : 0;
m[ 5] = (in_len > 168) ? in[21] : 0;
m[ 6] = (in_len > 176) ? in[22] : 0;
m[ 7] = (in_len > 184) ? in[23] : 0;
m[ 8] = (in_len > 192) ? in[24] : 0;
m[ 9] = (in_len > 200) ? in[25] : 0;
m[10] = (in_len > 208) ? in[26] : 0;
m[11] = (in_len > 216) ? in[27] : 0;
m[12] = (in_len > 224) ? in[28] : 0;
m[13] = (in_len > 232) ? in[29] : 0;
m[14] = (in_len > 240) ? in[30] : 0;
m[15] = (in_len > 248) ? in[31] : 0;
if (in_len % sizeof(ulong))
m[(in_len - 128) / sizeof(ulong)] &= (ulong)(-1) >> (64 - (in_len % sizeof(ulong)) * 8);
BLAKE2B_ROUNDS();
if (out_len > 0) out[0] = h[0] ^ v[0] ^ v[8];
if (out_len > 8) out[1] = h[1] ^ v[1] ^ v[9];
if (out_len > 16) out[2] = h[2] ^ v[2] ^ v[10];
if (out_len > 24) out[3] = h[3] ^ v[3] ^ v[11];
if (out_len > 32) out[4] = h[4] ^ v[4] ^ v[12];
if (out_len > 40) out[5] = h[5] ^ v[5] ^ v[13];
if (out_len > 48) out[6] = h[6] ^ v[6] ^ v[14];
if (out_len > 56) out[7] = h[7] ^ v[7] ^ v[15];
}
__attribute__((reqd_work_group_size(64, 1, 1)))
__kernel void blake2b_initial_hash_double(__global void *out, __global const void* blockTemplate, uint blockTemplateSize, uint start_nonce)
{
const uint global_index = get_global_id(0);
__global const ulong* p = (__global const ulong*) blockTemplate;
ulong m[16] = { p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15] };
const ulong nonce = start_nonce + global_index;
m[4] = (m[4] & ((ulong)(-1) >> 8)) | (nonce << 56);
m[5] = (m[5] & ((ulong)(-1) << 24)) | (nonce >> 8);
ulong hash[8];
blake2b_512_process_double_block_variable(hash, m, p, blockTemplateSize, 64);
__global ulong* t = ((__global ulong*) out) + global_index * 8;
t[0] = hash[0];
t[1] = hash[1];
t[2] = hash[2];
t[3] = hash[3];
t[4] = hash[4];
t[5] = hash[5];
t[6] = hash[6];
t[7] = hash[7];
}
#define in_len 256
#define out_len 32

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,58 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "backend/opencl/kernels/rx/Blake2bInitialHashDoubleKernel.h"
#include "backend/opencl/wrappers/OclLib.h"
void xmrig::Blake2bInitialHashDoubleKernel::enqueue(cl_command_queue queue, size_t threads)
{
const size_t gthreads = threads;
static const size_t lthreads = 64;
enqueueNDRange(queue, 1, nullptr, &gthreads, &lthreads);
}
// __kernel void blake2b_initial_hash_double(__global void *out, __global const void* blockTemplate, uint blockTemplateSize, uint start_nonce)
void xmrig::Blake2bInitialHashDoubleKernel::setArgs(cl_mem out, cl_mem blockTemplate)
{
setArg(0, sizeof(cl_mem), &out);
setArg(1, sizeof(cl_mem), &blockTemplate);
}
void xmrig::Blake2bInitialHashDoubleKernel::setBlobSize(size_t size)
{
const uint32_t s = size;
setArg(2, sizeof(uint32_t), &s);
}
void xmrig::Blake2bInitialHashDoubleKernel::setNonce(uint32_t nonce)
{
setArg(3, sizeof(uint32_t), &nonce);
}

View File

@@ -0,0 +1,50 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef XMRIG_BLAKE2BINITIALHASHDOUBLEKERNEL_H
#define XMRIG_BLAKE2BINITIALHASHDOUBLEKERNEL_H
#include "backend/opencl/wrappers/OclKernel.h"
namespace xmrig {
class Blake2bInitialHashDoubleKernel : public OclKernel
{
public:
inline Blake2bInitialHashDoubleKernel(cl_program program) : OclKernel(program, "blake2b_initial_hash_double") {}
void enqueue(cl_command_queue queue, size_t threads);
void setArgs(cl_mem out, cl_mem blockTemplate);
void setBlobSize(size_t size);
void setNonce(uint32_t nonce);
};
} // namespace xmrig
#endif /* XMRIG_BLAKE2BINITIALHASHDOUBLEKERNEL_H */

View File

@@ -80,6 +80,7 @@ if (WITH_OPENCL)
if (WITH_RANDOMX)
list(APPEND HEADERS_BACKEND_OPENCL
src/backend/opencl/kernels/rx/Blake2bHashRegistersKernel.h
src/backend/opencl/kernels/rx/Blake2bInitialHashDoubleKernel.h
src/backend/opencl/kernels/rx/Blake2bInitialHashKernel.h
src/backend/opencl/kernels/rx/ExecuteVmKernel.h
src/backend/opencl/kernels/rx/FillAesKernel.h
@@ -96,6 +97,7 @@ if (WITH_OPENCL)
list(APPEND SOURCES_BACKEND_OPENCL
src/backend/opencl/generators/ocl_generic_rx_generator.cpp
src/backend/opencl/kernels/rx/Blake2bHashRegistersKernel.cpp
src/backend/opencl/kernels/rx/Blake2bInitialHashDoubleKernel.cpp
src/backend/opencl/kernels/rx/Blake2bInitialHashKernel.cpp
src/backend/opencl/kernels/rx/ExecuteVmKernel.cpp
src/backend/opencl/kernels/rx/FillAesKernel.cpp

View File

@@ -25,6 +25,7 @@
#include "backend/opencl/runners/OclRxBaseRunner.h"
#include "backend/opencl/kernels/rx/Blake2bHashRegistersKernel.h"
#include "backend/opencl/kernels/rx/Blake2bInitialHashKernel.h"
#include "backend/opencl/kernels/rx/Blake2bInitialHashDoubleKernel.h"
#include "backend/opencl/kernels/rx/FillAesKernel.h"
#include "backend/opencl/kernels/rx/FindSharesKernel.h"
#include "backend/opencl/kernels/rx/HashAesKernel.h"
@@ -71,6 +72,7 @@ xmrig::OclRxBaseRunner::~OclRxBaseRunner()
delete m_fillAes4Rx4_entropy;
delete m_hashAes1Rx4;
delete m_blake2b_initial_hash;
delete m_blake2b_initial_hash_double;
delete m_blake2b_hash_registers_32;
delete m_blake2b_hash_registers_64;
delete m_find_shares;
@@ -87,12 +89,28 @@ void xmrig::OclRxBaseRunner::run(uint32_t nonce, uint32_t *hashOutput)
{
static const uint32_t zero = 0;
m_blake2b_initial_hash->setNonce(nonce);
if (m_jobSize <= 128) {
m_blake2b_initial_hash->setNonce(nonce);
}
else if (m_jobSize <= 256) {
m_blake2b_initial_hash_double->setNonce(nonce);
}
else {
hashOutput[0xFF] = 0;
return;
}
m_find_shares->setNonce(nonce);
enqueueWriteBuffer(m_output, CL_FALSE, sizeof(cl_uint) * 0xFF, sizeof(uint32_t), &zero);
m_blake2b_initial_hash->enqueue(m_queue, m_intensity);
if (m_jobSize <= 128) {
m_blake2b_initial_hash->enqueue(m_queue, m_intensity);
}
else {
m_blake2b_initial_hash_double->enqueue(m_queue, m_intensity);
}
m_fillAes1Rx4_scratchpad->enqueue(m_queue, m_intensity);
const uint32_t programCount = RxAlgo::programCount(m_algorithm);
@@ -134,7 +152,11 @@ void xmrig::OclRxBaseRunner::set(const Job &job, uint8_t *blob)
enqueueWriteBuffer(m_input, CL_TRUE, 0, Job::kMaxBlobSize, blob);
m_jobSize = job.size();
m_blake2b_initial_hash->setBlobSize(job.size());
m_blake2b_initial_hash_double->setBlobSize(job.size());
m_find_shares->setTarget(job.target());
}
@@ -166,6 +188,9 @@ void xmrig::OclRxBaseRunner::build()
m_blake2b_initial_hash = new Blake2bInitialHashKernel(m_program);
m_blake2b_initial_hash->setArgs(m_hashes, m_input);
m_blake2b_initial_hash_double = new Blake2bInitialHashDoubleKernel(m_program);
m_blake2b_initial_hash_double->setArgs(m_hashes, m_input);
m_blake2b_hash_registers_32 = new Blake2bHashRegistersKernel(m_program, "blake2b_hash_registers_32");
m_blake2b_hash_registers_64 = new Blake2bHashRegistersKernel(m_program, "blake2b_hash_registers_64");

View File

@@ -35,6 +35,7 @@ namespace xmrig {
class Blake2bHashRegistersKernel;
class Blake2bInitialHashKernel;
class Blake2bInitialHashDoubleKernel;
class FillAesKernel;
class FindSharesKernel;
class HashAesKernel;
@@ -58,21 +59,24 @@ protected:
protected:
virtual void execute(uint32_t iteration) = 0;
Blake2bHashRegistersKernel *m_blake2b_hash_registers_32 = nullptr;
Blake2bHashRegistersKernel *m_blake2b_hash_registers_64 = nullptr;
Blake2bInitialHashKernel *m_blake2b_initial_hash = nullptr;
Blake2bHashRegistersKernel *m_blake2b_hash_registers_32 = nullptr;
Blake2bHashRegistersKernel *m_blake2b_hash_registers_64 = nullptr;
Blake2bInitialHashKernel *m_blake2b_initial_hash = nullptr;
Blake2bInitialHashDoubleKernel *m_blake2b_initial_hash_double = nullptr;
Buffer m_seed;
cl_mem m_dataset = nullptr;
cl_mem m_entropy = nullptr;
cl_mem m_hashes = nullptr;
cl_mem m_rounding = nullptr;
cl_mem m_scratchpads = nullptr;
FillAesKernel *m_fillAes1Rx4_scratchpad = nullptr;
FillAesKernel *m_fillAes4Rx4_entropy = nullptr;
FindSharesKernel *m_find_shares = nullptr;
HashAesKernel *m_hashAes1Rx4 = nullptr;
uint32_t m_gcn_version = 12;
uint32_t m_worksize = 8;
cl_mem m_dataset = nullptr;
cl_mem m_entropy = nullptr;
cl_mem m_hashes = nullptr;
cl_mem m_rounding = nullptr;
cl_mem m_scratchpads = nullptr;
FillAesKernel *m_fillAes1Rx4_scratchpad = nullptr;
FillAesKernel *m_fillAes4Rx4_entropy = nullptr;
FindSharesKernel *m_find_shares = nullptr;
HashAesKernel *m_hashAes1Rx4 = nullptr;
uint32_t m_gcn_version = 12;
uint32_t m_worksize = 8;
size_t m_jobSize = 0;
};

View File

@@ -94,7 +94,13 @@ static inline const std::string &usage()
# ifdef XMRIG_ALGO_RANDOMX
u += " --huge-pages-jit enable huge pages support for RandomX JIT code\n";
# endif
# ifdef XMRIG_FEATURE_ASM
# ifdef XMRIG_FEATURE_ASM_AMD
u += " --asm=ASM ASM optimizations, possible values: auto, none, intel, ryzen, bulldozer\n";
# else
u += " --asm=ASM ASM optimizations, possible values: auto, none, intel\n";
# endif
# endif
# if defined(__x86_64__) || defined(_M_AMD64)
u += " --argon2-impl=IMPL argon2 implementation: x86_64, SSE2, SSSE3, XOP, AVX2, AVX-512F\n";

View File

@@ -55,6 +55,7 @@ bool cn_vaes_enabled = false;
#ifdef XMRIG_FEATURE_ASM
#ifdef XMRIG_FEATURE_ASM_AMD
# define ADD_FN_ASM(algo) do { \
m_map[algo]->data[AV_SINGLE][Assembly::INTEL] = cryptonight_single_hash_asm<algo, Assembly::INTEL>; \
m_map[algo]->data[AV_SINGLE][Assembly::RYZEN] = cryptonight_single_hash_asm<algo, Assembly::RYZEN>; \
@@ -63,34 +64,50 @@ bool cn_vaes_enabled = false;
m_map[algo]->data[AV_DOUBLE][Assembly::RYZEN] = cryptonight_double_hash_asm<algo, Assembly::RYZEN>; \
m_map[algo]->data[AV_DOUBLE][Assembly::BULLDOZER] = cryptonight_double_hash_asm<algo, Assembly::BULLDOZER>; \
} while (0)
#else
# define ADD_FN_ASM(algo) do { \
m_map[algo]->data[AV_SINGLE][Assembly::INTEL] = cryptonight_single_hash_asm<algo, Assembly::INTEL>; \
m_map[algo]->data[AV_DOUBLE][Assembly::INTEL] = cryptonight_double_hash_asm<algo, Assembly::INTEL>; \
} while (0)
#endif
namespace xmrig {
cn_mainloop_fun cn_half_mainloop_ivybridge_asm = nullptr;
#ifdef XMRIG_FEATURE_ASM_AMD
cn_mainloop_fun cn_half_mainloop_ryzen_asm = nullptr;
cn_mainloop_fun cn_half_mainloop_bulldozer_asm = nullptr;
#endif
cn_mainloop_fun cn_half_double_mainloop_sandybridge_asm = nullptr;
cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm = nullptr;
#ifdef XMRIG_FEATURE_ASM_AMD
cn_mainloop_fun cn_trtl_mainloop_ryzen_asm = nullptr;
cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm = nullptr;
#endif
cn_mainloop_fun cn_trtl_double_mainloop_sandybridge_asm = nullptr;
cn_mainloop_fun cn_tlo_mainloop_ivybridge_asm = nullptr;
#ifdef XMRIG_FEATURE_ASM_AMD
cn_mainloop_fun cn_tlo_mainloop_ryzen_asm = nullptr;
cn_mainloop_fun cn_tlo_mainloop_bulldozer_asm = nullptr;
#endif
cn_mainloop_fun cn_tlo_double_mainloop_sandybridge_asm = nullptr;
cn_mainloop_fun cn_zls_mainloop_ivybridge_asm = nullptr;
#ifdef XMRIG_FEATURE_ASM_AMD
cn_mainloop_fun cn_zls_mainloop_ryzen_asm = nullptr;
cn_mainloop_fun cn_zls_mainloop_bulldozer_asm = nullptr;
#endif
cn_mainloop_fun cn_zls_double_mainloop_sandybridge_asm = nullptr;
cn_mainloop_fun cn_double_mainloop_ivybridge_asm = nullptr;
#ifdef XMRIG_FEATURE_ASM_AMD
cn_mainloop_fun cn_double_mainloop_ryzen_asm = nullptr;
cn_mainloop_fun cn_double_mainloop_bulldozer_asm = nullptr;
#endif
cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm = nullptr;
cn_mainloop_fun cn_upx2_mainloop_asm = nullptr;
@@ -160,31 +177,41 @@ static void patchAsmVariants()
auto base = static_cast<uint8_t *>(VirtualMemory::allocateExecutableMemory(allocation_size, false));
cn_half_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x0000);
# ifdef XMRIG_FEATURE_ASM_AMD
cn_half_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1000);
cn_half_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x2000);
# endif
cn_half_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x3000);
# ifdef XMRIG_ALGO_CN_PICO
cn_trtl_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x4000);
# ifdef XMRIG_FEATURE_ASM_AMD
cn_trtl_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x5000);
cn_trtl_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x6000);
# endif
cn_trtl_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x7000);
# endif
cn_zls_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x8000);
# ifdef XMRIG_FEATURE_ASM_AMD
cn_zls_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x9000);
cn_zls_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xA000);
# endif
cn_zls_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xB000);
cn_double_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xC000);
# ifdef XMRIG_FEATURE_ASM_AMD
cn_double_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xD000);
cn_double_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xE000);
# endif
cn_double_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xF000);
# ifdef XMRIG_ALGO_CN_PICO
cn_tlo_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x10000);
# ifdef XMRIG_FEATURE_ASM_AMD
cn_tlo_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x11000);
cn_tlo_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x12000);
# endif
cn_tlo_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x13000);
# endif
@@ -220,8 +247,10 @@ static void patchAsmVariants()
constexpr uint32_t ITER = CnAlgo<Algorithm::CN_HALF>().iterations();
patchCode(cn_half_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, ITER);
# ifdef XMRIG_FEATURE_ASM_AMD
patchCode(cn_half_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, ITER);
patchCode(cn_half_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, ITER);
# endif
patchCode(cn_half_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER);
}
@@ -231,8 +260,10 @@ static void patchAsmVariants()
constexpr uint32_t MASK = CnAlgo<Algorithm::CN_PICO_0>().mask();
patchCode(cn_trtl_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, ITER, MASK);
# ifdef XMRIG_FEATURE_ASM_AMD
patchCode(cn_trtl_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, ITER, MASK);
patchCode(cn_trtl_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, ITER, MASK);
# endif
patchCode(cn_trtl_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER, MASK);
}
@@ -241,8 +272,10 @@ static void patchAsmVariants()
constexpr uint32_t MASK = CnAlgo<Algorithm::CN_PICO_TLO>().mask();
patchCode(cn_tlo_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, ITER, MASK);
# ifdef XMRIG_FEATURE_ASM_AMD
patchCode(cn_tlo_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, ITER, MASK);
patchCode(cn_tlo_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, ITER, MASK);
# endif
patchCode(cn_tlo_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER, MASK);
}
# endif
@@ -251,8 +284,10 @@ static void patchAsmVariants()
constexpr uint32_t ITER = CnAlgo<Algorithm::CN_ZLS>().iterations();
patchCode(cn_zls_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, ITER);
# ifdef XMRIG_FEATURE_ASM_AMD
patchCode(cn_zls_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, ITER);
patchCode(cn_zls_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, ITER);
# endif
patchCode(cn_zls_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER);
}
@@ -260,8 +295,10 @@ static void patchAsmVariants()
constexpr uint32_t ITER = CnAlgo<Algorithm::CN_DOUBLE>().iterations();
patchCode(cn_double_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, ITER);
# ifdef XMRIG_FEATURE_ASM_AMD
patchCode(cn_double_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, ITER);
patchCode(cn_double_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, ITER);
# endif
patchCode(cn_double_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER);
}

View File

@@ -852,12 +852,16 @@ extern "C" void cnv1_single_mainloop_asm(cryptonight_ctx * *ctx);
extern "C" void cnv1_double_mainloop_asm(cryptonight_ctx **ctx);
extern "C" void cnv1_quad_mainloop_asm(cryptonight_ctx **ctx);
extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx **ctx);
#ifdef XMRIG_FEATURE_ASM_AMD
extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx **ctx);
extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx **ctx);
#endif
extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx **ctx);
extern "C" void cnv2_rwz_mainloop_asm(cryptonight_ctx **ctx);
extern "C" void cnv2_rwz_double_mainloop_asm(cryptonight_ctx **ctx);
#ifdef XMRIG_FEATURE_ASM_AMD
extern "C" void cnv2_upx_double_mainloop_zen3_asm(cryptonight_ctx **ctx);
#endif
namespace xmrig {
@@ -867,28 +871,38 @@ typedef void (*cn_mainloop_fun)(cryptonight_ctx **ctx);
extern cn_mainloop_fun cn_half_mainloop_ivybridge_asm;
#ifdef XMRIG_FEATURE_ASM_AMD
extern cn_mainloop_fun cn_half_mainloop_ryzen_asm;
extern cn_mainloop_fun cn_half_mainloop_bulldozer_asm;
#endif
extern cn_mainloop_fun cn_half_double_mainloop_sandybridge_asm;
extern cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm;
#ifdef XMRIG_FEATURE_ASM_AMD
extern cn_mainloop_fun cn_trtl_mainloop_ryzen_asm;
extern cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm;
#endif
extern cn_mainloop_fun cn_trtl_double_mainloop_sandybridge_asm;
extern cn_mainloop_fun cn_tlo_mainloop_ivybridge_asm;
#ifdef XMRIG_FEATURE_ASM_AMD
extern cn_mainloop_fun cn_tlo_mainloop_ryzen_asm;
extern cn_mainloop_fun cn_tlo_mainloop_bulldozer_asm;
#endif
extern cn_mainloop_fun cn_tlo_double_mainloop_sandybridge_asm;
extern cn_mainloop_fun cn_zls_mainloop_ivybridge_asm;
#ifdef XMRIG_FEATURE_ASM_AMD
extern cn_mainloop_fun cn_zls_mainloop_ryzen_asm;
extern cn_mainloop_fun cn_zls_mainloop_bulldozer_asm;
#endif
extern cn_mainloop_fun cn_zls_double_mainloop_sandybridge_asm;
extern cn_mainloop_fun cn_double_mainloop_ivybridge_asm;
#ifdef XMRIG_FEATURE_ASM_AMD
extern cn_mainloop_fun cn_double_mainloop_ryzen_asm;
extern cn_mainloop_fun cn_double_mainloop_bulldozer_asm;
#endif
extern cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm;
extern cn_mainloop_fun cn_upx2_mainloop_asm;
@@ -964,46 +978,54 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
if (ASM == Assembly::INTEL) {
cnv2_mainloop_ivybridge_asm(ctx);
}
# ifdef XMRIG_FEATURE_ASM_AMD
else if (ASM == Assembly::RYZEN) {
cnv2_mainloop_ryzen_asm(ctx);
}
else {
cnv2_mainloop_bulldozer_asm(ctx);
}
# endif
}
else if (ALGO == Algorithm::CN_HALF) {
if (ASM == Assembly::INTEL) {
cn_half_mainloop_ivybridge_asm(ctx);
}
# ifdef XMRIG_FEATURE_ASM_AMD
else if (ASM == Assembly::RYZEN) {
cn_half_mainloop_ryzen_asm(ctx);
}
else {
cn_half_mainloop_bulldozer_asm(ctx);
}
# endif
}
# ifdef XMRIG_ALGO_CN_PICO
else if (ALGO == Algorithm::CN_PICO_0) {
if (ASM == Assembly::INTEL) {
cn_trtl_mainloop_ivybridge_asm(ctx);
}
# ifdef XMRIG_FEATURE_ASM_AMD
else if (ASM == Assembly::RYZEN) {
cn_trtl_mainloop_ryzen_asm(ctx);
}
else {
cn_trtl_mainloop_bulldozer_asm(ctx);
}
# endif
}
else if (ALGO == Algorithm::CN_PICO_TLO) {
if (ASM == Assembly::INTEL) {
cn_tlo_mainloop_ivybridge_asm(ctx);
}
# ifdef XMRIG_FEATURE_ASM_AMD
else if (ASM == Assembly::RYZEN) {
cn_tlo_mainloop_ryzen_asm(ctx);
}
else {
cn_tlo_mainloop_bulldozer_asm(ctx);
}
# endif
}
# endif
else if (ALGO == Algorithm::CN_RWZ) {
@@ -1013,23 +1035,27 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
if (ASM == Assembly::INTEL) {
cn_zls_mainloop_ivybridge_asm(ctx);
}
# ifdef XMRIG_FEATURE_ASM_AMD
else if (ASM == Assembly::RYZEN) {
cn_zls_mainloop_ryzen_asm(ctx);
}
else {
cn_zls_mainloop_bulldozer_asm(ctx);
}
# endif
}
else if (ALGO == Algorithm::CN_DOUBLE) {
if (ASM == Assembly::INTEL) {
cn_double_mainloop_ivybridge_asm(ctx);
}
# ifdef XMRIG_FEATURE_ASM_AMD
else if (ASM == Assembly::RYZEN) {
cn_double_mainloop_ryzen_asm(ctx);
}
else {
cn_double_mainloop_bulldozer_asm(ctx);
}
# endif
}
# ifdef XMRIG_ALGO_CN_FEMTO
else if (ALGO == Algorithm::CN_UPX2) {
@@ -1094,12 +1120,16 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
# endif
# ifdef XMRIG_ALGO_CN_FEMTO
else if (ALGO == Algorithm::CN_UPX2) {
# ifdef XMRIG_FEATURE_ASM_AMD
if (Cpu::info()->arch() == ICpuInfo::ARCH_ZEN3) {
cnv2_upx_double_mainloop_zen3_asm(ctx);
}
else {
cn_upx2_double_mainloop_asm(ctx);
}
# else
cn_upx2_double_mainloop_asm(ctx);
# endif
}
# endif
else if (ALGO == Algorithm::CN_RWZ) {

View File

@@ -15,12 +15,16 @@
.global FN_PREFIX(cnv1_double_mainloop_asm)
.global FN_PREFIX(cnv1_quad_mainloop_asm)
.global FN_PREFIX(cnv2_mainloop_ivybridge_asm)
#ifdef XMRIG_FEATURE_ASM_AMD
.global FN_PREFIX(cnv2_mainloop_ryzen_asm)
.global FN_PREFIX(cnv2_mainloop_bulldozer_asm)
#endif
.global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm)
.global FN_PREFIX(cnv2_rwz_mainloop_asm)
.global FN_PREFIX(cnv2_rwz_double_mainloop_asm)
#ifdef XMRIG_FEATURE_ASM_AMD
.global FN_PREFIX(cnv2_upx_double_mainloop_zen3_asm)
#endif
ALIGN(64)
FN_PREFIX(cnv1_single_mainloop_asm):
@@ -58,6 +62,7 @@ FN_PREFIX(cnv2_mainloop_ivybridge_asm):
ret 0
mov eax, 3735929054
#ifdef XMRIG_FEATURE_ASM_AMD
ALIGN(64)
FN_PREFIX(cnv2_mainloop_ryzen_asm):
sub rsp, 48
@@ -75,6 +80,7 @@ FN_PREFIX(cnv2_mainloop_bulldozer_asm):
add rsp, 48
ret 0
mov eax, 3735929054
#endif
ALIGN(64)
FN_PREFIX(cnv2_double_mainloop_sandybridge_asm):
@@ -103,6 +109,7 @@ FN_PREFIX(cnv2_rwz_double_mainloop_asm):
ret 0
mov eax, 3735929054
#ifdef XMRIG_FEATURE_ASM_AMD
ALIGN(64)
FN_PREFIX(cnv2_upx_double_mainloop_zen3_asm):
sub rsp, 48
@@ -111,6 +118,7 @@ FN_PREFIX(cnv2_upx_double_mainloop_zen3_asm):
add rsp, 48
ret 0
mov eax, 3735929054
#endif
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits

View File

@@ -5,12 +5,16 @@
.global cnv1_double_mainloop_asm
.global cnv1_quad_mainloop_asm
.global cnv2_mainloop_ivybridge_asm
#ifdef XMRIG_FEATURE_ASM_AMD
.global cnv2_mainloop_ryzen_asm
.global cnv2_mainloop_bulldozer_asm
#endif
.global cnv2_double_mainloop_sandybridge_asm
.global cnv2_rwz_mainloop_asm
.global cnv2_rwz_double_mainloop_asm
#ifdef XMRIG_FEATURE_ASM_AMD
.global cnv2_upx_double_mainloop_zen3_asm
#endif
ALIGN(64)
cnv1_single_mainloop_asm:
@@ -36,6 +40,7 @@ cnv2_mainloop_ivybridge_asm:
ret 0
mov eax, 3735929054
#ifdef XMRIG_FEATURE_ASM_AMD
ALIGN(64)
cnv2_mainloop_ryzen_asm:
#include "../cn2/cnv2_main_loop_ryzen.inc"
@@ -47,6 +52,7 @@ cnv2_mainloop_bulldozer_asm:
#include "../cn2/cnv2_main_loop_bulldozer.inc"
ret 0
mov eax, 3735929054
#endif
ALIGN(64)
cnv2_double_mainloop_sandybridge_asm:
@@ -66,8 +72,10 @@ cnv2_rwz_double_mainloop_asm:
ret 0
mov eax, 3735929054
#ifdef XMRIG_FEATURE_ASM_AMD
ALIGN(64)
cnv2_upx_double_mainloop_zen3_asm:
#include "cn2/cnv2_upx_double_mainloop_zen3.inc"
ret 0
mov eax, 3735929054
#endif

View File

@@ -41,10 +41,12 @@ randomx_vm *xmrig::RxVm::create(RxDataset *dataset, uint8_t *scratchpad, bool so
flags |= RANDOMX_FLAG_JIT;
}
# ifdef XMRIG_FEATURE_ASM_AMD
const auto asmId = assembly == Assembly::AUTO ? Cpu::info()->assembly() : assembly.id();
if ((asmId == Assembly::RYZEN) || (asmId == Assembly::BULLDOZER)) {
flags |= RANDOMX_FLAG_AMD;
}
# endif
return randomx_create_vm(static_cast<randomx_flags>(flags), !dataset->get() ? dataset->cache()->get() : nullptr, dataset->get(), scratchpad, node);
}