mirror of
https://github.com/xmrig/xmrig.git
synced 2025-12-09 08:42:40 -05:00
Compare commits
4 Commits
a96a360161
...
b114278c88
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b114278c88 | ||
|
|
fee51b20fa | ||
|
|
5e66efabcf | ||
|
|
a776ebf394 |
@@ -14,7 +14,9 @@ option(WITH_HTTP "Enable HTTP protocol support (client/server)" ON)
|
||||
option(WITH_DEBUG_LOG "Enable debug log output" OFF)
|
||||
option(WITH_TLS "Enable OpenSSL support" ON)
|
||||
option(WITH_ASM "Enable ASM PoW implementations" ON)
|
||||
option(WITH_MSR "Enable MSR mod & 1st-gen Ryzen fix" ON)
|
||||
option(WITH_ASM_AMD "Enable ASM for AMD processors" ON)
|
||||
option(WITH_MSR "Enable MSR mod" ON)
|
||||
option(WITH_MSR_ZEN "Enable MSR mod for AMD Zen-based processors" ON)
|
||||
option(WITH_ENV_VARS "Enable environment variables support in config file" ON)
|
||||
option(WITH_EMBEDDED_CONFIG "Enable internal embedded JSON config" OFF)
|
||||
option(WITH_OPENCL "Enable OpenCL backend" ON)
|
||||
|
||||
@@ -44,9 +44,17 @@ if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C)
|
||||
|
||||
add_definitions(/DXMRIG_FEATURE_ASM)
|
||||
if (WITH_ASM_AMD)
|
||||
add_definitions(/DXMRIG_FEATURE_ASM_AMD)
|
||||
message("-- WITH_ASM=ON (+amd)")
|
||||
else()
|
||||
message("-- WITH_ASM=ON (-amd)")
|
||||
endif()
|
||||
else()
|
||||
set(XMRIG_ASM_SOURCES "")
|
||||
set(XMRIG_ASM_LIBRARY "")
|
||||
|
||||
remove_definitions(/DXMRIG_FEATURE_ASM)
|
||||
remove_definitions(/DXMRIG_FEATURE_ASM_AMD)
|
||||
message("-- WITH_ASM=OFF")
|
||||
endif()
|
||||
|
||||
@@ -104,8 +104,13 @@ if (WITH_RANDOMX)
|
||||
|
||||
if (WITH_MSR AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8 AND (XMRIG_OS_WIN OR XMRIG_OS_LINUX))
|
||||
add_definitions(/DXMRIG_FEATURE_MSR)
|
||||
add_definitions(/DXMRIG_FIX_RYZEN)
|
||||
message("-- WITH_MSR=ON")
|
||||
if (WITH_MSR_ZEN)
|
||||
add_definitions(/DXMRIG_FIX_RYZEN)
|
||||
message("-- WITH_MSR=ON (+zen)")
|
||||
else()
|
||||
remove_definitions(/DXMRIG_FIX_RYZEN)
|
||||
message("-- WITH_MSR=ON (-zen)")
|
||||
endif()
|
||||
|
||||
if (XMRIG_OS_WIN)
|
||||
list(APPEND SOURCES_CRYPTO
|
||||
|
||||
@@ -94,7 +94,13 @@ static inline const std::string &usage()
|
||||
# ifdef XMRIG_ALGO_RANDOMX
|
||||
u += " --huge-pages-jit enable huge pages support for RandomX JIT code\n";
|
||||
# endif
|
||||
# ifdef XMRIG_FEATURE_ASM
|
||||
# ifdef XMRIG_FEATURE_ASM_AMD
|
||||
u += " --asm=ASM ASM optimizations, possible values: auto, none, intel, ryzen, bulldozer\n";
|
||||
# else
|
||||
u += " --asm=ASM ASM optimizations, possible values: auto, none, intel\n";
|
||||
# endif
|
||||
# endif
|
||||
|
||||
# if defined(__x86_64__) || defined(_M_AMD64)
|
||||
u += " --argon2-impl=IMPL argon2 implementation: x86_64, SSE2, SSSE3, XOP, AVX2, AVX-512F\n";
|
||||
|
||||
@@ -55,6 +55,7 @@ bool cn_vaes_enabled = false;
|
||||
|
||||
|
||||
#ifdef XMRIG_FEATURE_ASM
|
||||
#ifdef XMRIG_FEATURE_ASM_AMD
|
||||
# define ADD_FN_ASM(algo) do { \
|
||||
m_map[algo]->data[AV_SINGLE][Assembly::INTEL] = cryptonight_single_hash_asm<algo, Assembly::INTEL>; \
|
||||
m_map[algo]->data[AV_SINGLE][Assembly::RYZEN] = cryptonight_single_hash_asm<algo, Assembly::RYZEN>; \
|
||||
@@ -63,34 +64,50 @@ bool cn_vaes_enabled = false;
|
||||
m_map[algo]->data[AV_DOUBLE][Assembly::RYZEN] = cryptonight_double_hash_asm<algo, Assembly::RYZEN>; \
|
||||
m_map[algo]->data[AV_DOUBLE][Assembly::BULLDOZER] = cryptonight_double_hash_asm<algo, Assembly::BULLDOZER>; \
|
||||
} while (0)
|
||||
#else
|
||||
# define ADD_FN_ASM(algo) do { \
|
||||
m_map[algo]->data[AV_SINGLE][Assembly::INTEL] = cryptonight_single_hash_asm<algo, Assembly::INTEL>; \
|
||||
m_map[algo]->data[AV_DOUBLE][Assembly::INTEL] = cryptonight_double_hash_asm<algo, Assembly::INTEL>; \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
cn_mainloop_fun cn_half_mainloop_ivybridge_asm = nullptr;
|
||||
#ifdef XMRIG_FEATURE_ASM_AMD
|
||||
cn_mainloop_fun cn_half_mainloop_ryzen_asm = nullptr;
|
||||
cn_mainloop_fun cn_half_mainloop_bulldozer_asm = nullptr;
|
||||
#endif
|
||||
cn_mainloop_fun cn_half_double_mainloop_sandybridge_asm = nullptr;
|
||||
|
||||
cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm = nullptr;
|
||||
#ifdef XMRIG_FEATURE_ASM_AMD
|
||||
cn_mainloop_fun cn_trtl_mainloop_ryzen_asm = nullptr;
|
||||
cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm = nullptr;
|
||||
#endif
|
||||
cn_mainloop_fun cn_trtl_double_mainloop_sandybridge_asm = nullptr;
|
||||
|
||||
cn_mainloop_fun cn_tlo_mainloop_ivybridge_asm = nullptr;
|
||||
#ifdef XMRIG_FEATURE_ASM_AMD
|
||||
cn_mainloop_fun cn_tlo_mainloop_ryzen_asm = nullptr;
|
||||
cn_mainloop_fun cn_tlo_mainloop_bulldozer_asm = nullptr;
|
||||
#endif
|
||||
cn_mainloop_fun cn_tlo_double_mainloop_sandybridge_asm = nullptr;
|
||||
|
||||
cn_mainloop_fun cn_zls_mainloop_ivybridge_asm = nullptr;
|
||||
#ifdef XMRIG_FEATURE_ASM_AMD
|
||||
cn_mainloop_fun cn_zls_mainloop_ryzen_asm = nullptr;
|
||||
cn_mainloop_fun cn_zls_mainloop_bulldozer_asm = nullptr;
|
||||
#endif
|
||||
cn_mainloop_fun cn_zls_double_mainloop_sandybridge_asm = nullptr;
|
||||
|
||||
cn_mainloop_fun cn_double_mainloop_ivybridge_asm = nullptr;
|
||||
#ifdef XMRIG_FEATURE_ASM_AMD
|
||||
cn_mainloop_fun cn_double_mainloop_ryzen_asm = nullptr;
|
||||
cn_mainloop_fun cn_double_mainloop_bulldozer_asm = nullptr;
|
||||
#endif
|
||||
cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm = nullptr;
|
||||
|
||||
cn_mainloop_fun cn_upx2_mainloop_asm = nullptr;
|
||||
@@ -160,31 +177,41 @@ static void patchAsmVariants()
|
||||
auto base = static_cast<uint8_t *>(VirtualMemory::allocateExecutableMemory(allocation_size, false));
|
||||
|
||||
cn_half_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x0000);
|
||||
# ifdef XMRIG_FEATURE_ASM_AMD
|
||||
cn_half_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1000);
|
||||
cn_half_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x2000);
|
||||
# endif
|
||||
cn_half_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x3000);
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_PICO
|
||||
cn_trtl_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x4000);
|
||||
# ifdef XMRIG_FEATURE_ASM_AMD
|
||||
cn_trtl_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x5000);
|
||||
cn_trtl_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x6000);
|
||||
# endif
|
||||
cn_trtl_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x7000);
|
||||
# endif
|
||||
|
||||
cn_zls_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x8000);
|
||||
# ifdef XMRIG_FEATURE_ASM_AMD
|
||||
cn_zls_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x9000);
|
||||
cn_zls_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xA000);
|
||||
# endif
|
||||
cn_zls_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xB000);
|
||||
|
||||
cn_double_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xC000);
|
||||
# ifdef XMRIG_FEATURE_ASM_AMD
|
||||
cn_double_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xD000);
|
||||
cn_double_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xE000);
|
||||
# endif
|
||||
cn_double_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xF000);
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_PICO
|
||||
cn_tlo_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x10000);
|
||||
# ifdef XMRIG_FEATURE_ASM_AMD
|
||||
cn_tlo_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x11000);
|
||||
cn_tlo_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x12000);
|
||||
# endif
|
||||
cn_tlo_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x13000);
|
||||
# endif
|
||||
|
||||
@@ -220,8 +247,10 @@ static void patchAsmVariants()
|
||||
constexpr uint32_t ITER = CnAlgo<Algorithm::CN_HALF>().iterations();
|
||||
|
||||
patchCode(cn_half_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, ITER);
|
||||
# ifdef XMRIG_FEATURE_ASM_AMD
|
||||
patchCode(cn_half_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, ITER);
|
||||
patchCode(cn_half_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, ITER);
|
||||
# endif
|
||||
patchCode(cn_half_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER);
|
||||
}
|
||||
|
||||
@@ -231,8 +260,10 @@ static void patchAsmVariants()
|
||||
constexpr uint32_t MASK = CnAlgo<Algorithm::CN_PICO_0>().mask();
|
||||
|
||||
patchCode(cn_trtl_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, ITER, MASK);
|
||||
# ifdef XMRIG_FEATURE_ASM_AMD
|
||||
patchCode(cn_trtl_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, ITER, MASK);
|
||||
patchCode(cn_trtl_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, ITER, MASK);
|
||||
# endif
|
||||
patchCode(cn_trtl_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER, MASK);
|
||||
}
|
||||
|
||||
@@ -241,8 +272,10 @@ static void patchAsmVariants()
|
||||
constexpr uint32_t MASK = CnAlgo<Algorithm::CN_PICO_TLO>().mask();
|
||||
|
||||
patchCode(cn_tlo_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, ITER, MASK);
|
||||
# ifdef XMRIG_FEATURE_ASM_AMD
|
||||
patchCode(cn_tlo_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, ITER, MASK);
|
||||
patchCode(cn_tlo_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, ITER, MASK);
|
||||
# endif
|
||||
patchCode(cn_tlo_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER, MASK);
|
||||
}
|
||||
# endif
|
||||
@@ -251,8 +284,10 @@ static void patchAsmVariants()
|
||||
constexpr uint32_t ITER = CnAlgo<Algorithm::CN_ZLS>().iterations();
|
||||
|
||||
patchCode(cn_zls_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, ITER);
|
||||
# ifdef XMRIG_FEATURE_ASM_AMD
|
||||
patchCode(cn_zls_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, ITER);
|
||||
patchCode(cn_zls_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, ITER);
|
||||
# endif
|
||||
patchCode(cn_zls_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER);
|
||||
}
|
||||
|
||||
@@ -260,8 +295,10 @@ static void patchAsmVariants()
|
||||
constexpr uint32_t ITER = CnAlgo<Algorithm::CN_DOUBLE>().iterations();
|
||||
|
||||
patchCode(cn_double_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, ITER);
|
||||
# ifdef XMRIG_FEATURE_ASM_AMD
|
||||
patchCode(cn_double_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, ITER);
|
||||
patchCode(cn_double_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, ITER);
|
||||
# endif
|
||||
patchCode(cn_double_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER);
|
||||
}
|
||||
|
||||
|
||||
@@ -852,12 +852,16 @@ extern "C" void cnv1_single_mainloop_asm(cryptonight_ctx * *ctx);
|
||||
extern "C" void cnv1_double_mainloop_asm(cryptonight_ctx **ctx);
|
||||
extern "C" void cnv1_quad_mainloop_asm(cryptonight_ctx **ctx);
|
||||
extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx **ctx);
|
||||
#ifdef XMRIG_FEATURE_ASM_AMD
|
||||
extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx **ctx);
|
||||
extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx **ctx);
|
||||
#endif
|
||||
extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx **ctx);
|
||||
extern "C" void cnv2_rwz_mainloop_asm(cryptonight_ctx **ctx);
|
||||
extern "C" void cnv2_rwz_double_mainloop_asm(cryptonight_ctx **ctx);
|
||||
#ifdef XMRIG_FEATURE_ASM_AMD
|
||||
extern "C" void cnv2_upx_double_mainloop_zen3_asm(cryptonight_ctx **ctx);
|
||||
#endif
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
@@ -867,28 +871,38 @@ typedef void (*cn_mainloop_fun)(cryptonight_ctx **ctx);
|
||||
|
||||
|
||||
extern cn_mainloop_fun cn_half_mainloop_ivybridge_asm;
|
||||
#ifdef XMRIG_FEATURE_ASM_AMD
|
||||
extern cn_mainloop_fun cn_half_mainloop_ryzen_asm;
|
||||
extern cn_mainloop_fun cn_half_mainloop_bulldozer_asm;
|
||||
#endif
|
||||
extern cn_mainloop_fun cn_half_double_mainloop_sandybridge_asm;
|
||||
|
||||
extern cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm;
|
||||
#ifdef XMRIG_FEATURE_ASM_AMD
|
||||
extern cn_mainloop_fun cn_trtl_mainloop_ryzen_asm;
|
||||
extern cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm;
|
||||
#endif
|
||||
extern cn_mainloop_fun cn_trtl_double_mainloop_sandybridge_asm;
|
||||
|
||||
extern cn_mainloop_fun cn_tlo_mainloop_ivybridge_asm;
|
||||
#ifdef XMRIG_FEATURE_ASM_AMD
|
||||
extern cn_mainloop_fun cn_tlo_mainloop_ryzen_asm;
|
||||
extern cn_mainloop_fun cn_tlo_mainloop_bulldozer_asm;
|
||||
#endif
|
||||
extern cn_mainloop_fun cn_tlo_double_mainloop_sandybridge_asm;
|
||||
|
||||
extern cn_mainloop_fun cn_zls_mainloop_ivybridge_asm;
|
||||
#ifdef XMRIG_FEATURE_ASM_AMD
|
||||
extern cn_mainloop_fun cn_zls_mainloop_ryzen_asm;
|
||||
extern cn_mainloop_fun cn_zls_mainloop_bulldozer_asm;
|
||||
#endif
|
||||
extern cn_mainloop_fun cn_zls_double_mainloop_sandybridge_asm;
|
||||
|
||||
extern cn_mainloop_fun cn_double_mainloop_ivybridge_asm;
|
||||
#ifdef XMRIG_FEATURE_ASM_AMD
|
||||
extern cn_mainloop_fun cn_double_mainloop_ryzen_asm;
|
||||
extern cn_mainloop_fun cn_double_mainloop_bulldozer_asm;
|
||||
#endif
|
||||
extern cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm;
|
||||
|
||||
extern cn_mainloop_fun cn_upx2_mainloop_asm;
|
||||
@@ -964,46 +978,54 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
|
||||
if (ASM == Assembly::INTEL) {
|
||||
cnv2_mainloop_ivybridge_asm(ctx);
|
||||
}
|
||||
# ifdef XMRIG_FEATURE_ASM_AMD
|
||||
else if (ASM == Assembly::RYZEN) {
|
||||
cnv2_mainloop_ryzen_asm(ctx);
|
||||
}
|
||||
else {
|
||||
cnv2_mainloop_bulldozer_asm(ctx);
|
||||
}
|
||||
# endif
|
||||
}
|
||||
else if (ALGO == Algorithm::CN_HALF) {
|
||||
if (ASM == Assembly::INTEL) {
|
||||
cn_half_mainloop_ivybridge_asm(ctx);
|
||||
}
|
||||
# ifdef XMRIG_FEATURE_ASM_AMD
|
||||
else if (ASM == Assembly::RYZEN) {
|
||||
cn_half_mainloop_ryzen_asm(ctx);
|
||||
}
|
||||
else {
|
||||
cn_half_mainloop_bulldozer_asm(ctx);
|
||||
}
|
||||
# endif
|
||||
}
|
||||
# ifdef XMRIG_ALGO_CN_PICO
|
||||
else if (ALGO == Algorithm::CN_PICO_0) {
|
||||
if (ASM == Assembly::INTEL) {
|
||||
cn_trtl_mainloop_ivybridge_asm(ctx);
|
||||
}
|
||||
# ifdef XMRIG_FEATURE_ASM_AMD
|
||||
else if (ASM == Assembly::RYZEN) {
|
||||
cn_trtl_mainloop_ryzen_asm(ctx);
|
||||
}
|
||||
else {
|
||||
cn_trtl_mainloop_bulldozer_asm(ctx);
|
||||
}
|
||||
# endif
|
||||
}
|
||||
else if (ALGO == Algorithm::CN_PICO_TLO) {
|
||||
if (ASM == Assembly::INTEL) {
|
||||
cn_tlo_mainloop_ivybridge_asm(ctx);
|
||||
}
|
||||
# ifdef XMRIG_FEATURE_ASM_AMD
|
||||
else if (ASM == Assembly::RYZEN) {
|
||||
cn_tlo_mainloop_ryzen_asm(ctx);
|
||||
}
|
||||
else {
|
||||
cn_tlo_mainloop_bulldozer_asm(ctx);
|
||||
}
|
||||
# endif
|
||||
}
|
||||
# endif
|
||||
else if (ALGO == Algorithm::CN_RWZ) {
|
||||
@@ -1013,23 +1035,27 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
|
||||
if (ASM == Assembly::INTEL) {
|
||||
cn_zls_mainloop_ivybridge_asm(ctx);
|
||||
}
|
||||
# ifdef XMRIG_FEATURE_ASM_AMD
|
||||
else if (ASM == Assembly::RYZEN) {
|
||||
cn_zls_mainloop_ryzen_asm(ctx);
|
||||
}
|
||||
else {
|
||||
cn_zls_mainloop_bulldozer_asm(ctx);
|
||||
}
|
||||
# endif
|
||||
}
|
||||
else if (ALGO == Algorithm::CN_DOUBLE) {
|
||||
if (ASM == Assembly::INTEL) {
|
||||
cn_double_mainloop_ivybridge_asm(ctx);
|
||||
}
|
||||
# ifdef XMRIG_FEATURE_ASM_AMD
|
||||
else if (ASM == Assembly::RYZEN) {
|
||||
cn_double_mainloop_ryzen_asm(ctx);
|
||||
}
|
||||
else {
|
||||
cn_double_mainloop_bulldozer_asm(ctx);
|
||||
}
|
||||
# endif
|
||||
}
|
||||
# ifdef XMRIG_ALGO_CN_FEMTO
|
||||
else if (ALGO == Algorithm::CN_UPX2) {
|
||||
@@ -1094,12 +1120,16 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
|
||||
# endif
|
||||
# ifdef XMRIG_ALGO_CN_FEMTO
|
||||
else if (ALGO == Algorithm::CN_UPX2) {
|
||||
# ifdef XMRIG_FEATURE_ASM_AMD
|
||||
if (Cpu::info()->arch() == ICpuInfo::ARCH_ZEN3) {
|
||||
cnv2_upx_double_mainloop_zen3_asm(ctx);
|
||||
}
|
||||
else {
|
||||
cn_upx2_double_mainloop_asm(ctx);
|
||||
}
|
||||
# else
|
||||
cn_upx2_double_mainloop_asm(ctx);
|
||||
# endif
|
||||
}
|
||||
# endif
|
||||
else if (ALGO == Algorithm::CN_RWZ) {
|
||||
|
||||
@@ -15,12 +15,16 @@
|
||||
.global FN_PREFIX(cnv1_double_mainloop_asm)
|
||||
.global FN_PREFIX(cnv1_quad_mainloop_asm)
|
||||
.global FN_PREFIX(cnv2_mainloop_ivybridge_asm)
|
||||
#ifdef XMRIG_FEATURE_ASM_AMD
|
||||
.global FN_PREFIX(cnv2_mainloop_ryzen_asm)
|
||||
.global FN_PREFIX(cnv2_mainloop_bulldozer_asm)
|
||||
#endif
|
||||
.global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm)
|
||||
.global FN_PREFIX(cnv2_rwz_mainloop_asm)
|
||||
.global FN_PREFIX(cnv2_rwz_double_mainloop_asm)
|
||||
#ifdef XMRIG_FEATURE_ASM_AMD
|
||||
.global FN_PREFIX(cnv2_upx_double_mainloop_zen3_asm)
|
||||
#endif
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(cnv1_single_mainloop_asm):
|
||||
@@ -58,6 +62,7 @@ FN_PREFIX(cnv2_mainloop_ivybridge_asm):
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
#ifdef XMRIG_FEATURE_ASM_AMD
|
||||
ALIGN(64)
|
||||
FN_PREFIX(cnv2_mainloop_ryzen_asm):
|
||||
sub rsp, 48
|
||||
@@ -75,6 +80,7 @@ FN_PREFIX(cnv2_mainloop_bulldozer_asm):
|
||||
add rsp, 48
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
#endif
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(cnv2_double_mainloop_sandybridge_asm):
|
||||
@@ -103,6 +109,7 @@ FN_PREFIX(cnv2_rwz_double_mainloop_asm):
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
#ifdef XMRIG_FEATURE_ASM_AMD
|
||||
ALIGN(64)
|
||||
FN_PREFIX(cnv2_upx_double_mainloop_zen3_asm):
|
||||
sub rsp, 48
|
||||
@@ -111,6 +118,7 @@ FN_PREFIX(cnv2_upx_double_mainloop_zen3_asm):
|
||||
add rsp, 48
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
#endif
|
||||
|
||||
#if defined(__linux__) && defined(__ELF__)
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
||||
@@ -5,12 +5,16 @@
|
||||
.global cnv1_double_mainloop_asm
|
||||
.global cnv1_quad_mainloop_asm
|
||||
.global cnv2_mainloop_ivybridge_asm
|
||||
#ifdef XMRIG_FEATURE_ASM_AMD
|
||||
.global cnv2_mainloop_ryzen_asm
|
||||
.global cnv2_mainloop_bulldozer_asm
|
||||
#endif
|
||||
.global cnv2_double_mainloop_sandybridge_asm
|
||||
.global cnv2_rwz_mainloop_asm
|
||||
.global cnv2_rwz_double_mainloop_asm
|
||||
#ifdef XMRIG_FEATURE_ASM_AMD
|
||||
.global cnv2_upx_double_mainloop_zen3_asm
|
||||
#endif
|
||||
|
||||
ALIGN(64)
|
||||
cnv1_single_mainloop_asm:
|
||||
@@ -36,6 +40,7 @@ cnv2_mainloop_ivybridge_asm:
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
#ifdef XMRIG_FEATURE_ASM_AMD
|
||||
ALIGN(64)
|
||||
cnv2_mainloop_ryzen_asm:
|
||||
#include "../cn2/cnv2_main_loop_ryzen.inc"
|
||||
@@ -47,6 +52,7 @@ cnv2_mainloop_bulldozer_asm:
|
||||
#include "../cn2/cnv2_main_loop_bulldozer.inc"
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
#endif
|
||||
|
||||
ALIGN(64)
|
||||
cnv2_double_mainloop_sandybridge_asm:
|
||||
@@ -66,8 +72,10 @@ cnv2_rwz_double_mainloop_asm:
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
#ifdef XMRIG_FEATURE_ASM_AMD
|
||||
ALIGN(64)
|
||||
cnv2_upx_double_mainloop_zen3_asm:
|
||||
#include "cn2/cnv2_upx_double_mainloop_zen3.inc"
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
#endif
|
||||
|
||||
@@ -131,8 +131,8 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
|
||||
// and w16, w10, ScratchpadL3Mask64
|
||||
emit32(0x121A0000 | 16 | (10 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
|
||||
|
||||
// and w17, w18, ScratchpadL3Mask64
|
||||
emit32(0x121A0000 | 17 | (18 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
|
||||
// and w17, w20, ScratchpadL3Mask64
|
||||
emit32(0x121A0000 | 17 | (20 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
|
||||
|
||||
codePos = PrologueSize;
|
||||
literalPos = ImulRcpLiteralsEnd;
|
||||
@@ -148,16 +148,16 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
|
||||
}
|
||||
|
||||
// Update spMix2
|
||||
// eor w18, config.readReg2, config.readReg3
|
||||
emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
|
||||
// eor w20, config.readReg2, config.readReg3
|
||||
emit32(ARMV8A::EOR32 | 20 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
|
||||
|
||||
// Jump back to the main loop
|
||||
const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end) - ((uint8_t*)randomx_program_aarch64)) - codePos;
|
||||
emit32(ARMV8A::B | (offset / 4), code, codePos);
|
||||
|
||||
// and w18, w18, CacheLineAlignMask
|
||||
// and w20, w20, CacheLineAlignMask
|
||||
codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask1) - ((uint8_t*)randomx_program_aarch64));
|
||||
emit32(0x121A0000 | 18 | (18 << 5) | ((RandomX_CurrentConfig.Log2_DatasetBaseSize - 7) << 10), code, codePos);
|
||||
emit32(0x121A0000 | 20 | (20 << 5) | ((RandomX_CurrentConfig.Log2_DatasetBaseSize - 7) << 10), code, codePos);
|
||||
|
||||
// and w10, w10, CacheLineAlignMask
|
||||
codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask2) - ((uint8_t*)randomx_program_aarch64));
|
||||
@@ -189,8 +189,8 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
|
||||
// and w16, w10, ScratchpadL3Mask64
|
||||
emit32(0x121A0000 | 16 | (10 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
|
||||
|
||||
// and w17, w18, ScratchpadL3Mask64
|
||||
emit32(0x121A0000 | 17 | (18 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
|
||||
// and w17, w20, ScratchpadL3Mask64
|
||||
emit32(0x121A0000 | 17 | (20 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
|
||||
|
||||
codePos = PrologueSize;
|
||||
literalPos = ImulRcpLiteralsEnd;
|
||||
@@ -206,8 +206,8 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
|
||||
}
|
||||
|
||||
// Update spMix2
|
||||
// eor w18, config.readReg2, config.readReg3
|
||||
emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
|
||||
// eor w20, config.readReg2, config.readReg3
|
||||
emit32(ARMV8A::EOR32 | 20 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
|
||||
|
||||
// Jump back to the main loop
|
||||
const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end_light) - ((uint8_t*)randomx_program_aarch64)) - codePos;
|
||||
@@ -477,7 +477,7 @@ void JitCompilerA64::emitAddImmediate(uint32_t dst, uint32_t src, uint32_t imm,
|
||||
}
|
||||
else
|
||||
{
|
||||
constexpr uint32_t tmp_reg = 18;
|
||||
constexpr uint32_t tmp_reg = 20;
|
||||
emitMovImmediate(tmp_reg, imm, code, k);
|
||||
|
||||
// add dst, src, tmp_reg
|
||||
@@ -526,7 +526,7 @@ void JitCompilerA64::emitMemLoadFP(uint32_t src, Instruction& instr, uint8_t* co
|
||||
uint32_t k = codePos;
|
||||
|
||||
uint32_t imm = instr.getImm32();
|
||||
constexpr uint32_t tmp_reg = 18;
|
||||
constexpr uint32_t tmp_reg = 19;
|
||||
|
||||
imm &= instr.getModMem() ? (RandomX_CurrentConfig.ScratchpadL1_Size - 1) : (RandomX_CurrentConfig.ScratchpadL2_Size - 1);
|
||||
emitAddImmediate(tmp_reg, src, imm, code, k);
|
||||
@@ -580,7 +580,7 @@ void JitCompilerA64::h_IADD_M(Instruction& instr, uint32_t& codePos)
|
||||
const uint32_t src = IntRegMap[instr.src];
|
||||
const uint32_t dst = IntRegMap[instr.dst];
|
||||
|
||||
constexpr uint32_t tmp_reg = 18;
|
||||
constexpr uint32_t tmp_reg = 20;
|
||||
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
|
||||
|
||||
// add dst, dst, tmp_reg
|
||||
@@ -618,7 +618,7 @@ void JitCompilerA64::h_ISUB_M(Instruction& instr, uint32_t& codePos)
|
||||
const uint32_t src = IntRegMap[instr.src];
|
||||
const uint32_t dst = IntRegMap[instr.dst];
|
||||
|
||||
constexpr uint32_t tmp_reg = 18;
|
||||
constexpr uint32_t tmp_reg = 20;
|
||||
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
|
||||
|
||||
// sub dst, dst, tmp_reg
|
||||
@@ -637,7 +637,7 @@ void JitCompilerA64::h_IMUL_R(Instruction& instr, uint32_t& codePos)
|
||||
|
||||
if (src == dst)
|
||||
{
|
||||
src = 18;
|
||||
src = 20;
|
||||
emitMovImmediate(src, instr.getImm32(), code, k);
|
||||
}
|
||||
|
||||
@@ -655,7 +655,7 @@ void JitCompilerA64::h_IMUL_M(Instruction& instr, uint32_t& codePos)
|
||||
const uint32_t src = IntRegMap[instr.src];
|
||||
const uint32_t dst = IntRegMap[instr.dst];
|
||||
|
||||
constexpr uint32_t tmp_reg = 18;
|
||||
constexpr uint32_t tmp_reg = 20;
|
||||
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
|
||||
|
||||
// sub dst, dst, tmp_reg
|
||||
@@ -686,7 +686,7 @@ void JitCompilerA64::h_IMULH_M(Instruction& instr, uint32_t& codePos)
|
||||
const uint32_t src = IntRegMap[instr.src];
|
||||
const uint32_t dst = IntRegMap[instr.dst];
|
||||
|
||||
constexpr uint32_t tmp_reg = 18;
|
||||
constexpr uint32_t tmp_reg = 20;
|
||||
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
|
||||
|
||||
// umulh dst, dst, tmp_reg
|
||||
@@ -717,7 +717,7 @@ void JitCompilerA64::h_ISMULH_M(Instruction& instr, uint32_t& codePos)
|
||||
const uint32_t src = IntRegMap[instr.src];
|
||||
const uint32_t dst = IntRegMap[instr.dst];
|
||||
|
||||
constexpr uint32_t tmp_reg = 18;
|
||||
constexpr uint32_t tmp_reg = 20;
|
||||
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
|
||||
|
||||
// smulh dst, dst, tmp_reg
|
||||
@@ -735,7 +735,7 @@ void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)
|
||||
|
||||
uint32_t k = codePos;
|
||||
|
||||
constexpr uint32_t tmp_reg = 18;
|
||||
constexpr uint32_t tmp_reg = 20;
|
||||
const uint32_t dst = IntRegMap[instr.dst];
|
||||
|
||||
constexpr uint64_t N = 1ULL << 63;
|
||||
@@ -754,9 +754,9 @@ void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)
|
||||
literalPos -= sizeof(uint64_t);
|
||||
*(uint64_t*)(code + literalPos) = (q << shift) + ((r << shift) / divisor);
|
||||
|
||||
if (literal_id < 13)
|
||||
if (literal_id < 12)
|
||||
{
|
||||
static constexpr uint32_t literal_regs[13] = { 30 << 16, 29 << 16, 28 << 16, 27 << 16, 26 << 16, 25 << 16, 24 << 16, 23 << 16, 22 << 16, 21 << 16, 20 << 16, 11 << 16, 0 };
|
||||
static constexpr uint32_t literal_regs[12] = { 30 << 16, 29 << 16, 28 << 16, 27 << 16, 26 << 16, 25 << 16, 24 << 16, 23 << 16, 22 << 16, 21 << 16, 11 << 16, 0 };
|
||||
|
||||
// mul dst, dst, literal_reg
|
||||
emit32(ARMV8A::MUL | dst | (dst << 5) | literal_regs[literal_id], code, k);
|
||||
@@ -794,7 +794,7 @@ void JitCompilerA64::h_IXOR_R(Instruction& instr, uint32_t& codePos)
|
||||
|
||||
if (src == dst)
|
||||
{
|
||||
src = 18;
|
||||
src = 20;
|
||||
emitMovImmediate(src, instr.getImm32(), code, k);
|
||||
}
|
||||
|
||||
@@ -812,7 +812,7 @@ void JitCompilerA64::h_IXOR_M(Instruction& instr, uint32_t& codePos)
|
||||
const uint32_t src = IntRegMap[instr.src];
|
||||
const uint32_t dst = IntRegMap[instr.dst];
|
||||
|
||||
constexpr uint32_t tmp_reg = 18;
|
||||
constexpr uint32_t tmp_reg = 20;
|
||||
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
|
||||
|
||||
// eor dst, dst, tmp_reg
|
||||
@@ -850,7 +850,7 @@ void JitCompilerA64::h_IROL_R(Instruction& instr, uint32_t& codePos)
|
||||
|
||||
if (src != dst)
|
||||
{
|
||||
constexpr uint32_t tmp_reg = 18;
|
||||
constexpr uint32_t tmp_reg = 20;
|
||||
|
||||
// sub tmp_reg, xzr, src
|
||||
emit32(ARMV8A::SUB | tmp_reg | (31 << 5) | (src << 16), code, k);
|
||||
@@ -878,7 +878,7 @@ void JitCompilerA64::h_ISWAP_R(Instruction& instr, uint32_t& codePos)
|
||||
|
||||
uint32_t k = codePos;
|
||||
|
||||
constexpr uint32_t tmp_reg = 18;
|
||||
constexpr uint32_t tmp_reg = 20;
|
||||
emit32(ARMV8A::MOV_REG | tmp_reg | (dst << 16), code, k);
|
||||
emit32(ARMV8A::MOV_REG | dst | (src << 16), code, k);
|
||||
emit32(ARMV8A::MOV_REG | src | (tmp_reg << 16), code, k);
|
||||
@@ -1026,7 +1026,7 @@ void JitCompilerA64::h_CFROUND(Instruction& instr, uint32_t& codePos)
|
||||
|
||||
const uint32_t src = IntRegMap[instr.src];
|
||||
|
||||
constexpr uint32_t tmp_reg = 18;
|
||||
constexpr uint32_t tmp_reg = 20;
|
||||
constexpr uint32_t fpcr_tmp_reg = 8;
|
||||
|
||||
// ror tmp_reg, src, imm
|
||||
@@ -1050,7 +1050,7 @@ void JitCompilerA64::h_ISTORE(Instruction& instr, uint32_t& codePos)
|
||||
|
||||
const uint32_t src = IntRegMap[instr.src];
|
||||
const uint32_t dst = IntRegMap[instr.dst];
|
||||
constexpr uint32_t tmp_reg = 18;
|
||||
constexpr uint32_t tmp_reg = 20;
|
||||
|
||||
uint32_t imm = instr.getImm32();
|
||||
|
||||
|
||||
@@ -72,9 +72,9 @@
|
||||
# x15 -> "r7"
|
||||
# x16 -> spAddr0
|
||||
# x17 -> spAddr1
|
||||
# x18 -> temporary
|
||||
# x18 -> unused (platform register, don't touch it)
|
||||
# x19 -> temporary
|
||||
# x20 -> literal for IMUL_RCP
|
||||
# x20 -> temporary
|
||||
# x21 -> literal for IMUL_RCP
|
||||
# x22 -> literal for IMUL_RCP
|
||||
# x23 -> literal for IMUL_RCP
|
||||
@@ -109,7 +109,7 @@ DECL(randomx_program_aarch64):
|
||||
# Save callee-saved registers
|
||||
sub sp, sp, 192
|
||||
stp x16, x17, [sp]
|
||||
stp x18, x19, [sp, 16]
|
||||
str x19, [sp, 16]
|
||||
stp x20, x21, [sp, 32]
|
||||
stp x22, x23, [sp, 48]
|
||||
stp x24, x25, [sp, 64]
|
||||
@@ -164,7 +164,6 @@ DECL(randomx_program_aarch64):
|
||||
# Read literals
|
||||
ldr x0, literal_x0
|
||||
ldr x11, literal_x11
|
||||
ldr x20, literal_x20
|
||||
ldr x21, literal_x21
|
||||
ldr x22, literal_x22
|
||||
ldr x23, literal_x23
|
||||
@@ -196,11 +195,11 @@ DECL(randomx_program_aarch64):
|
||||
DECL(randomx_program_aarch64_main_loop):
|
||||
# spAddr0 = spMix1 & ScratchpadL3Mask64;
|
||||
# spAddr1 = (spMix1 >> 32) & ScratchpadL3Mask64;
|
||||
lsr x18, x10, 32
|
||||
lsr x20, x10, 32
|
||||
|
||||
# Actual mask will be inserted by JIT compiler
|
||||
and w16, w10, 1
|
||||
and w17, w18, 1
|
||||
and w17, w20, 1
|
||||
|
||||
# x16 = scratchpad + spAddr0
|
||||
# x17 = scratchpad + spAddr1
|
||||
@@ -208,31 +207,31 @@ DECL(randomx_program_aarch64_main_loop):
|
||||
add x17, x17, x2
|
||||
|
||||
# xor integer registers with scratchpad data (spAddr0)
|
||||
ldp x18, x19, [x16]
|
||||
eor x4, x4, x18
|
||||
ldp x20, x19, [x16]
|
||||
eor x4, x4, x20
|
||||
eor x5, x5, x19
|
||||
ldp x18, x19, [x16, 16]
|
||||
eor x6, x6, x18
|
||||
ldp x20, x19, [x16, 16]
|
||||
eor x6, x6, x20
|
||||
eor x7, x7, x19
|
||||
ldp x18, x19, [x16, 32]
|
||||
eor x12, x12, x18
|
||||
ldp x20, x19, [x16, 32]
|
||||
eor x12, x12, x20
|
||||
eor x13, x13, x19
|
||||
ldp x18, x19, [x16, 48]
|
||||
eor x14, x14, x18
|
||||
ldp x20, x19, [x16, 48]
|
||||
eor x14, x14, x20
|
||||
eor x15, x15, x19
|
||||
|
||||
# Load group F registers (spAddr1)
|
||||
ldpsw x18, x19, [x17]
|
||||
ins v16.d[0], x18
|
||||
ldpsw x20, x19, [x17]
|
||||
ins v16.d[0], x20
|
||||
ins v16.d[1], x19
|
||||
ldpsw x18, x19, [x17, 8]
|
||||
ins v17.d[0], x18
|
||||
ldpsw x20, x19, [x17, 8]
|
||||
ins v17.d[0], x20
|
||||
ins v17.d[1], x19
|
||||
ldpsw x18, x19, [x17, 16]
|
||||
ins v18.d[0], x18
|
||||
ldpsw x20, x19, [x17, 16]
|
||||
ins v18.d[0], x20
|
||||
ins v18.d[1], x19
|
||||
ldpsw x18, x19, [x17, 24]
|
||||
ins v19.d[0], x18
|
||||
ldpsw x20, x19, [x17, 24]
|
||||
ins v19.d[0], x20
|
||||
ins v19.d[1], x19
|
||||
scvtf v16.2d, v16.2d
|
||||
scvtf v17.2d, v17.2d
|
||||
@@ -240,17 +239,17 @@ DECL(randomx_program_aarch64_main_loop):
|
||||
scvtf v19.2d, v19.2d
|
||||
|
||||
# Load group E registers (spAddr1)
|
||||
ldpsw x18, x19, [x17, 32]
|
||||
ins v20.d[0], x18
|
||||
ldpsw x20, x19, [x17, 32]
|
||||
ins v20.d[0], x20
|
||||
ins v20.d[1], x19
|
||||
ldpsw x18, x19, [x17, 40]
|
||||
ins v21.d[0], x18
|
||||
ldpsw x20, x19, [x17, 40]
|
||||
ins v21.d[0], x20
|
||||
ins v21.d[1], x19
|
||||
ldpsw x18, x19, [x17, 48]
|
||||
ins v22.d[0], x18
|
||||
ldpsw x20, x19, [x17, 48]
|
||||
ins v22.d[0], x20
|
||||
ins v22.d[1], x19
|
||||
ldpsw x18, x19, [x17, 56]
|
||||
ins v23.d[0], x18
|
||||
ldpsw x20, x19, [x17, 56]
|
||||
ins v23.d[0], x20
|
||||
ins v23.d[1], x19
|
||||
scvtf v20.2d, v20.2d
|
||||
scvtf v21.2d, v21.2d
|
||||
@@ -273,7 +272,6 @@ DECL(randomx_program_aarch64_vm_instructions):
|
||||
|
||||
literal_x0: .fill 1,8,0
|
||||
literal_x11: .fill 1,8,0
|
||||
literal_x20: .fill 1,8,0
|
||||
literal_x21: .fill 1,8,0
|
||||
literal_x22: .fill 1,8,0
|
||||
literal_x23: .fill 1,8,0
|
||||
@@ -309,17 +307,17 @@ DECL(randomx_program_aarch64_vm_instructions_end):
|
||||
lsr x10, x9, 32
|
||||
|
||||
# mx ^= r[readReg2] ^ r[readReg3];
|
||||
eor x9, x9, x18
|
||||
eor x9, x9, x20
|
||||
|
||||
# Calculate dataset pointer for dataset prefetch
|
||||
mov w18, w9
|
||||
mov w20, w9
|
||||
DECL(randomx_program_aarch64_cacheline_align_mask1):
|
||||
# Actual mask will be inserted by JIT compiler
|
||||
and x18, x18, 1
|
||||
add x18, x18, x1
|
||||
and x20, x20, 1
|
||||
add x20, x20, x1
|
||||
|
||||
# Prefetch dataset data
|
||||
prfm pldl2strm, [x18]
|
||||
prfm pldl2strm, [x20]
|
||||
|
||||
# mx <-> ma
|
||||
ror x9, x9, 32
|
||||
@@ -331,17 +329,17 @@ DECL(randomx_program_aarch64_cacheline_align_mask2):
|
||||
|
||||
DECL(randomx_program_aarch64_xor_with_dataset_line):
|
||||
# xor integer registers with dataset data
|
||||
ldp x18, x19, [x10]
|
||||
eor x4, x4, x18
|
||||
ldp x20, x19, [x10]
|
||||
eor x4, x4, x20
|
||||
eor x5, x5, x19
|
||||
ldp x18, x19, [x10, 16]
|
||||
eor x6, x6, x18
|
||||
ldp x20, x19, [x10, 16]
|
||||
eor x6, x6, x20
|
||||
eor x7, x7, x19
|
||||
ldp x18, x19, [x10, 32]
|
||||
eor x12, x12, x18
|
||||
ldp x20, x19, [x10, 32]
|
||||
eor x12, x12, x20
|
||||
eor x13, x13, x19
|
||||
ldp x18, x19, [x10, 48]
|
||||
eor x14, x14, x18
|
||||
ldp x20, x19, [x10, 48]
|
||||
eor x14, x14, x20
|
||||
eor x15, x15, x19
|
||||
|
||||
DECL(randomx_program_aarch64_update_spMix1):
|
||||
@@ -384,7 +382,7 @@ DECL(randomx_program_aarch64_update_spMix1):
|
||||
|
||||
# Restore callee-saved registers
|
||||
ldp x16, x17, [sp]
|
||||
ldp x18, x19, [sp, 16]
|
||||
ldr x19, [sp, 16]
|
||||
ldp x20, x21, [sp, 32]
|
||||
ldp x22, x23, [sp, 48]
|
||||
ldp x24, x25, [sp, 64]
|
||||
@@ -405,7 +403,7 @@ DECL(randomx_program_aarch64_vm_instructions_end_light):
|
||||
stp x2, x30, [sp, 80]
|
||||
|
||||
# mx ^= r[readReg2] ^ r[readReg3];
|
||||
eor x9, x9, x18
|
||||
eor x9, x9, x20
|
||||
|
||||
# mx <-> ma
|
||||
ror x9, x9, 32
|
||||
@@ -447,8 +445,8 @@ DECL(randomx_program_aarch64_light_dataset_offset):
|
||||
# x3 -> end item
|
||||
|
||||
DECL(randomx_init_dataset_aarch64):
|
||||
# Save x30 (return address)
|
||||
str x30, [sp, -16]!
|
||||
# Save x20 (used as temporary, but must be saved to not break ABI) and x30 (return address)
|
||||
stp x20, x30, [sp, -16]!
|
||||
|
||||
# Load pointer to cache memory
|
||||
ldr x0, [x0]
|
||||
@@ -460,8 +458,8 @@ DECL(randomx_init_dataset_aarch64_main_loop):
|
||||
cmp x2, x3
|
||||
bne DECL(randomx_init_dataset_aarch64_main_loop)
|
||||
|
||||
# Restore x30 (return address)
|
||||
ldr x30, [sp], 16
|
||||
# Restore x20 and x30
|
||||
ldp x20, x30, [sp], 16
|
||||
|
||||
ret
|
||||
|
||||
|
||||
@@ -41,10 +41,12 @@ randomx_vm *xmrig::RxVm::create(RxDataset *dataset, uint8_t *scratchpad, bool so
|
||||
flags |= RANDOMX_FLAG_JIT;
|
||||
}
|
||||
|
||||
# ifdef XMRIG_FEATURE_ASM_AMD
|
||||
const auto asmId = assembly == Assembly::AUTO ? Cpu::info()->assembly() : assembly.id();
|
||||
if ((asmId == Assembly::RYZEN) || (asmId == Assembly::BULLDOZER)) {
|
||||
flags |= RANDOMX_FLAG_AMD;
|
||||
}
|
||||
# endif
|
||||
|
||||
return randomx_create_vm(static_cast<randomx_flags>(flags), !dataset->get() ? dataset->cache()->get() : nullptr, dataset->get(), scratchpad, node);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user