1
0
mirror of https://github.com/xmrig/xmrig.git synced 2026-01-01 07:52:38 -05:00

Compare commits

..

11 Commits

Author SHA1 Message Date
XMRig
8084ff37a5 v6.21.0 2023-11-23 20:40:58 +07:00
XMRig
7cf3db7750 Merge branch 'dev' 2023-11-23 20:40:34 +07:00
XMRig
4bda6e054d v6.21.0-dev 2023-11-23 19:51:41 +07:00
xmrig
64a0ed413b Merge pull request #3358 from SChernykh/dev
Zephyr solo mining: handle multiple outputs
2023-11-15 22:36:35 +07:00
SChernykh
0b59b7eb43 Zephyr solo mining: handle multiple outputs 2023-11-15 16:18:05 +01:00
xmrig
ae6b10b5a4 Merge pull request #3356 from SChernykh/dev
Updated pricing record size for Zephyr solo mining
2023-11-15 08:27:02 +07:00
SChernykh
705a7eac0c Updated pricing record size for Zephyr solo mining 2023-11-14 13:06:10 +01:00
xmrig
10bfffe033 Merge pull request #3348 from SChernykh/dev
Update to latest sse2neon.h
2023-10-31 11:52:38 +07:00
SChernykh
4131aa4754 Update sse2neon.h 2023-10-30 20:07:03 +01:00
xmrig
fee51b20fa Merge pull request #3346 from SChernykh/dev
ARM64 JIT: don't use `x18` register
2023-10-20 07:36:12 +07:00
SChernykh
5e66efabcf ARM64 JIT: don't use x18 register
From https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms
> The platforms reserve register x18. Don’t use this register.

This PR fixes invalid hashes when running on Apple silicon with the latest macOS SDK.
2023-10-19 17:45:15 +02:00
15 changed files with 880 additions and 827 deletions

View File

@@ -1,3 +1,13 @@
# v6.21.0
- [#3302](https://github.com/xmrig/xmrig/pull/3302) [#3312](https://github.com/xmrig/xmrig/pull/3312) Enabled keepalive for Windows (>= Vista).
- [#3320](https://github.com/xmrig/xmrig/pull/3320) Added "built for OS/architecture/bits" to "ABOUT".
- [#3339](https://github.com/xmrig/xmrig/pull/3339) Added SNI option for TLS connections.
- [#3342](https://github.com/xmrig/xmrig/pull/3342) Update `cn_main_loop.asm`.
- [#3346](https://github.com/xmrig/xmrig/pull/3346) ARM64 JIT: don't use `x18` register.
- [#3348](https://github.com/xmrig/xmrig/pull/3348) Update to latest `sse2neon.h`.
- [#3356](https://github.com/xmrig/xmrig/pull/3356) Updated pricing record size for **Zephyr** solo mining.
- [#3358](https://github.com/xmrig/xmrig/pull/3358) **Zephyr** solo mining: handle multiple outputs.
# v6.20.0 # v6.20.0
- Added new ARM CPU names. - Added new ARM CPU names.
- [#2394](https://github.com/xmrig/xmrig/pull/2394) Added new CMake options `ARM_V8` and `ARM_V7`. - [#2394](https://github.com/xmrig/xmrig/pull/2394) Added new CMake options `ARM_V8` and `ARM_V7`.

View File

@@ -14,9 +14,7 @@ option(WITH_HTTP "Enable HTTP protocol support (client/server)" ON)
option(WITH_DEBUG_LOG "Enable debug log output" OFF) option(WITH_DEBUG_LOG "Enable debug log output" OFF)
option(WITH_TLS "Enable OpenSSL support" ON) option(WITH_TLS "Enable OpenSSL support" ON)
option(WITH_ASM "Enable ASM PoW implementations" ON) option(WITH_ASM "Enable ASM PoW implementations" ON)
option(WITH_ASM_AMD "Enable ASM for AMD processors" ON) option(WITH_MSR "Enable MSR mod & 1st-gen Ryzen fix" ON)
option(WITH_MSR "Enable MSR mod" ON)
option(WITH_MSR_ZEN "Enable MSR mod for AMD Zen-based processors" ON)
option(WITH_ENV_VARS "Enable environment variables support in config file" ON) option(WITH_ENV_VARS "Enable environment variables support in config file" ON)
option(WITH_EMBEDDED_CONFIG "Enable internal embedded JSON config" OFF) option(WITH_EMBEDDED_CONFIG "Enable internal embedded JSON config" OFF)
option(WITH_OPENCL "Enable OpenCL backend" ON) option(WITH_OPENCL "Enable OpenCL backend" ON)

View File

@@ -44,17 +44,9 @@ if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C) set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C)
add_definitions(/DXMRIG_FEATURE_ASM) add_definitions(/DXMRIG_FEATURE_ASM)
if (WITH_ASM_AMD)
add_definitions(/DXMRIG_FEATURE_ASM_AMD)
message("-- WITH_ASM=ON (+amd)")
else()
message("-- WITH_ASM=ON (-amd)")
endif()
else() else()
set(XMRIG_ASM_SOURCES "") set(XMRIG_ASM_SOURCES "")
set(XMRIG_ASM_LIBRARY "") set(XMRIG_ASM_LIBRARY "")
remove_definitions(/DXMRIG_FEATURE_ASM) remove_definitions(/DXMRIG_FEATURE_ASM)
remove_definitions(/DXMRIG_FEATURE_ASM_AMD)
message("-- WITH_ASM=OFF")
endif() endif()

View File

@@ -104,13 +104,8 @@ if (WITH_RANDOMX)
if (WITH_MSR AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8 AND (XMRIG_OS_WIN OR XMRIG_OS_LINUX)) if (WITH_MSR AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8 AND (XMRIG_OS_WIN OR XMRIG_OS_LINUX))
add_definitions(/DXMRIG_FEATURE_MSR) add_definitions(/DXMRIG_FEATURE_MSR)
if (WITH_MSR_ZEN) add_definitions(/DXMRIG_FIX_RYZEN)
add_definitions(/DXMRIG_FIX_RYZEN) message("-- WITH_MSR=ON")
message("-- WITH_MSR=ON (+zen)")
else()
remove_definitions(/DXMRIG_FIX_RYZEN)
message("-- WITH_MSR=ON (-zen)")
endif()
if (XMRIG_OS_WIN) if (XMRIG_OS_WIN)
list(APPEND SOURCES_CRYPTO list(APPEND SOURCES_CRYPTO

View File

@@ -198,7 +198,7 @@ bool xmrig::BlockTemplate::parse(bool hashes)
} }
if (m_coin == Coin::ZEPHYR) { if (m_coin == Coin::ZEPHYR) {
uint8_t pricing_record[24]; uint8_t pricing_record[120];
ar(pricing_record); ar(pricing_record);
} }
@@ -225,8 +225,12 @@ bool xmrig::BlockTemplate::parse(bool hashes)
ar(m_height); ar(m_height);
ar(m_numOutputs); ar(m_numOutputs);
const uint64_t expected_outputs = (m_coin == Coin::ZEPHYR) ? 2 : 1; if (m_coin == Coin::ZEPHYR) {
if (m_numOutputs != expected_outputs) { if (m_numOutputs < 2) {
return false;
}
}
else if (m_numOutputs != 1) {
return false; return false;
} }
@@ -252,23 +256,25 @@ bool xmrig::BlockTemplate::parse(bool hashes)
ar.skip(asset_type_len); ar.skip(asset_type_len);
ar(m_viewTag); ar(m_viewTag);
uint64_t amount2; for (uint64_t k = 1; k < m_numOutputs; ++k) {
ar(amount2); uint64_t amount2;
ar(amount2);
uint8_t output_type2; uint8_t output_type2;
ar(output_type2); ar(output_type2);
if (output_type2 != 2) { if (output_type2 != 2) {
return false; return false;
}
Span key2;
ar(key2, kKeySize);
ar(asset_type_len);
ar.skip(asset_type_len);
uint8_t view_tag2;
ar(view_tag2);
} }
Span key2;
ar(key2, kKeySize);
ar(asset_type_len);
ar.skip(asset_type_len);
uint8_t view_tag2;
ar(view_tag2);
} }
else if (m_outputType == 3) { else if (m_outputType == 3) {
ar(m_viewTag); ar(m_viewTag);

View File

@@ -94,13 +94,7 @@ static inline const std::string &usage()
# ifdef XMRIG_ALGO_RANDOMX # ifdef XMRIG_ALGO_RANDOMX
u += " --huge-pages-jit enable huge pages support for RandomX JIT code\n"; u += " --huge-pages-jit enable huge pages support for RandomX JIT code\n";
# endif # endif
# ifdef XMRIG_FEATURE_ASM
# ifdef XMRIG_FEATURE_ASM_AMD
u += " --asm=ASM ASM optimizations, possible values: auto, none, intel, ryzen, bulldozer\n"; u += " --asm=ASM ASM optimizations, possible values: auto, none, intel, ryzen, bulldozer\n";
# else
u += " --asm=ASM ASM optimizations, possible values: auto, none, intel\n";
# endif
# endif
# if defined(__x86_64__) || defined(_M_AMD64) # if defined(__x86_64__) || defined(_M_AMD64)
u += " --argon2-impl=IMPL argon2 implementation: x86_64, SSE2, SSSE3, XOP, AVX2, AVX-512F\n"; u += " --argon2-impl=IMPL argon2 implementation: x86_64, SSE2, SSSE3, XOP, AVX2, AVX-512F\n";

View File

@@ -55,7 +55,6 @@ bool cn_vaes_enabled = false;
#ifdef XMRIG_FEATURE_ASM #ifdef XMRIG_FEATURE_ASM
#ifdef XMRIG_FEATURE_ASM_AMD
# define ADD_FN_ASM(algo) do { \ # define ADD_FN_ASM(algo) do { \
m_map[algo]->data[AV_SINGLE][Assembly::INTEL] = cryptonight_single_hash_asm<algo, Assembly::INTEL>; \ m_map[algo]->data[AV_SINGLE][Assembly::INTEL] = cryptonight_single_hash_asm<algo, Assembly::INTEL>; \
m_map[algo]->data[AV_SINGLE][Assembly::RYZEN] = cryptonight_single_hash_asm<algo, Assembly::RYZEN>; \ m_map[algo]->data[AV_SINGLE][Assembly::RYZEN] = cryptonight_single_hash_asm<algo, Assembly::RYZEN>; \
@@ -64,50 +63,34 @@ bool cn_vaes_enabled = false;
m_map[algo]->data[AV_DOUBLE][Assembly::RYZEN] = cryptonight_double_hash_asm<algo, Assembly::RYZEN>; \ m_map[algo]->data[AV_DOUBLE][Assembly::RYZEN] = cryptonight_double_hash_asm<algo, Assembly::RYZEN>; \
m_map[algo]->data[AV_DOUBLE][Assembly::BULLDOZER] = cryptonight_double_hash_asm<algo, Assembly::BULLDOZER>; \ m_map[algo]->data[AV_DOUBLE][Assembly::BULLDOZER] = cryptonight_double_hash_asm<algo, Assembly::BULLDOZER>; \
} while (0) } while (0)
#else
# define ADD_FN_ASM(algo) do { \
m_map[algo]->data[AV_SINGLE][Assembly::INTEL] = cryptonight_single_hash_asm<algo, Assembly::INTEL>; \
m_map[algo]->data[AV_DOUBLE][Assembly::INTEL] = cryptonight_double_hash_asm<algo, Assembly::INTEL>; \
} while (0)
#endif
namespace xmrig { namespace xmrig {
cn_mainloop_fun cn_half_mainloop_ivybridge_asm = nullptr; cn_mainloop_fun cn_half_mainloop_ivybridge_asm = nullptr;
#ifdef XMRIG_FEATURE_ASM_AMD
cn_mainloop_fun cn_half_mainloop_ryzen_asm = nullptr; cn_mainloop_fun cn_half_mainloop_ryzen_asm = nullptr;
cn_mainloop_fun cn_half_mainloop_bulldozer_asm = nullptr; cn_mainloop_fun cn_half_mainloop_bulldozer_asm = nullptr;
#endif
cn_mainloop_fun cn_half_double_mainloop_sandybridge_asm = nullptr; cn_mainloop_fun cn_half_double_mainloop_sandybridge_asm = nullptr;
cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm = nullptr; cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm = nullptr;
#ifdef XMRIG_FEATURE_ASM_AMD
cn_mainloop_fun cn_trtl_mainloop_ryzen_asm = nullptr; cn_mainloop_fun cn_trtl_mainloop_ryzen_asm = nullptr;
cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm = nullptr; cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm = nullptr;
#endif
cn_mainloop_fun cn_trtl_double_mainloop_sandybridge_asm = nullptr; cn_mainloop_fun cn_trtl_double_mainloop_sandybridge_asm = nullptr;
cn_mainloop_fun cn_tlo_mainloop_ivybridge_asm = nullptr; cn_mainloop_fun cn_tlo_mainloop_ivybridge_asm = nullptr;
#ifdef XMRIG_FEATURE_ASM_AMD
cn_mainloop_fun cn_tlo_mainloop_ryzen_asm = nullptr; cn_mainloop_fun cn_tlo_mainloop_ryzen_asm = nullptr;
cn_mainloop_fun cn_tlo_mainloop_bulldozer_asm = nullptr; cn_mainloop_fun cn_tlo_mainloop_bulldozer_asm = nullptr;
#endif
cn_mainloop_fun cn_tlo_double_mainloop_sandybridge_asm = nullptr; cn_mainloop_fun cn_tlo_double_mainloop_sandybridge_asm = nullptr;
cn_mainloop_fun cn_zls_mainloop_ivybridge_asm = nullptr; cn_mainloop_fun cn_zls_mainloop_ivybridge_asm = nullptr;
#ifdef XMRIG_FEATURE_ASM_AMD
cn_mainloop_fun cn_zls_mainloop_ryzen_asm = nullptr; cn_mainloop_fun cn_zls_mainloop_ryzen_asm = nullptr;
cn_mainloop_fun cn_zls_mainloop_bulldozer_asm = nullptr; cn_mainloop_fun cn_zls_mainloop_bulldozer_asm = nullptr;
#endif
cn_mainloop_fun cn_zls_double_mainloop_sandybridge_asm = nullptr; cn_mainloop_fun cn_zls_double_mainloop_sandybridge_asm = nullptr;
cn_mainloop_fun cn_double_mainloop_ivybridge_asm = nullptr; cn_mainloop_fun cn_double_mainloop_ivybridge_asm = nullptr;
#ifdef XMRIG_FEATURE_ASM_AMD
cn_mainloop_fun cn_double_mainloop_ryzen_asm = nullptr; cn_mainloop_fun cn_double_mainloop_ryzen_asm = nullptr;
cn_mainloop_fun cn_double_mainloop_bulldozer_asm = nullptr; cn_mainloop_fun cn_double_mainloop_bulldozer_asm = nullptr;
#endif
cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm = nullptr; cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm = nullptr;
cn_mainloop_fun cn_upx2_mainloop_asm = nullptr; cn_mainloop_fun cn_upx2_mainloop_asm = nullptr;
@@ -177,41 +160,31 @@ static void patchAsmVariants()
auto base = static_cast<uint8_t *>(VirtualMemory::allocateExecutableMemory(allocation_size, false)); auto base = static_cast<uint8_t *>(VirtualMemory::allocateExecutableMemory(allocation_size, false));
cn_half_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x0000); cn_half_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x0000);
# ifdef XMRIG_FEATURE_ASM_AMD
cn_half_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1000); cn_half_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1000);
cn_half_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x2000); cn_half_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x2000);
# endif
cn_half_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x3000); cn_half_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x3000);
# ifdef XMRIG_ALGO_CN_PICO # ifdef XMRIG_ALGO_CN_PICO
cn_trtl_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x4000); cn_trtl_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x4000);
# ifdef XMRIG_FEATURE_ASM_AMD
cn_trtl_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x5000); cn_trtl_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x5000);
cn_trtl_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x6000); cn_trtl_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x6000);
# endif
cn_trtl_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x7000); cn_trtl_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x7000);
# endif # endif
cn_zls_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x8000); cn_zls_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x8000);
# ifdef XMRIG_FEATURE_ASM_AMD
cn_zls_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x9000); cn_zls_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x9000);
cn_zls_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xA000); cn_zls_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xA000);
# endif
cn_zls_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xB000); cn_zls_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xB000);
cn_double_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xC000); cn_double_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xC000);
# ifdef XMRIG_FEATURE_ASM_AMD
cn_double_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xD000); cn_double_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xD000);
cn_double_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xE000); cn_double_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xE000);
# endif
cn_double_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xF000); cn_double_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xF000);
# ifdef XMRIG_ALGO_CN_PICO # ifdef XMRIG_ALGO_CN_PICO
cn_tlo_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x10000); cn_tlo_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x10000);
# ifdef XMRIG_FEATURE_ASM_AMD
cn_tlo_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x11000); cn_tlo_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x11000);
cn_tlo_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x12000); cn_tlo_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x12000);
# endif
cn_tlo_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x13000); cn_tlo_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x13000);
# endif # endif
@@ -247,10 +220,8 @@ static void patchAsmVariants()
constexpr uint32_t ITER = CnAlgo<Algorithm::CN_HALF>().iterations(); constexpr uint32_t ITER = CnAlgo<Algorithm::CN_HALF>().iterations();
patchCode(cn_half_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, ITER); patchCode(cn_half_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, ITER);
# ifdef XMRIG_FEATURE_ASM_AMD
patchCode(cn_half_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, ITER); patchCode(cn_half_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, ITER);
patchCode(cn_half_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, ITER); patchCode(cn_half_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, ITER);
# endif
patchCode(cn_half_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER); patchCode(cn_half_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER);
} }
@@ -260,10 +231,8 @@ static void patchAsmVariants()
constexpr uint32_t MASK = CnAlgo<Algorithm::CN_PICO_0>().mask(); constexpr uint32_t MASK = CnAlgo<Algorithm::CN_PICO_0>().mask();
patchCode(cn_trtl_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, ITER, MASK); patchCode(cn_trtl_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, ITER, MASK);
# ifdef XMRIG_FEATURE_ASM_AMD
patchCode(cn_trtl_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, ITER, MASK); patchCode(cn_trtl_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, ITER, MASK);
patchCode(cn_trtl_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, ITER, MASK); patchCode(cn_trtl_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, ITER, MASK);
# endif
patchCode(cn_trtl_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER, MASK); patchCode(cn_trtl_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER, MASK);
} }
@@ -272,10 +241,8 @@ static void patchAsmVariants()
constexpr uint32_t MASK = CnAlgo<Algorithm::CN_PICO_TLO>().mask(); constexpr uint32_t MASK = CnAlgo<Algorithm::CN_PICO_TLO>().mask();
patchCode(cn_tlo_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, ITER, MASK); patchCode(cn_tlo_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, ITER, MASK);
# ifdef XMRIG_FEATURE_ASM_AMD
patchCode(cn_tlo_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, ITER, MASK); patchCode(cn_tlo_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, ITER, MASK);
patchCode(cn_tlo_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, ITER, MASK); patchCode(cn_tlo_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, ITER, MASK);
# endif
patchCode(cn_tlo_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER, MASK); patchCode(cn_tlo_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER, MASK);
} }
# endif # endif
@@ -284,10 +251,8 @@ static void patchAsmVariants()
constexpr uint32_t ITER = CnAlgo<Algorithm::CN_ZLS>().iterations(); constexpr uint32_t ITER = CnAlgo<Algorithm::CN_ZLS>().iterations();
patchCode(cn_zls_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, ITER); patchCode(cn_zls_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, ITER);
# ifdef XMRIG_FEATURE_ASM_AMD
patchCode(cn_zls_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, ITER); patchCode(cn_zls_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, ITER);
patchCode(cn_zls_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, ITER); patchCode(cn_zls_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, ITER);
# endif
patchCode(cn_zls_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER); patchCode(cn_zls_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER);
} }
@@ -295,10 +260,8 @@ static void patchAsmVariants()
constexpr uint32_t ITER = CnAlgo<Algorithm::CN_DOUBLE>().iterations(); constexpr uint32_t ITER = CnAlgo<Algorithm::CN_DOUBLE>().iterations();
patchCode(cn_double_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, ITER); patchCode(cn_double_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, ITER);
# ifdef XMRIG_FEATURE_ASM_AMD
patchCode(cn_double_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, ITER); patchCode(cn_double_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, ITER);
patchCode(cn_double_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, ITER); patchCode(cn_double_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, ITER);
# endif
patchCode(cn_double_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER); patchCode(cn_double_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER);
} }

View File

@@ -852,16 +852,12 @@ extern "C" void cnv1_single_mainloop_asm(cryptonight_ctx * *ctx);
extern "C" void cnv1_double_mainloop_asm(cryptonight_ctx **ctx); extern "C" void cnv1_double_mainloop_asm(cryptonight_ctx **ctx);
extern "C" void cnv1_quad_mainloop_asm(cryptonight_ctx **ctx); extern "C" void cnv1_quad_mainloop_asm(cryptonight_ctx **ctx);
extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx **ctx); extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx **ctx);
#ifdef XMRIG_FEATURE_ASM_AMD
extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx **ctx); extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx **ctx);
extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx **ctx); extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx **ctx);
#endif
extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx **ctx); extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx **ctx);
extern "C" void cnv2_rwz_mainloop_asm(cryptonight_ctx **ctx); extern "C" void cnv2_rwz_mainloop_asm(cryptonight_ctx **ctx);
extern "C" void cnv2_rwz_double_mainloop_asm(cryptonight_ctx **ctx); extern "C" void cnv2_rwz_double_mainloop_asm(cryptonight_ctx **ctx);
#ifdef XMRIG_FEATURE_ASM_AMD
extern "C" void cnv2_upx_double_mainloop_zen3_asm(cryptonight_ctx **ctx); extern "C" void cnv2_upx_double_mainloop_zen3_asm(cryptonight_ctx **ctx);
#endif
namespace xmrig { namespace xmrig {
@@ -871,38 +867,28 @@ typedef void (*cn_mainloop_fun)(cryptonight_ctx **ctx);
extern cn_mainloop_fun cn_half_mainloop_ivybridge_asm; extern cn_mainloop_fun cn_half_mainloop_ivybridge_asm;
#ifdef XMRIG_FEATURE_ASM_AMD
extern cn_mainloop_fun cn_half_mainloop_ryzen_asm; extern cn_mainloop_fun cn_half_mainloop_ryzen_asm;
extern cn_mainloop_fun cn_half_mainloop_bulldozer_asm; extern cn_mainloop_fun cn_half_mainloop_bulldozer_asm;
#endif
extern cn_mainloop_fun cn_half_double_mainloop_sandybridge_asm; extern cn_mainloop_fun cn_half_double_mainloop_sandybridge_asm;
extern cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm; extern cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm;
#ifdef XMRIG_FEATURE_ASM_AMD
extern cn_mainloop_fun cn_trtl_mainloop_ryzen_asm; extern cn_mainloop_fun cn_trtl_mainloop_ryzen_asm;
extern cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm; extern cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm;
#endif
extern cn_mainloop_fun cn_trtl_double_mainloop_sandybridge_asm; extern cn_mainloop_fun cn_trtl_double_mainloop_sandybridge_asm;
extern cn_mainloop_fun cn_tlo_mainloop_ivybridge_asm; extern cn_mainloop_fun cn_tlo_mainloop_ivybridge_asm;
#ifdef XMRIG_FEATURE_ASM_AMD
extern cn_mainloop_fun cn_tlo_mainloop_ryzen_asm; extern cn_mainloop_fun cn_tlo_mainloop_ryzen_asm;
extern cn_mainloop_fun cn_tlo_mainloop_bulldozer_asm; extern cn_mainloop_fun cn_tlo_mainloop_bulldozer_asm;
#endif
extern cn_mainloop_fun cn_tlo_double_mainloop_sandybridge_asm; extern cn_mainloop_fun cn_tlo_double_mainloop_sandybridge_asm;
extern cn_mainloop_fun cn_zls_mainloop_ivybridge_asm; extern cn_mainloop_fun cn_zls_mainloop_ivybridge_asm;
#ifdef XMRIG_FEATURE_ASM_AMD
extern cn_mainloop_fun cn_zls_mainloop_ryzen_asm; extern cn_mainloop_fun cn_zls_mainloop_ryzen_asm;
extern cn_mainloop_fun cn_zls_mainloop_bulldozer_asm; extern cn_mainloop_fun cn_zls_mainloop_bulldozer_asm;
#endif
extern cn_mainloop_fun cn_zls_double_mainloop_sandybridge_asm; extern cn_mainloop_fun cn_zls_double_mainloop_sandybridge_asm;
extern cn_mainloop_fun cn_double_mainloop_ivybridge_asm; extern cn_mainloop_fun cn_double_mainloop_ivybridge_asm;
#ifdef XMRIG_FEATURE_ASM_AMD
extern cn_mainloop_fun cn_double_mainloop_ryzen_asm; extern cn_mainloop_fun cn_double_mainloop_ryzen_asm;
extern cn_mainloop_fun cn_double_mainloop_bulldozer_asm; extern cn_mainloop_fun cn_double_mainloop_bulldozer_asm;
#endif
extern cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm; extern cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm;
extern cn_mainloop_fun cn_upx2_mainloop_asm; extern cn_mainloop_fun cn_upx2_mainloop_asm;
@@ -978,54 +964,46 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
if (ASM == Assembly::INTEL) { if (ASM == Assembly::INTEL) {
cnv2_mainloop_ivybridge_asm(ctx); cnv2_mainloop_ivybridge_asm(ctx);
} }
# ifdef XMRIG_FEATURE_ASM_AMD
else if (ASM == Assembly::RYZEN) { else if (ASM == Assembly::RYZEN) {
cnv2_mainloop_ryzen_asm(ctx); cnv2_mainloop_ryzen_asm(ctx);
} }
else { else {
cnv2_mainloop_bulldozer_asm(ctx); cnv2_mainloop_bulldozer_asm(ctx);
} }
# endif
} }
else if (ALGO == Algorithm::CN_HALF) { else if (ALGO == Algorithm::CN_HALF) {
if (ASM == Assembly::INTEL) { if (ASM == Assembly::INTEL) {
cn_half_mainloop_ivybridge_asm(ctx); cn_half_mainloop_ivybridge_asm(ctx);
} }
# ifdef XMRIG_FEATURE_ASM_AMD
else if (ASM == Assembly::RYZEN) { else if (ASM == Assembly::RYZEN) {
cn_half_mainloop_ryzen_asm(ctx); cn_half_mainloop_ryzen_asm(ctx);
} }
else { else {
cn_half_mainloop_bulldozer_asm(ctx); cn_half_mainloop_bulldozer_asm(ctx);
} }
# endif
} }
# ifdef XMRIG_ALGO_CN_PICO # ifdef XMRIG_ALGO_CN_PICO
else if (ALGO == Algorithm::CN_PICO_0) { else if (ALGO == Algorithm::CN_PICO_0) {
if (ASM == Assembly::INTEL) { if (ASM == Assembly::INTEL) {
cn_trtl_mainloop_ivybridge_asm(ctx); cn_trtl_mainloop_ivybridge_asm(ctx);
} }
# ifdef XMRIG_FEATURE_ASM_AMD
else if (ASM == Assembly::RYZEN) { else if (ASM == Assembly::RYZEN) {
cn_trtl_mainloop_ryzen_asm(ctx); cn_trtl_mainloop_ryzen_asm(ctx);
} }
else { else {
cn_trtl_mainloop_bulldozer_asm(ctx); cn_trtl_mainloop_bulldozer_asm(ctx);
} }
# endif
} }
else if (ALGO == Algorithm::CN_PICO_TLO) { else if (ALGO == Algorithm::CN_PICO_TLO) {
if (ASM == Assembly::INTEL) { if (ASM == Assembly::INTEL) {
cn_tlo_mainloop_ivybridge_asm(ctx); cn_tlo_mainloop_ivybridge_asm(ctx);
} }
# ifdef XMRIG_FEATURE_ASM_AMD
else if (ASM == Assembly::RYZEN) { else if (ASM == Assembly::RYZEN) {
cn_tlo_mainloop_ryzen_asm(ctx); cn_tlo_mainloop_ryzen_asm(ctx);
} }
else { else {
cn_tlo_mainloop_bulldozer_asm(ctx); cn_tlo_mainloop_bulldozer_asm(ctx);
} }
# endif
} }
# endif # endif
else if (ALGO == Algorithm::CN_RWZ) { else if (ALGO == Algorithm::CN_RWZ) {
@@ -1035,27 +1013,23 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
if (ASM == Assembly::INTEL) { if (ASM == Assembly::INTEL) {
cn_zls_mainloop_ivybridge_asm(ctx); cn_zls_mainloop_ivybridge_asm(ctx);
} }
# ifdef XMRIG_FEATURE_ASM_AMD
else if (ASM == Assembly::RYZEN) { else if (ASM == Assembly::RYZEN) {
cn_zls_mainloop_ryzen_asm(ctx); cn_zls_mainloop_ryzen_asm(ctx);
} }
else { else {
cn_zls_mainloop_bulldozer_asm(ctx); cn_zls_mainloop_bulldozer_asm(ctx);
} }
# endif
} }
else if (ALGO == Algorithm::CN_DOUBLE) { else if (ALGO == Algorithm::CN_DOUBLE) {
if (ASM == Assembly::INTEL) { if (ASM == Assembly::INTEL) {
cn_double_mainloop_ivybridge_asm(ctx); cn_double_mainloop_ivybridge_asm(ctx);
} }
# ifdef XMRIG_FEATURE_ASM_AMD
else if (ASM == Assembly::RYZEN) { else if (ASM == Assembly::RYZEN) {
cn_double_mainloop_ryzen_asm(ctx); cn_double_mainloop_ryzen_asm(ctx);
} }
else { else {
cn_double_mainloop_bulldozer_asm(ctx); cn_double_mainloop_bulldozer_asm(ctx);
} }
# endif
} }
# ifdef XMRIG_ALGO_CN_FEMTO # ifdef XMRIG_ALGO_CN_FEMTO
else if (ALGO == Algorithm::CN_UPX2) { else if (ALGO == Algorithm::CN_UPX2) {
@@ -1120,16 +1094,12 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
# endif # endif
# ifdef XMRIG_ALGO_CN_FEMTO # ifdef XMRIG_ALGO_CN_FEMTO
else if (ALGO == Algorithm::CN_UPX2) { else if (ALGO == Algorithm::CN_UPX2) {
# ifdef XMRIG_FEATURE_ASM_AMD
if (Cpu::info()->arch() == ICpuInfo::ARCH_ZEN3) { if (Cpu::info()->arch() == ICpuInfo::ARCH_ZEN3) {
cnv2_upx_double_mainloop_zen3_asm(ctx); cnv2_upx_double_mainloop_zen3_asm(ctx);
} }
else { else {
cn_upx2_double_mainloop_asm(ctx); cn_upx2_double_mainloop_asm(ctx);
} }
# else
cn_upx2_double_mainloop_asm(ctx);
# endif
} }
# endif # endif
else if (ALGO == Algorithm::CN_RWZ) { else if (ALGO == Algorithm::CN_RWZ) {

View File

@@ -15,16 +15,12 @@
.global FN_PREFIX(cnv1_double_mainloop_asm) .global FN_PREFIX(cnv1_double_mainloop_asm)
.global FN_PREFIX(cnv1_quad_mainloop_asm) .global FN_PREFIX(cnv1_quad_mainloop_asm)
.global FN_PREFIX(cnv2_mainloop_ivybridge_asm) .global FN_PREFIX(cnv2_mainloop_ivybridge_asm)
#ifdef XMRIG_FEATURE_ASM_AMD
.global FN_PREFIX(cnv2_mainloop_ryzen_asm) .global FN_PREFIX(cnv2_mainloop_ryzen_asm)
.global FN_PREFIX(cnv2_mainloop_bulldozer_asm) .global FN_PREFIX(cnv2_mainloop_bulldozer_asm)
#endif
.global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm) .global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm)
.global FN_PREFIX(cnv2_rwz_mainloop_asm) .global FN_PREFIX(cnv2_rwz_mainloop_asm)
.global FN_PREFIX(cnv2_rwz_double_mainloop_asm) .global FN_PREFIX(cnv2_rwz_double_mainloop_asm)
#ifdef XMRIG_FEATURE_ASM_AMD
.global FN_PREFIX(cnv2_upx_double_mainloop_zen3_asm) .global FN_PREFIX(cnv2_upx_double_mainloop_zen3_asm)
#endif
ALIGN(64) ALIGN(64)
FN_PREFIX(cnv1_single_mainloop_asm): FN_PREFIX(cnv1_single_mainloop_asm):
@@ -62,7 +58,6 @@ FN_PREFIX(cnv2_mainloop_ivybridge_asm):
ret 0 ret 0
mov eax, 3735929054 mov eax, 3735929054
#ifdef XMRIG_FEATURE_ASM_AMD
ALIGN(64) ALIGN(64)
FN_PREFIX(cnv2_mainloop_ryzen_asm): FN_PREFIX(cnv2_mainloop_ryzen_asm):
sub rsp, 48 sub rsp, 48
@@ -80,7 +75,6 @@ FN_PREFIX(cnv2_mainloop_bulldozer_asm):
add rsp, 48 add rsp, 48
ret 0 ret 0
mov eax, 3735929054 mov eax, 3735929054
#endif
ALIGN(64) ALIGN(64)
FN_PREFIX(cnv2_double_mainloop_sandybridge_asm): FN_PREFIX(cnv2_double_mainloop_sandybridge_asm):
@@ -109,7 +103,6 @@ FN_PREFIX(cnv2_rwz_double_mainloop_asm):
ret 0 ret 0
mov eax, 3735929054 mov eax, 3735929054
#ifdef XMRIG_FEATURE_ASM_AMD
ALIGN(64) ALIGN(64)
FN_PREFIX(cnv2_upx_double_mainloop_zen3_asm): FN_PREFIX(cnv2_upx_double_mainloop_zen3_asm):
sub rsp, 48 sub rsp, 48
@@ -118,7 +111,6 @@ FN_PREFIX(cnv2_upx_double_mainloop_zen3_asm):
add rsp, 48 add rsp, 48
ret 0 ret 0
mov eax, 3735929054 mov eax, 3735929054
#endif
#if defined(__linux__) && defined(__ELF__) #if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits .section .note.GNU-stack,"",%progbits

View File

@@ -5,16 +5,12 @@
.global cnv1_double_mainloop_asm .global cnv1_double_mainloop_asm
.global cnv1_quad_mainloop_asm .global cnv1_quad_mainloop_asm
.global cnv2_mainloop_ivybridge_asm .global cnv2_mainloop_ivybridge_asm
#ifdef XMRIG_FEATURE_ASM_AMD
.global cnv2_mainloop_ryzen_asm .global cnv2_mainloop_ryzen_asm
.global cnv2_mainloop_bulldozer_asm .global cnv2_mainloop_bulldozer_asm
#endif
.global cnv2_double_mainloop_sandybridge_asm .global cnv2_double_mainloop_sandybridge_asm
.global cnv2_rwz_mainloop_asm .global cnv2_rwz_mainloop_asm
.global cnv2_rwz_double_mainloop_asm .global cnv2_rwz_double_mainloop_asm
#ifdef XMRIG_FEATURE_ASM_AMD
.global cnv2_upx_double_mainloop_zen3_asm .global cnv2_upx_double_mainloop_zen3_asm
#endif
ALIGN(64) ALIGN(64)
cnv1_single_mainloop_asm: cnv1_single_mainloop_asm:
@@ -40,7 +36,6 @@ cnv2_mainloop_ivybridge_asm:
ret 0 ret 0
mov eax, 3735929054 mov eax, 3735929054
#ifdef XMRIG_FEATURE_ASM_AMD
ALIGN(64) ALIGN(64)
cnv2_mainloop_ryzen_asm: cnv2_mainloop_ryzen_asm:
#include "../cn2/cnv2_main_loop_ryzen.inc" #include "../cn2/cnv2_main_loop_ryzen.inc"
@@ -52,7 +47,6 @@ cnv2_mainloop_bulldozer_asm:
#include "../cn2/cnv2_main_loop_bulldozer.inc" #include "../cn2/cnv2_main_loop_bulldozer.inc"
ret 0 ret 0
mov eax, 3735929054 mov eax, 3735929054
#endif
ALIGN(64) ALIGN(64)
cnv2_double_mainloop_sandybridge_asm: cnv2_double_mainloop_sandybridge_asm:
@@ -72,10 +66,8 @@ cnv2_rwz_double_mainloop_asm:
ret 0 ret 0
mov eax, 3735929054 mov eax, 3735929054
#ifdef XMRIG_FEATURE_ASM_AMD
ALIGN(64) ALIGN(64)
cnv2_upx_double_mainloop_zen3_asm: cnv2_upx_double_mainloop_zen3_asm:
#include "cn2/cnv2_upx_double_mainloop_zen3.inc" #include "cn2/cnv2_upx_double_mainloop_zen3.inc"
ret 0 ret 0
mov eax, 3735929054 mov eax, 3735929054
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -131,8 +131,8 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
// and w16, w10, ScratchpadL3Mask64 // and w16, w10, ScratchpadL3Mask64
emit32(0x121A0000 | 16 | (10 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos); emit32(0x121A0000 | 16 | (10 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
// and w17, w18, ScratchpadL3Mask64 // and w17, w20, ScratchpadL3Mask64
emit32(0x121A0000 | 17 | (18 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos); emit32(0x121A0000 | 17 | (20 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
codePos = PrologueSize; codePos = PrologueSize;
literalPos = ImulRcpLiteralsEnd; literalPos = ImulRcpLiteralsEnd;
@@ -148,16 +148,16 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
} }
// Update spMix2 // Update spMix2
// eor w18, config.readReg2, config.readReg3 // eor w20, config.readReg2, config.readReg3
emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos); emit32(ARMV8A::EOR32 | 20 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
// Jump back to the main loop // Jump back to the main loop
const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end) - ((uint8_t*)randomx_program_aarch64)) - codePos; const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end) - ((uint8_t*)randomx_program_aarch64)) - codePos;
emit32(ARMV8A::B | (offset / 4), code, codePos); emit32(ARMV8A::B | (offset / 4), code, codePos);
// and w18, w18, CacheLineAlignMask // and w20, w20, CacheLineAlignMask
codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask1) - ((uint8_t*)randomx_program_aarch64)); codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask1) - ((uint8_t*)randomx_program_aarch64));
emit32(0x121A0000 | 18 | (18 << 5) | ((RandomX_CurrentConfig.Log2_DatasetBaseSize - 7) << 10), code, codePos); emit32(0x121A0000 | 20 | (20 << 5) | ((RandomX_CurrentConfig.Log2_DatasetBaseSize - 7) << 10), code, codePos);
// and w10, w10, CacheLineAlignMask // and w10, w10, CacheLineAlignMask
codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask2) - ((uint8_t*)randomx_program_aarch64)); codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask2) - ((uint8_t*)randomx_program_aarch64));
@@ -189,8 +189,8 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
// and w16, w10, ScratchpadL3Mask64 // and w16, w10, ScratchpadL3Mask64
emit32(0x121A0000 | 16 | (10 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos); emit32(0x121A0000 | 16 | (10 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
// and w17, w18, ScratchpadL3Mask64 // and w17, w20, ScratchpadL3Mask64
emit32(0x121A0000 | 17 | (18 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos); emit32(0x121A0000 | 17 | (20 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
codePos = PrologueSize; codePos = PrologueSize;
literalPos = ImulRcpLiteralsEnd; literalPos = ImulRcpLiteralsEnd;
@@ -206,8 +206,8 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
} }
// Update spMix2 // Update spMix2
// eor w18, config.readReg2, config.readReg3 // eor w20, config.readReg2, config.readReg3
emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos); emit32(ARMV8A::EOR32 | 20 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
// Jump back to the main loop // Jump back to the main loop
const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end_light) - ((uint8_t*)randomx_program_aarch64)) - codePos; const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end_light) - ((uint8_t*)randomx_program_aarch64)) - codePos;
@@ -477,7 +477,7 @@ void JitCompilerA64::emitAddImmediate(uint32_t dst, uint32_t src, uint32_t imm,
} }
else else
{ {
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
emitMovImmediate(tmp_reg, imm, code, k); emitMovImmediate(tmp_reg, imm, code, k);
// add dst, src, tmp_reg // add dst, src, tmp_reg
@@ -526,7 +526,7 @@ void JitCompilerA64::emitMemLoadFP(uint32_t src, Instruction& instr, uint8_t* co
uint32_t k = codePos; uint32_t k = codePos;
uint32_t imm = instr.getImm32(); uint32_t imm = instr.getImm32();
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 19;
imm &= instr.getModMem() ? (RandomX_CurrentConfig.ScratchpadL1_Size - 1) : (RandomX_CurrentConfig.ScratchpadL2_Size - 1); imm &= instr.getModMem() ? (RandomX_CurrentConfig.ScratchpadL1_Size - 1) : (RandomX_CurrentConfig.ScratchpadL2_Size - 1);
emitAddImmediate(tmp_reg, src, imm, code, k); emitAddImmediate(tmp_reg, src, imm, code, k);
@@ -580,7 +580,7 @@ void JitCompilerA64::h_IADD_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src]; const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst]; const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k); emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// add dst, dst, tmp_reg // add dst, dst, tmp_reg
@@ -618,7 +618,7 @@ void JitCompilerA64::h_ISUB_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src]; const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst]; const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k); emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// sub dst, dst, tmp_reg // sub dst, dst, tmp_reg
@@ -637,7 +637,7 @@ void JitCompilerA64::h_IMUL_R(Instruction& instr, uint32_t& codePos)
if (src == dst) if (src == dst)
{ {
src = 18; src = 20;
emitMovImmediate(src, instr.getImm32(), code, k); emitMovImmediate(src, instr.getImm32(), code, k);
} }
@@ -655,7 +655,7 @@ void JitCompilerA64::h_IMUL_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src]; const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst]; const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k); emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// sub dst, dst, tmp_reg // sub dst, dst, tmp_reg
@@ -686,7 +686,7 @@ void JitCompilerA64::h_IMULH_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src]; const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst]; const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k); emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// umulh dst, dst, tmp_reg // umulh dst, dst, tmp_reg
@@ -717,7 +717,7 @@ void JitCompilerA64::h_ISMULH_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src]; const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst]; const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k); emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// smulh dst, dst, tmp_reg // smulh dst, dst, tmp_reg
@@ -735,7 +735,7 @@ void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)
uint32_t k = codePos; uint32_t k = codePos;
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
const uint32_t dst = IntRegMap[instr.dst]; const uint32_t dst = IntRegMap[instr.dst];
constexpr uint64_t N = 1ULL << 63; constexpr uint64_t N = 1ULL << 63;
@@ -754,9 +754,9 @@ void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)
literalPos -= sizeof(uint64_t); literalPos -= sizeof(uint64_t);
*(uint64_t*)(code + literalPos) = (q << shift) + ((r << shift) / divisor); *(uint64_t*)(code + literalPos) = (q << shift) + ((r << shift) / divisor);
if (literal_id < 13) if (literal_id < 12)
{ {
static constexpr uint32_t literal_regs[13] = { 30 << 16, 29 << 16, 28 << 16, 27 << 16, 26 << 16, 25 << 16, 24 << 16, 23 << 16, 22 << 16, 21 << 16, 20 << 16, 11 << 16, 0 }; static constexpr uint32_t literal_regs[12] = { 30 << 16, 29 << 16, 28 << 16, 27 << 16, 26 << 16, 25 << 16, 24 << 16, 23 << 16, 22 << 16, 21 << 16, 11 << 16, 0 };
// mul dst, dst, literal_reg // mul dst, dst, literal_reg
emit32(ARMV8A::MUL | dst | (dst << 5) | literal_regs[literal_id], code, k); emit32(ARMV8A::MUL | dst | (dst << 5) | literal_regs[literal_id], code, k);
@@ -794,7 +794,7 @@ void JitCompilerA64::h_IXOR_R(Instruction& instr, uint32_t& codePos)
if (src == dst) if (src == dst)
{ {
src = 18; src = 20;
emitMovImmediate(src, instr.getImm32(), code, k); emitMovImmediate(src, instr.getImm32(), code, k);
} }
@@ -812,7 +812,7 @@ void JitCompilerA64::h_IXOR_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src]; const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst]; const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k); emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// eor dst, dst, tmp_reg // eor dst, dst, tmp_reg
@@ -850,7 +850,7 @@ void JitCompilerA64::h_IROL_R(Instruction& instr, uint32_t& codePos)
if (src != dst) if (src != dst)
{ {
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
// sub tmp_reg, xzr, src // sub tmp_reg, xzr, src
emit32(ARMV8A::SUB | tmp_reg | (31 << 5) | (src << 16), code, k); emit32(ARMV8A::SUB | tmp_reg | (31 << 5) | (src << 16), code, k);
@@ -878,7 +878,7 @@ void JitCompilerA64::h_ISWAP_R(Instruction& instr, uint32_t& codePos)
uint32_t k = codePos; uint32_t k = codePos;
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
emit32(ARMV8A::MOV_REG | tmp_reg | (dst << 16), code, k); emit32(ARMV8A::MOV_REG | tmp_reg | (dst << 16), code, k);
emit32(ARMV8A::MOV_REG | dst | (src << 16), code, k); emit32(ARMV8A::MOV_REG | dst | (src << 16), code, k);
emit32(ARMV8A::MOV_REG | src | (tmp_reg << 16), code, k); emit32(ARMV8A::MOV_REG | src | (tmp_reg << 16), code, k);
@@ -1026,7 +1026,7 @@ void JitCompilerA64::h_CFROUND(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src]; const uint32_t src = IntRegMap[instr.src];
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
constexpr uint32_t fpcr_tmp_reg = 8; constexpr uint32_t fpcr_tmp_reg = 8;
// ror tmp_reg, src, imm // ror tmp_reg, src, imm
@@ -1050,7 +1050,7 @@ void JitCompilerA64::h_ISTORE(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src]; const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst]; const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
uint32_t imm = instr.getImm32(); uint32_t imm = instr.getImm32();

View File

@@ -72,9 +72,9 @@
# x15 -> "r7" # x15 -> "r7"
# x16 -> spAddr0 # x16 -> spAddr0
# x17 -> spAddr1 # x17 -> spAddr1
# x18 -> temporary # x18 -> unused (platform register, don't touch it)
# x19 -> temporary # x19 -> temporary
# x20 -> literal for IMUL_RCP # x20 -> temporary
# x21 -> literal for IMUL_RCP # x21 -> literal for IMUL_RCP
# x22 -> literal for IMUL_RCP # x22 -> literal for IMUL_RCP
# x23 -> literal for IMUL_RCP # x23 -> literal for IMUL_RCP
@@ -109,7 +109,7 @@ DECL(randomx_program_aarch64):
# Save callee-saved registers # Save callee-saved registers
sub sp, sp, 192 sub sp, sp, 192
stp x16, x17, [sp] stp x16, x17, [sp]
stp x18, x19, [sp, 16] str x19, [sp, 16]
stp x20, x21, [sp, 32] stp x20, x21, [sp, 32]
stp x22, x23, [sp, 48] stp x22, x23, [sp, 48]
stp x24, x25, [sp, 64] stp x24, x25, [sp, 64]
@@ -164,7 +164,6 @@ DECL(randomx_program_aarch64):
# Read literals # Read literals
ldr x0, literal_x0 ldr x0, literal_x0
ldr x11, literal_x11 ldr x11, literal_x11
ldr x20, literal_x20
ldr x21, literal_x21 ldr x21, literal_x21
ldr x22, literal_x22 ldr x22, literal_x22
ldr x23, literal_x23 ldr x23, literal_x23
@@ -196,11 +195,11 @@ DECL(randomx_program_aarch64):
DECL(randomx_program_aarch64_main_loop): DECL(randomx_program_aarch64_main_loop):
# spAddr0 = spMix1 & ScratchpadL3Mask64; # spAddr0 = spMix1 & ScratchpadL3Mask64;
# spAddr1 = (spMix1 >> 32) & ScratchpadL3Mask64; # spAddr1 = (spMix1 >> 32) & ScratchpadL3Mask64;
lsr x18, x10, 32 lsr x20, x10, 32
# Actual mask will be inserted by JIT compiler # Actual mask will be inserted by JIT compiler
and w16, w10, 1 and w16, w10, 1
and w17, w18, 1 and w17, w20, 1
# x16 = scratchpad + spAddr0 # x16 = scratchpad + spAddr0
# x17 = scratchpad + spAddr1 # x17 = scratchpad + spAddr1
@@ -208,31 +207,31 @@ DECL(randomx_program_aarch64_main_loop):
add x17, x17, x2 add x17, x17, x2
# xor integer registers with scratchpad data (spAddr0) # xor integer registers with scratchpad data (spAddr0)
ldp x18, x19, [x16] ldp x20, x19, [x16]
eor x4, x4, x18 eor x4, x4, x20
eor x5, x5, x19 eor x5, x5, x19
ldp x18, x19, [x16, 16] ldp x20, x19, [x16, 16]
eor x6, x6, x18 eor x6, x6, x20
eor x7, x7, x19 eor x7, x7, x19
ldp x18, x19, [x16, 32] ldp x20, x19, [x16, 32]
eor x12, x12, x18 eor x12, x12, x20
eor x13, x13, x19 eor x13, x13, x19
ldp x18, x19, [x16, 48] ldp x20, x19, [x16, 48]
eor x14, x14, x18 eor x14, x14, x20
eor x15, x15, x19 eor x15, x15, x19
# Load group F registers (spAddr1) # Load group F registers (spAddr1)
ldpsw x18, x19, [x17] ldpsw x20, x19, [x17]
ins v16.d[0], x18 ins v16.d[0], x20
ins v16.d[1], x19 ins v16.d[1], x19
ldpsw x18, x19, [x17, 8] ldpsw x20, x19, [x17, 8]
ins v17.d[0], x18 ins v17.d[0], x20
ins v17.d[1], x19 ins v17.d[1], x19
ldpsw x18, x19, [x17, 16] ldpsw x20, x19, [x17, 16]
ins v18.d[0], x18 ins v18.d[0], x20
ins v18.d[1], x19 ins v18.d[1], x19
ldpsw x18, x19, [x17, 24] ldpsw x20, x19, [x17, 24]
ins v19.d[0], x18 ins v19.d[0], x20
ins v19.d[1], x19 ins v19.d[1], x19
scvtf v16.2d, v16.2d scvtf v16.2d, v16.2d
scvtf v17.2d, v17.2d scvtf v17.2d, v17.2d
@@ -240,17 +239,17 @@ DECL(randomx_program_aarch64_main_loop):
scvtf v19.2d, v19.2d scvtf v19.2d, v19.2d
# Load group E registers (spAddr1) # Load group E registers (spAddr1)
ldpsw x18, x19, [x17, 32] ldpsw x20, x19, [x17, 32]
ins v20.d[0], x18 ins v20.d[0], x20
ins v20.d[1], x19 ins v20.d[1], x19
ldpsw x18, x19, [x17, 40] ldpsw x20, x19, [x17, 40]
ins v21.d[0], x18 ins v21.d[0], x20
ins v21.d[1], x19 ins v21.d[1], x19
ldpsw x18, x19, [x17, 48] ldpsw x20, x19, [x17, 48]
ins v22.d[0], x18 ins v22.d[0], x20
ins v22.d[1], x19 ins v22.d[1], x19
ldpsw x18, x19, [x17, 56] ldpsw x20, x19, [x17, 56]
ins v23.d[0], x18 ins v23.d[0], x20
ins v23.d[1], x19 ins v23.d[1], x19
scvtf v20.2d, v20.2d scvtf v20.2d, v20.2d
scvtf v21.2d, v21.2d scvtf v21.2d, v21.2d
@@ -273,7 +272,6 @@ DECL(randomx_program_aarch64_vm_instructions):
literal_x0: .fill 1,8,0 literal_x0: .fill 1,8,0
literal_x11: .fill 1,8,0 literal_x11: .fill 1,8,0
literal_x20: .fill 1,8,0
literal_x21: .fill 1,8,0 literal_x21: .fill 1,8,0
literal_x22: .fill 1,8,0 literal_x22: .fill 1,8,0
literal_x23: .fill 1,8,0 literal_x23: .fill 1,8,0
@@ -309,17 +307,17 @@ DECL(randomx_program_aarch64_vm_instructions_end):
lsr x10, x9, 32 lsr x10, x9, 32
# mx ^= r[readReg2] ^ r[readReg3]; # mx ^= r[readReg2] ^ r[readReg3];
eor x9, x9, x18 eor x9, x9, x20
# Calculate dataset pointer for dataset prefetch # Calculate dataset pointer for dataset prefetch
mov w18, w9 mov w20, w9
DECL(randomx_program_aarch64_cacheline_align_mask1): DECL(randomx_program_aarch64_cacheline_align_mask1):
# Actual mask will be inserted by JIT compiler # Actual mask will be inserted by JIT compiler
and x18, x18, 1 and x20, x20, 1
add x18, x18, x1 add x20, x20, x1
# Prefetch dataset data # Prefetch dataset data
prfm pldl2strm, [x18] prfm pldl2strm, [x20]
# mx <-> ma # mx <-> ma
ror x9, x9, 32 ror x9, x9, 32
@@ -331,17 +329,17 @@ DECL(randomx_program_aarch64_cacheline_align_mask2):
DECL(randomx_program_aarch64_xor_with_dataset_line): DECL(randomx_program_aarch64_xor_with_dataset_line):
# xor integer registers with dataset data # xor integer registers with dataset data
ldp x18, x19, [x10] ldp x20, x19, [x10]
eor x4, x4, x18 eor x4, x4, x20
eor x5, x5, x19 eor x5, x5, x19
ldp x18, x19, [x10, 16] ldp x20, x19, [x10, 16]
eor x6, x6, x18 eor x6, x6, x20
eor x7, x7, x19 eor x7, x7, x19
ldp x18, x19, [x10, 32] ldp x20, x19, [x10, 32]
eor x12, x12, x18 eor x12, x12, x20
eor x13, x13, x19 eor x13, x13, x19
ldp x18, x19, [x10, 48] ldp x20, x19, [x10, 48]
eor x14, x14, x18 eor x14, x14, x20
eor x15, x15, x19 eor x15, x15, x19
DECL(randomx_program_aarch64_update_spMix1): DECL(randomx_program_aarch64_update_spMix1):
@@ -384,7 +382,7 @@ DECL(randomx_program_aarch64_update_spMix1):
# Restore callee-saved registers # Restore callee-saved registers
ldp x16, x17, [sp] ldp x16, x17, [sp]
ldp x18, x19, [sp, 16] ldr x19, [sp, 16]
ldp x20, x21, [sp, 32] ldp x20, x21, [sp, 32]
ldp x22, x23, [sp, 48] ldp x22, x23, [sp, 48]
ldp x24, x25, [sp, 64] ldp x24, x25, [sp, 64]
@@ -405,7 +403,7 @@ DECL(randomx_program_aarch64_vm_instructions_end_light):
stp x2, x30, [sp, 80] stp x2, x30, [sp, 80]
# mx ^= r[readReg2] ^ r[readReg3]; # mx ^= r[readReg2] ^ r[readReg3];
eor x9, x9, x18 eor x9, x9, x20
# mx <-> ma # mx <-> ma
ror x9, x9, 32 ror x9, x9, 32
@@ -447,8 +445,8 @@ DECL(randomx_program_aarch64_light_dataset_offset):
# x3 -> end item # x3 -> end item
DECL(randomx_init_dataset_aarch64): DECL(randomx_init_dataset_aarch64):
# Save x30 (return address) # Save x20 (used as temporary, but must be saved to not break ABI) and x30 (return address)
str x30, [sp, -16]! stp x20, x30, [sp, -16]!
# Load pointer to cache memory # Load pointer to cache memory
ldr x0, [x0] ldr x0, [x0]
@@ -460,8 +458,8 @@ DECL(randomx_init_dataset_aarch64_main_loop):
cmp x2, x3 cmp x2, x3
bne DECL(randomx_init_dataset_aarch64_main_loop) bne DECL(randomx_init_dataset_aarch64_main_loop)
# Restore x30 (return address) # Restore x20 and x30
ldr x30, [sp], 16 ldp x20, x30, [sp], 16
ret ret

View File

@@ -41,12 +41,10 @@ randomx_vm *xmrig::RxVm::create(RxDataset *dataset, uint8_t *scratchpad, bool so
flags |= RANDOMX_FLAG_JIT; flags |= RANDOMX_FLAG_JIT;
} }
# ifdef XMRIG_FEATURE_ASM_AMD
const auto asmId = assembly == Assembly::AUTO ? Cpu::info()->assembly() : assembly.id(); const auto asmId = assembly == Assembly::AUTO ? Cpu::info()->assembly() : assembly.id();
if ((asmId == Assembly::RYZEN) || (asmId == Assembly::BULLDOZER)) { if ((asmId == Assembly::RYZEN) || (asmId == Assembly::BULLDOZER)) {
flags |= RANDOMX_FLAG_AMD; flags |= RANDOMX_FLAG_AMD;
} }
# endif
return randomx_create_vm(static_cast<randomx_flags>(flags), !dataset->get() ? dataset->cache()->get() : nullptr, dataset->get(), scratchpad, node); return randomx_create_vm(static_cast<randomx_flags>(flags), !dataset->get() ? dataset->cache()->get() : nullptr, dataset->get(), scratchpad, node);
} }

View File

@@ -22,15 +22,15 @@
#define APP_ID "xmrig" #define APP_ID "xmrig"
#define APP_NAME "XMRig" #define APP_NAME "XMRig"
#define APP_DESC "XMRig miner" #define APP_DESC "XMRig miner"
#define APP_VERSION "6.20.1-dev" #define APP_VERSION "6.21.0"
#define APP_DOMAIN "xmrig.com" #define APP_DOMAIN "xmrig.com"
#define APP_SITE "www.xmrig.com" #define APP_SITE "www.xmrig.com"
#define APP_COPYRIGHT "Copyright (C) 2016-2023 xmrig.com" #define APP_COPYRIGHT "Copyright (C) 2016-2023 xmrig.com"
#define APP_KIND "miner" #define APP_KIND "miner"
#define APP_VER_MAJOR 6 #define APP_VER_MAJOR 6
#define APP_VER_MINOR 20 #define APP_VER_MINOR 21
#define APP_VER_PATCH 1 #define APP_VER_PATCH 0
#ifdef _MSC_VER #ifdef _MSC_VER
# if (_MSC_VER >= 1930) # if (_MSC_VER >= 1930)