1
0
mirror of https://github.com/xmrig/xmrig.git synced 2026-04-19 21:52:40 -04:00

Compare commits

...

5 Commits

Author SHA1 Message Date
Jean-Pierre De Jesus DIAZ
ff87190a43 Merge f0f6d1666c into fee51b20fa 2023-10-21 13:30:07 -07:00
xmrig
fee51b20fa Merge pull request #3346 from SChernykh/dev
ARM64 JIT: don't use `x18` register
2023-10-20 07:36:12 +07:00
SChernykh
5e66efabcf ARM64 JIT: don't use x18 register
From https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms
> The platforms reserve register x18. Don’t use this register.

This PR fixes invalid hashes when running on Apple silicon with the latest macOS SDK.
2023-10-19 17:45:15 +02:00
Jean-Pierre De Jesus DIAZ
f0f6d1666c fixup! Allow using system libfmt 2023-05-29 20:15:44 +02:00
Jean-Pierre De Jesus DIAZ
0399b6b6bb Allow using system libfmt
Signed-off-by: Jean-Pierre De Jesus DIAZ <me@jeandudey.tech>
2023-05-29 20:15:44 +02:00
48 changed files with 107 additions and 92 deletions

View File

@@ -32,6 +32,7 @@ option(WITH_VAES "Enable VAES instructions for Cryptonight" ON)
option(WITH_BENCHMARK "Enable builtin RandomX benchmark and stress test" ON) option(WITH_BENCHMARK "Enable builtin RandomX benchmark and stress test" ON)
option(WITH_SECURE_JIT "Enable secure access to JIT memory" OFF) option(WITH_SECURE_JIT "Enable secure access to JIT memory" OFF)
option(WITH_DMI "Enable DMI/SMBIOS reader" ON) option(WITH_DMI "Enable DMI/SMBIOS reader" ON)
option(WITH_BUNDLED_FMT "Force use of bundled fmt library" ON)
option(BUILD_STATIC "Build static binary" OFF) option(BUILD_STATIC "Build static binary" OFF)
option(ARM_V8 "Force ARMv8 (64 bit) architecture, use with caution if automatic detection fails, but you sure it may work" OFF) option(ARM_V8 "Force ARMv8 (64 bit) architecture, use with caution if automatic detection fails, but you sure it may work" OFF)
@@ -198,6 +199,7 @@ add_definitions(-D__STDC_FORMAT_MACROS -DUNICODE -D_FILE_OFFSET_BITS=64)
find_package(UV REQUIRED) find_package(UV REQUIRED)
include(cmake/flags.cmake) include(cmake/flags.cmake)
include(cmake/fmt.cmake)
include(cmake/randomx.cmake) include(cmake/randomx.cmake)
include(cmake/argon2.cmake) include(cmake/argon2.cmake)
include(cmake/kawpow.cmake) include(cmake/kawpow.cmake)
@@ -229,7 +231,7 @@ include(src/hw/api/api.cmake)
include(src/hw/dmi/dmi.cmake) include(src/hw/dmi/dmi.cmake)
include_directories(src) include_directories(src)
include_directories(src/3rdparty) include_directories(src/3rdparty/CL)
include_directories(${UV_INCLUDE_DIR}) include_directories(${UV_INCLUDE_DIR})
if (WITH_DEBUG_LOG) if (WITH_DEBUG_LOG)
@@ -237,7 +239,7 @@ if (WITH_DEBUG_LOG)
endif() endif()
add_executable(${CMAKE_PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${TLS_SOURCES} ${XMRIG_ASM_SOURCES}) add_executable(${CMAKE_PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${TLS_SOURCES} ${XMRIG_ASM_SOURCES})
target_link_libraries(${CMAKE_PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${EXTRA_LIBS} ${CPUID_LIB} ${ARGON2_LIBRARY} ${ETHASH_LIBRARY} ${GHOSTRIDER_LIBRARY}) target_link_libraries(${CMAKE_PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${EXTRA_LIBS} ${CPUID_LIB} ${ARGON2_LIBRARY} ${ETHASH_LIBRARY} ${GHOSTRIDER_LIBRARY} ${FMT_LIBRARY})
if (WIN32) if (WIN32)
add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_SOURCE_DIR}/bin/WinRing0/WinRing0x64.sys" $<TARGET_FILE_DIR:${CMAKE_PROJECT_NAME}>) add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_SOURCE_DIR}/bin/WinRing0/WinRing0x64.sys" $<TARGET_FILE_DIR:${CMAKE_PROJECT_NAME}>)

13
cmake/fmt.cmake Normal file
View File

@@ -0,0 +1,13 @@
# SPDX-FileCopyrightText: © 2023 Jean-Pierre De Jesus DIAZ <me@jeandudey.tech>
# SPDX-License-Identifier: GPL-3.0-or-later
if(WITH_BUNDLED_FMT)
add_library(fmt INTERFACE)
target_sources(fmt INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/src/3rdparty/fmt/format.cc)
target_include_directories(fmt INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/src/3rdparty/fmt)
else()
set(FMT_LIBRARY fmt)
find_package(fmt REQUIRED)
set(FMT_LIBRARY fmt::fmt)
endif()

2
src/3rdparty/cl.h vendored
View File

@@ -29,7 +29,7 @@
#if defined(__APPLE__) #if defined(__APPLE__)
# include <OpenCL/cl.h> # include <OpenCL/cl.h>
#else #else
# include "3rdparty/CL/cl.h" # include <CL/cl.h>
#endif #endif

View File

@@ -5,7 +5,7 @@
// //
// For the license information refer to format.h. // For the license information refer to format.h.
#include "3rdparty/fmt/format-inl.h" #include "fmt/format-inl.h"
FMT_BEGIN_NAMESPACE FMT_BEGIN_NAMESPACE
namespace detail { namespace detail {

View File

@@ -18,11 +18,11 @@
*/ */
#include "base/tools/String.h" #include "base/tools/String.h"
#include "3rdparty/fmt/core.h"
#include <cstdio> #include <cstdio>
#include <cctype> #include <cctype>
#include <fmt/core.h>
namespace xmrig { namespace xmrig {

View File

@@ -20,7 +20,6 @@
#include "backend/opencl/wrappers/AdlLib.h" #include "backend/opencl/wrappers/AdlLib.h"
#include "3rdparty/fmt/core.h"
#include "backend/opencl/wrappers/OclDevice.h" #include "backend/opencl/wrappers/OclDevice.h"
@@ -31,6 +30,7 @@
#include <string> #include <string>
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/types.h> #include <sys/types.h>
#include <fmt/core.h>
namespace xmrig { namespace xmrig {

View File

@@ -88,7 +88,6 @@ set(HEADERS_BASE
) )
set(SOURCES_BASE set(SOURCES_BASE
src/3rdparty/fmt/format.cc
src/base/crypto/Algorithm.cpp src/base/crypto/Algorithm.cpp
src/base/crypto/Coin.cpp src/base/crypto/Coin.cpp
src/base/crypto/keccak.cpp src/base/crypto/keccak.cpp

View File

@@ -22,10 +22,10 @@
#include "base/kernel/Process.h" #include "base/kernel/Process.h"
#include "3rdparty/fmt/core.h"
#include "base/tools/Chrono.h" #include "base/tools/Chrono.h"
#include "version.h" #include "version.h"
#include <fmt/core.h>
#ifdef XMRIG_OS_WIN #ifdef XMRIG_OS_WIN
# ifdef _MSC_VER # ifdef _MSC_VER

View File

@@ -17,7 +17,6 @@
*/ */
#include "base/net/stratum/BaseClient.h" #include "base/net/stratum/BaseClient.h"
#include "3rdparty/fmt/core.h"
#include "3rdparty/rapidjson/document.h" #include "3rdparty/rapidjson/document.h"
#include "base/io/Env.h" #include "base/io/Env.h"
#include "base/io/log/Log.h" #include "base/io/log/Log.h"
@@ -25,6 +24,8 @@
#include "base/kernel/interfaces/IClientListener.h" #include "base/kernel/interfaces/IClientListener.h"
#include "base/net/stratum/SubmitResult.h" #include "base/net/stratum/SubmitResult.h"
#include <fmt/core.h>
namespace xmrig { namespace xmrig {

View File

@@ -17,7 +17,6 @@
*/ */
#include "base/net/stratum/benchmark/BenchClient.h" #include "base/net/stratum/benchmark/BenchClient.h"
#include "3rdparty/fmt/core.h"
#include "3rdparty/rapidjson/document.h" #include "3rdparty/rapidjson/document.h"
#include "backend/common/benchmark/BenchState.h" #include "backend/common/benchmark/BenchState.h"
#include "backend/common/interfaces/IBackend.h" #include "backend/common/interfaces/IBackend.h"
@@ -39,6 +38,8 @@
# include "hw/dmi/DmiReader.h" # include "hw/dmi/DmiReader.h"
#endif #endif
#include <fmt/core.h>
xmrig::BenchClient::BenchClient(const std::shared_ptr<BenchConfig> &benchmark, IClientListener* listener) : xmrig::BenchClient::BenchClient(const std::shared_ptr<BenchConfig> &benchmark, IClientListener* listener) :
m_listener(listener), m_listener(listener),

View File

@@ -17,12 +17,12 @@
*/ */
#include "base/net/stratum/benchmark/BenchConfig.h" #include "base/net/stratum/benchmark/BenchConfig.h"
#include "3rdparty/fmt/core.h"
#include "3rdparty/rapidjson/document.h" #include "3rdparty/rapidjson/document.h"
#include "base/io/json/Json.h" #include "base/io/json/Json.h"
#include <string> #include <string>
#include <fmt/core.h>
#ifdef _MSC_VER #ifdef _MSC_VER

View File

@@ -17,12 +17,12 @@
*/ */
#include "crypto/common/LinuxMemory.h" #include "crypto/common/LinuxMemory.h"
#include "3rdparty/fmt/core.h"
#include "crypto/common/VirtualMemory.h" #include "crypto/common/VirtualMemory.h"
#include <algorithm> #include <algorithm>
#include <fstream> #include <fstream>
#include <fmt/core.h>
#include <mutex> #include <mutex>
#include <string> #include <string>

View File

@@ -131,8 +131,8 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
// and w16, w10, ScratchpadL3Mask64 // and w16, w10, ScratchpadL3Mask64
emit32(0x121A0000 | 16 | (10 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos); emit32(0x121A0000 | 16 | (10 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
// and w17, w18, ScratchpadL3Mask64 // and w17, w20, ScratchpadL3Mask64
emit32(0x121A0000 | 17 | (18 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos); emit32(0x121A0000 | 17 | (20 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
codePos = PrologueSize; codePos = PrologueSize;
literalPos = ImulRcpLiteralsEnd; literalPos = ImulRcpLiteralsEnd;
@@ -148,16 +148,16 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
} }
// Update spMix2 // Update spMix2
// eor w18, config.readReg2, config.readReg3 // eor w20, config.readReg2, config.readReg3
emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos); emit32(ARMV8A::EOR32 | 20 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
// Jump back to the main loop // Jump back to the main loop
const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end) - ((uint8_t*)randomx_program_aarch64)) - codePos; const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end) - ((uint8_t*)randomx_program_aarch64)) - codePos;
emit32(ARMV8A::B | (offset / 4), code, codePos); emit32(ARMV8A::B | (offset / 4), code, codePos);
// and w18, w18, CacheLineAlignMask // and w20, w20, CacheLineAlignMask
codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask1) - ((uint8_t*)randomx_program_aarch64)); codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask1) - ((uint8_t*)randomx_program_aarch64));
emit32(0x121A0000 | 18 | (18 << 5) | ((RandomX_CurrentConfig.Log2_DatasetBaseSize - 7) << 10), code, codePos); emit32(0x121A0000 | 20 | (20 << 5) | ((RandomX_CurrentConfig.Log2_DatasetBaseSize - 7) << 10), code, codePos);
// and w10, w10, CacheLineAlignMask // and w10, w10, CacheLineAlignMask
codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask2) - ((uint8_t*)randomx_program_aarch64)); codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask2) - ((uint8_t*)randomx_program_aarch64));
@@ -189,8 +189,8 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
// and w16, w10, ScratchpadL3Mask64 // and w16, w10, ScratchpadL3Mask64
emit32(0x121A0000 | 16 | (10 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos); emit32(0x121A0000 | 16 | (10 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
// and w17, w18, ScratchpadL3Mask64 // and w17, w20, ScratchpadL3Mask64
emit32(0x121A0000 | 17 | (18 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos); emit32(0x121A0000 | 17 | (20 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
codePos = PrologueSize; codePos = PrologueSize;
literalPos = ImulRcpLiteralsEnd; literalPos = ImulRcpLiteralsEnd;
@@ -206,8 +206,8 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
} }
// Update spMix2 // Update spMix2
// eor w18, config.readReg2, config.readReg3 // eor w20, config.readReg2, config.readReg3
emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos); emit32(ARMV8A::EOR32 | 20 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
// Jump back to the main loop // Jump back to the main loop
const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end_light) - ((uint8_t*)randomx_program_aarch64)) - codePos; const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end_light) - ((uint8_t*)randomx_program_aarch64)) - codePos;
@@ -477,7 +477,7 @@ void JitCompilerA64::emitAddImmediate(uint32_t dst, uint32_t src, uint32_t imm,
} }
else else
{ {
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
emitMovImmediate(tmp_reg, imm, code, k); emitMovImmediate(tmp_reg, imm, code, k);
// add dst, src, tmp_reg // add dst, src, tmp_reg
@@ -526,7 +526,7 @@ void JitCompilerA64::emitMemLoadFP(uint32_t src, Instruction& instr, uint8_t* co
uint32_t k = codePos; uint32_t k = codePos;
uint32_t imm = instr.getImm32(); uint32_t imm = instr.getImm32();
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 19;
imm &= instr.getModMem() ? (RandomX_CurrentConfig.ScratchpadL1_Size - 1) : (RandomX_CurrentConfig.ScratchpadL2_Size - 1); imm &= instr.getModMem() ? (RandomX_CurrentConfig.ScratchpadL1_Size - 1) : (RandomX_CurrentConfig.ScratchpadL2_Size - 1);
emitAddImmediate(tmp_reg, src, imm, code, k); emitAddImmediate(tmp_reg, src, imm, code, k);
@@ -580,7 +580,7 @@ void JitCompilerA64::h_IADD_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src]; const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst]; const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k); emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// add dst, dst, tmp_reg // add dst, dst, tmp_reg
@@ -618,7 +618,7 @@ void JitCompilerA64::h_ISUB_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src]; const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst]; const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k); emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// sub dst, dst, tmp_reg // sub dst, dst, tmp_reg
@@ -637,7 +637,7 @@ void JitCompilerA64::h_IMUL_R(Instruction& instr, uint32_t& codePos)
if (src == dst) if (src == dst)
{ {
src = 18; src = 20;
emitMovImmediate(src, instr.getImm32(), code, k); emitMovImmediate(src, instr.getImm32(), code, k);
} }
@@ -655,7 +655,7 @@ void JitCompilerA64::h_IMUL_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src]; const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst]; const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k); emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// sub dst, dst, tmp_reg // sub dst, dst, tmp_reg
@@ -686,7 +686,7 @@ void JitCompilerA64::h_IMULH_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src]; const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst]; const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k); emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// umulh dst, dst, tmp_reg // umulh dst, dst, tmp_reg
@@ -717,7 +717,7 @@ void JitCompilerA64::h_ISMULH_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src]; const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst]; const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k); emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// smulh dst, dst, tmp_reg // smulh dst, dst, tmp_reg
@@ -735,7 +735,7 @@ void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)
uint32_t k = codePos; uint32_t k = codePos;
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
const uint32_t dst = IntRegMap[instr.dst]; const uint32_t dst = IntRegMap[instr.dst];
constexpr uint64_t N = 1ULL << 63; constexpr uint64_t N = 1ULL << 63;
@@ -754,9 +754,9 @@ void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)
literalPos -= sizeof(uint64_t); literalPos -= sizeof(uint64_t);
*(uint64_t*)(code + literalPos) = (q << shift) + ((r << shift) / divisor); *(uint64_t*)(code + literalPos) = (q << shift) + ((r << shift) / divisor);
if (literal_id < 13) if (literal_id < 12)
{ {
static constexpr uint32_t literal_regs[13] = { 30 << 16, 29 << 16, 28 << 16, 27 << 16, 26 << 16, 25 << 16, 24 << 16, 23 << 16, 22 << 16, 21 << 16, 20 << 16, 11 << 16, 0 }; static constexpr uint32_t literal_regs[12] = { 30 << 16, 29 << 16, 28 << 16, 27 << 16, 26 << 16, 25 << 16, 24 << 16, 23 << 16, 22 << 16, 21 << 16, 11 << 16, 0 };
// mul dst, dst, literal_reg // mul dst, dst, literal_reg
emit32(ARMV8A::MUL | dst | (dst << 5) | literal_regs[literal_id], code, k); emit32(ARMV8A::MUL | dst | (dst << 5) | literal_regs[literal_id], code, k);
@@ -794,7 +794,7 @@ void JitCompilerA64::h_IXOR_R(Instruction& instr, uint32_t& codePos)
if (src == dst) if (src == dst)
{ {
src = 18; src = 20;
emitMovImmediate(src, instr.getImm32(), code, k); emitMovImmediate(src, instr.getImm32(), code, k);
} }
@@ -812,7 +812,7 @@ void JitCompilerA64::h_IXOR_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src]; const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst]; const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k); emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// eor dst, dst, tmp_reg // eor dst, dst, tmp_reg
@@ -850,7 +850,7 @@ void JitCompilerA64::h_IROL_R(Instruction& instr, uint32_t& codePos)
if (src != dst) if (src != dst)
{ {
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
// sub tmp_reg, xzr, src // sub tmp_reg, xzr, src
emit32(ARMV8A::SUB | tmp_reg | (31 << 5) | (src << 16), code, k); emit32(ARMV8A::SUB | tmp_reg | (31 << 5) | (src << 16), code, k);
@@ -878,7 +878,7 @@ void JitCompilerA64::h_ISWAP_R(Instruction& instr, uint32_t& codePos)
uint32_t k = codePos; uint32_t k = codePos;
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
emit32(ARMV8A::MOV_REG | tmp_reg | (dst << 16), code, k); emit32(ARMV8A::MOV_REG | tmp_reg | (dst << 16), code, k);
emit32(ARMV8A::MOV_REG | dst | (src << 16), code, k); emit32(ARMV8A::MOV_REG | dst | (src << 16), code, k);
emit32(ARMV8A::MOV_REG | src | (tmp_reg << 16), code, k); emit32(ARMV8A::MOV_REG | src | (tmp_reg << 16), code, k);
@@ -1026,7 +1026,7 @@ void JitCompilerA64::h_CFROUND(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src]; const uint32_t src = IntRegMap[instr.src];
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
constexpr uint32_t fpcr_tmp_reg = 8; constexpr uint32_t fpcr_tmp_reg = 8;
// ror tmp_reg, src, imm // ror tmp_reg, src, imm
@@ -1050,7 +1050,7 @@ void JitCompilerA64::h_ISTORE(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src]; const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst]; const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
uint32_t imm = instr.getImm32(); uint32_t imm = instr.getImm32();

View File

@@ -72,9 +72,9 @@
# x15 -> "r7" # x15 -> "r7"
# x16 -> spAddr0 # x16 -> spAddr0
# x17 -> spAddr1 # x17 -> spAddr1
# x18 -> temporary # x18 -> unused (platform register, don't touch it)
# x19 -> temporary # x19 -> temporary
# x20 -> literal for IMUL_RCP # x20 -> temporary
# x21 -> literal for IMUL_RCP # x21 -> literal for IMUL_RCP
# x22 -> literal for IMUL_RCP # x22 -> literal for IMUL_RCP
# x23 -> literal for IMUL_RCP # x23 -> literal for IMUL_RCP
@@ -109,7 +109,7 @@ DECL(randomx_program_aarch64):
# Save callee-saved registers # Save callee-saved registers
sub sp, sp, 192 sub sp, sp, 192
stp x16, x17, [sp] stp x16, x17, [sp]
stp x18, x19, [sp, 16] str x19, [sp, 16]
stp x20, x21, [sp, 32] stp x20, x21, [sp, 32]
stp x22, x23, [sp, 48] stp x22, x23, [sp, 48]
stp x24, x25, [sp, 64] stp x24, x25, [sp, 64]
@@ -164,7 +164,6 @@ DECL(randomx_program_aarch64):
# Read literals # Read literals
ldr x0, literal_x0 ldr x0, literal_x0
ldr x11, literal_x11 ldr x11, literal_x11
ldr x20, literal_x20
ldr x21, literal_x21 ldr x21, literal_x21
ldr x22, literal_x22 ldr x22, literal_x22
ldr x23, literal_x23 ldr x23, literal_x23
@@ -196,11 +195,11 @@ DECL(randomx_program_aarch64):
DECL(randomx_program_aarch64_main_loop): DECL(randomx_program_aarch64_main_loop):
# spAddr0 = spMix1 & ScratchpadL3Mask64; # spAddr0 = spMix1 & ScratchpadL3Mask64;
# spAddr1 = (spMix1 >> 32) & ScratchpadL3Mask64; # spAddr1 = (spMix1 >> 32) & ScratchpadL3Mask64;
lsr x18, x10, 32 lsr x20, x10, 32
# Actual mask will be inserted by JIT compiler # Actual mask will be inserted by JIT compiler
and w16, w10, 1 and w16, w10, 1
and w17, w18, 1 and w17, w20, 1
# x16 = scratchpad + spAddr0 # x16 = scratchpad + spAddr0
# x17 = scratchpad + spAddr1 # x17 = scratchpad + spAddr1
@@ -208,31 +207,31 @@ DECL(randomx_program_aarch64_main_loop):
add x17, x17, x2 add x17, x17, x2
# xor integer registers with scratchpad data (spAddr0) # xor integer registers with scratchpad data (spAddr0)
ldp x18, x19, [x16] ldp x20, x19, [x16]
eor x4, x4, x18 eor x4, x4, x20
eor x5, x5, x19 eor x5, x5, x19
ldp x18, x19, [x16, 16] ldp x20, x19, [x16, 16]
eor x6, x6, x18 eor x6, x6, x20
eor x7, x7, x19 eor x7, x7, x19
ldp x18, x19, [x16, 32] ldp x20, x19, [x16, 32]
eor x12, x12, x18 eor x12, x12, x20
eor x13, x13, x19 eor x13, x13, x19
ldp x18, x19, [x16, 48] ldp x20, x19, [x16, 48]
eor x14, x14, x18 eor x14, x14, x20
eor x15, x15, x19 eor x15, x15, x19
# Load group F registers (spAddr1) # Load group F registers (spAddr1)
ldpsw x18, x19, [x17] ldpsw x20, x19, [x17]
ins v16.d[0], x18 ins v16.d[0], x20
ins v16.d[1], x19 ins v16.d[1], x19
ldpsw x18, x19, [x17, 8] ldpsw x20, x19, [x17, 8]
ins v17.d[0], x18 ins v17.d[0], x20
ins v17.d[1], x19 ins v17.d[1], x19
ldpsw x18, x19, [x17, 16] ldpsw x20, x19, [x17, 16]
ins v18.d[0], x18 ins v18.d[0], x20
ins v18.d[1], x19 ins v18.d[1], x19
ldpsw x18, x19, [x17, 24] ldpsw x20, x19, [x17, 24]
ins v19.d[0], x18 ins v19.d[0], x20
ins v19.d[1], x19 ins v19.d[1], x19
scvtf v16.2d, v16.2d scvtf v16.2d, v16.2d
scvtf v17.2d, v17.2d scvtf v17.2d, v17.2d
@@ -240,17 +239,17 @@ DECL(randomx_program_aarch64_main_loop):
scvtf v19.2d, v19.2d scvtf v19.2d, v19.2d
# Load group E registers (spAddr1) # Load group E registers (spAddr1)
ldpsw x18, x19, [x17, 32] ldpsw x20, x19, [x17, 32]
ins v20.d[0], x18 ins v20.d[0], x20
ins v20.d[1], x19 ins v20.d[1], x19
ldpsw x18, x19, [x17, 40] ldpsw x20, x19, [x17, 40]
ins v21.d[0], x18 ins v21.d[0], x20
ins v21.d[1], x19 ins v21.d[1], x19
ldpsw x18, x19, [x17, 48] ldpsw x20, x19, [x17, 48]
ins v22.d[0], x18 ins v22.d[0], x20
ins v22.d[1], x19 ins v22.d[1], x19
ldpsw x18, x19, [x17, 56] ldpsw x20, x19, [x17, 56]
ins v23.d[0], x18 ins v23.d[0], x20
ins v23.d[1], x19 ins v23.d[1], x19
scvtf v20.2d, v20.2d scvtf v20.2d, v20.2d
scvtf v21.2d, v21.2d scvtf v21.2d, v21.2d
@@ -273,7 +272,6 @@ DECL(randomx_program_aarch64_vm_instructions):
literal_x0: .fill 1,8,0 literal_x0: .fill 1,8,0
literal_x11: .fill 1,8,0 literal_x11: .fill 1,8,0
literal_x20: .fill 1,8,0
literal_x21: .fill 1,8,0 literal_x21: .fill 1,8,0
literal_x22: .fill 1,8,0 literal_x22: .fill 1,8,0
literal_x23: .fill 1,8,0 literal_x23: .fill 1,8,0
@@ -309,17 +307,17 @@ DECL(randomx_program_aarch64_vm_instructions_end):
lsr x10, x9, 32 lsr x10, x9, 32
# mx ^= r[readReg2] ^ r[readReg3]; # mx ^= r[readReg2] ^ r[readReg3];
eor x9, x9, x18 eor x9, x9, x20
# Calculate dataset pointer for dataset prefetch # Calculate dataset pointer for dataset prefetch
mov w18, w9 mov w20, w9
DECL(randomx_program_aarch64_cacheline_align_mask1): DECL(randomx_program_aarch64_cacheline_align_mask1):
# Actual mask will be inserted by JIT compiler # Actual mask will be inserted by JIT compiler
and x18, x18, 1 and x20, x20, 1
add x18, x18, x1 add x20, x20, x1
# Prefetch dataset data # Prefetch dataset data
prfm pldl2strm, [x18] prfm pldl2strm, [x20]
# mx <-> ma # mx <-> ma
ror x9, x9, 32 ror x9, x9, 32
@@ -331,17 +329,17 @@ DECL(randomx_program_aarch64_cacheline_align_mask2):
DECL(randomx_program_aarch64_xor_with_dataset_line): DECL(randomx_program_aarch64_xor_with_dataset_line):
# xor integer registers with dataset data # xor integer registers with dataset data
ldp x18, x19, [x10] ldp x20, x19, [x10]
eor x4, x4, x18 eor x4, x4, x20
eor x5, x5, x19 eor x5, x5, x19
ldp x18, x19, [x10, 16] ldp x20, x19, [x10, 16]
eor x6, x6, x18 eor x6, x6, x20
eor x7, x7, x19 eor x7, x7, x19
ldp x18, x19, [x10, 32] ldp x20, x19, [x10, 32]
eor x12, x12, x18 eor x12, x12, x20
eor x13, x13, x19 eor x13, x13, x19
ldp x18, x19, [x10, 48] ldp x20, x19, [x10, 48]
eor x14, x14, x18 eor x14, x14, x20
eor x15, x15, x19 eor x15, x15, x19
DECL(randomx_program_aarch64_update_spMix1): DECL(randomx_program_aarch64_update_spMix1):
@@ -384,7 +382,7 @@ DECL(randomx_program_aarch64_update_spMix1):
# Restore callee-saved registers # Restore callee-saved registers
ldp x16, x17, [sp] ldp x16, x17, [sp]
ldp x18, x19, [sp, 16] ldr x19, [sp, 16]
ldp x20, x21, [sp, 32] ldp x20, x21, [sp, 32]
ldp x22, x23, [sp, 48] ldp x22, x23, [sp, 48]
ldp x24, x25, [sp, 64] ldp x24, x25, [sp, 64]
@@ -405,7 +403,7 @@ DECL(randomx_program_aarch64_vm_instructions_end_light):
stp x2, x30, [sp, 80] stp x2, x30, [sp, 80]
# mx ^= r[readReg2] ^ r[readReg3]; # mx ^= r[readReg2] ^ r[readReg3];
eor x9, x9, x18 eor x9, x9, x20
# mx <-> ma # mx <-> ma
ror x9, x9, 32 ror x9, x9, 32
@@ -447,8 +445,8 @@ DECL(randomx_program_aarch64_light_dataset_offset):
# x3 -> end item # x3 -> end item
DECL(randomx_init_dataset_aarch64): DECL(randomx_init_dataset_aarch64):
# Save x30 (return address) # Save x20 (used as temporary, but must be saved to not break ABI) and x30 (return address)
str x30, [sp, -16]! stp x20, x30, [sp, -16]!
# Load pointer to cache memory # Load pointer to cache memory
ldr x0, [x0] ldr x0, [x0]
@@ -460,8 +458,8 @@ DECL(randomx_init_dataset_aarch64_main_loop):
cmp x2, x3 cmp x2, x3
bne DECL(randomx_init_dataset_aarch64_main_loop) bne DECL(randomx_init_dataset_aarch64_main_loop)
# Restore x30 (return address) # Restore x20 and x30
ldr x30, [sp], 16 ldp x20, x30, [sp], 16
ret ret

View File

@@ -20,7 +20,6 @@
#include "hw/dmi/DmiMemory.h" #include "hw/dmi/DmiMemory.h"
#include "3rdparty/fmt/format.h"
#include "3rdparty/rapidjson/document.h" #include "3rdparty/rapidjson/document.h"
#include "hw/dmi/DmiTools.h" #include "hw/dmi/DmiTools.h"
@@ -28,6 +27,7 @@
#include <algorithm> #include <algorithm>
#include <array> #include <array>
#include <regex> #include <regex>
#include <fmt/format.h>
namespace xmrig { namespace xmrig {

View File

@@ -19,10 +19,11 @@
*/ */
#include "hw/dmi/DmiReader.h" #include "hw/dmi/DmiReader.h"
#include "3rdparty/fmt/core.h"
#include "3rdparty/rapidjson/document.h" #include "3rdparty/rapidjson/document.h"
#include "hw/dmi/DmiTools.h" #include "hw/dmi/DmiTools.h"
#include <fmt/core.h>
namespace xmrig { namespace xmrig {

View File

@@ -18,7 +18,6 @@
#include "hw/msr/Msr.h" #include "hw/msr/Msr.h"
#include "3rdparty/fmt/core.h"
#include "backend/cpu/Cpu.h" #include "backend/cpu/Cpu.h"
#include "base/io/log/Log.h" #include "base/io/log/Log.h"
@@ -29,6 +28,7 @@
#include <cstdio> #include <cstdio>
#include <dirent.h> #include <dirent.h>
#include <fcntl.h> #include <fcntl.h>
#include <fmt/core.h>
#include <fstream> #include <fstream>
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/types.h> #include <sys/types.h>