mirror of
https://github.com/xmrig/xmrig.git
synced 2026-04-17 04:59:28 -04:00
Compare commits
83 Commits
v6.23.0
...
a189d84fcd
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a189d84fcd | ||
|
|
cb6001945e | ||
|
|
f16a06eb67 | ||
|
|
9d71358f46 | ||
|
|
5a80c65d31 | ||
|
|
67cc6cfd1c | ||
|
|
db24bf5154 | ||
|
|
0d9a372e49 | ||
|
|
c1e3d386fe | ||
|
|
5ca4828255 | ||
|
|
1a04bf2904 | ||
|
|
5feb764b27 | ||
|
|
cb7511507f | ||
|
|
6e6eab1763 | ||
|
|
f35f9d7241 | ||
|
|
45d0a15c98 | ||
|
|
f4845cbd68 | ||
|
|
ed80a8a828 | ||
|
|
9e5492eecc | ||
|
|
e41b28ef78 | ||
|
|
1bd59129c4 | ||
|
|
8ccf7de304 | ||
|
|
30ffb9cb27 | ||
|
|
d3a84c4b52 | ||
|
|
eb49237aaa | ||
|
|
e1efd3dc7f | ||
|
|
e3d0135708 | ||
|
|
f661e1eb30 | ||
|
|
99488751f1 | ||
|
|
5fb0321c84 | ||
|
|
753859caea | ||
|
|
712a5a5e66 | ||
|
|
290a0de6e5 | ||
|
|
e0564b5fdd | ||
|
|
482a1f0b40 | ||
|
|
856813c1ae | ||
|
|
23da1a90f5 | ||
|
|
7981e4a76a | ||
|
|
7ef5142a52 | ||
|
|
db5c6d9190 | ||
|
|
e88009d575 | ||
|
|
5115597e7f | ||
|
|
4cdc35f966 | ||
|
|
b02519b9f5 | ||
|
|
a44b21cef3 | ||
|
|
ea832899f2 | ||
|
|
3ecacf0ac2 | ||
|
|
27c8e60919 | ||
|
|
985fe06e8d | ||
|
|
75b63ddde9 | ||
|
|
643b65f2c0 | ||
|
|
116ba1828f | ||
|
|
da5a5674b4 | ||
|
|
6cc4819cec | ||
|
|
a659397c41 | ||
|
|
20acfd0d79 | ||
|
|
da683d8c3e | ||
|
|
255565b533 | ||
|
|
878e83bf59 | ||
|
|
7abf17cb59 | ||
|
|
eeec5ecd10 | ||
|
|
93f5067999 | ||
|
|
dd6671bc59 | ||
|
|
a1ee2fd9d2 | ||
|
|
2619131176 | ||
|
|
1161f230c5 | ||
|
|
d2363ba28b | ||
|
|
1676da1fe9 | ||
|
|
6e4a5a6d94 | ||
|
|
273133aa63 | ||
|
|
c69e30c9a0 | ||
|
|
6a690ba1e9 | ||
|
|
545aef0937 | ||
|
|
9fa66d3242 | ||
|
|
ec286c7fef | ||
|
|
e28d663d80 | ||
|
|
aba1ad8cfc | ||
|
|
bf44ed52e9 | ||
|
|
762c435fa8 | ||
|
|
48faf0a11b | ||
|
|
d125d22d27 | ||
|
|
9f3591ae0d | ||
|
|
6bbbcc71f1 |
3
.codespellrc
Normal file
3
.codespellrc
Normal file
@@ -0,0 +1,3 @@
|
||||
[codespell]
|
||||
skip = ./src/3rdparty,./src/crypto/ghostrider,./src/crypto/randomx/blake2,./src/crypto/cn/sse2neon.h,./src/backend/opencl/cl/cn/groestl256.cl,./src/backend/opencl/cl/cn/jh.cl
|
||||
ignore-words-list = Carmel,vor
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -4,3 +4,5 @@ scripts/deps
|
||||
/CMakeLists.txt.user
|
||||
/.idea
|
||||
/src/backend/opencl/cl/cn/cryptonight_gen.cl
|
||||
.vscode
|
||||
/.qtcreator
|
||||
|
||||
32
CHANGELOG.md
32
CHANGELOG.md
@@ -1,3 +1,23 @@
|
||||
# v6.25.0
|
||||
- [#3680](https://github.com/xmrig/xmrig/pull/3680) Added `armv8l` to the list of 32-bit ARM targets.
|
||||
- [#3708](https://github.com/xmrig/xmrig/pull/3708) Minor Aarch64 JIT changes (better instruction selection, don't emit instructions that add 0, etc).
|
||||
- [#3718](https://github.com/xmrig/xmrig/pull/3718) Solo mining: added support for FCMP++ hardfork.
|
||||
- [#3722](https://github.com/xmrig/xmrig/pull/3722) Added Zen4 (Hawk Point) CPUs detection.
|
||||
- [#3725](https://github.com/xmrig/xmrig/pull/3725) Added **RISC-V** support with JIT compiler.
|
||||
- [#3731](https://github.com/xmrig/xmrig/pull/3731) Added initial Haiku OS support.
|
||||
- [#3733](https://github.com/xmrig/xmrig/pull/3733) Added detection for MSVC/2026.
|
||||
- [#3736](https://github.com/xmrig/xmrig/pull/3736) RISC-V: added vectorized dataset init.
|
||||
- [#3740](https://github.com/xmrig/xmrig/pull/3740) RISC-V: added vectorized soft AES.
|
||||
- [#3743](https://github.com/xmrig/xmrig/pull/3743) Linux: added support for transparent huge pages.
|
||||
- Improved LibreSSL support.
|
||||
- Improved compatibility for automatically enabling huge pages on Linux systems without NUMA support.
|
||||
|
||||
# v6.24.0
|
||||
- [#3671](https://github.com/xmrig/xmrig/pull/3671) Fixed detection of L2 cache size for some complex NUMA topologies.
|
||||
- [#3674](https://github.com/xmrig/xmrig/pull/3674) Fixed ARMv7 build.
|
||||
- [#3677](https://github.com/xmrig/xmrig/pull/3677) Fixed auto-config for AMD CPUs with less than 2 MB L3 cache per thread.
|
||||
- [#3678](https://github.com/xmrig/xmrig/pull/3678) Improved IPv6 support: the new default settings use IPv6 equally with IPv4.
|
||||
|
||||
# v6.23.0
|
||||
- [#3668](https://github.com/xmrig/xmrig/issues/3668) Added support for Windows ARM64.
|
||||
- [#3665](https://github.com/xmrig/xmrig/pull/3665) Tweaked auto-config for AMD CPUs with < 2 MB L3 cache per thread.
|
||||
@@ -140,7 +160,7 @@
|
||||
# v6.16.2
|
||||
- [#2751](https://github.com/xmrig/xmrig/pull/2751) Fixed crash on CPUs supporting VAES and running GCC-compiled xmrig.
|
||||
- [#2761](https://github.com/xmrig/xmrig/pull/2761) Fixed broken auto-tuning in GCC Windows build.
|
||||
- [#2771](https://github.com/xmrig/xmrig/issues/2771) Fixed environment variables support for GhostRider and KawPow.
|
||||
- [#2771](https://github.com/xmrig/xmrig/issues/2771) Fixed environment variables support for GhostRider and KawPow.
|
||||
- [#2769](https://github.com/xmrig/xmrig/pull/2769) Performance fixes:
|
||||
- Fixed several performance bottlenecks introduced in v6.16.1.
|
||||
- Fixed overall GCC-compiled build performance, it's the same speed as MSVC build now.
|
||||
@@ -448,7 +468,7 @@
|
||||
- Compiler for Windows gcc builds updated to v10.1.
|
||||
|
||||
# v5.11.1
|
||||
- [#1652](https://github.com/xmrig/xmrig/pull/1652) Up to 1% RandomX perfomance improvement on recent AMD CPUs.
|
||||
- [#1652](https://github.com/xmrig/xmrig/pull/1652) Up to 1% RandomX performance improvement on recent AMD CPUs.
|
||||
- [#1306](https://github.com/xmrig/xmrig/issues/1306) Fixed possible double connection to a pool.
|
||||
- [#1654](https://github.com/xmrig/xmrig/issues/1654) Fixed build with LibreSSL.
|
||||
|
||||
@@ -554,9 +574,9 @@
|
||||
- Added automatic huge pages configuration on Linux if use the miner with root privileges.
|
||||
- **Added [automatic Intel prefetchers configuration](https://xmrig.com/docs/miner/randomx-optimization-guide#intel-specific-optimizations) on Linux.**
|
||||
- Added new option `wrmsr` in `randomx` object with command line equivalent `--randomx-wrmsr=6`.
|
||||
- [#1396](https://github.com/xmrig/xmrig/pull/1396) [#1401](https://github.com/xmrig/xmrig/pull/1401) New performance optimizations for Ryzen CPUs.
|
||||
- [#1385](https://github.com/xmrig/xmrig/issues/1385) Added `max-threads-hint` option support for RandomX dataset initialization threads.
|
||||
- [#1386](https://github.com/xmrig/xmrig/issues/1386) Added `priority` option support for RandomX dataset initialization threads.
|
||||
- [#1396](https://github.com/xmrig/xmrig/pull/1396) [#1401](https://github.com/xmrig/xmrig/pull/1401) New performance optimizations for Ryzen CPUs.
|
||||
- [#1385](https://github.com/xmrig/xmrig/issues/1385) Added `max-threads-hint` option support for RandomX dataset initialization threads.
|
||||
- [#1386](https://github.com/xmrig/xmrig/issues/1386) Added `priority` option support for RandomX dataset initialization threads.
|
||||
- For official builds all dependencies (libuv, hwloc, openssl) updated to recent versions.
|
||||
- Windows `msvc` builds now use Visual Studio 2019 instead of 2017.
|
||||
|
||||
@@ -602,7 +622,7 @@ This release based on 4.x.x series and include all features from v4.6.2-beta, ch
|
||||
- Removed command line option `--http-enabled`, HTTP API enabled automatically if any other `--http-*` option provided.
|
||||
- [#1172](https://github.com/xmrig/xmrig/issues/1172) **Added OpenCL mining backend.**
|
||||
- [#268](https://github.com/xmrig/xmrig-amd/pull/268) [#270](https://github.com/xmrig/xmrig-amd/pull/270) [#271](https://github.com/xmrig/xmrig-amd/pull/271) [#273](https://github.com/xmrig/xmrig-amd/pull/273) [#274](https://github.com/xmrig/xmrig-amd/pull/274) [#1171](https://github.com/xmrig/xmrig/pull/1171) Added RandomX support for OpenCL, thanks [@SChernykh](https://github.com/SChernykh).
|
||||
- Algorithm `cn/wow` removed, as no longer alive.
|
||||
- Algorithm `cn/wow` removed, as no longer alive.
|
||||
|
||||
# Previous versions
|
||||
[doc/CHANGELOG_OLD.md](doc/CHANGELOG_OLD.md)
|
||||
|
||||
@@ -95,7 +95,7 @@ set(HEADERS_CRYPTO
|
||||
src/crypto/common/VirtualMemory.h
|
||||
)
|
||||
|
||||
if (XMRIG_ARM)
|
||||
if (XMRIG_ARM OR XMRIG_RISCV)
|
||||
set(HEADERS_CRYPTO "${HEADERS_CRYPTO}" src/crypto/cn/CryptoNight_arm.h)
|
||||
else()
|
||||
set(HEADERS_CRYPTO "${HEADERS_CRYPTO}" src/crypto/cn/CryptoNight_x86.h)
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
XMRig is a high performance, open source, cross platform RandomX, KawPow, CryptoNight and [GhostRider](https://github.com/xmrig/xmrig/tree/master/src/crypto/ghostrider#readme) unified CPU/GPU miner and [RandomX benchmark](https://xmrig.com/benchmark). Official binaries are available for Windows, Linux, macOS and FreeBSD.
|
||||
|
||||
## Mining backends
|
||||
- **CPU** (x86/x64/ARMv7/ARMv8)
|
||||
- **CPU** (x86/x64/ARMv7/ARMv8/RISC-V)
|
||||
- **OpenCL** for AMD GPUs.
|
||||
- **CUDA** for NVIDIA GPUs via external [CUDA plugin](https://github.com/xmrig/xmrig-cuda).
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
if (WITH_ASM AND NOT XMRIG_ARM AND NOT XMRIG_RISCV AND CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
set(XMRIG_ASM_LIBRARY "xmrig-asm")
|
||||
|
||||
if (CMAKE_C_COMPILER_ID MATCHES MSVC)
|
||||
|
||||
129
cmake/cpu.cmake
129
cmake/cpu.cmake
@@ -21,6 +21,19 @@ if (NOT VAES_SUPPORTED)
|
||||
set(WITH_VAES OFF)
|
||||
endif()
|
||||
|
||||
# Detect RISC-V architecture early (before it's used below)
|
||||
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv64|riscv|rv64)$")
|
||||
set(RISCV_TARGET 64)
|
||||
set(XMRIG_RISCV ON)
|
||||
add_definitions(-DXMRIG_RISCV)
|
||||
message(STATUS "Detected RISC-V 64-bit architecture (${CMAKE_SYSTEM_PROCESSOR})")
|
||||
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv32|rv32)$")
|
||||
set(RISCV_TARGET 32)
|
||||
set(XMRIG_RISCV ON)
|
||||
add_definitions(-DXMRIG_RISCV)
|
||||
message(STATUS "Detected RISC-V 32-bit architecture (${CMAKE_SYSTEM_PROCESSOR})")
|
||||
endif()
|
||||
|
||||
if (XMRIG_64_BIT AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|AMD64)$")
|
||||
add_definitions(-DRAPIDJSON_SSE2)
|
||||
else()
|
||||
@@ -29,6 +42,120 @@ else()
|
||||
set(WITH_VAES OFF)
|
||||
endif()
|
||||
|
||||
# Disable x86-specific features for RISC-V
|
||||
if (XMRIG_RISCV)
|
||||
set(WITH_SSE4_1 OFF)
|
||||
set(WITH_AVX2 OFF)
|
||||
set(WITH_VAES OFF)
|
||||
|
||||
# default build uses the RV64GC baseline
|
||||
set(RVARCH "rv64gc")
|
||||
|
||||
enable_language(ASM)
|
||||
|
||||
try_run(RANDOMX_VECTOR_RUN_FAIL
|
||||
RANDOMX_VECTOR_COMPILE_OK
|
||||
${CMAKE_CURRENT_BINARY_DIR}/
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/crypto/randomx/tests/riscv64_vector.s
|
||||
COMPILE_DEFINITIONS "-march=rv64gcv")
|
||||
|
||||
if (RANDOMX_VECTOR_COMPILE_OK AND NOT RANDOMX_VECTOR_RUN_FAIL)
|
||||
set(RVARCH_V ON)
|
||||
message(STATUS "RISC-V vector extension detected")
|
||||
else()
|
||||
set(RVARCH_V OFF)
|
||||
endif()
|
||||
|
||||
try_run(RANDOMX_ZICBOP_RUN_FAIL
|
||||
RANDOMX_ZICBOP_COMPILE_OK
|
||||
${CMAKE_CURRENT_BINARY_DIR}/
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/crypto/randomx/tests/riscv64_zicbop.s
|
||||
COMPILE_DEFINITIONS "-march=rv64gc_zicbop")
|
||||
|
||||
if (RANDOMX_ZICBOP_COMPILE_OK AND NOT RANDOMX_ZICBOP_RUN_FAIL)
|
||||
set(RVARCH_ZICBOP ON)
|
||||
message(STATUS "RISC-V zicbop extension detected")
|
||||
else()
|
||||
set(RVARCH_ZICBOP OFF)
|
||||
endif()
|
||||
|
||||
try_run(RANDOMX_ZBA_RUN_FAIL
|
||||
RANDOMX_ZBA_COMPILE_OK
|
||||
${CMAKE_CURRENT_BINARY_DIR}/
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/crypto/randomx/tests/riscv64_zba.s
|
||||
COMPILE_DEFINITIONS "-march=rv64gc_zba")
|
||||
|
||||
if (RANDOMX_ZBA_COMPILE_OK AND NOT RANDOMX_ZBA_RUN_FAIL)
|
||||
set(RVARCH_ZBA ON)
|
||||
message(STATUS "RISC-V zba extension detected")
|
||||
else()
|
||||
set(RVARCH_ZBA OFF)
|
||||
endif()
|
||||
|
||||
try_run(RANDOMX_ZBB_RUN_FAIL
|
||||
RANDOMX_ZBB_COMPILE_OK
|
||||
${CMAKE_CURRENT_BINARY_DIR}/
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/crypto/randomx/tests/riscv64_zbb.s
|
||||
COMPILE_DEFINITIONS "-march=rv64gc_zbb")
|
||||
|
||||
if (RANDOMX_ZBB_COMPILE_OK AND NOT RANDOMX_ZBB_RUN_FAIL)
|
||||
set(RVARCH_ZBB ON)
|
||||
message(STATUS "RISC-V zbb extension detected")
|
||||
else()
|
||||
set(RVARCH_ZBB OFF)
|
||||
endif()
|
||||
|
||||
try_run(RANDOMX_ZVKB_RUN_FAIL
|
||||
RANDOMX_ZVKB_COMPILE_OK
|
||||
${CMAKE_CURRENT_BINARY_DIR}/
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/crypto/randomx/tests/riscv64_zvkb.s
|
||||
COMPILE_DEFINITIONS "-march=rv64gcv_zvkb")
|
||||
|
||||
if (RANDOMX_ZVKB_COMPILE_OK AND NOT RANDOMX_ZVKB_RUN_FAIL)
|
||||
set(RVARCH_ZVKB ON)
|
||||
message(STATUS "RISC-V zvkb extension detected")
|
||||
else()
|
||||
set(RVARCH_ZVKB OFF)
|
||||
endif()
|
||||
|
||||
try_run(RANDOMX_ZVKNED_RUN_FAIL
|
||||
RANDOMX_ZVKNED_COMPILE_OK
|
||||
${CMAKE_CURRENT_BINARY_DIR}/
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/crypto/randomx/tests/riscv64_zvkned.s
|
||||
COMPILE_DEFINITIONS "-march=rv64gcv_zvkned")
|
||||
|
||||
if (RANDOMX_ZVKNED_COMPILE_OK AND NOT RANDOMX_ZVKNED_RUN_FAIL)
|
||||
set(RVARCH_ZVKNED ON)
|
||||
message(STATUS "RISC-V zvkned extension detected")
|
||||
else()
|
||||
set(RVARCH_ZVKNED OFF)
|
||||
endif()
|
||||
|
||||
# for native builds, enable Zba and Zbb if supported by the CPU
|
||||
if (ARCH STREQUAL "native")
|
||||
if (RVARCH_V)
|
||||
set(RVARCH "${RVARCH}v")
|
||||
endif()
|
||||
if (RVARCH_ZICBOP)
|
||||
set(RVARCH "${RVARCH}_zicbop")
|
||||
endif()
|
||||
if (RVARCH_ZBA)
|
||||
set(RVARCH "${RVARCH}_zba")
|
||||
endif()
|
||||
if (RVARCH_ZBB)
|
||||
set(RVARCH "${RVARCH}_zbb")
|
||||
endif()
|
||||
if (RVARCH_ZVKB)
|
||||
set(RVARCH "${RVARCH}_zvkb")
|
||||
endif()
|
||||
if (RVARCH_ZVKNED)
|
||||
set(RVARCH "${RVARCH}_zvkned")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
message(STATUS "Using -march=${RVARCH}")
|
||||
endif()
|
||||
|
||||
add_definitions(-DRAPIDJSON_WRITE_DEFAULT_FLAGS=6) # rapidjson::kWriteNanAndInfFlag | rapidjson::kWriteNanAndInfNullFlag
|
||||
|
||||
if (ARM_V8)
|
||||
@@ -40,7 +167,7 @@ endif()
|
||||
if (NOT ARM_TARGET)
|
||||
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64|ARM64|armv8-a)$")
|
||||
set(ARM_TARGET 8)
|
||||
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv7|armv7f|armv7s|armv7k|armv7-a|armv7l|armv7ve)$")
|
||||
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv7|armv7f|armv7s|armv7k|armv7-a|armv7l|armv7ve|armv8l)$")
|
||||
set(ARM_TARGET 7)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@@ -26,8 +26,13 @@ if (CMAKE_CXX_COMPILER_ID MATCHES GNU)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARM8_CXX_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARM8_CXX_FLAGS} -flax-vector-conversions")
|
||||
elseif (ARM_TARGET EQUAL 7)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv7-a -mfpu=neon")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv7-a -mfpu=neon -flax-vector-conversions")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv7-a -mfpu=neon -flax-vector-conversions")
|
||||
elseif (XMRIG_RISCV)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=${RVARCH}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=${RVARCH}")
|
||||
|
||||
add_definitions(-DHAVE_ROTR)
|
||||
else()
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes")
|
||||
@@ -41,6 +46,8 @@ if (CMAKE_CXX_COMPILER_ID MATCHES GNU)
|
||||
else()
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static -Wl,--large-address-aware")
|
||||
endif()
|
||||
elseif(CMAKE_SYSTEM_NAME STREQUAL "Haiku")
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc")
|
||||
else()
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc -static-libstdc++")
|
||||
endif()
|
||||
@@ -74,6 +81,11 @@ elseif (CMAKE_CXX_COMPILER_ID MATCHES Clang)
|
||||
elseif (ARM_TARGET EQUAL 7)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon -march=${CMAKE_SYSTEM_PROCESSOR}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -march=${CMAKE_SYSTEM_PROCESSOR}")
|
||||
elseif (XMRIG_RISCV)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=${RVARCH}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=${RVARCH}")
|
||||
|
||||
add_definitions(-DHAVE_ROTR)
|
||||
else()
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes")
|
||||
|
||||
@@ -17,6 +17,10 @@ else()
|
||||
set(XMRIG_OS_LINUX ON)
|
||||
elseif(CMAKE_SYSTEM_NAME STREQUAL FreeBSD OR CMAKE_SYSTEM_NAME STREQUAL DragonFly)
|
||||
set(XMRIG_OS_FREEBSD ON)
|
||||
elseif(CMAKE_SYSTEM_NAME STREQUAL OpenBSD)
|
||||
set(XMRIG_OS_OPENBSD ON)
|
||||
elseif(CMAKE_SYSTEM_NAME STREQUAL "Haiku")
|
||||
set(XMRIG_OS_HAIKU ON)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@@ -43,6 +47,10 @@ elseif(XMRIG_OS_UNIX)
|
||||
add_definitions(-DXMRIG_OS_LINUX)
|
||||
elseif (XMRIG_OS_FREEBSD)
|
||||
add_definitions(-DXMRIG_OS_FREEBSD)
|
||||
elseif (XMRIG_OS_OPENBSD)
|
||||
add_definitions(-DXMRIG_OS_OPENBSD)
|
||||
elseif (XMRIG_OS_HAIKU)
|
||||
add_definitions(-DXMRIG_OS_HAIKU)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
@@ -62,7 +62,7 @@ if (WITH_RANDOMX)
|
||||
src/crypto/randomx/jit_compiler_x86_static.asm
|
||||
src/crypto/randomx/jit_compiler_x86.cpp
|
||||
)
|
||||
elseif (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
elseif (WITH_ASM AND NOT XMRIG_ARM AND NOT XMRIG_RISCV AND CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
list(APPEND SOURCES_CRYPTO
|
||||
src/crypto/randomx/jit_compiler_x86_static.S
|
||||
src/crypto/randomx/jit_compiler_x86.cpp
|
||||
@@ -80,6 +80,39 @@ if (WITH_RANDOMX)
|
||||
else()
|
||||
set_property(SOURCE src/crypto/randomx/jit_compiler_a64_static.S PROPERTY LANGUAGE C)
|
||||
endif()
|
||||
elseif (XMRIG_RISCV AND CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
list(APPEND SOURCES_CRYPTO
|
||||
src/crypto/randomx/jit_compiler_rv64_static.S
|
||||
src/crypto/randomx/jit_compiler_rv64_vector_static.S
|
||||
src/crypto/randomx/jit_compiler_rv64.cpp
|
||||
src/crypto/randomx/jit_compiler_rv64_vector.cpp
|
||||
src/crypto/randomx/aes_hash_rv64_vector.cpp
|
||||
src/crypto/randomx/aes_hash_rv64_zvkned.cpp
|
||||
)
|
||||
# cheat because cmake and ccache hate each other
|
||||
set_property(SOURCE src/crypto/randomx/jit_compiler_rv64_static.S PROPERTY LANGUAGE C)
|
||||
set_property(SOURCE src/crypto/randomx/jit_compiler_rv64_vector_static.S PROPERTY LANGUAGE C)
|
||||
|
||||
set(RV64_VECTOR_FILE_ARCH "rv64gcv")
|
||||
|
||||
if (ARCH STREQUAL "native")
|
||||
if (RVARCH_ZICBOP)
|
||||
set(RV64_VECTOR_FILE_ARCH "${RV64_VECTOR_FILE_ARCH}_zicbop")
|
||||
endif()
|
||||
if (RVARCH_ZBA)
|
||||
set(RV64_VECTOR_FILE_ARCH "${RV64_VECTOR_FILE_ARCH}_zba")
|
||||
endif()
|
||||
if (RVARCH_ZBB)
|
||||
set(RV64_VECTOR_FILE_ARCH "${RV64_VECTOR_FILE_ARCH}_zbb")
|
||||
endif()
|
||||
if (RVARCH_ZVKB)
|
||||
set(RV64_VECTOR_FILE_ARCH "${RV64_VECTOR_FILE_ARCH}_zvkb")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set_source_files_properties(src/crypto/randomx/jit_compiler_rv64_vector_static.S PROPERTIES COMPILE_FLAGS "-march=${RV64_VECTOR_FILE_ARCH}")
|
||||
set_source_files_properties(src/crypto/randomx/aes_hash_rv64_vector.cpp PROPERTIES COMPILE_FLAGS "-O3 -march=${RV64_VECTOR_FILE_ARCH}")
|
||||
set_source_files_properties(src/crypto/randomx/aes_hash_rv64_zvkned.cpp PROPERTIES COMPILE_FLAGS "-O3 -march=${RV64_VECTOR_FILE_ARCH}_zvkned")
|
||||
else()
|
||||
list(APPEND SOURCES_CRYPTO
|
||||
src/crypto/randomx/jit_compiler_fallback.cpp
|
||||
@@ -116,7 +149,7 @@ if (WITH_RANDOMX)
|
||||
)
|
||||
endif()
|
||||
|
||||
if (WITH_MSR AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8 AND (XMRIG_OS_WIN OR XMRIG_OS_LINUX))
|
||||
if (WITH_MSR AND NOT XMRIG_ARM AND NOT XMRIG_RISCV AND CMAKE_SIZEOF_VOID_P EQUAL 8 AND (XMRIG_OS_WIN OR XMRIG_OS_LINUX))
|
||||
add_definitions(/DXMRIG_FEATURE_MSR)
|
||||
add_definitions(/DXMRIG_FIX_RYZEN)
|
||||
message("-- WITH_MSR=ON")
|
||||
@@ -157,6 +190,15 @@ if (WITH_RANDOMX)
|
||||
list(APPEND HEADERS_CRYPTO src/crypto/rx/Profiler.h)
|
||||
list(APPEND SOURCES_CRYPTO src/crypto/rx/Profiler.cpp)
|
||||
endif()
|
||||
|
||||
if (WITH_VAES)
|
||||
set(SOURCES_CRYPTO "${SOURCES_CRYPTO}" src/crypto/randomx/aes_hash_vaes512.cpp)
|
||||
if (CMAKE_C_COMPILER_ID MATCHES MSVC)
|
||||
set_source_files_properties(src/crypto/randomx/aes_hash_vaes512.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX512")
|
||||
elseif (CMAKE_C_COMPILER_ID MATCHES GNU OR CMAKE_C_COMPILER_ID MATCHES Clang)
|
||||
set_source_files_properties(src/crypto/randomx/aes_hash_vaes512.cpp PROPERTIES COMPILE_FLAGS "-mavx512f -mvaes")
|
||||
endif()
|
||||
endif()
|
||||
else()
|
||||
remove_definitions(/DXMRIG_ALGO_RANDOMX)
|
||||
endif()
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
# HTTP API
|
||||
|
||||
If you want use HTTP API you need enable it (`"enabled": true,`) then choice `port` and optionaly `host`. API not available if miner built without HTTP support (`-DWITH_HTTP=OFF`).
|
||||
If you want use HTTP API you need enable it (`"enabled": true,`) then choice `port` and optionally `host`. API not available if miner built without HTTP support (`-DWITH_HTTP=OFF`).
|
||||
|
||||
Offical HTTP client for API: http://workers.xmrig.info/
|
||||
Official HTTP client for API: http://workers.xmrig.info/
|
||||
|
||||
Example configuration:
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ Double check that you see `Huge pages 100%` both for dataset and for all threads
|
||||
|
||||
### Benchmark with custom config
|
||||
|
||||
You can run benchmark with any configuration you want. Just start without command line parameteres, use regular config.json and add `"benchmark":"1M",` on the next line after pool url.
|
||||
You can run benchmark with any configuration you want. Just start without command line parameters, use regular config.json and add `"benchmark":"1M",` on the next line after pool url.
|
||||
|
||||
# Stress test
|
||||
|
||||
@@ -26,4 +26,4 @@ You can also run continuous stress-test that is as close to the real RandomX min
|
||||
xmrig --stress
|
||||
xmrig --stress -a rx/wow
|
||||
```
|
||||
This will require Internet connection and will run indefinitely.
|
||||
This will require Internet connection and will run indefinitely.
|
||||
|
||||
@@ -57,7 +57,7 @@
|
||||
# v4.0.0-beta
|
||||
- [#1172](https://github.com/xmrig/xmrig/issues/1172) **Added OpenCL mining backend.**
|
||||
- [#268](https://github.com/xmrig/xmrig-amd/pull/268) [#270](https://github.com/xmrig/xmrig-amd/pull/270) [#271](https://github.com/xmrig/xmrig-amd/pull/271) [#273](https://github.com/xmrig/xmrig-amd/pull/273) [#274](https://github.com/xmrig/xmrig-amd/pull/274) [#1171](https://github.com/xmrig/xmrig/pull/1171) Added RandomX support for OpenCL, thanks [@SChernykh](https://github.com/SChernykh).
|
||||
- Algorithm `cn/wow` removed, as no longer alive.
|
||||
- Algorithm `cn/wow` removed, as no longer alive.
|
||||
|
||||
# v3.2.0
|
||||
- Added per pool option `coin` with single possible value `monero` for pools without algorithm negotiation, for upcoming Monero fork.
|
||||
@@ -103,7 +103,7 @@
|
||||
- [#1105](https://github.com/xmrig/xmrig/issues/1105) Improved auto configuration for `cn-pico` algorithm.
|
||||
- Added commands `pause` and `resume` via JSON RPC 2.0 API (`POST /json_rpc`).
|
||||
- Added command line option `--export-topology` for export hwloc topology to a XML file.
|
||||
- Breaked backward compatibility with previous configs and command line, `variant` option replaced to `algo`, global option `algo` removed, all CPU related settings moved to `cpu` object.
|
||||
- Broken backward compatibility with previous configs and command line, `variant` option replaced to `algo`, global option `algo` removed, all CPU related settings moved to `cpu` object.
|
||||
- Options `av`, `safe` and `max-cpu-usage` removed.
|
||||
- Algorithm `cn/msr` renamed to `cn/fast`.
|
||||
- Algorithm `cn/xtl` removed.
|
||||
@@ -122,7 +122,7 @@
|
||||
- [#1092](https://github.com/xmrig/xmrig/issues/1092) Fixed crash if wrong CPU affinity used.
|
||||
- [#1103](https://github.com/xmrig/xmrig/issues/1103) Improved auto configuration for RandomX for CPUs where L2 cache is limiting factor.
|
||||
- [#1105](https://github.com/xmrig/xmrig/issues/1105) Improved auto configuration for `cn-pico` algorithm.
|
||||
- [#1106](https://github.com/xmrig/xmrig/issues/1106) Fixed `hugepages` field in summary API.
|
||||
- [#1106](https://github.com/xmrig/xmrig/issues/1106) Fixed `hugepages` field in summary API.
|
||||
- Added alternative short format for CPU threads.
|
||||
- Changed format for CPU threads with intensity above 1.
|
||||
- Name for reference RandomX configuration changed to `rx/test` to avoid potential conflicts in future.
|
||||
@@ -150,7 +150,7 @@
|
||||
- [#1050](https://github.com/xmrig/xmrig/pull/1050) Added RandomXL algorithm for [Loki](https://loki.network/), algorithm name used by miner is `randomx/loki` or `rx/loki`.
|
||||
- Added [flexible](https://github.com/xmrig/xmrig/blob/evo/doc/CPU.md) multi algorithm configuration.
|
||||
- Added unlimited switching between incompatible algorithms, all mining options can be changed in runtime.
|
||||
- Breaked backward compatibility with previous configs and command line, `variant` option replaced to `algo`, global option `algo` removed, all CPU related settings moved to `cpu` object.
|
||||
- Broken backward compatibility with previous configs and command line, `variant` option replaced to `algo`, global option `algo` removed, all CPU related settings moved to `cpu` object.
|
||||
- Options `av`, `safe` and `max-cpu-usage` removed.
|
||||
- Algorithm `cn/msr` renamed to `cn/fast`.
|
||||
- Algorithm `cn/xtl` removed.
|
||||
@@ -183,7 +183,7 @@
|
||||
- [#314](https://github.com/xmrig/xmrig-proxy/issues/314) Added donate over proxy feature.
|
||||
- Added new option `donate-over-proxy`.
|
||||
- Added real graceful exit.
|
||||
|
||||
|
||||
# v2.14.4
|
||||
- [#992](https://github.com/xmrig/xmrig/pull/992) Fixed compilation with Clang 3.5.
|
||||
- [#1012](https://github.com/xmrig/xmrig/pull/1012) Fixed compilation with Clang 9.0.
|
||||
@@ -250,7 +250,7 @@
|
||||
# v2.8.1
|
||||
- [#768](https://github.com/xmrig/xmrig/issues/768) Fixed build with Visual Studio 2015.
|
||||
- [#769](https://github.com/xmrig/xmrig/issues/769) Fixed regression, some ANSI escape sequences was in log with disabled colors.
|
||||
- [#777](https://github.com/xmrig/xmrig/issues/777) Better report about pool connection issues.
|
||||
- [#777](https://github.com/xmrig/xmrig/issues/777) Better report about pool connection issues.
|
||||
- Simplified checks for ASM auto detection, only AES support necessary.
|
||||
- Added missing options to `--help` output.
|
||||
|
||||
@@ -259,7 +259,7 @@
|
||||
- Added global and per thread option `"asm"` and command line equivalent.
|
||||
- **[#758](https://github.com/xmrig/xmrig/issues/758) Added SSL/TLS support for secure connections to pools.**
|
||||
- Added per pool options `"tls"` and `"tls-fingerprint"` and command line equivalents.
|
||||
- [#767](https://github.com/xmrig/xmrig/issues/767) Added config autosave feature, same with GPU miners.
|
||||
- [#767](https://github.com/xmrig/xmrig/issues/767) Added config autosave feature, same with GPU miners.
|
||||
- [#245](https://github.com/xmrig/xmrig-proxy/issues/245) Fixed API ID collision when run multiple miners on same machine.
|
||||
- [#757](https://github.com/xmrig/xmrig/issues/757) Fixed send buffer overflow.
|
||||
|
||||
@@ -346,7 +346,7 @@
|
||||
|
||||
# v2.4.4
|
||||
- Added libmicrohttpd version to --version output.
|
||||
- Fixed bug in singal handler, in some cases miner wasn't shutdown properly.
|
||||
- Fixed bug in signal handler, in some cases miner wasn't shutdown properly.
|
||||
- Fixed recent MSVC 2017 version detection.
|
||||
- [#279](https://github.com/xmrig/xmrig/pull/279) Fixed build on some macOS versions.
|
||||
|
||||
@@ -359,7 +359,7 @@
|
||||
# v2.4.2
|
||||
- [#60](https://github.com/xmrig/xmrig/issues/60) Added FreeBSD support, thanks [vcambur](https://github.com/vcambur).
|
||||
- [#153](https://github.com/xmrig/xmrig/issues/153) Fixed issues with dwarfpool.com.
|
||||
|
||||
|
||||
# v2.4.1
|
||||
- [#147](https://github.com/xmrig/xmrig/issues/147) Fixed comparability with monero-stratum.
|
||||
|
||||
@@ -371,7 +371,7 @@
|
||||
- [#101](https://github.com/xmrig/xmrig/issues/101) Fixed MSVC 2017 (15.3) compile time version detection.
|
||||
- [#108](https://github.com/xmrig/xmrig/issues/108) Silently ignore invalid values for `donate-level` option.
|
||||
- [#111](https://github.com/xmrig/xmrig/issues/111) Fixed build without AEON support.
|
||||
|
||||
|
||||
# v2.3.1
|
||||
- [#68](https://github.com/xmrig/xmrig/issues/68) Fixed compatibility with Docker containers, was nothing print on console.
|
||||
|
||||
@@ -398,7 +398,7 @@
|
||||
# v2.1.0
|
||||
- [#40](https://github.com/xmrig/xmrig/issues/40)
|
||||
Improved miner shutdown, fixed crash on exit for Linux and OS X.
|
||||
- Fixed, login request was contain malformed JSON if username or password has some special characters for example `\`.
|
||||
- Fixed, login request was contain malformed JSON if username or password has some special characters for example `\`.
|
||||
- [#220](https://github.com/fireice-uk/xmr-stak-cpu/pull/220) Better support for Round Robin DNS, IP address now always chosen randomly instead of stuck on first one.
|
||||
- Changed donation address, new [xmrig-proxy](https://github.com/xmrig/xmrig-proxy) is coming soon.
|
||||
|
||||
@@ -418,16 +418,16 @@ Improved miner shutdown, fixed crash on exit for Linux and OS X.
|
||||
- Fixed Windows XP support.
|
||||
- Fixed regression, option `--no-color` was not fully disable colored output.
|
||||
- Show resolved pool IP address in miner output.
|
||||
|
||||
|
||||
# v1.0.1
|
||||
- Fix broken software AES implementation, app has crashed if CPU not support AES-NI, only version 1.0.0 affected.
|
||||
|
||||
# v1.0.0
|
||||
- Miner complete rewritten in C++ with libuv.
|
||||
- This version should be fully compatible (except config file) with previos versions, many new nice features will come in next versions.
|
||||
- This is still beta. If you found regression, stability or perfomance issues or have an idea for new feature please fell free to open new [issue](https://github.com/xmrig/xmrig/issues/new).
|
||||
- This version should be fully compatible (except config file) with previous versions, many new nice features will come in next versions.
|
||||
- This is still beta. If you found regression, stability or performance issues or have an idea for new feature please fell free to open new [issue](https://github.com/xmrig/xmrig/issues/new).
|
||||
- Added new option `--print-time=N`, print hashrate report every N seconds.
|
||||
- New hashrate reports, by default every 60 secons.
|
||||
- New hashrate reports, by default every 60 seconds.
|
||||
- Added Microsoft Visual C++ 2015 and 2017 support.
|
||||
- Removed dependency on libcurl.
|
||||
- To compile this version from source please switch to [dev](https://github.com/xmrig/xmrig/tree/dev) branch.
|
||||
@@ -440,7 +440,7 @@ Improved miner shutdown, fixed crash on exit for Linux and OS X.
|
||||
- Fixed gcc 7.1 support.
|
||||
|
||||
# v0.8.1
|
||||
- Added nicehash support, detects automaticaly by pool URL, for example `cryptonight.eu.nicehash.com:3355` or manually via option `--nicehash`.
|
||||
- Added nicehash support, detects automatically by pool URL, for example `cryptonight.eu.nicehash.com:3355` or manually via option `--nicehash`.
|
||||
|
||||
# v0.8.0
|
||||
- Added double hash mode, also known as lower power mode. `--av=2` and `--av=4`.
|
||||
|
||||
@@ -124,7 +124,7 @@ Force enable (`true`) or disable (`false`) hardware AES support. Default value `
|
||||
Mining threads priority, value from `1` (lowest priority) to `5` (highest possible priority). Default value `null` means miner don't change threads priority at all. Setting priority higher than 2 can make your PC unresponsive.
|
||||
|
||||
#### `memory-pool` (since v4.3.0)
|
||||
Use continuous, persistent memory block for mining threads, useful for preserve huge pages allocation while algorithm switching. Possible values `false` (feature disabled, by default) or `true` or specific count of 2 MB huge pages. It helps to avoid loosing huge pages for scratchpads when RandomX dataset is updated and mining threads restart after a 2-3 days of mining.
|
||||
Use continuous, persistent memory block for mining threads, useful for preserve huge pages allocation while algorithm switching. Possible values `false` (feature disabled, by default) or `true` or specific count of 2 MB huge pages. It helps to avoid losing huge pages for scratchpads when RandomX dataset is updated and mining threads restart after a 2-3 days of mining.
|
||||
|
||||
#### `yield` (since v5.1.1)
|
||||
Prefer system better system response/stability `true` (default value) or maximum hashrate `false`.
|
||||
@@ -133,7 +133,7 @@ Prefer system better system response/stability `true` (default value) or maximum
|
||||
Enable/configure or disable ASM optimizations. Possible values: `true`, `false`, `"intel"`, `"ryzen"`, `"bulldozer"`.
|
||||
|
||||
#### `argon2-impl` (since v3.1.0)
|
||||
Allow override automatically detected Argon2 implementation, this option added mostly for debug purposes, default value `null` means autodetect. This is used in RandomX dataset initialization and also in some other mining algorithms. Other possible values: `"x86_64"`, `"SSE2"`, `"SSSE3"`, `"XOP"`, `"AVX2"`, `"AVX-512F"`. Manual selection has no safe guards - if your CPU doesn't support required instuctions, miner will crash.
|
||||
Allow override automatically detected Argon2 implementation, this option added mostly for debug purposes, default value `null` means autodetect. This is used in RandomX dataset initialization and also in some other mining algorithms. Other possible values: `"x86_64"`, `"SSE2"`, `"SSSE3"`, `"XOP"`, `"AVX2"`, `"AVX-512F"`. Manual selection has no safe guards - if your CPU doesn't support required instructions, miner will crash.
|
||||
|
||||
#### `astrobwt-max-size`
|
||||
AstroBWT algorithm: skip hashes with large stage 2 size, default: `550`, min: `400`, max: `1200`. Optimal value depends on your CPU/GPU
|
||||
|
||||
365
doc/RISCV_PERF_TUNING.md
Normal file
365
doc/RISCV_PERF_TUNING.md
Normal file
@@ -0,0 +1,365 @@
|
||||
# RISC-V Performance Optimization Guide
|
||||
|
||||
This guide provides comprehensive instructions for optimizing XMRig on RISC-V architectures.
|
||||
|
||||
## Build Optimizations
|
||||
|
||||
### Compiler Flags Applied Automatically
|
||||
|
||||
The CMake build now applies aggressive RISC-V-specific optimizations:
|
||||
|
||||
```cmake
|
||||
# RISC-V ISA with extensions
|
||||
-march=rv64gcv_zba_zbb_zbc_zbs
|
||||
|
||||
# Aggressive compiler optimizations
|
||||
-funroll-loops # Unroll loops for ILP (instruction-level parallelism)
|
||||
-fomit-frame-pointer # Free up frame pointer register (RISC-V has limited registers)
|
||||
-fno-common # Better code generation for global variables
|
||||
-finline-functions # Inline more functions for better cache locality
|
||||
-ffast-math # Relaxed FP semantics (safe for mining)
|
||||
-flto # Link-time optimization for cross-module inlining
|
||||
|
||||
# Release build additions
|
||||
-minline-atomics # Inline atomic operations for faster synchronization
|
||||
```
|
||||
|
||||
### Optimal Build Command
|
||||
|
||||
```bash
|
||||
mkdir build && cd build
|
||||
cmake -DCMAKE_BUILD_TYPE=Release ..
|
||||
make -j$(nproc)
|
||||
```
|
||||
|
||||
**Expected build time**: 5-15 minutes depending on CPU
|
||||
|
||||
## Runtime Optimizations
|
||||
|
||||
### 1. Memory Configuration (Most Important)
|
||||
|
||||
Enable huge pages to reduce TLB misses and fragmentation:
|
||||
|
||||
#### Enable 2MB Huge Pages
|
||||
```bash
|
||||
# Calculate required huge pages (1 page = 2MB)
|
||||
# For 2 GB dataset: 1024 pages
|
||||
# For cache + dataset: 1536 pages minimum
|
||||
sudo sysctl -w vm.nr_hugepages=2048
|
||||
```
|
||||
|
||||
Verify:
|
||||
```bash
|
||||
grep HugePages /proc/meminfo
|
||||
# Expected: HugePages_Free should be close to nr_hugepages
|
||||
```
|
||||
|
||||
#### Enable 1GB Huge Pages (Optional but Recommended)
|
||||
|
||||
```bash
|
||||
# Run provided helper script
|
||||
sudo ./scripts/enable_1gb_pages.sh
|
||||
|
||||
# Verify 1GB pages are available
|
||||
cat /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages
|
||||
# Should be: >= 1 (one 1GB page)
|
||||
```
|
||||
|
||||
Update config.json:
|
||||
```json
|
||||
{
|
||||
"cpu": {
|
||||
"huge-pages": true
|
||||
},
|
||||
"randomx": {
|
||||
"1gb-pages": true
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 2. RandomX Mode Selection
|
||||
|
||||
| Mode | Memory | Init Time | Throughput | Recommendation |
|
||||
|------|--------|-----------|-----------|-----------------|
|
||||
| **light** | 256 MB | 10 sec | Low | Testing, resource-constrained |
|
||||
| **fast** | 2 GB | 2-5 min* | High | Production (with huge pages) |
|
||||
| **auto** | 2 GB | Varies | High | Default (uses fast if possible) |
|
||||
|
||||
*With optimizations; can be 30+ minutes without huge pages
|
||||
|
||||
**For RISC-V, use fast mode with huge pages enabled.**
|
||||
|
||||
### 3. Dataset Initialization Threads
|
||||
|
||||
Optimal thread count = 60-75% of CPU cores (leaves headroom for OS/other tasks)
|
||||
|
||||
```json
|
||||
{
|
||||
"randomx": {
|
||||
"init": 4
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Or auto-detect (rewritten for RISC-V):
|
||||
```json
|
||||
{
|
||||
"randomx": {
|
||||
"init": -1
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 4. CPU Affinity (Optional)
|
||||
|
||||
Pin threads to specific cores for better cache locality:
|
||||
|
||||
```json
|
||||
{
|
||||
"cpu": {
|
||||
"rx/0": [
|
||||
{ "threads": 1, "affinity": 0 },
|
||||
{ "threads": 1, "affinity": 1 },
|
||||
{ "threads": 1, "affinity": 2 },
|
||||
{ "threads": 1, "affinity": 3 }
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 5. CPU Governor (Linux)
|
||||
|
||||
Set to performance mode for maximum throughput:
|
||||
|
||||
```bash
|
||||
# Check current governor
|
||||
cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor
|
||||
|
||||
# Set to performance (requires root)
|
||||
echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
|
||||
|
||||
# Verify
|
||||
cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor
|
||||
# Should output: performance
|
||||
```
|
||||
|
||||
## Configuration Examples
|
||||
|
||||
### Minimum (Testing)
|
||||
```json
|
||||
{
|
||||
"randomx": {
|
||||
"mode": "light"
|
||||
},
|
||||
"cpu": {
|
||||
"huge-pages": false
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Recommended (Balanced)
|
||||
```json
|
||||
{
|
||||
"randomx": {
|
||||
"mode": "auto",
|
||||
"init": 4,
|
||||
"1gb-pages": true
|
||||
},
|
||||
"cpu": {
|
||||
"huge-pages": true,
|
||||
"priority": 2
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Maximum Performance (Production)
|
||||
```json
|
||||
{
|
||||
"randomx": {
|
||||
"mode": "fast",
|
||||
"init": -1,
|
||||
"1gb-pages": true,
|
||||
"scratchpad_prefetch_mode": 1
|
||||
},
|
||||
"cpu": {
|
||||
"huge-pages": true,
|
||||
"priority": 3,
|
||||
"yield": false
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## CLI Equivalents
|
||||
|
||||
```bash
|
||||
# Light mode
|
||||
./xmrig --randomx-mode=light
|
||||
|
||||
# Fast mode with 4 init threads
|
||||
./xmrig --randomx-mode=fast --randomx-init=4
|
||||
|
||||
# Benchmark
|
||||
./xmrig --bench=1M --algo=rx/0
|
||||
|
||||
# Benchmark Wownero variant (1 MB scratchpad)
|
||||
./xmrig --bench=1M --algo=rx/wow
|
||||
|
||||
# Mine to pool
|
||||
./xmrig -o pool.example.com:3333 -u YOUR_WALLET -p x
|
||||
```
|
||||
|
||||
## Performance Diagnostics
|
||||
|
||||
### Check if Vector Extensions are Detected
|
||||
|
||||
Look for `FEATURES:` line in output:
|
||||
```
|
||||
* CPU: ky,x60 (uarch ky,x1)
|
||||
* FEATURES: rv64imafdcv zba zbb zbc zbs
|
||||
```
|
||||
|
||||
- `v`: Vector extension (RVV) ✓
|
||||
- `zba`, `zbb`, `zbc`, `zbs`: Bit manipulation ✓
|
||||
- If missing, make sure build used `-march=rv64gcv_zba_zbb_zbc_zbs`
|
||||
|
||||
### Verify Huge Pages at Runtime
|
||||
|
||||
```bash
|
||||
# Run xmrig with --bench=1M and check output
|
||||
./xmrig --bench=1M
|
||||
|
||||
# Look for line like:
|
||||
# HUGE PAGES 100% 1 / 1 (1024 MB)
|
||||
```
|
||||
|
||||
- Should show 100% for dataset AND threads
|
||||
- If less, increase `vm.nr_hugepages` and reboot
|
||||
|
||||
### Monitor Performance
|
||||
|
||||
```bash
|
||||
# Run benchmark multiple times to find stable hashrate
|
||||
./xmrig --bench=1M --algo=rx/0
|
||||
./xmrig --bench=10M --algo=rx/0
|
||||
./xmrig --bench=100M --algo=rx/0
|
||||
|
||||
# Check system load and memory during mining
|
||||
while true; do free -h; grep HugePages /proc/meminfo; sleep 2; done
|
||||
```
|
||||
|
||||
## Expected Performance
|
||||
|
||||
### Hardware: Orange Pi RV2 (Ky X1, 8 cores @ ~1.5 GHz)
|
||||
|
||||
| Config | Mode | Hashrate | Init Time |
|
||||
|--------|------|----------|-----------|
|
||||
| Scalar (baseline) | fast | 30 H/s | 10 min |
|
||||
| Scalar + huge pages | fast | 33 H/s | 2 min |
|
||||
| RVV (if enabled) | fast | 70-100 H/s | 3 min |
|
||||
|
||||
*Actual results depend on CPU frequency, memory speed, and load*
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Long Initialization Times (30+ minutes)
|
||||
|
||||
**Cause**: Huge pages not enabled, system using swap
|
||||
**Solution**:
|
||||
1. Enable huge pages: `sudo sysctl -w vm.nr_hugepages=2048`
|
||||
2. Reboot: `sudo reboot`
|
||||
3. Reduce mining threads to free memory
|
||||
4. Check available memory: `free -h`
|
||||
|
||||
### Low Hashrate (50% of expected)
|
||||
|
||||
**Cause**: CPU governor set to power-save, no huge pages, high contention
|
||||
**Solution**:
|
||||
1. Set governor to performance: `echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor`
|
||||
2. Enable huge pages
|
||||
3. Reduce number of mining threads
|
||||
4. Check system load: `top` or `htop`
|
||||
|
||||
### Dataset Init Crashes or Hangs
|
||||
|
||||
**Cause**: Insufficient memory, corrupted huge pages
|
||||
**Solution**:
|
||||
1. Disable huge pages temporarily: set `huge-pages: false` in config
|
||||
2. Reduce mining threads
|
||||
3. Reboot and re-enable huge pages
|
||||
4. Try light mode: `--randomx-mode=light`
|
||||
|
||||
### Out of Memory During Benchmark
|
||||
|
||||
**Cause**: Not enough RAM for dataset + cache + threads
|
||||
**Solution**:
|
||||
1. Use light mode: `--randomx-mode=light`
|
||||
2. Reduce mining threads: `--threads=1`
|
||||
3. Increase available memory (kill other processes)
|
||||
4. Check: `free -h` before mining
|
||||
|
||||
## Advanced Tuning
|
||||
|
||||
### Vector Length (VLEN) Detection
|
||||
|
||||
RISC-V vector extension variable length (VLEN) affects performance:
|
||||
|
||||
```bash
|
||||
# Check VLEN on your CPU
|
||||
cat /proc/cpuinfo | grep vlen
|
||||
|
||||
# Expected values:
|
||||
# - 128 bits (16 bytes) = minimum
|
||||
# - 256 bits (32 bytes) = common
|
||||
# - 512 bits (64 bytes) = high performance
|
||||
```
|
||||
|
||||
Larger VLEN generally means better performance for vectorized operations.
|
||||
|
||||
### Prefetch Optimization
|
||||
|
||||
The code automatically optimizes memory prefetching for RISC-V:
|
||||
|
||||
```
|
||||
scratchpad_prefetch_mode: 0 = disabled (slowest)
|
||||
scratchpad_prefetch_mode: 1 = prefetch.r (default, recommended)
|
||||
scratchpad_prefetch_mode: 2 = prefetch.w (experimental)
|
||||
```
|
||||
|
||||
### Memory Bandwidth Saturation
|
||||
|
||||
If experiencing memory bandwidth saturation (high latency):
|
||||
|
||||
1. Reduce mining threads
|
||||
2. Increase L2/L3 cache by mining fewer threads per core
|
||||
3. Enable cache QoS (AMD Ryzen): `cache_qos: true`
|
||||
|
||||
## Building with Custom Flags
|
||||
|
||||
To build with custom RISC-V flags:
|
||||
|
||||
```bash
|
||||
mkdir build && cd build
|
||||
cmake -DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_C_FLAGS="-march=rv64gcv_zba_zbb_zbc_zbs -O3 -funroll-loops -fomit-frame-pointer" \
|
||||
..
|
||||
make -j$(nproc)
|
||||
```
|
||||
|
||||
## Future Optimizations
|
||||
|
||||
- [ ] Zbk* (crypto) support detection and usage
|
||||
- [ ] Optimal VLEN-aware algorithm selection
|
||||
- [ ] Per-core memory affinity (NUMA support)
|
||||
- [ ] Dynamic thread count adjustment based on thermals
|
||||
- [ ] Cross-compile optimizations for various RISC-V cores
|
||||
|
||||
## References
|
||||
|
||||
- [RISC-V Vector Extension Spec](https://github.com/riscv/riscv-v-spec)
|
||||
- [RISC-V Bit Manipulation Spec](https://github.com/riscv/riscv-bitmanip)
|
||||
- [RISC-V Crypto Spec](https://github.com/riscv/riscv-crypto)
|
||||
- [XMRig Documentation](https://xmrig.com/docs)
|
||||
|
||||
---
|
||||
|
||||
For further optimization, enable RVV intrinsics by replacing `sse2rvv.h` with `sse2rvv_optimized.h` in the build.
|
||||
@@ -12,7 +12,7 @@ if grep -E 'AMD Ryzen|AMD EPYC|AuthenticAMD' /proc/cpuinfo > /dev/null;
|
||||
then
|
||||
if grep "cpu family[[:space:]]\{1,\}:[[:space:]]25" /proc/cpuinfo > /dev/null;
|
||||
then
|
||||
if grep "model[[:space:]]\{1,\}:[[:space:]]97" /proc/cpuinfo > /dev/null;
|
||||
if grep "model[[:space:]]\{1,\}:[[:space:]]\(97\|117\)" /proc/cpuinfo > /dev/null;
|
||||
then
|
||||
echo "Detected Zen4 CPU"
|
||||
wrmsr -a 0xc0011020 0x4400000000000
|
||||
|
||||
2
src/3rdparty/argon2/CMakeLists.txt
vendored
2
src/3rdparty/argon2/CMakeLists.txt
vendored
@@ -35,7 +35,7 @@ if (CMAKE_C_COMPILER_ID MATCHES MSVC)
|
||||
add_feature_impl(xop "" HAVE_XOP)
|
||||
add_feature_impl(avx2 "/arch:AVX2" HAVE_AVX2)
|
||||
add_feature_impl(avx512f "/arch:AVX512F" HAVE_AVX512F)
|
||||
elseif (NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
elseif (NOT XMRIG_ARM AND NOT XMRIG_RISCV AND CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
function(add_feature_impl FEATURE GCC_FLAG DEF)
|
||||
add_library(argon2-${FEATURE} STATIC arch/x86_64/lib/argon2-${FEATURE}.c)
|
||||
target_include_directories(argon2-${FEATURE} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../)
|
||||
|
||||
2
src/3rdparty/libethash/endian.h
vendored
2
src/3rdparty/libethash/endian.h
vendored
@@ -31,7 +31,7 @@
|
||||
#include <libkern/OSByteOrder.h>
|
||||
#define ethash_swap_u32(input_) OSSwapInt32(input_)
|
||||
#define ethash_swap_u64(input_) OSSwapInt64(input_)
|
||||
#elif defined(__FreeBSD__) || defined(__DragonFly__) || defined(__NetBSD__)
|
||||
#elif defined(__FreeBSD__) || defined(__DragonFly__) || defined(__NetBSD__) || defined(__HAIKU__)
|
||||
#define ethash_swap_u32(input_) bswap32(input_)
|
||||
#define ethash_swap_u64(input_) bswap64(input_)
|
||||
#elif defined(__OpenBSD__)
|
||||
|
||||
@@ -89,11 +89,16 @@ static void print_cpu(const Config *)
|
||||
{
|
||||
const auto info = Cpu::info();
|
||||
|
||||
Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s%s (%zu)") " %s %sAES%s",
|
||||
Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s%s (%zu)") " %s %s%sAES%s",
|
||||
"CPU",
|
||||
info->brand(),
|
||||
info->packages(),
|
||||
ICpuInfo::is64bit() ? GREEN_BOLD("64-bit") : RED_BOLD("32-bit"),
|
||||
#ifdef XMRIG_RISCV
|
||||
info->hasRISCV_Vector() ? GREEN_BOLD_S "RVV " : RED_BOLD_S "-RVV ",
|
||||
#else
|
||||
"",
|
||||
#endif
|
||||
info->hasAES() ? GREEN_BOLD_S : RED_BOLD_S "-",
|
||||
info->isVM() ? RED_BOLD_S " VM" : ""
|
||||
);
|
||||
|
||||
@@ -48,6 +48,24 @@ static const std::map<int, std::map<uint32_t, uint64_t> > hashCheck = {
|
||||
{ 9000000U, 0x323935102AB6B45CULL },
|
||||
{ 10000000U, 0xB5231262E2792B26ULL }
|
||||
}},
|
||||
{ Algorithm::RX_V2, {
|
||||
# ifndef NDEBUG
|
||||
{ 10000U, 0x57d2051d099613a4ULL },
|
||||
{ 20000U, 0x0bae0155cc797f01ULL },
|
||||
# endif
|
||||
{ 250000U, 0x18cf741a71484072ULL },
|
||||
{ 500000U, 0xcd8c3e6ec31b2faeULL },
|
||||
{ 1000000U, 0x88d6b8fb70cd479dULL },
|
||||
{ 2000000U, 0x0e16828d236a1a63ULL },
|
||||
{ 3000000U, 0x2739bdd0f25b83a6ULL },
|
||||
{ 4000000U, 0x32f42d9006d2d34bULL },
|
||||
{ 5000000U, 0x16d9c6286cb82251ULL },
|
||||
{ 6000000U, 0x1f916ae19d6bcf07ULL },
|
||||
{ 7000000U, 0x1f474f99a873948fULL },
|
||||
{ 8000000U, 0x8d67e0ddf05476bbULL },
|
||||
{ 9000000U, 0x3ebf37dcd5c4a215ULL },
|
||||
{ 10000000U, 0x7efbddff3f30fb74ULL }
|
||||
}},
|
||||
{ Algorithm::RX_WOW, {
|
||||
# ifndef NDEBUG
|
||||
{ 10000U, 0x6B0918757100B338ULL },
|
||||
@@ -88,6 +106,24 @@ static const std::map<int, std::map<uint32_t, uint64_t> > hashCheck1T = {
|
||||
{ 9000000U, 0xC6D39EF59213A07CULL },
|
||||
{ 10000000U, 0x95E6BAE68DD779CDULL }
|
||||
}},
|
||||
{ Algorithm::RX_V2, {
|
||||
# ifndef NDEBUG
|
||||
{ 10000, 0x90eb7c07cd9e0d90ULL },
|
||||
{ 20000, 0x6523a3658d7d9930ULL },
|
||||
# endif
|
||||
{ 250000, 0xf83b6d9d355ee5b1ULL },
|
||||
{ 500000, 0xbea3c1bf1465e9abULL },
|
||||
{ 1000000, 0x9e16f7cb56b366e1ULL },
|
||||
{ 2000000, 0x3b5e671f47e15e55ULL },
|
||||
{ 3000000, 0xec5819c180df03e2ULL },
|
||||
{ 4000000, 0x19d31b498f86aad4ULL },
|
||||
{ 5000000, 0x2487626c75cd12ccULL },
|
||||
{ 6000000, 0xa323a25a5286c39aULL },
|
||||
{ 7000000, 0xa123b100f3104dfcULL },
|
||||
{ 8000000, 0x602db9d83bfa0ddcULL },
|
||||
{ 9000000, 0x98da909e579765ddULL },
|
||||
{ 10000000, 0x3a45b7247cec9895ULL }
|
||||
}},
|
||||
{ Algorithm::RX_WOW, {
|
||||
# ifndef NDEBUG
|
||||
{ 10000U, 0x9EC1B9B8C8C7F082ULL },
|
||||
|
||||
@@ -87,14 +87,14 @@ xmrig::CpuWorker<N>::CpuWorker(size_t id, const CpuLaunchData &data) :
|
||||
if (!cn_heavyZen3Memory) {
|
||||
// Round up number of threads to the multiple of 8
|
||||
const size_t num_threads = ((m_threads + 7) / 8) * 8;
|
||||
cn_heavyZen3Memory = new VirtualMemory(m_algorithm.l3() * num_threads, data.hugePages, false, false, node());
|
||||
cn_heavyZen3Memory = new VirtualMemory(m_algorithm.l3() * num_threads, data.hugePages, false, false, node(), VirtualMemory::kDefaultHugePageSize);
|
||||
}
|
||||
m_memory = cn_heavyZen3Memory;
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
m_memory = new VirtualMemory(m_algorithm.l3() * N, data.hugePages, false, true, node());
|
||||
m_memory = new VirtualMemory(m_algorithm.l3() * N, data.hugePages, false, true, node(), VirtualMemory::kDefaultHugePageSize);
|
||||
}
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
@@ -256,7 +256,10 @@ void xmrig::CpuWorker<N>::start()
|
||||
|
||||
# ifdef XMRIG_ALGO_RANDOMX
|
||||
bool first = true;
|
||||
alignas(16) uint64_t tempHash[8] = {};
|
||||
alignas(64) uint64_t tempHash[8] = {};
|
||||
|
||||
size_t prev_job_size = 0;
|
||||
alignas(64) uint8_t prev_job[Job::kMaxBlobSize] = {};
|
||||
# endif
|
||||
|
||||
while (!Nonce::isOutdated(Nonce::CPU, m_job.sequence())) {
|
||||
@@ -297,6 +300,11 @@ void xmrig::CpuWorker<N>::start()
|
||||
job.generateMinerSignature(m_job.blob(), job.size(), miner_signature_ptr);
|
||||
}
|
||||
randomx_calculate_hash_first(m_vm, tempHash, m_job.blob(), job.size());
|
||||
|
||||
if (RandomX_CurrentConfig.Tweak_V2_COMMITMENT) {
|
||||
prev_job_size = job.size();
|
||||
memcpy(prev_job, m_job.blob(), prev_job_size);
|
||||
}
|
||||
}
|
||||
|
||||
if (!nextRound()) {
|
||||
@@ -307,7 +315,15 @@ void xmrig::CpuWorker<N>::start()
|
||||
memcpy(miner_signature_saved, miner_signature_ptr, sizeof(miner_signature_saved));
|
||||
job.generateMinerSignature(m_job.blob(), job.size(), miner_signature_ptr);
|
||||
}
|
||||
|
||||
randomx_calculate_hash_next(m_vm, tempHash, m_job.blob(), job.size(), m_hash);
|
||||
|
||||
if (RandomX_CurrentConfig.Tweak_V2_COMMITMENT) {
|
||||
memcpy(m_commitment, m_hash, RANDOMX_HASH_SIZE);
|
||||
randomx_calculate_commitment(prev_job, prev_job_size, m_hash, m_hash);
|
||||
prev_job_size = job.size();
|
||||
memcpy(prev_job, m_job.blob(), prev_job_size);
|
||||
}
|
||||
}
|
||||
else
|
||||
# endif
|
||||
|
||||
@@ -83,6 +83,7 @@ private:
|
||||
void allocateCnCtx();
|
||||
void consumeJob();
|
||||
|
||||
alignas(8) uint8_t m_commitment[N * 32]{ 0 };
|
||||
alignas(8) uint8_t m_hash[N * 32]{ 0 };
|
||||
const Algorithm m_algorithm;
|
||||
const Assembly m_assembly;
|
||||
|
||||
@@ -46,7 +46,12 @@ else()
|
||||
set(CPUID_LIB "")
|
||||
endif()
|
||||
|
||||
if (XMRIG_ARM)
|
||||
if (XMRIG_RISCV)
|
||||
list(APPEND SOURCES_BACKEND_CPU
|
||||
src/backend/cpu/platform/lscpu_riscv.cpp
|
||||
src/backend/cpu/platform/BasicCpuInfo_riscv.cpp
|
||||
)
|
||||
elseif (XMRIG_ARM)
|
||||
list(APPEND SOURCES_BACKEND_CPU src/backend/cpu/platform/BasicCpuInfo_arm.cpp)
|
||||
|
||||
if (XMRIG_OS_WIN)
|
||||
|
||||
@@ -85,13 +85,14 @@ public:
|
||||
FLAG_POPCNT,
|
||||
FLAG_CAT_L3,
|
||||
FLAG_VM,
|
||||
FLAG_RISCV_VECTOR,
|
||||
FLAG_MAX
|
||||
};
|
||||
|
||||
ICpuInfo() = default;
|
||||
virtual ~ICpuInfo() = default;
|
||||
|
||||
# if defined(__x86_64__) || defined(_M_AMD64) || defined (__arm64__) || defined (__aarch64__)
|
||||
# if defined(__x86_64__) || defined(_M_AMD64) || defined (__arm64__) || defined (__aarch64__) || defined(__riscv) && (__riscv_xlen == 64)
|
||||
inline constexpr static bool is64bit() { return true; }
|
||||
# else
|
||||
inline constexpr static bool is64bit() { return false; }
|
||||
@@ -109,6 +110,7 @@ public:
|
||||
virtual bool hasOneGbPages() const = 0;
|
||||
virtual bool hasXOP() const = 0;
|
||||
virtual bool isVM() const = 0;
|
||||
virtual bool hasRISCV_Vector() const = 0;
|
||||
virtual bool jccErratum() const = 0;
|
||||
virtual const char *backend() const = 0;
|
||||
virtual const char *brand() const = 0;
|
||||
|
||||
@@ -58,8 +58,8 @@
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
constexpr size_t kCpuFlagsSize = 15;
|
||||
static const std::array<const char *, kCpuFlagsSize> flagNames = { "aes", "vaes", "avx", "avx2", "avx512f", "bmi2", "osxsave", "pdpe1gb", "sse2", "ssse3", "sse4.1", "xop", "popcnt", "cat_l3", "vm" };
|
||||
constexpr size_t kCpuFlagsSize = 16;
|
||||
static const std::array<const char *, kCpuFlagsSize> flagNames = { "aes", "vaes", "avx", "avx2", "avx512f", "bmi2", "osxsave", "pdpe1gb", "sse2", "ssse3", "sse4.1", "xop", "popcnt", "cat_l3", "vm", "rvv" };
|
||||
static_assert(kCpuFlagsSize == ICpuInfo::FLAG_MAX, "kCpuFlagsSize and FLAG_MAX mismatch");
|
||||
|
||||
|
||||
@@ -250,7 +250,7 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
|
||||
break;
|
||||
|
||||
case 0x19:
|
||||
if (m_model == 0x61) {
|
||||
if ((m_model == 0x61) || (m_model == 0x75)) {
|
||||
m_arch = ARCH_ZEN4;
|
||||
m_msrMod = MSR_MOD_RYZEN_19H_ZEN4;
|
||||
}
|
||||
|
||||
@@ -52,6 +52,7 @@ protected:
|
||||
inline bool hasOneGbPages() const override { return has(FLAG_PDPE1GB); }
|
||||
inline bool hasXOP() const override { return has(FLAG_XOP); }
|
||||
inline bool isVM() const override { return has(FLAG_VM); }
|
||||
inline bool hasRISCV_Vector() const override { return has(FLAG_RISCV_VECTOR); }
|
||||
inline bool jccErratum() const override { return m_jccErratum; }
|
||||
inline const char *brand() const override { return m_brand; }
|
||||
inline const std::vector<int32_t> &units() const override { return m_units; }
|
||||
@@ -65,7 +66,7 @@ protected:
|
||||
inline Vendor vendor() const override { return m_vendor; }
|
||||
inline uint32_t model() const override
|
||||
{
|
||||
# ifndef XMRIG_ARM
|
||||
# if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
|
||||
return m_model;
|
||||
# else
|
||||
return 0;
|
||||
@@ -80,7 +81,7 @@ protected:
|
||||
Vendor m_vendor = VENDOR_UNKNOWN;
|
||||
|
||||
private:
|
||||
# ifndef XMRIG_ARM
|
||||
# if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
|
||||
uint32_t m_procInfo = 0;
|
||||
uint32_t m_family = 0;
|
||||
uint32_t m_model = 0;
|
||||
|
||||
119
src/backend/cpu/platform/BasicCpuInfo_riscv.cpp
Normal file
119
src/backend/cpu/platform/BasicCpuInfo_riscv.cpp
Normal file
@@ -0,0 +1,119 @@
|
||||
/* XMRig
|
||||
* Copyright (c) 2025 Slayingripper <https://github.com/Slayingripper>
|
||||
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright (c) 2016-2025 XMRig <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <thread>
|
||||
|
||||
|
||||
#include "backend/cpu/platform/BasicCpuInfo.h"
|
||||
#include "base/tools/String.h"
|
||||
#include "3rdparty/rapidjson/document.h"
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
extern String cpu_name_riscv();
|
||||
extern bool has_riscv_vector();
|
||||
extern bool has_riscv_aes();
|
||||
|
||||
|
||||
} // namespace xmrig
|
||||
|
||||
|
||||
xmrig::BasicCpuInfo::BasicCpuInfo() :
|
||||
m_threads(std::thread::hardware_concurrency())
|
||||
{
|
||||
m_units.resize(m_threads);
|
||||
for (int32_t i = 0; i < static_cast<int32_t>(m_threads); ++i) {
|
||||
m_units[i] = i;
|
||||
}
|
||||
|
||||
memcpy(m_brand, "RISC-V", 6);
|
||||
|
||||
auto name = cpu_name_riscv();
|
||||
if (!name.isNull()) {
|
||||
strncpy(m_brand, name.data(), sizeof(m_brand) - 1);
|
||||
}
|
||||
|
||||
// Check for vector extensions
|
||||
m_flags.set(FLAG_RISCV_VECTOR, has_riscv_vector());
|
||||
|
||||
// Check for AES extensions (Zknd/Zkne)
|
||||
m_flags.set(FLAG_AES, has_riscv_aes());
|
||||
|
||||
// RISC-V typically supports 1GB huge pages
|
||||
m_flags.set(FLAG_PDPE1GB, std::ifstream("/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages").good());
|
||||
}
|
||||
|
||||
|
||||
const char *xmrig::BasicCpuInfo::backend() const
|
||||
{
|
||||
return "basic/1";
|
||||
}
|
||||
|
||||
|
||||
xmrig::CpuThreads xmrig::BasicCpuInfo::threads(const Algorithm &algorithm, uint32_t) const
|
||||
{
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
if (algorithm.family() == Algorithm::GHOSTRIDER) {
|
||||
return CpuThreads(threads(), 8);
|
||||
}
|
||||
# endif
|
||||
|
||||
return CpuThreads(threads());
|
||||
}
|
||||
|
||||
|
||||
rapidjson::Value xmrig::BasicCpuInfo::toJSON(rapidjson::Document &doc) const
|
||||
{
|
||||
using namespace rapidjson;
|
||||
auto &allocator = doc.GetAllocator();
|
||||
|
||||
Value out(kObjectType);
|
||||
|
||||
out.AddMember("brand", StringRef(brand()), allocator);
|
||||
out.AddMember("aes", hasAES(), allocator);
|
||||
out.AddMember("avx2", false, allocator);
|
||||
out.AddMember("x64", is64bit(), allocator); // DEPRECATED will be removed in the next major release.
|
||||
out.AddMember("64_bit", is64bit(), allocator);
|
||||
out.AddMember("l2", static_cast<uint64_t>(L2()), allocator);
|
||||
out.AddMember("l3", static_cast<uint64_t>(L3()), allocator);
|
||||
out.AddMember("cores", static_cast<uint64_t>(cores()), allocator);
|
||||
out.AddMember("threads", static_cast<uint64_t>(threads()), allocator);
|
||||
out.AddMember("packages", static_cast<uint64_t>(packages()), allocator);
|
||||
out.AddMember("nodes", static_cast<uint64_t>(nodes()), allocator);
|
||||
out.AddMember("backend", StringRef(backend()), allocator);
|
||||
out.AddMember("msr", "none", allocator);
|
||||
out.AddMember("assembly", "none", allocator);
|
||||
out.AddMember("arch", "riscv64", allocator);
|
||||
|
||||
Value flags(kArrayType);
|
||||
|
||||
if (hasAES()) {
|
||||
flags.PushBack("aes", allocator);
|
||||
}
|
||||
|
||||
out.AddMember("flags", flags, allocator);
|
||||
|
||||
return out;
|
||||
}
|
||||
@@ -87,7 +87,7 @@ static inline size_t countByType(hwloc_topology_t topology, hwloc_obj_type_t typ
|
||||
}
|
||||
|
||||
|
||||
#ifndef XMRIG_ARM
|
||||
#if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
|
||||
static inline std::vector<hwloc_obj_t> findByType(hwloc_obj_t obj, hwloc_obj_type_t type)
|
||||
{
|
||||
std::vector<hwloc_obj_t> out;
|
||||
@@ -207,7 +207,7 @@ bool xmrig::HwlocCpuInfo::membind(hwloc_const_bitmap_t nodeset)
|
||||
|
||||
xmrig::CpuThreads xmrig::HwlocCpuInfo::threads(const Algorithm &algorithm, uint32_t limit) const
|
||||
{
|
||||
# ifndef XMRIG_ARM
|
||||
# if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
|
||||
if (L2() == 0 && L3() == 0) {
|
||||
return BasicCpuInfo::threads(algorithm, limit);
|
||||
}
|
||||
@@ -277,7 +277,7 @@ xmrig::CpuThreads xmrig::HwlocCpuInfo::allThreads(const Algorithm &algorithm, ui
|
||||
|
||||
void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorithm &algorithm, CpuThreads &threads, size_t limit) const
|
||||
{
|
||||
# ifndef XMRIG_ARM
|
||||
# if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
|
||||
constexpr size_t oneMiB = 1024U * 1024U;
|
||||
|
||||
size_t PUs = countByType(cache, HWLOC_OBJ_PU);
|
||||
@@ -311,17 +311,17 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith
|
||||
uint32_t intensity = algorithm.maxIntensity() == 1 ? 0 : 1;
|
||||
|
||||
if (cache->attr->cache.depth == 3) {
|
||||
for (size_t i = 0; i < cache->arity; ++i) {
|
||||
hwloc_obj_t l2 = cache->children[i];
|
||||
auto process_L2 = [&L2, &L2_associativity, L3_exclusive, this, &extra, scratchpad](hwloc_obj_t l2) {
|
||||
if (!hwloc_obj_type_is_cache(l2->type) || l2->attr == nullptr) {
|
||||
continue;
|
||||
return;
|
||||
}
|
||||
|
||||
L2 += l2->attr->cache.size;
|
||||
L2_associativity = l2->attr->cache.associativity;
|
||||
|
||||
if (L3_exclusive) {
|
||||
if (vendor() == VENDOR_AMD) {
|
||||
if ((vendor() == VENDOR_AMD) && ((arch() == ARCH_ZEN4) || (arch() == ARCH_ZEN5))) {
|
||||
// Use extra L2 only on newer CPUs because older CPUs (Zen 3 and older) don't benefit from it.
|
||||
// For some reason, AMD CPUs can use only half of the exclusive L2/L3 cache combo efficiently
|
||||
extra += std::min<size_t>(l2->attr->cache.size / 2, scratchpad);
|
||||
}
|
||||
@@ -329,6 +329,18 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith
|
||||
extra += scratchpad;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < cache->arity; ++i) {
|
||||
hwloc_obj_t ch = cache->children[i];
|
||||
if (ch->type == HWLOC_OBJ_GROUP) {
|
||||
for (size_t j = 0; j < ch->arity; ++j) {
|
||||
process_L2(ch->children[j]);
|
||||
}
|
||||
}
|
||||
else {
|
||||
process_L2(ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
150
src/backend/cpu/platform/lscpu_riscv.cpp
Normal file
150
src/backend/cpu/platform/lscpu_riscv.cpp
Normal file
@@ -0,0 +1,150 @@
|
||||
/* XMRig
|
||||
* Copyright (c) 2025 Slayingripper <https://github.com/Slayingripper>
|
||||
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2025 XMRig <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "base/tools/String.h"
|
||||
#include "3rdparty/fmt/core.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
struct riscv_cpu_desc
|
||||
{
|
||||
String model;
|
||||
String isa;
|
||||
String uarch;
|
||||
bool has_vector = false;
|
||||
bool has_aes = false;
|
||||
|
||||
inline bool isReady() const { return !isa.isNull(); }
|
||||
};
|
||||
|
||||
static bool lookup_riscv(char *line, const char *pattern, String &value)
|
||||
{
|
||||
char *p = strstr(line, pattern);
|
||||
if (!p) {
|
||||
return false;
|
||||
}
|
||||
|
||||
p += strlen(pattern);
|
||||
while (isspace(*p)) {
|
||||
++p;
|
||||
}
|
||||
|
||||
if (*p == ':') {
|
||||
++p;
|
||||
}
|
||||
|
||||
while (isspace(*p)) {
|
||||
++p;
|
||||
}
|
||||
|
||||
// Remove trailing newline
|
||||
size_t len = strlen(p);
|
||||
if (len > 0 && p[len - 1] == '\n') {
|
||||
p[len - 1] = '\0';
|
||||
}
|
||||
|
||||
// Ensure we call the const char* assignment (which performs a copy)
|
||||
// instead of the char* overload (which would take ownership of the pointer)
|
||||
value = (const char*)p;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool read_riscv_cpuinfo(riscv_cpu_desc *desc)
|
||||
{
|
||||
auto fp = fopen("/proc/cpuinfo", "r");
|
||||
if (!fp) {
|
||||
return false;
|
||||
}
|
||||
|
||||
char buf[2048]; // Larger buffer for long ISA strings
|
||||
while (fgets(buf, sizeof(buf), fp) != nullptr) {
|
||||
lookup_riscv(buf, "model name", desc->model);
|
||||
|
||||
if (lookup_riscv(buf, "isa", desc->isa)) {
|
||||
desc->isa.toLower();
|
||||
|
||||
for (const String& s : desc->isa.split('_')) {
|
||||
const char* p = s.data();
|
||||
const size_t n = s.size();
|
||||
|
||||
if ((s.size() > 4) && (memcmp(p, "rv64", 4) == 0)) {
|
||||
for (size_t i = 4; i < n; ++i) {
|
||||
if (p[i] == 'v') {
|
||||
desc->has_vector = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (s == "zve64d") {
|
||||
desc->has_vector = true;
|
||||
}
|
||||
else if ((s == "zvkn") || (s == "zvknc") || (s == "zvkned") || (s == "zvkng")){
|
||||
desc->has_aes = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
lookup_riscv(buf, "uarch", desc->uarch);
|
||||
|
||||
if (desc->isReady()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
|
||||
return desc->isReady();
|
||||
}
|
||||
|
||||
String cpu_name_riscv()
|
||||
{
|
||||
riscv_cpu_desc desc;
|
||||
if (read_riscv_cpuinfo(&desc)) {
|
||||
if (!desc.uarch.isNull()) {
|
||||
return fmt::format("{} ({})", desc.model, desc.uarch).c_str();
|
||||
}
|
||||
return desc.model;
|
||||
}
|
||||
|
||||
return "RISC-V";
|
||||
}
|
||||
|
||||
bool has_riscv_vector()
|
||||
{
|
||||
riscv_cpu_desc desc;
|
||||
if (read_riscv_cpuinfo(&desc)) {
|
||||
return desc.has_vector;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool has_riscv_aes()
|
||||
{
|
||||
riscv_cpu_desc desc;
|
||||
if (read_riscv_cpuinfo(&desc)) {
|
||||
return desc.has_aes;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace xmrig
|
||||
@@ -19,6 +19,7 @@
|
||||
#define ALGO_CN_PICO_TLO 0x63120274
|
||||
#define ALGO_CN_UPX2 0x63110200
|
||||
#define ALGO_RX_0 0x72151200
|
||||
#define ALGO_RX_V2 0x72151202
|
||||
#define ALGO_RX_WOW 0x72141177
|
||||
#define ALGO_RX_ARQMA 0x72121061
|
||||
#define ALGO_RX_SFX 0x72151273
|
||||
|
||||
@@ -706,7 +706,7 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u
|
||||
}
|
||||
|
||||
# if (ALGO_FAMILY == FAMILY_CN_HEAVY)
|
||||
/* Also left over threads performe this loop.
|
||||
/* Also left over threads perform this loop.
|
||||
* The left over thread results will be ignored
|
||||
*/
|
||||
#pragma unroll 16
|
||||
@@ -1005,7 +1005,7 @@ __kernel void Groestl(__global ulong *states, __global uint *BranchBuf, __global
|
||||
ulong State[8] = { 0UL, 0UL, 0UL, 0UL, 0UL, 0UL, 0UL, 0x0001000000000000UL };
|
||||
ulong H[8], M[8];
|
||||
|
||||
// BUG: AMD driver 19.7.X crashs if this is written as loop
|
||||
// BUG: AMD driver 19.7.X crashes if this is written as loop
|
||||
// Thx AMD for so bad software
|
||||
{
|
||||
((ulong8 *)M)[0] = vload8(0, states);
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -10,7 +10,7 @@
|
||||
#else
|
||||
# define STATIC
|
||||
/* taken from https://www.khronos.org/registry/OpenCL/extensions/amd/cl_amd_media_ops.txt
|
||||
* Build-in Function
|
||||
* Built-in Function
|
||||
* uintn amd_bitalign (uintn src0, uintn src1, uintn src2)
|
||||
* Description
|
||||
* dst.s0 = (uint) (((((long)src0.s0) << 32) | (long)src1.s0) >> (src2.s0 & 31))
|
||||
|
||||
@@ -77,7 +77,7 @@ void keccak_f800_round(uint32_t st[25], const int r)
|
||||
void keccak_f800(uint32_t* st)
|
||||
{
|
||||
// Complete all 22 rounds as a separate impl to
|
||||
// evaluate only first 8 words is wasteful of regsters
|
||||
// evaluate only first 8 words is wasteful of registers
|
||||
for (int r = 0; r < 22; r++) {
|
||||
keccak_f800_round(st, r);
|
||||
}
|
||||
@@ -181,7 +181,7 @@ __kernel void progpow_search(__global dag_t const* g_dag, __global uint* job_blo
|
||||
for (int i = 10; i < 25; i++)
|
||||
state[i] = ravencoin_rndc[i-10];
|
||||
|
||||
// Run intial keccak round
|
||||
// Run initial keccak round
|
||||
keccak_f800(state);
|
||||
|
||||
for (int i = 0; i < 8; i++)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -77,6 +77,7 @@ const char *Algorithm::kCN_UPX2 = "cn/upx2";
|
||||
#ifdef XMRIG_ALGO_RANDOMX
|
||||
const char *Algorithm::kRX = "rx";
|
||||
const char *Algorithm::kRX_0 = "rx/0";
|
||||
const char *Algorithm::kRX_V2 = "rx/2";
|
||||
const char *Algorithm::kRX_WOW = "rx/wow";
|
||||
const char *Algorithm::kRX_ARQ = "rx/arq";
|
||||
const char *Algorithm::kRX_GRAFT = "rx/graft";
|
||||
@@ -143,6 +144,7 @@ static const std::map<uint32_t, const char *> kAlgorithmNames = {
|
||||
|
||||
# ifdef XMRIG_ALGO_RANDOMX
|
||||
ALGO_NAME(RX_0),
|
||||
ALGO_NAME(RX_V2),
|
||||
ALGO_NAME(RX_WOW),
|
||||
ALGO_NAME(RX_ARQ),
|
||||
ALGO_NAME(RX_GRAFT),
|
||||
@@ -253,6 +255,8 @@ static const std::map<const char *, Algorithm::Id, aliasCompare> kAlgorithmAlias
|
||||
ALGO_ALIAS(RX_0, "rx/test"),
|
||||
ALGO_ALIAS(RX_0, "randomx"),
|
||||
ALGO_ALIAS(RX_0, "rx"),
|
||||
ALGO_ALIAS_AUTO(RX_V2), ALGO_ALIAS(RX_V2, "randomx/v2"),
|
||||
ALGO_ALIAS(RX_V2, "rx/v2"),
|
||||
ALGO_ALIAS_AUTO(RX_WOW), ALGO_ALIAS(RX_WOW, "randomx/wow"),
|
||||
ALGO_ALIAS(RX_WOW, "randomwow"),
|
||||
ALGO_ALIAS_AUTO(RX_ARQ), ALGO_ALIAS(RX_ARQ, "randomx/arq"),
|
||||
@@ -350,7 +354,7 @@ std::vector<xmrig::Algorithm> xmrig::Algorithm::all(const std::function<bool(con
|
||||
CN_HEAVY_0, CN_HEAVY_TUBE, CN_HEAVY_XHV,
|
||||
CN_PICO_0, CN_PICO_TLO,
|
||||
CN_UPX2,
|
||||
RX_0, RX_WOW, RX_ARQ, RX_GRAFT, RX_SFX, RX_YADA,
|
||||
RX_0, RX_V2, RX_WOW, RX_ARQ, RX_GRAFT, RX_SFX, RX_YADA,
|
||||
AR2_CHUKWA, AR2_CHUKWA_V2, AR2_WRKZ,
|
||||
KAWPOW_RVN,
|
||||
GHOSTRIDER_RTM
|
||||
|
||||
@@ -73,6 +73,7 @@ public:
|
||||
CN_GR_5 = 0x63120105, // "cn/turtle-lite" GhostRider
|
||||
GHOSTRIDER_RTM = 0x6c150000, // "ghostrider" GhostRider
|
||||
RX_0 = 0x72151200, // "rx/0" RandomX (reference configuration).
|
||||
RX_V2 = 0x72151202, // "rx/2" RandomX (Monero v2).
|
||||
RX_WOW = 0x72141177, // "rx/wow" RandomWOW (Wownero).
|
||||
RX_ARQ = 0x72121061, // "rx/arq" RandomARQ (Arqma).
|
||||
RX_GRAFT = 0x72151267, // "rx/graft" RandomGRAFT (Graft).
|
||||
@@ -139,6 +140,7 @@ public:
|
||||
# ifdef XMRIG_ALGO_RANDOMX
|
||||
static const char *kRX;
|
||||
static const char *kRX_0;
|
||||
static const char* kRX_V2;
|
||||
static const char *kRX_WOW;
|
||||
static const char *kRX_ARQ;
|
||||
static const char *kRX_GRAFT;
|
||||
|
||||
@@ -48,7 +48,7 @@
|
||||
#define KECCAK_ROUNDS 24
|
||||
|
||||
|
||||
/* *************************** Public Inteface ************************ */
|
||||
/* *************************** Public Interface ************************ */
|
||||
|
||||
/* For Init or Reset call these: */
|
||||
sha3_return_t
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/* XMRig
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -71,11 +71,11 @@ char *xmrig::Platform::createUserAgent()
|
||||
|
||||
|
||||
#ifndef XMRIG_FEATURE_HWLOC
|
||||
#ifdef __DragonFly__
|
||||
#if defined(__DragonFly__) || defined(XMRIG_OS_OPENBSD) || defined(XMRIG_OS_HAIKU)
|
||||
|
||||
bool xmrig::Platform::setThreadAffinity(uint64_t cpu_id)
|
||||
{
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/* XMRig
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -18,14 +18,12 @@
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# include "getopt/getopt.h"
|
||||
#else
|
||||
# include <getopt.h>
|
||||
#endif
|
||||
|
||||
|
||||
#include "base/kernel/config/BaseTransform.h"
|
||||
#include "base/io/json/JsonChain.h"
|
||||
#include "base/io/log/Log.h"
|
||||
@@ -37,7 +35,6 @@
|
||||
#include "base/net/stratum/Pools.h"
|
||||
#include "core/config/Config_platform.h"
|
||||
|
||||
|
||||
#ifdef XMRIG_FEATURE_TLS
|
||||
# include "base/net/tls/TlsConfig.h"
|
||||
#endif
|
||||
@@ -47,9 +44,9 @@ void xmrig::BaseTransform::load(JsonChain &chain, Process *process, IConfigTrans
|
||||
{
|
||||
using namespace rapidjson;
|
||||
|
||||
int key = 0;
|
||||
int argc = process->arguments().argc();
|
||||
char **argv = process->arguments().argv();
|
||||
int key = 0;
|
||||
const int argc = process->arguments().argc();
|
||||
char **argv = process->arguments().argv();
|
||||
|
||||
Document doc(kObjectType);
|
||||
|
||||
@@ -262,7 +259,8 @@ void xmrig::BaseTransform::transform(rapidjson::Document &doc, int key, const ch
|
||||
case IConfig::DaemonKey: /* --daemon */
|
||||
case IConfig::SubmitToOriginKey: /* --submit-to-origin */
|
||||
case IConfig::VerboseKey: /* --verbose */
|
||||
case IConfig::DnsIPv6Key: /* --dns-ipv6 */
|
||||
case IConfig::DnsIPv4Key: /* --ipv4 */
|
||||
case IConfig::DnsIPv6Key: /* --ipv6 */
|
||||
return transformBoolean(doc, key, true);
|
||||
|
||||
case IConfig::ColorKey: /* --no-color */
|
||||
@@ -323,8 +321,11 @@ void xmrig::BaseTransform::transformBoolean(rapidjson::Document &doc, int key, b
|
||||
case IConfig::NoTitleKey: /* --no-title */
|
||||
return set(doc, BaseConfig::kTitle, enable);
|
||||
|
||||
case IConfig::DnsIPv6Key: /* --dns-ipv6 */
|
||||
return set(doc, DnsConfig::kField, DnsConfig::kIPv6, enable);
|
||||
case IConfig::DnsIPv4Key: /* --ipv4 */
|
||||
return set(doc, DnsConfig::kField, DnsConfig::kIPv, 4);
|
||||
|
||||
case IConfig::DnsIPv6Key: /* --ipv6 */
|
||||
return set(doc, DnsConfig::kField, DnsConfig::kIPv, 6);
|
||||
|
||||
default:
|
||||
break;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/* XMRig
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -16,9 +16,7 @@
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef XMRIG_ICONFIG_H
|
||||
#define XMRIG_ICONFIG_H
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "3rdparty/rapidjson/fwd.h"
|
||||
|
||||
@@ -82,7 +80,8 @@ public:
|
||||
HugePageSizeKey = 1050,
|
||||
PauseOnActiveKey = 1051,
|
||||
SubmitToOriginKey = 1052,
|
||||
DnsIPv6Key = 1053,
|
||||
DnsIPv4Key = '4',
|
||||
DnsIPv6Key = '6',
|
||||
DnsTtlKey = 1054,
|
||||
SpendSecretKey = 1055,
|
||||
DaemonZMQPortKey = 1056,
|
||||
@@ -177,7 +176,4 @@ public:
|
||||
};
|
||||
|
||||
|
||||
} /* namespace xmrig */
|
||||
|
||||
|
||||
#endif // XMRIG_ICONFIG_H
|
||||
} // namespace xmrig
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/* XMRig
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -16,21 +16,16 @@
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef XMRIG_IDNSBACKEND_H
|
||||
#define XMRIG_IDNSBACKEND_H
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "base/tools/Object.h"
|
||||
|
||||
|
||||
#include <memory>
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
class DnsConfig;
|
||||
class DnsRecords;
|
||||
class DnsRequest;
|
||||
class IDnsListener;
|
||||
class String;
|
||||
|
||||
@@ -43,12 +38,8 @@ public:
|
||||
IDnsBackend() = default;
|
||||
virtual ~IDnsBackend() = default;
|
||||
|
||||
virtual const DnsRecords &records() const = 0;
|
||||
virtual std::shared_ptr<DnsRequest> resolve(const String &host, IDnsListener *listener, uint64_t ttl) = 0;
|
||||
virtual void resolve(const String &host, const std::weak_ptr<IDnsListener> &listener, const DnsConfig &config) = 0;
|
||||
};
|
||||
|
||||
|
||||
} /* namespace xmrig */
|
||||
|
||||
|
||||
#endif // XMRIG_IDNSBACKEND_H
|
||||
} // namespace xmrig
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/* XMRig
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -18,6 +18,7 @@
|
||||
|
||||
|
||||
#include "base/net/dns/Dns.h"
|
||||
#include "base/net/dns/DnsRequest.h"
|
||||
#include "base/net/dns/DnsUvBackend.h"
|
||||
|
||||
|
||||
@@ -25,17 +26,21 @@ namespace xmrig {
|
||||
|
||||
|
||||
DnsConfig Dns::m_config;
|
||||
std::map<String, std::shared_ptr<IDnsBackend> > Dns::m_backends;
|
||||
std::map<String, std::shared_ptr<IDnsBackend>> Dns::m_backends;
|
||||
|
||||
|
||||
} // namespace xmrig
|
||||
|
||||
|
||||
std::shared_ptr<xmrig::DnsRequest> xmrig::Dns::resolve(const String &host, IDnsListener *listener, uint64_t ttl)
|
||||
std::shared_ptr<xmrig::DnsRequest> xmrig::Dns::resolve(const String &host, IDnsListener *listener)
|
||||
{
|
||||
auto req = std::make_shared<DnsRequest>(listener);
|
||||
|
||||
if (m_backends.find(host) == m_backends.end()) {
|
||||
m_backends.insert({ host, std::make_shared<DnsUvBackend>() });
|
||||
}
|
||||
|
||||
return m_backends.at(host)->resolve(host, listener, ttl == 0 ? m_config.ttl() : ttl);
|
||||
m_backends.at(host)->resolve(host, req, m_config);
|
||||
|
||||
return req;
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/* XMRig
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -43,7 +43,7 @@ public:
|
||||
inline static const DnsConfig &config() { return m_config; }
|
||||
inline static void set(const DnsConfig &config) { m_config = config; }
|
||||
|
||||
static std::shared_ptr<DnsRequest> resolve(const String &host, IDnsListener *listener, uint64_t ttl = 0);
|
||||
static std::shared_ptr<DnsRequest> resolve(const String &host, IDnsListener *listener);
|
||||
|
||||
private:
|
||||
static DnsConfig m_config;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/* XMRig
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -20,15 +20,15 @@
|
||||
#include "3rdparty/rapidjson/document.h"
|
||||
#include "base/io/json/Json.h"
|
||||
|
||||
|
||||
#include <algorithm>
|
||||
#include <uv.h>
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
const char *DnsConfig::kField = "dns";
|
||||
const char *DnsConfig::kIPv6 = "ipv6";
|
||||
const char *DnsConfig::kIPv = "ip_version";
|
||||
const char *DnsConfig::kTTL = "ttl";
|
||||
|
||||
|
||||
@@ -37,8 +37,26 @@ const char *DnsConfig::kTTL = "ttl";
|
||||
|
||||
xmrig::DnsConfig::DnsConfig(const rapidjson::Value &value)
|
||||
{
|
||||
m_ipv6 = Json::getBool(value, kIPv6, m_ipv6);
|
||||
m_ttl = std::max(Json::getUint(value, kTTL, m_ttl), 1U);
|
||||
const uint32_t ipv = Json::getUint(value, kIPv, m_ipv);
|
||||
if (ipv == 0 || ipv == 4 || ipv == 6) {
|
||||
m_ipv = ipv;
|
||||
}
|
||||
|
||||
m_ttl = std::max(Json::getUint(value, kTTL, m_ttl), 1U);
|
||||
}
|
||||
|
||||
|
||||
int xmrig::DnsConfig::ai_family() const
|
||||
{
|
||||
if (m_ipv == 4) {
|
||||
return AF_INET;
|
||||
}
|
||||
|
||||
if (m_ipv == 6) {
|
||||
return AF_INET6;
|
||||
}
|
||||
|
||||
return AF_UNSPEC;
|
||||
}
|
||||
|
||||
|
||||
@@ -49,8 +67,8 @@ rapidjson::Value xmrig::DnsConfig::toJSON(rapidjson::Document &doc) const
|
||||
auto &allocator = doc.GetAllocator();
|
||||
Value obj(kObjectType);
|
||||
|
||||
obj.AddMember(StringRef(kIPv6), m_ipv6, allocator);
|
||||
obj.AddMember(StringRef(kTTL), m_ttl, allocator);
|
||||
obj.AddMember(StringRef(kIPv), m_ipv, allocator);
|
||||
obj.AddMember(StringRef(kTTL), m_ttl, allocator);
|
||||
|
||||
return obj;
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/* XMRig
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -16,9 +16,7 @@
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef XMRIG_DNSCONFIG_H
|
||||
#define XMRIG_DNSCONFIG_H
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "3rdparty/rapidjson/fwd.h"
|
||||
|
||||
@@ -30,25 +28,22 @@ class DnsConfig
|
||||
{
|
||||
public:
|
||||
static const char *kField;
|
||||
static const char *kIPv6;
|
||||
static const char *kIPv;
|
||||
static const char *kTTL;
|
||||
|
||||
DnsConfig() = default;
|
||||
DnsConfig(const rapidjson::Value &value);
|
||||
|
||||
inline bool isIPv6() const { return m_ipv6; }
|
||||
inline uint32_t ipv() const { return m_ipv; }
|
||||
inline uint32_t ttl() const { return m_ttl * 1000U; }
|
||||
|
||||
int ai_family() const;
|
||||
rapidjson::Value toJSON(rapidjson::Document &doc) const;
|
||||
|
||||
|
||||
private:
|
||||
bool m_ipv6 = false;
|
||||
uint32_t m_ttl = 30U;
|
||||
uint32_t m_ttl = 30U;
|
||||
uint32_t m_ipv = 0U;
|
||||
};
|
||||
|
||||
|
||||
} /* namespace xmrig */
|
||||
|
||||
|
||||
#endif /* XMRIG_DNSCONFIG_H */
|
||||
} // namespace xmrig
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/* XMRig
|
||||
* Copyright (c) 2018-2023 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2023 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -16,19 +16,16 @@
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include <uv.h>
|
||||
|
||||
|
||||
#include "base/net/dns/DnsRecord.h"
|
||||
|
||||
|
||||
xmrig::DnsRecord::DnsRecord(const addrinfo *addr) :
|
||||
m_type(addr->ai_family == AF_INET6 ? AAAA : (addr->ai_family == AF_INET ? A : Unknown))
|
||||
xmrig::DnsRecord::DnsRecord(const addrinfo *addr)
|
||||
{
|
||||
static_assert(sizeof(m_data) >= sizeof(sockaddr_in6), "Not enough storage for IPv6 address.");
|
||||
|
||||
memcpy(m_data, addr->ai_addr, m_type == AAAA ? sizeof(sockaddr_in6) : sizeof(sockaddr_in));
|
||||
memcpy(m_data, addr->ai_addr, addr->ai_family == AF_INET6 ? sizeof(sockaddr_in6) : sizeof(sockaddr_in));
|
||||
}
|
||||
|
||||
|
||||
@@ -44,7 +41,7 @@ xmrig::String xmrig::DnsRecord::ip() const
|
||||
{
|
||||
char *buf = nullptr;
|
||||
|
||||
if (m_type == AAAA) {
|
||||
if (reinterpret_cast<const sockaddr &>(m_data).sa_family == AF_INET6) {
|
||||
buf = new char[45]();
|
||||
uv_ip6_name(reinterpret_cast<const sockaddr_in6*>(m_data), buf, 45);
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/* XMRig
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -16,14 +16,11 @@
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef XMRIG_DNSRECORD_H
|
||||
#define XMRIG_DNSRECORD_H
|
||||
|
||||
#pragma once
|
||||
|
||||
struct addrinfo;
|
||||
struct sockaddr;
|
||||
|
||||
|
||||
#include "base/tools/String.h"
|
||||
|
||||
|
||||
@@ -33,28 +30,15 @@ namespace xmrig {
|
||||
class DnsRecord
|
||||
{
|
||||
public:
|
||||
enum Type : uint32_t {
|
||||
Unknown,
|
||||
A,
|
||||
AAAA
|
||||
};
|
||||
|
||||
DnsRecord() {}
|
||||
DnsRecord(const addrinfo *addr);
|
||||
|
||||
const sockaddr *addr(uint16_t port = 0) const;
|
||||
String ip() const;
|
||||
|
||||
inline bool isValid() const { return m_type != Unknown; }
|
||||
inline Type type() const { return m_type; }
|
||||
|
||||
private:
|
||||
mutable uint8_t m_data[28]{};
|
||||
const Type m_type = Unknown;
|
||||
};
|
||||
|
||||
|
||||
} /* namespace xmrig */
|
||||
|
||||
|
||||
#endif /* XMRIG_DNSRECORD_H */
|
||||
} // namespace xmrig
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/* XMRig
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -18,90 +18,96 @@
|
||||
|
||||
#include <uv.h>
|
||||
|
||||
|
||||
#include "base/net/dns/DnsRecords.h"
|
||||
#include "base/net/dns/Dns.h"
|
||||
|
||||
|
||||
const xmrig::DnsRecord &xmrig::DnsRecords::get(DnsRecord::Type prefered) const
|
||||
namespace {
|
||||
|
||||
|
||||
static size_t dns_records_count(const addrinfo *res, int &ai_family)
|
||||
{
|
||||
size_t ipv4 = 0;
|
||||
size_t ipv6 = 0;
|
||||
|
||||
while (res != nullptr) {
|
||||
if (res->ai_family == AF_INET) {
|
||||
++ipv4;
|
||||
}
|
||||
|
||||
if (res->ai_family == AF_INET6) {
|
||||
++ipv6;
|
||||
}
|
||||
|
||||
res = res->ai_next;
|
||||
}
|
||||
|
||||
if (ai_family == AF_INET6 && !ipv6) {
|
||||
ai_family = AF_INET;
|
||||
}
|
||||
|
||||
switch (ai_family) {
|
||||
case AF_UNSPEC:
|
||||
return ipv4 + ipv6;
|
||||
|
||||
case AF_INET:
|
||||
return ipv4;
|
||||
|
||||
case AF_INET6:
|
||||
return ipv6;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
} // namespace
|
||||
|
||||
|
||||
xmrig::DnsRecords::DnsRecords(const addrinfo *res, int ai_family)
|
||||
{
|
||||
size_t size = dns_records_count(res, ai_family);
|
||||
if (!size) {
|
||||
return;
|
||||
}
|
||||
|
||||
m_records.reserve(size);
|
||||
|
||||
if (ai_family == AF_UNSPEC) {
|
||||
while (res != nullptr) {
|
||||
if (res->ai_family == AF_INET || res->ai_family == AF_INET6) {
|
||||
m_records.emplace_back(res);
|
||||
}
|
||||
|
||||
res = res->ai_next;
|
||||
};
|
||||
} else {
|
||||
while (res != nullptr) {
|
||||
if (res->ai_family == ai_family) {
|
||||
m_records.emplace_back(res);
|
||||
}
|
||||
|
||||
res = res->ai_next;
|
||||
};
|
||||
}
|
||||
|
||||
size = m_records.size();
|
||||
if (size > 1) {
|
||||
m_index = static_cast<size_t>(rand()) % size; // NOLINT(concurrency-mt-unsafe, cert-msc30-c, cert-msc50-cpp)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
const xmrig::DnsRecord &xmrig::DnsRecords::get() const
|
||||
{
|
||||
static const DnsRecord defaultRecord;
|
||||
|
||||
if (isEmpty()) {
|
||||
return defaultRecord;
|
||||
}
|
||||
|
||||
const size_t ipv4 = m_ipv4.size();
|
||||
const size_t ipv6 = m_ipv6.size();
|
||||
|
||||
if (ipv6 && (prefered == DnsRecord::AAAA || Dns::config().isIPv6() || !ipv4)) {
|
||||
return m_ipv6[ipv6 == 1 ? 0 : static_cast<size_t>(rand()) % ipv6]; // NOLINT(concurrency-mt-unsafe, cert-msc30-c, cert-msc50-cpp)
|
||||
}
|
||||
|
||||
if (ipv4) {
|
||||
return m_ipv4[ipv4 == 1 ? 0 : static_cast<size_t>(rand()) % ipv4]; // NOLINT(concurrency-mt-unsafe, cert-msc30-c, cert-msc50-cpp)
|
||||
const size_t size = m_records.size();
|
||||
if (size > 0) {
|
||||
return m_records[m_index++ % size];
|
||||
}
|
||||
|
||||
return defaultRecord;
|
||||
}
|
||||
|
||||
|
||||
size_t xmrig::DnsRecords::count(DnsRecord::Type type) const
|
||||
{
|
||||
if (type == DnsRecord::A) {
|
||||
return m_ipv4.size();
|
||||
}
|
||||
|
||||
if (type == DnsRecord::AAAA) {
|
||||
return m_ipv6.size();
|
||||
}
|
||||
|
||||
return m_ipv4.size() + m_ipv6.size();
|
||||
}
|
||||
|
||||
|
||||
void xmrig::DnsRecords::clear()
|
||||
{
|
||||
m_ipv4.clear();
|
||||
m_ipv6.clear();
|
||||
}
|
||||
|
||||
|
||||
void xmrig::DnsRecords::parse(addrinfo *res)
|
||||
{
|
||||
clear();
|
||||
|
||||
addrinfo *ptr = res;
|
||||
size_t ipv4 = 0;
|
||||
size_t ipv6 = 0;
|
||||
|
||||
while (ptr != nullptr) {
|
||||
if (ptr->ai_family == AF_INET) {
|
||||
++ipv4;
|
||||
}
|
||||
else if (ptr->ai_family == AF_INET6) {
|
||||
++ipv6;
|
||||
}
|
||||
|
||||
ptr = ptr->ai_next;
|
||||
}
|
||||
|
||||
if (ipv4 == 0 && ipv6 == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
m_ipv4.reserve(ipv4);
|
||||
m_ipv6.reserve(ipv6);
|
||||
|
||||
ptr = res;
|
||||
while (ptr != nullptr) {
|
||||
if (ptr->ai_family == AF_INET) {
|
||||
m_ipv4.emplace_back(ptr);
|
||||
}
|
||||
else if (ptr->ai_family == AF_INET6) {
|
||||
m_ipv6.emplace_back(ptr);
|
||||
}
|
||||
|
||||
ptr = ptr->ai_next;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/* XMRig
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -16,9 +16,7 @@
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef XMRIG_DNSRECORDS_H
|
||||
#define XMRIG_DNSRECORDS_H
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "base/net/dns/DnsRecord.h"
|
||||
|
||||
@@ -29,20 +27,19 @@ namespace xmrig {
|
||||
class DnsRecords
|
||||
{
|
||||
public:
|
||||
inline bool isEmpty() const { return m_ipv4.empty() && m_ipv6.empty(); }
|
||||
DnsRecords() = default;
|
||||
DnsRecords(const addrinfo *res, int ai_family);
|
||||
|
||||
const DnsRecord &get(DnsRecord::Type prefered = DnsRecord::Unknown) const;
|
||||
size_t count(DnsRecord::Type type = DnsRecord::Unknown) const;
|
||||
void clear();
|
||||
void parse(addrinfo *res);
|
||||
inline bool isEmpty() const { return m_records.empty(); }
|
||||
inline const std::vector<DnsRecord> &records() const { return m_records; }
|
||||
inline size_t size() const { return m_records.size(); }
|
||||
|
||||
const DnsRecord &get() const;
|
||||
|
||||
private:
|
||||
std::vector<DnsRecord> m_ipv4;
|
||||
std::vector<DnsRecord> m_ipv6;
|
||||
mutable size_t m_index = 0;
|
||||
std::vector<DnsRecord> m_records;
|
||||
};
|
||||
|
||||
|
||||
} /* namespace xmrig */
|
||||
|
||||
|
||||
#endif /* XMRIG_DNSRECORDS_H */
|
||||
} // namespace xmrig
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/* XMRig
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -16,35 +16,30 @@
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef XMRIG_DNSREQUEST_H
|
||||
#define XMRIG_DNSREQUEST_H
|
||||
#pragma once
|
||||
|
||||
|
||||
#include "base/tools/Object.h"
|
||||
|
||||
|
||||
#include <cstdint>
|
||||
#include "base/kernel/interfaces/IDnsListener.h"
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
class IDnsListener;
|
||||
|
||||
|
||||
class DnsRequest
|
||||
class DnsRequest : public IDnsListener
|
||||
{
|
||||
public:
|
||||
XMRIG_DISABLE_COPY_MOVE_DEFAULT(DnsRequest)
|
||||
|
||||
DnsRequest(IDnsListener *listener) : listener(listener) {}
|
||||
~DnsRequest() = default;
|
||||
inline DnsRequest(IDnsListener *listener) : m_listener(listener) {}
|
||||
~DnsRequest() override = default;
|
||||
|
||||
IDnsListener *listener;
|
||||
protected:
|
||||
inline void onResolved(const DnsRecords &records, int status, const char *error) override {
|
||||
m_listener->onResolved(records, status, error);
|
||||
}
|
||||
|
||||
private:
|
||||
IDnsListener *m_listener;
|
||||
};
|
||||
|
||||
|
||||
} /* namespace xmrig */
|
||||
|
||||
|
||||
#endif /* XMRIG_DNSREQUEST_H */
|
||||
} // namespace xmrig
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/* XMRig
|
||||
* Copyright (c) 2018-2023 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2023 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -16,13 +16,11 @@
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include <uv.h>
|
||||
|
||||
|
||||
#include "base/net/dns/DnsUvBackend.h"
|
||||
#include "base/kernel/interfaces/IDnsListener.h"
|
||||
#include "base/net/dns/DnsRequest.h"
|
||||
#include "base/net/dns/DnsConfig.h"
|
||||
#include "base/tools/Chrono.h"
|
||||
|
||||
|
||||
@@ -73,21 +71,23 @@ xmrig::DnsUvBackend::~DnsUvBackend()
|
||||
}
|
||||
|
||||
|
||||
std::shared_ptr<xmrig::DnsRequest> xmrig::DnsUvBackend::resolve(const String &host, IDnsListener *listener, uint64_t ttl)
|
||||
void xmrig::DnsUvBackend::resolve(const String &host, const std::weak_ptr<IDnsListener> &listener, const DnsConfig &config)
|
||||
{
|
||||
auto req = std::make_shared<DnsRequest>(listener);
|
||||
m_queue.emplace_back(listener);
|
||||
|
||||
if (Chrono::currentMSecsSinceEpoch() - m_ts <= ttl && !m_records.isEmpty()) {
|
||||
req->listener->onResolved(m_records, 0, nullptr);
|
||||
} else {
|
||||
m_queue.emplace(req);
|
||||
if (Chrono::currentMSecsSinceEpoch() - m_ts <= config.ttl()) {
|
||||
return notify();
|
||||
}
|
||||
|
||||
if (m_queue.size() == 1 && !resolve(host)) {
|
||||
done();
|
||||
if (m_req) {
|
||||
return;
|
||||
}
|
||||
|
||||
return req;
|
||||
m_ai_family = config.ai_family();
|
||||
|
||||
if (!resolve(host)) {
|
||||
notify();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -102,44 +102,46 @@ bool xmrig::DnsUvBackend::resolve(const String &host)
|
||||
}
|
||||
|
||||
|
||||
void xmrig::DnsUvBackend::done()
|
||||
void xmrig::DnsUvBackend::notify()
|
||||
{
|
||||
const char *error = m_status < 0 ? uv_strerror(m_status) : nullptr;
|
||||
|
||||
while (!m_queue.empty()) {
|
||||
auto req = std::move(m_queue.front()).lock();
|
||||
if (req) {
|
||||
req->listener->onResolved(m_records, m_status, error);
|
||||
for (const auto &l : m_queue) {
|
||||
auto listener = l.lock();
|
||||
if (listener) {
|
||||
listener->onResolved(m_records, m_status, error);
|
||||
}
|
||||
|
||||
m_queue.pop();
|
||||
}
|
||||
|
||||
m_queue.clear();
|
||||
m_req.reset();
|
||||
}
|
||||
|
||||
|
||||
void xmrig::DnsUvBackend::onResolved(int status, addrinfo *res)
|
||||
{
|
||||
m_ts = Chrono::currentMSecsSinceEpoch();
|
||||
m_status = status;
|
||||
m_ts = Chrono::currentMSecsSinceEpoch();
|
||||
|
||||
if ((m_status = status) < 0) {
|
||||
return done();
|
||||
if (m_status < 0) {
|
||||
m_records = {};
|
||||
|
||||
return notify();
|
||||
}
|
||||
|
||||
m_records.parse(res);
|
||||
m_records = { res, m_ai_family };
|
||||
|
||||
if (m_records.isEmpty()) {
|
||||
m_status = UV_EAI_NONAME;
|
||||
}
|
||||
|
||||
done();
|
||||
notify();
|
||||
}
|
||||
|
||||
|
||||
void xmrig::DnsUvBackend::onResolved(uv_getaddrinfo_t *req, int status, addrinfo *res)
|
||||
{
|
||||
auto backend = getStorage().get(req->data);
|
||||
auto *backend = getStorage().get(req->data);
|
||||
if (backend) {
|
||||
backend->onResolved(status, res);
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/* XMRig
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -16,16 +16,13 @@
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef XMRIG_DNSUVBACKEND_H
|
||||
#define XMRIG_DNSUVBACKEND_H
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "base/kernel/interfaces/IDnsBackend.h"
|
||||
#include "base/net/dns/DnsRecords.h"
|
||||
#include "base/net/tools/Storage.h"
|
||||
|
||||
|
||||
#include <queue>
|
||||
#include <deque>
|
||||
|
||||
|
||||
using uv_getaddrinfo_t = struct uv_getaddrinfo_s;
|
||||
@@ -43,20 +40,19 @@ public:
|
||||
~DnsUvBackend() override;
|
||||
|
||||
protected:
|
||||
inline const DnsRecords &records() const override { return m_records; }
|
||||
|
||||
std::shared_ptr<DnsRequest> resolve(const String &host, IDnsListener *listener, uint64_t ttl) override;
|
||||
void resolve(const String &host, const std::weak_ptr<IDnsListener> &listener, const DnsConfig &config) override;
|
||||
|
||||
private:
|
||||
bool resolve(const String &host);
|
||||
void done();
|
||||
void notify();
|
||||
void onResolved(int status, addrinfo *res);
|
||||
|
||||
static void onResolved(uv_getaddrinfo_t *req, int status, addrinfo *res);
|
||||
|
||||
DnsRecords m_records;
|
||||
int m_ai_family = 0;
|
||||
int m_status = 0;
|
||||
std::queue<std::weak_ptr<DnsRequest> > m_queue;
|
||||
std::deque<std::weak_ptr<IDnsListener>> m_queue;
|
||||
std::shared_ptr<uv_getaddrinfo_t> m_req;
|
||||
uint64_t m_ts = 0;
|
||||
uintptr_t m_key;
|
||||
@@ -66,7 +62,4 @@ private:
|
||||
};
|
||||
|
||||
|
||||
} /* namespace xmrig */
|
||||
|
||||
|
||||
#endif /* XMRIG_DNSUVBACKEND_H */
|
||||
} // namespace xmrig
|
||||
|
||||
@@ -554,6 +554,7 @@ int64_t xmrig::Client::send(size_t size)
|
||||
}
|
||||
|
||||
m_expire = Chrono::steadyMSecs() + kResponseTimeout;
|
||||
startTimeout();
|
||||
return m_sequence++;
|
||||
}
|
||||
|
||||
@@ -661,8 +662,6 @@ void xmrig::Client::onClose()
|
||||
|
||||
void xmrig::Client::parse(char *line, size_t len)
|
||||
{
|
||||
startTimeout();
|
||||
|
||||
LOG_DEBUG("[%s] received (%d bytes): \"%.*s\"", url(), len, static_cast<int>(len), line);
|
||||
|
||||
if (len < 22 || line[0] != '{') {
|
||||
@@ -857,8 +856,6 @@ void xmrig::Client::parseResponse(int64_t id, const rapidjson::Value &result, co
|
||||
void xmrig::Client::ping()
|
||||
{
|
||||
send(snprintf(m_sendBuf.data(), m_sendBuf.size(), "{\"id\":%" PRId64 ",\"jsonrpc\":\"2.0\",\"method\":\"keepalived\",\"params\":{\"id\":\"%s\"}}\n", m_sequence, m_rpcId.data()));
|
||||
|
||||
m_keepAlive = 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/* XMRig
|
||||
* Copyright (c) 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright (c) 2018-2023 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2023 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -45,7 +45,7 @@ namespace xmrig {
|
||||
|
||||
|
||||
// https://wiki.openssl.org/index.php/Diffie-Hellman_parameters
|
||||
#if OPENSSL_VERSION_NUMBER < 0x30000000L || defined(LIBRESSL_VERSION_NUMBER)
|
||||
#if OPENSSL_VERSION_NUMBER < 0x30000000L || (defined(LIBRESSL_VERSION_NUMBER) && !defined(LIBRESSL_HAS_TLS1_3))
|
||||
static DH *get_dh2048()
|
||||
{
|
||||
static unsigned char dhp_2048[] = {
|
||||
@@ -152,7 +152,7 @@ bool xmrig::TlsContext::load(const TlsConfig &config)
|
||||
SSL_CTX_set_options(m_ctx, SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3);
|
||||
SSL_CTX_set_options(m_ctx, SSL_OP_CIPHER_SERVER_PREFERENCE);
|
||||
|
||||
# if OPENSSL_VERSION_NUMBER >= 0x1010100fL && !defined(LIBRESSL_VERSION_NUMBER)
|
||||
# if OPENSSL_VERSION_NUMBER >= 0x1010100fL || defined(LIBRESSL_HAS_TLS1_3)
|
||||
SSL_CTX_set_max_early_data(m_ctx, 0);
|
||||
# endif
|
||||
|
||||
@@ -180,7 +180,7 @@ bool xmrig::TlsContext::setCipherSuites(const char *ciphersuites)
|
||||
return true;
|
||||
}
|
||||
|
||||
# if OPENSSL_VERSION_NUMBER >= 0x1010100fL && !defined(LIBRESSL_VERSION_NUMBER)
|
||||
# if OPENSSL_VERSION_NUMBER >= 0x1010100fL || defined(LIBRESSL_HAS_TLS1_3)
|
||||
if (SSL_CTX_set_ciphersuites(m_ctx, ciphersuites) == 1) {
|
||||
return true;
|
||||
}
|
||||
@@ -194,7 +194,7 @@ bool xmrig::TlsContext::setCipherSuites(const char *ciphersuites)
|
||||
|
||||
bool xmrig::TlsContext::setDH(const char *dhparam)
|
||||
{
|
||||
# if OPENSSL_VERSION_NUMBER < 0x30000000L || defined(LIBRESSL_VERSION_NUMBER)
|
||||
# if OPENSSL_VERSION_NUMBER < 0x30000000L || (defined(LIBRESSL_VERSION_NUMBER) && !defined(LIBRESSL_HAS_TLS1_3))
|
||||
DH *dh = nullptr;
|
||||
|
||||
if (dhparam != nullptr) {
|
||||
|
||||
@@ -241,8 +241,13 @@ bool xmrig::BlockTemplate::parse(bool hashes)
|
||||
ar(m_amount);
|
||||
ar(m_outputType);
|
||||
|
||||
// output type must be txout_to_key (2) or txout_to_tagged_key (3)
|
||||
if ((m_outputType != 2) && (m_outputType != 3)) {
|
||||
const bool is_fcmp_pp = (m_coin == Coin::MONERO) && (m_version.first >= 17);
|
||||
|
||||
// output type must be txout_to_key (2) or txout_to_tagged_key (3) for versions < 17, and txout_to_carrot_v1 (0) for version FCMP++
|
||||
if (is_fcmp_pp && (m_outputType == 0)) {
|
||||
// all good
|
||||
}
|
||||
else if ((m_outputType != 2) && (m_outputType != 3)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -250,6 +255,11 @@ bool xmrig::BlockTemplate::parse(bool hashes)
|
||||
|
||||
ar(m_ephPublicKey, kKeySize);
|
||||
|
||||
if (is_fcmp_pp) {
|
||||
ar(m_carrotViewTag);
|
||||
ar(m_janusAnchor);
|
||||
}
|
||||
|
||||
if (m_coin == Coin::ZEPHYR) {
|
||||
if (m_outputType != 2) {
|
||||
return false;
|
||||
|
||||
@@ -148,6 +148,8 @@ private:
|
||||
Buffer m_hashes;
|
||||
Buffer m_minerTxMerkleTreeBranch;
|
||||
uint8_t m_rootHash[kHashSize]{};
|
||||
uint8_t m_carrotViewTag[3]{};
|
||||
uint8_t m_janusAnchor[16]{};
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -93,7 +93,7 @@
|
||||
"dhparam": null
|
||||
},
|
||||
"dns": {
|
||||
"ipv6": false,
|
||||
"ip_version": 0,
|
||||
"ttl": 30
|
||||
},
|
||||
"user-agent": null,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/* XMRig
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -16,9 +16,7 @@
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef XMRIG_CONFIG_PLATFORM_H
|
||||
#define XMRIG_CONFIG_PLATFORM_H
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# include "getopt/getopt.h"
|
||||
@@ -28,13 +26,12 @@
|
||||
|
||||
|
||||
#include "base/kernel/interfaces/IConfig.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
static const char short_options[] = "a:c:kBp:Px:r:R:s:t:T:o:u:O:v:l:Sx:";
|
||||
static const char short_options[] = "a:c:kBp:Px:r:R:s:t:T:o:u:O:v:l:Sx:46";
|
||||
|
||||
|
||||
static const option options[] = {
|
||||
@@ -99,7 +96,8 @@ static const option options[] = {
|
||||
{ "no-title", 0, nullptr, IConfig::NoTitleKey },
|
||||
{ "pause-on-battery", 0, nullptr, IConfig::PauseOnBatteryKey },
|
||||
{ "pause-on-active", 1, nullptr, IConfig::PauseOnActiveKey },
|
||||
{ "dns-ipv6", 0, nullptr, IConfig::DnsIPv6Key },
|
||||
{ "ipv4", 0, nullptr, IConfig::DnsIPv4Key },
|
||||
{ "ipv6", 0, nullptr, IConfig::DnsIPv6Key },
|
||||
{ "dns-ttl", 1, nullptr, IConfig::DnsTtlKey },
|
||||
{ "spend-secret-key", 1, nullptr, IConfig::SpendSecretKey },
|
||||
# ifdef XMRIG_FEATURE_BENCHMARK
|
||||
@@ -169,6 +167,3 @@ static const option options[] = {
|
||||
|
||||
|
||||
} // namespace xmrig
|
||||
|
||||
|
||||
#endif /* XMRIG_CONFIG_PLATFORM_H */
|
||||
|
||||
@@ -4,8 +4,8 @@
|
||||
* Copyright (c) 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright (c) 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright (c) 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright (c) 2018-2024 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2024 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -21,13 +21,10 @@
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef XMRIG_USAGE_H
|
||||
#define XMRIG_USAGE_H
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#include <string>
|
||||
|
||||
|
||||
@@ -59,7 +56,8 @@ static inline const std::string &usage()
|
||||
u += " --tls-fingerprint=HEX pool TLS certificate fingerprint for strict certificate pinning\n";
|
||||
# endif
|
||||
|
||||
u += " --dns-ipv6 prefer IPv6 records from DNS responses\n";
|
||||
u += " -4, --ipv4 resolve names to IPv4 addresses\n";
|
||||
u += " -6, --ipv6 resolve names to IPv6 addresses\n";
|
||||
u += " --dns-ttl=N N seconds (default: 30) TTL for internal DNS cache\n";
|
||||
|
||||
# ifdef XMRIG_FEATURE_HTTP
|
||||
@@ -205,6 +203,4 @@ static inline const std::string &usage()
|
||||
}
|
||||
|
||||
|
||||
} /* namespace xmrig */
|
||||
|
||||
#endif /* XMRIG_USAGE_H */
|
||||
} // namespace xmrig
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
#include "crypto/common/VirtualMemory.h"
|
||||
|
||||
|
||||
#if defined(XMRIG_ARM)
|
||||
#if defined(XMRIG_ARM) || defined(XMRIG_RISCV)
|
||||
# include "crypto/cn/CryptoNight_arm.h"
|
||||
#else
|
||||
# include "crypto/cn/CryptoNight_x86.h"
|
||||
|
||||
@@ -30,7 +30,7 @@
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined _MSC_VER || defined XMRIG_ARM
|
||||
#if defined _MSC_VER || defined XMRIG_ARM || defined XMRIG_RISCV
|
||||
# define ABI_ATTRIBUTE
|
||||
#else
|
||||
# define ABI_ATTRIBUTE __attribute__((ms_abi))
|
||||
|
||||
@@ -27,6 +27,9 @@
|
||||
#ifndef XMRIG_CRYPTONIGHT_ARM_H
|
||||
#define XMRIG_CRYPTONIGHT_ARM_H
|
||||
|
||||
#ifdef XMRIG_RISCV
|
||||
# include "crypto/cn/sse2rvv.h"
|
||||
#endif
|
||||
|
||||
#include "base/crypto/keccak.h"
|
||||
#include "crypto/cn/CnAlgo.h"
|
||||
|
||||
@@ -30,7 +30,7 @@
|
||||
#include <math.h>
|
||||
|
||||
// VARIANT ALTERATIONS
|
||||
#ifndef XMRIG_ARM
|
||||
#if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
|
||||
# define VARIANT1_INIT(part) \
|
||||
uint64_t tweak1_2_##part = 0; \
|
||||
if (BASE == Algorithm::CN_1) { \
|
||||
@@ -60,7 +60,7 @@
|
||||
}
|
||||
|
||||
|
||||
#ifndef XMRIG_ARM
|
||||
#if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
|
||||
# define VARIANT2_INIT(part) \
|
||||
__m128i division_result_xmm_##part = _mm_cvtsi64_si128(static_cast<int64_t>(h##part[12])); \
|
||||
__m128i sqrt_result_xmm_##part = _mm_cvtsi64_si128(static_cast<int64_t>(h##part[13]));
|
||||
|
||||
@@ -235,7 +235,7 @@ static HashReturn Init(hashState *state, int hashbitlen)
|
||||
/*initialize the initial hash value of JH*/
|
||||
state->hashbitlen = hashbitlen;
|
||||
|
||||
/*load the intital hash value into state*/
|
||||
/*load the initial hash value into state*/
|
||||
switch (hashbitlen)
|
||||
{
|
||||
case 224: memcpy(state->x,JH224_H0,128); break;
|
||||
|
||||
@@ -48,7 +48,7 @@
|
||||
multiple of size / 8)
|
||||
|
||||
ptr_cast(x,size) casts a pointer to a pointer to a
|
||||
varaiable of length 'size' bits
|
||||
variable of length 'size' bits
|
||||
*/
|
||||
|
||||
#define ui_type(size) uint##size##_t
|
||||
|
||||
@@ -29,6 +29,8 @@
|
||||
|
||||
#if defined(XMRIG_ARM)
|
||||
# include "crypto/cn/sse2neon.h"
|
||||
#elif defined(XMRIG_RISCV)
|
||||
# include "crypto/cn/sse2rvv.h"
|
||||
#elif defined(__GNUC__)
|
||||
# include <x86intrin.h>
|
||||
#else
|
||||
|
||||
748
src/crypto/cn/sse2rvv.h
Normal file
748
src/crypto/cn/sse2rvv.h
Normal file
@@ -0,0 +1,748 @@
|
||||
/* XMRig
|
||||
* Copyright (c) 2025 Slayingripper <https://github.com/Slayingripper>
|
||||
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2025 XMRig <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/*
|
||||
* SSE to RISC-V Vector (RVV) optimized compatibility header
|
||||
* Provides both scalar fallback and vectorized implementations using RVV intrinsics
|
||||
*
|
||||
* Based on sse2neon.h concepts, adapted for RISC-V architecture with RVV extensions
|
||||
* Original sse2neon.h: https://github.com/DLTcollab/sse2neon
|
||||
*/
|
||||
|
||||
#ifndef XMRIG_SSE2RVV_OPTIMIZED_H
|
||||
#define XMRIG_SSE2RVV_OPTIMIZED_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
/* Check if RVV is available */
|
||||
#if defined(__riscv_vector)
|
||||
#include <riscv_vector.h>
|
||||
#define USE_RVV_INTRINSICS 1
|
||||
#else
|
||||
#define USE_RVV_INTRINSICS 0
|
||||
#endif
|
||||
|
||||
/* 128-bit vector type */
|
||||
typedef union {
|
||||
uint8_t u8[16];
|
||||
uint16_t u16[8];
|
||||
uint32_t u32[4];
|
||||
uint64_t u64[2];
|
||||
int8_t i8[16];
|
||||
int16_t i16[8];
|
||||
int32_t i32[4];
|
||||
int64_t i64[2];
|
||||
} __m128i_union;
|
||||
|
||||
typedef __m128i_union __m128i;
|
||||
|
||||
/* Set operations */
|
||||
static inline __m128i _mm_set_epi32(int e3, int e2, int e1, int e0)
|
||||
{
|
||||
__m128i result;
|
||||
result.i32[0] = e0;
|
||||
result.i32[1] = e1;
|
||||
result.i32[2] = e2;
|
||||
result.i32[3] = e3;
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128i _mm_set_epi64x(int64_t e1, int64_t e0)
|
||||
{
|
||||
__m128i result;
|
||||
result.i64[0] = e0;
|
||||
result.i64[1] = e1;
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128i _mm_setzero_si128(void)
|
||||
{
|
||||
__m128i result;
|
||||
memset(&result, 0, sizeof(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Extract/insert operations */
|
||||
static inline int _mm_cvtsi128_si32(__m128i a)
|
||||
{
|
||||
return a.i32[0];
|
||||
}
|
||||
|
||||
static inline int64_t _mm_cvtsi128_si64(__m128i a)
|
||||
{
|
||||
return a.i64[0];
|
||||
}
|
||||
|
||||
static inline __m128i _mm_cvtsi32_si128(int a)
|
||||
{
|
||||
__m128i result = _mm_setzero_si128();
|
||||
result.i32[0] = a;
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128i _mm_cvtsi64_si128(int64_t a)
|
||||
{
|
||||
__m128i result = _mm_setzero_si128();
|
||||
result.i64[0] = a;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Shuffle operations */
|
||||
static inline __m128i _mm_shuffle_epi32(__m128i a, int imm8)
|
||||
{
|
||||
__m128i result;
|
||||
result.u32[0] = a.u32[(imm8 >> 0) & 0x3];
|
||||
result.u32[1] = a.u32[(imm8 >> 2) & 0x3];
|
||||
result.u32[2] = a.u32[(imm8 >> 4) & 0x3];
|
||||
result.u32[3] = a.u32[(imm8 >> 6) & 0x3];
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Logical operations - optimized with RVV when available */
|
||||
static inline __m128i _mm_xor_si128(__m128i a, __m128i b)
|
||||
{
|
||||
#if USE_RVV_INTRINSICS
|
||||
__m128i result;
|
||||
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||
vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||
vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
|
||||
vuint64m1_t vr = __riscv_vxor_vv_u64m1(va, vb, vl);
|
||||
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||
return result;
|
||||
#else
|
||||
__m128i result;
|
||||
result.u64[0] = a.u64[0] ^ b.u64[0];
|
||||
result.u64[1] = a.u64[1] ^ b.u64[1];
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline __m128i _mm_or_si128(__m128i a, __m128i b)
|
||||
{
|
||||
#if USE_RVV_INTRINSICS
|
||||
__m128i result;
|
||||
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||
vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||
vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
|
||||
vuint64m1_t vr = __riscv_vor_vv_u64m1(va, vb, vl);
|
||||
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||
return result;
|
||||
#else
|
||||
__m128i result;
|
||||
result.u64[0] = a.u64[0] | b.u64[0];
|
||||
result.u64[1] = a.u64[1] | b.u64[1];
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline __m128i _mm_and_si128(__m128i a, __m128i b)
|
||||
{
|
||||
#if USE_RVV_INTRINSICS
|
||||
__m128i result;
|
||||
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||
vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||
vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
|
||||
vuint64m1_t vr = __riscv_vand_vv_u64m1(va, vb, vl);
|
||||
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||
return result;
|
||||
#else
|
||||
__m128i result;
|
||||
result.u64[0] = a.u64[0] & b.u64[0];
|
||||
result.u64[1] = a.u64[1] & b.u64[1];
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline __m128i _mm_andnot_si128(__m128i a, __m128i b)
|
||||
{
|
||||
#if USE_RVV_INTRINSICS
|
||||
__m128i result;
|
||||
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||
vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||
vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
|
||||
vuint64m1_t vnot_a = __riscv_vnot_v_u64m1(va, vl);
|
||||
vuint64m1_t vr = __riscv_vand_vv_u64m1(vnot_a, vb, vl);
|
||||
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||
return result;
|
||||
#else
|
||||
__m128i result;
|
||||
result.u64[0] = (~a.u64[0]) & b.u64[0];
|
||||
result.u64[1] = (~a.u64[1]) & b.u64[1];
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Shift operations */
|
||||
static inline __m128i _mm_slli_si128(__m128i a, int imm8)
|
||||
{
|
||||
#if USE_RVV_INTRINSICS
|
||||
__m128i result = _mm_setzero_si128();
|
||||
int count = imm8 & 0xFF;
|
||||
if (count > 15) return result;
|
||||
|
||||
size_t vl = __riscv_vsetvl_e8m1(16);
|
||||
vuint8m1_t va = __riscv_vle8_v_u8m1(a.u8, vl);
|
||||
vuint8m1_t vr = __riscv_vslideup_vx_u8m1(__riscv_vmv_v_x_u8m1(0, vl), va, count, vl);
|
||||
__riscv_vse8_v_u8m1(result.u8, vr, vl);
|
||||
return result;
|
||||
#else
|
||||
__m128i result = _mm_setzero_si128();
|
||||
int count = imm8 & 0xFF;
|
||||
if (count > 15) return result;
|
||||
|
||||
for (int i = 0; i < 16 - count; i++) {
|
||||
result.u8[i + count] = a.u8[i];
|
||||
}
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline __m128i _mm_srli_si128(__m128i a, int imm8)
|
||||
{
|
||||
#if USE_RVV_INTRINSICS
|
||||
__m128i result = _mm_setzero_si128();
|
||||
int count = imm8 & 0xFF;
|
||||
if (count > 15) return result;
|
||||
|
||||
size_t vl = __riscv_vsetvl_e8m1(16);
|
||||
vuint8m1_t va = __riscv_vle8_v_u8m1(a.u8, vl);
|
||||
vuint8m1_t vr = __riscv_vslidedown_vx_u8m1(va, count, vl);
|
||||
__riscv_vse8_v_u8m1(result.u8, vr, vl);
|
||||
return result;
|
||||
#else
|
||||
__m128i result = _mm_setzero_si128();
|
||||
int count = imm8 & 0xFF;
|
||||
if (count > 15) return result;
|
||||
|
||||
for (int i = count; i < 16; i++) {
|
||||
result.u8[i - count] = a.u8[i];
|
||||
}
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline __m128i _mm_slli_epi64(__m128i a, int imm8)
|
||||
{
|
||||
#if USE_RVV_INTRINSICS
|
||||
__m128i result;
|
||||
if (imm8 > 63) {
|
||||
result.u64[0] = 0;
|
||||
result.u64[1] = 0;
|
||||
} else {
|
||||
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||
vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||
vuint64m1_t vr = __riscv_vsll_vx_u64m1(va, imm8, vl);
|
||||
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||
}
|
||||
return result;
|
||||
#else
|
||||
__m128i result;
|
||||
if (imm8 > 63) {
|
||||
result.u64[0] = 0;
|
||||
result.u64[1] = 0;
|
||||
} else {
|
||||
result.u64[0] = a.u64[0] << imm8;
|
||||
result.u64[1] = a.u64[1] << imm8;
|
||||
}
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline __m128i _mm_srli_epi64(__m128i a, int imm8)
|
||||
{
|
||||
#if USE_RVV_INTRINSICS
|
||||
__m128i result;
|
||||
if (imm8 > 63) {
|
||||
result.u64[0] = 0;
|
||||
result.u64[1] = 0;
|
||||
} else {
|
||||
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||
vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||
vuint64m1_t vr = __riscv_vsrl_vx_u64m1(va, imm8, vl);
|
||||
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||
}
|
||||
return result;
|
||||
#else
|
||||
__m128i result;
|
||||
if (imm8 > 63) {
|
||||
result.u64[0] = 0;
|
||||
result.u64[1] = 0;
|
||||
} else {
|
||||
result.u64[0] = a.u64[0] >> imm8;
|
||||
result.u64[1] = a.u64[1] >> imm8;
|
||||
}
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Load/store operations - optimized with RVV */
|
||||
static inline __m128i _mm_load_si128(const __m128i* p)
|
||||
{
|
||||
#if USE_RVV_INTRINSICS
|
||||
__m128i result;
|
||||
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||
vuint64m1_t v = __riscv_vle64_v_u64m1((const uint64_t*)p, vl);
|
||||
__riscv_vse64_v_u64m1(result.u64, v, vl);
|
||||
return result;
|
||||
#else
|
||||
__m128i result;
|
||||
memcpy(&result, p, sizeof(__m128i));
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline __m128i _mm_loadu_si128(const __m128i* p)
|
||||
{
|
||||
__m128i result;
|
||||
memcpy(&result, p, sizeof(__m128i));
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline void _mm_store_si128(__m128i* p, __m128i a)
|
||||
{
|
||||
#if USE_RVV_INTRINSICS
|
||||
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||
vuint64m1_t v = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||
__riscv_vse64_v_u64m1((uint64_t*)p, v, vl);
|
||||
#else
|
||||
memcpy(p, &a, sizeof(__m128i));
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void _mm_storeu_si128(__m128i* p, __m128i a)
|
||||
{
|
||||
memcpy(p, &a, sizeof(__m128i));
|
||||
}
|
||||
|
||||
/* Arithmetic operations - optimized with RVV */
|
||||
static inline __m128i _mm_add_epi64(__m128i a, __m128i b)
|
||||
{
|
||||
#if USE_RVV_INTRINSICS
|
||||
__m128i result;
|
||||
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||
vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||
vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
|
||||
vuint64m1_t vr = __riscv_vadd_vv_u64m1(va, vb, vl);
|
||||
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||
return result;
|
||||
#else
|
||||
__m128i result;
|
||||
result.u64[0] = a.u64[0] + b.u64[0];
|
||||
result.u64[1] = a.u64[1] + b.u64[1];
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline __m128i _mm_add_epi32(__m128i a, __m128i b)
|
||||
{
|
||||
#if USE_RVV_INTRINSICS
|
||||
__m128i result;
|
||||
size_t vl = __riscv_vsetvl_e32m1(4);
|
||||
vuint32m1_t va = __riscv_vle32_v_u32m1(a.u32, vl);
|
||||
vuint32m1_t vb = __riscv_vle32_v_u32m1(b.u32, vl);
|
||||
vuint32m1_t vr = __riscv_vadd_vv_u32m1(va, vb, vl);
|
||||
__riscv_vse32_v_u32m1(result.u32, vr, vl);
|
||||
return result;
|
||||
#else
|
||||
__m128i result;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
result.i32[i] = a.i32[i] + b.i32[i];
|
||||
}
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline __m128i _mm_sub_epi64(__m128i a, __m128i b)
|
||||
{
|
||||
#if USE_RVV_INTRINSICS
|
||||
__m128i result;
|
||||
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||
vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||
vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
|
||||
vuint64m1_t vr = __riscv_vsub_vv_u64m1(va, vb, vl);
|
||||
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||
return result;
|
||||
#else
|
||||
__m128i result;
|
||||
result.u64[0] = a.u64[0] - b.u64[0];
|
||||
result.u64[1] = a.u64[1] - b.u64[1];
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline __m128i _mm_mul_epu32(__m128i a, __m128i b)
|
||||
{
|
||||
#if USE_RVV_INTRINSICS
|
||||
__m128i result;
|
||||
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||
vuint64m1_t va_lo = __riscv_vzext_vf2_u64m1(__riscv_vle32_v_u32mf2(&a.u32[0], 2), vl);
|
||||
vuint64m1_t vb_lo = __riscv_vzext_vf2_u64m1(__riscv_vle32_v_u32mf2(&b.u32[0], 2), vl);
|
||||
vuint64m1_t vr = __riscv_vmul_vv_u64m1(va_lo, vb_lo, vl);
|
||||
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||
return result;
|
||||
#else
|
||||
__m128i result;
|
||||
result.u64[0] = (uint64_t)a.u32[0] * (uint64_t)b.u32[0];
|
||||
result.u64[1] = (uint64_t)a.u32[2] * (uint64_t)b.u32[2];
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Unpack operations */
|
||||
static inline __m128i _mm_unpacklo_epi64(__m128i a, __m128i b)
|
||||
{
|
||||
__m128i result;
|
||||
result.u64[0] = a.u64[0];
|
||||
result.u64[1] = b.u64[0];
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128i _mm_unpackhi_epi64(__m128i a, __m128i b)
|
||||
{
|
||||
__m128i result;
|
||||
result.u64[0] = a.u64[1];
|
||||
result.u64[1] = b.u64[1];
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Pause instruction for spin-wait loops */
|
||||
static inline void _mm_pause(void)
|
||||
{
|
||||
/* RISC-V pause hint if available (requires Zihintpause extension) */
|
||||
#if defined(__riscv_zihintpause)
|
||||
__asm__ __volatile__("pause");
|
||||
#else
|
||||
__asm__ __volatile__("nop");
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Memory fence - optimized for RISC-V */
|
||||
static inline void _mm_mfence(void)
|
||||
{
|
||||
__asm__ __volatile__("fence rw,rw" ::: "memory");
|
||||
}
|
||||
|
||||
static inline void _mm_lfence(void)
|
||||
{
|
||||
__asm__ __volatile__("fence r,r" ::: "memory");
|
||||
}
|
||||
|
||||
static inline void _mm_sfence(void)
|
||||
{
|
||||
__asm__ __volatile__("fence w,w" ::: "memory");
|
||||
}
|
||||
|
||||
/* Comparison operations */
|
||||
static inline __m128i _mm_cmpeq_epi32(__m128i a, __m128i b)
|
||||
{
|
||||
__m128i result;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
result.u32[i] = (a.u32[i] == b.u32[i]) ? 0xFFFFFFFF : 0;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128i _mm_cmpeq_epi64(__m128i a, __m128i b)
|
||||
{
|
||||
__m128i result;
|
||||
for (int i = 0; i < 2; i++) {
|
||||
result.u64[i] = (a.u64[i] == b.u64[i]) ? 0xFFFFFFFFFFFFFFFFULL : 0;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Additional shift operations */
|
||||
static inline __m128i _mm_slli_epi32(__m128i a, int imm8)
|
||||
{
|
||||
#if USE_RVV_INTRINSICS
|
||||
__m128i result;
|
||||
if (imm8 > 31) {
|
||||
memset(&result, 0, sizeof(result));
|
||||
} else {
|
||||
size_t vl = __riscv_vsetvl_e32m1(4);
|
||||
vuint32m1_t va = __riscv_vle32_v_u32m1(a.u32, vl);
|
||||
vuint32m1_t vr = __riscv_vsll_vx_u32m1(va, imm8, vl);
|
||||
__riscv_vse32_v_u32m1(result.u32, vr, vl);
|
||||
}
|
||||
return result;
|
||||
#else
|
||||
__m128i result;
|
||||
if (imm8 > 31) {
|
||||
for (int i = 0; i < 4; i++) result.u32[i] = 0;
|
||||
} else {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
result.u32[i] = a.u32[i] << imm8;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline __m128i _mm_srli_epi32(__m128i a, int imm8)
|
||||
{
|
||||
#if USE_RVV_INTRINSICS
|
||||
__m128i result;
|
||||
if (imm8 > 31) {
|
||||
memset(&result, 0, sizeof(result));
|
||||
} else {
|
||||
size_t vl = __riscv_vsetvl_e32m1(4);
|
||||
vuint32m1_t va = __riscv_vle32_v_u32m1(a.u32, vl);
|
||||
vuint32m1_t vr = __riscv_vsrl_vx_u32m1(va, imm8, vl);
|
||||
__riscv_vse32_v_u32m1(result.u32, vr, vl);
|
||||
}
|
||||
return result;
|
||||
#else
|
||||
__m128i result;
|
||||
if (imm8 > 31) {
|
||||
for (int i = 0; i < 4; i++) result.u32[i] = 0;
|
||||
} else {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
result.u32[i] = a.u32[i] >> imm8;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* 64-bit integer operations */
|
||||
static inline __m128i _mm_set1_epi64x(int64_t a)
|
||||
{
|
||||
__m128i result;
|
||||
result.i64[0] = a;
|
||||
result.i64[1] = a;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Float type for compatibility */
|
||||
typedef __m128i __m128;
|
||||
|
||||
/* Float operations - simplified scalar implementations */
|
||||
static inline __m128 _mm_set1_ps(float a)
|
||||
{
|
||||
__m128 result;
|
||||
uint32_t val;
|
||||
memcpy(&val, &a, sizeof(float));
|
||||
for (int i = 0; i < 4; i++) {
|
||||
result.u32[i] = val;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128 _mm_setzero_ps(void)
|
||||
{
|
||||
__m128 result;
|
||||
memset(&result, 0, sizeof(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128 _mm_add_ps(__m128 a, __m128 b)
|
||||
{
|
||||
__m128 result;
|
||||
float fa[4], fb[4], fr[4];
|
||||
memcpy(fa, &a, sizeof(__m128));
|
||||
memcpy(fb, &b, sizeof(__m128));
|
||||
for (int i = 0; i < 4; i++) {
|
||||
fr[i] = fa[i] + fb[i];
|
||||
}
|
||||
memcpy(&result, fr, sizeof(__m128));
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128 _mm_mul_ps(__m128 a, __m128 b)
|
||||
{
|
||||
__m128 result;
|
||||
float fa[4], fb[4], fr[4];
|
||||
memcpy(fa, &a, sizeof(__m128));
|
||||
memcpy(fb, &b, sizeof(__m128));
|
||||
for (int i = 0; i < 4; i++) {
|
||||
fr[i] = fa[i] * fb[i];
|
||||
}
|
||||
memcpy(&result, fr, sizeof(__m128));
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128 _mm_and_ps(__m128 a, __m128 b)
|
||||
{
|
||||
__m128 result;
|
||||
result.u64[0] = a.u64[0] & b.u64[0];
|
||||
result.u64[1] = a.u64[1] & b.u64[1];
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128 _mm_or_ps(__m128 a, __m128 b)
|
||||
{
|
||||
__m128 result;
|
||||
result.u64[0] = a.u64[0] | b.u64[0];
|
||||
result.u64[1] = a.u64[1] | b.u64[1];
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128 _mm_cvtepi32_ps(__m128i a)
|
||||
{
|
||||
__m128 result;
|
||||
float fr[4];
|
||||
for (int i = 0; i < 4; i++) {
|
||||
fr[i] = (float)a.i32[i];
|
||||
}
|
||||
memcpy(&result, fr, sizeof(__m128));
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128i _mm_cvttps_epi32(__m128 a)
|
||||
{
|
||||
__m128i result;
|
||||
float fa[4];
|
||||
memcpy(fa, &a, sizeof(__m128));
|
||||
for (int i = 0; i < 4; i++) {
|
||||
result.i32[i] = (int32_t)fa[i];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Casting operations */
|
||||
static inline __m128 _mm_castsi128_ps(__m128i a)
|
||||
{
|
||||
__m128 result;
|
||||
memcpy(&result, &a, sizeof(__m128));
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128i _mm_castps_si128(__m128 a)
|
||||
{
|
||||
__m128i result;
|
||||
memcpy(&result, &a, sizeof(__m128));
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Additional set operations */
|
||||
static inline __m128i _mm_set1_epi32(int a)
|
||||
{
|
||||
__m128i result;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
result.i32[i] = a;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* AES instructions - placeholders for soft_aes compatibility */
|
||||
static inline __m128i _mm_aesenc_si128(__m128i a, __m128i roundkey)
|
||||
{
|
||||
return _mm_xor_si128(a, roundkey);
|
||||
}
|
||||
|
||||
static inline __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon)
|
||||
{
|
||||
return a;
|
||||
}
|
||||
|
||||
/* Rotate right operation for soft_aes.h */
|
||||
static inline uint32_t _rotr(uint32_t value, unsigned int count)
|
||||
{
|
||||
const unsigned int mask = 31;
|
||||
count &= mask;
|
||||
return (value >> count) | (value << ((-count) & mask));
|
||||
}
|
||||
|
||||
/* ARM NEON compatibility types and intrinsics for RISC-V */
|
||||
typedef __m128i_union uint64x2_t;
|
||||
typedef __m128i_union uint8x16_t;
|
||||
typedef __m128i_union int64x2_t;
|
||||
typedef __m128i_union int32x4_t;
|
||||
|
||||
static inline uint64x2_t vld1q_u64(const uint64_t *ptr)
|
||||
{
|
||||
uint64x2_t result;
|
||||
result.u64[0] = ptr[0];
|
||||
result.u64[1] = ptr[1];
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline int64x2_t vld1q_s64(const int64_t *ptr)
|
||||
{
|
||||
int64x2_t result;
|
||||
result.i64[0] = ptr[0];
|
||||
result.i64[1] = ptr[1];
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline void vst1q_u64(uint64_t *ptr, uint64x2_t val)
|
||||
{
|
||||
ptr[0] = val.u64[0];
|
||||
ptr[1] = val.u64[1];
|
||||
}
|
||||
|
||||
static inline uint64x2_t veorq_u64(uint64x2_t a, uint64x2_t b)
|
||||
{
|
||||
return _mm_xor_si128(a, b);
|
||||
}
|
||||
|
||||
static inline uint64x2_t vaddq_u64(uint64x2_t a, uint64x2_t b)
|
||||
{
|
||||
return _mm_add_epi64(a, b);
|
||||
}
|
||||
|
||||
static inline uint64x2_t vreinterpretq_u64_u8(uint8x16_t a)
|
||||
{
|
||||
uint64x2_t result;
|
||||
memcpy(&result, &a, sizeof(uint64x2_t));
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline uint64_t vgetq_lane_u64(uint64x2_t v, int lane)
|
||||
{
|
||||
return v.u64[lane];
|
||||
}
|
||||
|
||||
static inline int64_t vgetq_lane_s64(int64x2_t v, int lane)
|
||||
{
|
||||
return v.i64[lane];
|
||||
}
|
||||
|
||||
static inline int32_t vgetq_lane_s32(int32x4_t v, int lane)
|
||||
{
|
||||
return v.i32[lane];
|
||||
}
|
||||
|
||||
typedef struct { uint64_t val[1]; } uint64x1_t;
|
||||
|
||||
static inline uint64x1_t vcreate_u64(uint64_t a)
|
||||
{
|
||||
uint64x1_t result;
|
||||
result.val[0] = a;
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline uint64x2_t vcombine_u64(uint64x1_t low, uint64x1_t high)
|
||||
{
|
||||
uint64x2_t result;
|
||||
result.u64[0] = low.val[0];
|
||||
result.u64[1] = high.val[0];
|
||||
return result;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* XMRIG_SSE2RVV_OPTIMIZED_H */
|
||||
748
src/crypto/cn/sse2rvv_optimized.h
Normal file
748
src/crypto/cn/sse2rvv_optimized.h
Normal file
@@ -0,0 +1,748 @@
|
||||
/* XMRig
|
||||
* Copyright (c) 2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/*
|
||||
* SSE to RISC-V Vector (RVV) optimized compatibility header
|
||||
* Provides both scalar fallback and vectorized implementations using RVV intrinsics
|
||||
*/
|
||||
|
||||
#ifndef XMRIG_SSE2RVV_OPTIMIZED_H
|
||||
#define XMRIG_SSE2RVV_OPTIMIZED_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
/* Check if RVV is available */
|
||||
#if defined(__riscv_vector)
|
||||
#include <riscv_vector.h>
|
||||
#define USE_RVV_INTRINSICS 1
|
||||
#else
|
||||
#define USE_RVV_INTRINSICS 0
|
||||
#endif
|
||||
|
||||
/* 128-bit vector type */
|
||||
typedef union {
|
||||
uint8_t u8[16];
|
||||
uint16_t u16[8];
|
||||
uint32_t u32[4];
|
||||
uint64_t u64[2];
|
||||
int8_t i8[16];
|
||||
int16_t i16[8];
|
||||
int32_t i32[4];
|
||||
int64_t i64[2];
|
||||
#if USE_RVV_INTRINSICS
|
||||
vuint64m1_t rvv_u64;
|
||||
vuint32m1_t rvv_u32;
|
||||
vuint8m1_t rvv_u8;
|
||||
#endif
|
||||
} __m128i_union;
|
||||
|
||||
typedef __m128i_union __m128i;
|
||||
|
||||
/* Set operations */
|
||||
static inline __m128i _mm_set_epi32(int e3, int e2, int e1, int e0)
|
||||
{
|
||||
__m128i result;
|
||||
result.i32[0] = e0;
|
||||
result.i32[1] = e1;
|
||||
result.i32[2] = e2;
|
||||
result.i32[3] = e3;
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128i _mm_set_epi64x(int64_t e1, int64_t e0)
|
||||
{
|
||||
__m128i result;
|
||||
result.i64[0] = e0;
|
||||
result.i64[1] = e1;
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128i _mm_setzero_si128(void)
|
||||
{
|
||||
__m128i result;
|
||||
memset(&result, 0, sizeof(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Extract/insert operations */
|
||||
static inline int _mm_cvtsi128_si32(__m128i a)
|
||||
{
|
||||
return a.i32[0];
|
||||
}
|
||||
|
||||
static inline int64_t _mm_cvtsi128_si64(__m128i a)
|
||||
{
|
||||
return a.i64[0];
|
||||
}
|
||||
|
||||
static inline __m128i _mm_cvtsi32_si128(int a)
|
||||
{
|
||||
__m128i result = _mm_setzero_si128();
|
||||
result.i32[0] = a;
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128i _mm_cvtsi64_si128(int64_t a)
|
||||
{
|
||||
__m128i result = _mm_setzero_si128();
|
||||
result.i64[0] = a;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Shuffle operations */
|
||||
static inline __m128i _mm_shuffle_epi32(__m128i a, int imm8)
|
||||
{
|
||||
__m128i result;
|
||||
result.u32[0] = a.u32[(imm8 >> 0) & 0x3];
|
||||
result.u32[1] = a.u32[(imm8 >> 2) & 0x3];
|
||||
result.u32[2] = a.u32[(imm8 >> 4) & 0x3];
|
||||
result.u32[3] = a.u32[(imm8 >> 6) & 0x3];
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Logical operations - optimized with RVV when available */
|
||||
static inline __m128i _mm_xor_si128(__m128i a, __m128i b)
|
||||
{
|
||||
#if USE_RVV_INTRINSICS
|
||||
__m128i result;
|
||||
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||
vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||
vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
|
||||
vuint64m1_t vr = __riscv_vxor_vv_u64m1(va, vb, vl);
|
||||
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||
return result;
|
||||
#else
|
||||
__m128i result;
|
||||
result.u64[0] = a.u64[0] ^ b.u64[0];
|
||||
result.u64[1] = a.u64[1] ^ b.u64[1];
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline __m128i _mm_or_si128(__m128i a, __m128i b)
|
||||
{
|
||||
#if USE_RVV_INTRINSICS
|
||||
__m128i result;
|
||||
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||
vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||
vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
|
||||
vuint64m1_t vr = __riscv_vor_vv_u64m1(va, vb, vl);
|
||||
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||
return result;
|
||||
#else
|
||||
__m128i result;
|
||||
result.u64[0] = a.u64[0] | b.u64[0];
|
||||
result.u64[1] = a.u64[1] | b.u64[1];
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline __m128i _mm_and_si128(__m128i a, __m128i b)
|
||||
{
|
||||
#if USE_RVV_INTRINSICS
|
||||
__m128i result;
|
||||
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||
vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||
vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
|
||||
vuint64m1_t vr = __riscv_vand_vv_u64m1(va, vb, vl);
|
||||
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||
return result;
|
||||
#else
|
||||
__m128i result;
|
||||
result.u64[0] = a.u64[0] & b.u64[0];
|
||||
result.u64[1] = a.u64[1] & b.u64[1];
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline __m128i _mm_andnot_si128(__m128i a, __m128i b)
|
||||
{
|
||||
#if USE_RVV_INTRINSICS
|
||||
__m128i result;
|
||||
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||
vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||
vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
|
||||
vuint64m1_t vnot_a = __riscv_vnot_v_u64m1(va, vl);
|
||||
vuint64m1_t vr = __riscv_vand_vv_u64m1(vnot_a, vb, vl);
|
||||
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||
return result;
|
||||
#else
|
||||
__m128i result;
|
||||
result.u64[0] = (~a.u64[0]) & b.u64[0];
|
||||
result.u64[1] = (~a.u64[1]) & b.u64[1];
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Shift operations */
|
||||
static inline __m128i _mm_slli_si128(__m128i a, int imm8)
|
||||
{
|
||||
#if USE_RVV_INTRINSICS
|
||||
__m128i result = _mm_setzero_si128();
|
||||
int count = imm8 & 0xFF;
|
||||
if (count > 15) return result;
|
||||
|
||||
size_t vl = __riscv_vsetvl_e8m1(16);
|
||||
vuint8m1_t va = __riscv_vle8_v_u8m1(a.u8, vl);
|
||||
vuint8m1_t vr = __riscv_vslideup_vx_u8m1(__riscv_vmv_v_x_u8m1(0, vl), va, count, vl);
|
||||
__riscv_vse8_v_u8m1(result.u8, vr, vl);
|
||||
return result;
|
||||
#else
|
||||
__m128i result = _mm_setzero_si128();
|
||||
int count = imm8 & 0xFF;
|
||||
if (count > 15) return result;
|
||||
|
||||
for (int i = 0; i < 16 - count; i++) {
|
||||
result.u8[i + count] = a.u8[i];
|
||||
}
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline __m128i _mm_srli_si128(__m128i a, int imm8)
|
||||
{
|
||||
#if USE_RVV_INTRINSICS
|
||||
__m128i result = _mm_setzero_si128();
|
||||
int count = imm8 & 0xFF;
|
||||
if (count > 15) return result;
|
||||
|
||||
size_t vl = __riscv_vsetvl_e8m1(16);
|
||||
vuint8m1_t va = __riscv_vle8_v_u8m1(a.u8, vl);
|
||||
vuint8m1_t vr = __riscv_vslidedown_vx_u8m1(va, count, vl);
|
||||
__riscv_vse8_v_u8m1(result.u8, vr, vl);
|
||||
return result;
|
||||
#else
|
||||
__m128i result = _mm_setzero_si128();
|
||||
int count = imm8 & 0xFF;
|
||||
if (count > 15) return result;
|
||||
|
||||
for (int i = count; i < 16; i++) {
|
||||
result.u8[i - count] = a.u8[i];
|
||||
}
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline __m128i _mm_slli_epi64(__m128i a, int imm8)
|
||||
{
|
||||
#if USE_RVV_INTRINSICS
|
||||
__m128i result;
|
||||
if (imm8 > 63) {
|
||||
result.u64[0] = 0;
|
||||
result.u64[1] = 0;
|
||||
} else {
|
||||
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||
vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||
vuint64m1_t vr = __riscv_vsll_vx_u64m1(va, imm8, vl);
|
||||
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||
}
|
||||
return result;
|
||||
#else
|
||||
__m128i result;
|
||||
if (imm8 > 63) {
|
||||
result.u64[0] = 0;
|
||||
result.u64[1] = 0;
|
||||
} else {
|
||||
result.u64[0] = a.u64[0] << imm8;
|
||||
result.u64[1] = a.u64[1] << imm8;
|
||||
}
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline __m128i _mm_srli_epi64(__m128i a, int imm8)
|
||||
{
|
||||
#if USE_RVV_INTRINSICS
|
||||
__m128i result;
|
||||
if (imm8 > 63) {
|
||||
result.u64[0] = 0;
|
||||
result.u64[1] = 0;
|
||||
} else {
|
||||
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||
vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||
vuint64m1_t vr = __riscv_vsrl_vx_u64m1(va, imm8, vl);
|
||||
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||
}
|
||||
return result;
|
||||
#else
|
||||
__m128i result;
|
||||
if (imm8 > 63) {
|
||||
result.u64[0] = 0;
|
||||
result.u64[1] = 0;
|
||||
} else {
|
||||
result.u64[0] = a.u64[0] >> imm8;
|
||||
result.u64[1] = a.u64[1] >> imm8;
|
||||
}
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Load/store operations - optimized with RVV */
|
||||
static inline __m128i _mm_load_si128(const __m128i* p)
|
||||
{
|
||||
#if USE_RVV_INTRINSICS
|
||||
__m128i result;
|
||||
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||
vuint64m1_t v = __riscv_vle64_v_u64m1((const uint64_t*)p, vl);
|
||||
__riscv_vse64_v_u64m1(result.u64, v, vl);
|
||||
return result;
|
||||
#else
|
||||
__m128i result;
|
||||
memcpy(&result, p, sizeof(__m128i));
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline __m128i _mm_loadu_si128(const __m128i* p)
|
||||
{
|
||||
__m128i result;
|
||||
memcpy(&result, p, sizeof(__m128i));
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline void _mm_store_si128(__m128i* p, __m128i a)
|
||||
{
|
||||
#if USE_RVV_INTRINSICS
|
||||
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||
vuint64m1_t v = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||
__riscv_vse64_v_u64m1((uint64_t*)p, v, vl);
|
||||
#else
|
||||
memcpy(p, &a, sizeof(__m128i));
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void _mm_storeu_si128(__m128i* p, __m128i a)
|
||||
{
|
||||
memcpy(p, &a, sizeof(__m128i));
|
||||
}
|
||||
|
||||
/* Arithmetic operations - optimized with RVV */
|
||||
static inline __m128i _mm_add_epi64(__m128i a, __m128i b)
|
||||
{
|
||||
#if USE_RVV_INTRINSICS
|
||||
__m128i result;
|
||||
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||
vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||
vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
|
||||
vuint64m1_t vr = __riscv_vadd_vv_u64m1(va, vb, vl);
|
||||
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||
return result;
|
||||
#else
|
||||
__m128i result;
|
||||
result.u64[0] = a.u64[0] + b.u64[0];
|
||||
result.u64[1] = a.u64[1] + b.u64[1];
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline __m128i _mm_add_epi32(__m128i a, __m128i b)
|
||||
{
|
||||
#if USE_RVV_INTRINSICS
|
||||
__m128i result;
|
||||
size_t vl = __riscv_vsetvl_e32m1(4);
|
||||
vuint32m1_t va = __riscv_vle32_v_u32m1(a.u32, vl);
|
||||
vuint32m1_t vb = __riscv_vle32_v_u32m1(b.u32, vl);
|
||||
vuint32m1_t vr = __riscv_vadd_vv_u32m1(va, vb, vl);
|
||||
__riscv_vse32_v_u32m1(result.u32, vr, vl);
|
||||
return result;
|
||||
#else
|
||||
__m128i result;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
result.i32[i] = a.i32[i] + b.i32[i];
|
||||
}
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline __m128i _mm_sub_epi64(__m128i a, __m128i b)
|
||||
{
|
||||
#if USE_RVV_INTRINSICS
|
||||
__m128i result;
|
||||
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||
vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||
vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
|
||||
vuint64m1_t vr = __riscv_vsub_vv_u64m1(va, vb, vl);
|
||||
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||
return result;
|
||||
#else
|
||||
__m128i result;
|
||||
result.u64[0] = a.u64[0] - b.u64[0];
|
||||
result.u64[1] = a.u64[1] - b.u64[1];
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline __m128i _mm_mul_epu32(__m128i a, __m128i b)
|
||||
{
|
||||
#if USE_RVV_INTRINSICS
|
||||
__m128i result;
|
||||
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||
vuint64m1_t va_lo = __riscv_vzext_vf2_u64m1(__riscv_vle32_v_u32mf2(&a.u32[0], 2), vl);
|
||||
vuint64m1_t vb_lo = __riscv_vzext_vf2_u64m1(__riscv_vle32_v_u32mf2(&b.u32[0], 2), vl);
|
||||
vuint64m1_t vr = __riscv_vmul_vv_u64m1(va_lo, vb_lo, vl);
|
||||
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||
return result;
|
||||
#else
|
||||
__m128i result;
|
||||
result.u64[0] = (uint64_t)a.u32[0] * (uint64_t)b.u32[0];
|
||||
result.u64[1] = (uint64_t)a.u32[2] * (uint64_t)b.u32[2];
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Unpack operations */
|
||||
static inline __m128i _mm_unpacklo_epi64(__m128i a, __m128i b)
|
||||
{
|
||||
__m128i result;
|
||||
result.u64[0] = a.u64[0];
|
||||
result.u64[1] = b.u64[0];
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128i _mm_unpackhi_epi64(__m128i a, __m128i b)
|
||||
{
|
||||
__m128i result;
|
||||
result.u64[0] = a.u64[1];
|
||||
result.u64[1] = b.u64[1];
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Pause instruction for spin-wait loops */
|
||||
static inline void _mm_pause(void)
|
||||
{
|
||||
/* RISC-V pause hint if available (requires Zihintpause extension) */
|
||||
#if defined(__riscv_zihintpause)
|
||||
__asm__ __volatile__("pause");
|
||||
#else
|
||||
__asm__ __volatile__("nop");
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Memory fence - optimized for RISC-V */
|
||||
static inline void _mm_mfence(void)
|
||||
{
|
||||
__asm__ __volatile__("fence rw,rw" ::: "memory");
|
||||
}
|
||||
|
||||
static inline void _mm_lfence(void)
|
||||
{
|
||||
__asm__ __volatile__("fence r,r" ::: "memory");
|
||||
}
|
||||
|
||||
static inline void _mm_sfence(void)
|
||||
{
|
||||
__asm__ __volatile__("fence w,w" ::: "memory");
|
||||
}
|
||||
|
||||
/* Comparison operations */
|
||||
static inline __m128i _mm_cmpeq_epi32(__m128i a, __m128i b)
|
||||
{
|
||||
__m128i result;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
result.u32[i] = (a.u32[i] == b.u32[i]) ? 0xFFFFFFFF : 0;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128i _mm_cmpeq_epi64(__m128i a, __m128i b)
|
||||
{
|
||||
__m128i result;
|
||||
for (int i = 0; i < 2; i++) {
|
||||
result.u64[i] = (a.u64[i] == b.u64[i]) ? 0xFFFFFFFFFFFFFFFFULL : 0;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Additional shift operations */
|
||||
static inline __m128i _mm_slli_epi32(__m128i a, int imm8)
|
||||
{
|
||||
#if USE_RVV_INTRINSICS
|
||||
__m128i result;
|
||||
if (imm8 > 31) {
|
||||
memset(&result, 0, sizeof(result));
|
||||
} else {
|
||||
size_t vl = __riscv_vsetvl_e32m1(4);
|
||||
vuint32m1_t va = __riscv_vle32_v_u32m1(a.u32, vl);
|
||||
vuint32m1_t vr = __riscv_vsll_vx_u32m1(va, imm8, vl);
|
||||
__riscv_vse32_v_u32m1(result.u32, vr, vl);
|
||||
}
|
||||
return result;
|
||||
#else
|
||||
__m128i result;
|
||||
if (imm8 > 31) {
|
||||
for (int i = 0; i < 4; i++) result.u32[i] = 0;
|
||||
} else {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
result.u32[i] = a.u32[i] << imm8;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline __m128i _mm_srli_epi32(__m128i a, int imm8)
|
||||
{
|
||||
#if USE_RVV_INTRINSICS
|
||||
__m128i result;
|
||||
if (imm8 > 31) {
|
||||
memset(&result, 0, sizeof(result));
|
||||
} else {
|
||||
size_t vl = __riscv_vsetvl_e32m1(4);
|
||||
vuint32m1_t va = __riscv_vle32_v_u32m1(a.u32, vl);
|
||||
vuint32m1_t vr = __riscv_vsrl_vx_u32m1(va, imm8, vl);
|
||||
__riscv_vse32_v_u32m1(result.u32, vr, vl);
|
||||
}
|
||||
return result;
|
||||
#else
|
||||
__m128i result;
|
||||
if (imm8 > 31) {
|
||||
for (int i = 0; i < 4; i++) result.u32[i] = 0;
|
||||
} else {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
result.u32[i] = a.u32[i] >> imm8;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* 64-bit integer operations */
|
||||
static inline __m128i _mm_set1_epi64x(int64_t a)
|
||||
{
|
||||
__m128i result;
|
||||
result.i64[0] = a;
|
||||
result.i64[1] = a;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Float type for compatibility */
|
||||
typedef __m128i __m128;
|
||||
|
||||
/* Float operations - simplified scalar implementations */
|
||||
static inline __m128 _mm_set1_ps(float a)
|
||||
{
|
||||
__m128 result;
|
||||
uint32_t val;
|
||||
memcpy(&val, &a, sizeof(float));
|
||||
for (int i = 0; i < 4; i++) {
|
||||
result.u32[i] = val;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128 _mm_setzero_ps(void)
|
||||
{
|
||||
__m128 result;
|
||||
memset(&result, 0, sizeof(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128 _mm_add_ps(__m128 a, __m128 b)
|
||||
{
|
||||
__m128 result;
|
||||
float fa[4], fb[4], fr[4];
|
||||
memcpy(fa, &a, sizeof(__m128));
|
||||
memcpy(fb, &b, sizeof(__m128));
|
||||
for (int i = 0; i < 4; i++) {
|
||||
fr[i] = fa[i] + fb[i];
|
||||
}
|
||||
memcpy(&result, fr, sizeof(__m128));
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128 _mm_mul_ps(__m128 a, __m128 b)
|
||||
{
|
||||
__m128 result;
|
||||
float fa[4], fb[4], fr[4];
|
||||
memcpy(fa, &a, sizeof(__m128));
|
||||
memcpy(fb, &b, sizeof(__m128));
|
||||
for (int i = 0; i < 4; i++) {
|
||||
fr[i] = fa[i] * fb[i];
|
||||
}
|
||||
memcpy(&result, fr, sizeof(__m128));
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128 _mm_and_ps(__m128 a, __m128 b)
|
||||
{
|
||||
__m128 result;
|
||||
result.u64[0] = a.u64[0] & b.u64[0];
|
||||
result.u64[1] = a.u64[1] & b.u64[1];
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128 _mm_or_ps(__m128 a, __m128 b)
|
||||
{
|
||||
__m128 result;
|
||||
result.u64[0] = a.u64[0] | b.u64[0];
|
||||
result.u64[1] = a.u64[1] | b.u64[1];
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128 _mm_cvtepi32_ps(__m128i a)
|
||||
{
|
||||
__m128 result;
|
||||
float fr[4];
|
||||
for (int i = 0; i < 4; i++) {
|
||||
fr[i] = (float)a.i32[i];
|
||||
}
|
||||
memcpy(&result, fr, sizeof(__m128));
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128i _mm_cvttps_epi32(__m128 a)
|
||||
{
|
||||
__m128i result;
|
||||
float fa[4];
|
||||
memcpy(fa, &a, sizeof(__m128));
|
||||
for (int i = 0; i < 4; i++) {
|
||||
result.i32[i] = (int32_t)fa[i];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Casting operations */
|
||||
static inline __m128 _mm_castsi128_ps(__m128i a)
|
||||
{
|
||||
__m128 result;
|
||||
memcpy(&result, &a, sizeof(__m128));
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128i _mm_castps_si128(__m128 a)
|
||||
{
|
||||
__m128i result;
|
||||
memcpy(&result, &a, sizeof(__m128));
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Additional set operations */
|
||||
static inline __m128i _mm_set1_epi32(int a)
|
||||
{
|
||||
__m128i result;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
result.i32[i] = a;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* AES instructions - placeholders for soft_aes compatibility */
|
||||
static inline __m128i _mm_aesenc_si128(__m128i a, __m128i roundkey)
|
||||
{
|
||||
return _mm_xor_si128(a, roundkey);
|
||||
}
|
||||
|
||||
static inline __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon)
|
||||
{
|
||||
return a;
|
||||
}
|
||||
|
||||
/* Rotate right operation for soft_aes.h */
|
||||
static inline uint32_t _rotr(uint32_t value, unsigned int count)
|
||||
{
|
||||
const unsigned int mask = 31;
|
||||
count &= mask;
|
||||
return (value >> count) | (value << ((-count) & mask));
|
||||
}
|
||||
|
||||
/* ARM NEON compatibility types and intrinsics for RISC-V */
|
||||
typedef __m128i_union uint64x2_t;
|
||||
typedef __m128i_union uint8x16_t;
|
||||
typedef __m128i_union int64x2_t;
|
||||
typedef __m128i_union int32x4_t;
|
||||
|
||||
static inline uint64x2_t vld1q_u64(const uint64_t *ptr)
|
||||
{
|
||||
uint64x2_t result;
|
||||
result.u64[0] = ptr[0];
|
||||
result.u64[1] = ptr[1];
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline int64x2_t vld1q_s64(const int64_t *ptr)
|
||||
{
|
||||
int64x2_t result;
|
||||
result.i64[0] = ptr[0];
|
||||
result.i64[1] = ptr[1];
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline void vst1q_u64(uint64_t *ptr, uint64x2_t val)
|
||||
{
|
||||
ptr[0] = val.u64[0];
|
||||
ptr[1] = val.u64[1];
|
||||
}
|
||||
|
||||
static inline uint64x2_t veorq_u64(uint64x2_t a, uint64x2_t b)
|
||||
{
|
||||
return _mm_xor_si128(a, b);
|
||||
}
|
||||
|
||||
static inline uint64x2_t vaddq_u64(uint64x2_t a, uint64x2_t b)
|
||||
{
|
||||
return _mm_add_epi64(a, b);
|
||||
}
|
||||
|
||||
static inline uint64x2_t vreinterpretq_u64_u8(uint8x16_t a)
|
||||
{
|
||||
uint64x2_t result;
|
||||
memcpy(&result, &a, sizeof(uint64x2_t));
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline uint64_t vgetq_lane_u64(uint64x2_t v, int lane)
|
||||
{
|
||||
return v.u64[lane];
|
||||
}
|
||||
|
||||
static inline int64_t vgetq_lane_s64(int64x2_t v, int lane)
|
||||
{
|
||||
return v.i64[lane];
|
||||
}
|
||||
|
||||
static inline int32_t vgetq_lane_s32(int32x4_t v, int lane)
|
||||
{
|
||||
return v.i32[lane];
|
||||
}
|
||||
|
||||
typedef struct { uint64_t val[1]; } uint64x1_t;
|
||||
|
||||
static inline uint64x1_t vcreate_u64(uint64_t a)
|
||||
{
|
||||
uint64x1_t result;
|
||||
result.val[0] = a;
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline uint64x2_t vcombine_u64(uint64x1_t low, uint64x1_t high)
|
||||
{
|
||||
uint64x2_t result;
|
||||
result.u64[0] = low.val[0];
|
||||
result.u64[1] = high.val[0];
|
||||
return result;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* XMRIG_SSE2RVV_OPTIMIZED_H */
|
||||
571
src/crypto/cn/sse2rvv_scalar_backup.h
Normal file
571
src/crypto/cn/sse2rvv_scalar_backup.h
Normal file
@@ -0,0 +1,571 @@
|
||||
/* XMRig
|
||||
* Copyright (c) 2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/*
|
||||
* SSE to RISC-V compatibility header
|
||||
* Provides scalar implementations of SSE intrinsics for RISC-V architecture
|
||||
*/
|
||||
|
||||
#ifndef XMRIG_SSE2RVV_H
|
||||
#define XMRIG_SSE2RVV_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
/* 128-bit vector type */
|
||||
typedef union {
|
||||
uint8_t u8[16];
|
||||
uint16_t u16[8];
|
||||
uint32_t u32[4];
|
||||
uint64_t u64[2];
|
||||
int8_t i8[16];
|
||||
int16_t i16[8];
|
||||
int32_t i32[4];
|
||||
int64_t i64[2];
|
||||
} __m128i_union;
|
||||
|
||||
typedef __m128i_union __m128i;
|
||||
|
||||
/* Set operations */
|
||||
static inline __m128i _mm_set_epi32(int e3, int e2, int e1, int e0)
|
||||
{
|
||||
__m128i result;
|
||||
result.i32[0] = e0;
|
||||
result.i32[1] = e1;
|
||||
result.i32[2] = e2;
|
||||
result.i32[3] = e3;
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128i _mm_set_epi64x(int64_t e1, int64_t e0)
|
||||
{
|
||||
__m128i result;
|
||||
result.i64[0] = e0;
|
||||
result.i64[1] = e1;
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128i _mm_setzero_si128(void)
|
||||
{
|
||||
__m128i result;
|
||||
memset(&result, 0, sizeof(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Extract/insert operations */
|
||||
static inline int _mm_cvtsi128_si32(__m128i a)
|
||||
{
|
||||
return a.i32[0];
|
||||
}
|
||||
|
||||
static inline int64_t _mm_cvtsi128_si64(__m128i a)
|
||||
{
|
||||
return a.i64[0];
|
||||
}
|
||||
|
||||
static inline __m128i _mm_cvtsi32_si128(int a)
|
||||
{
|
||||
__m128i result = _mm_setzero_si128();
|
||||
result.i32[0] = a;
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128i _mm_cvtsi64_si128(int64_t a)
|
||||
{
|
||||
__m128i result = _mm_setzero_si128();
|
||||
result.i64[0] = a;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Shuffle operations */
|
||||
static inline __m128i _mm_shuffle_epi32(__m128i a, int imm8)
|
||||
{
|
||||
__m128i result;
|
||||
result.u32[0] = a.u32[(imm8 >> 0) & 0x3];
|
||||
result.u32[1] = a.u32[(imm8 >> 2) & 0x3];
|
||||
result.u32[2] = a.u32[(imm8 >> 4) & 0x3];
|
||||
result.u32[3] = a.u32[(imm8 >> 6) & 0x3];
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Logical operations */
|
||||
static inline __m128i _mm_xor_si128(__m128i a, __m128i b)
|
||||
{
|
||||
__m128i result;
|
||||
result.u64[0] = a.u64[0] ^ b.u64[0];
|
||||
result.u64[1] = a.u64[1] ^ b.u64[1];
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128i _mm_or_si128(__m128i a, __m128i b)
|
||||
{
|
||||
__m128i result;
|
||||
result.u64[0] = a.u64[0] | b.u64[0];
|
||||
result.u64[1] = a.u64[1] | b.u64[1];
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128i _mm_and_si128(__m128i a, __m128i b)
|
||||
{
|
||||
__m128i result;
|
||||
result.u64[0] = a.u64[0] & b.u64[0];
|
||||
result.u64[1] = a.u64[1] & b.u64[1];
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128i _mm_andnot_si128(__m128i a, __m128i b)
|
||||
{
|
||||
__m128i result;
|
||||
result.u64[0] = (~a.u64[0]) & b.u64[0];
|
||||
result.u64[1] = (~a.u64[1]) & b.u64[1];
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Shift operations */
|
||||
static inline __m128i _mm_slli_si128(__m128i a, int imm8)
|
||||
{
|
||||
__m128i result = _mm_setzero_si128();
|
||||
int count = imm8 & 0xFF;
|
||||
if (count > 15) return result;
|
||||
|
||||
for (int i = 0; i < 16 - count; i++) {
|
||||
result.u8[i + count] = a.u8[i];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128i _mm_srli_si128(__m128i a, int imm8)
|
||||
{
|
||||
__m128i result = _mm_setzero_si128();
|
||||
int count = imm8 & 0xFF;
|
||||
if (count > 15) return result;
|
||||
|
||||
for (int i = count; i < 16; i++) {
|
||||
result.u8[i - count] = a.u8[i];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128i _mm_slli_epi64(__m128i a, int imm8)
|
||||
{
|
||||
__m128i result;
|
||||
if (imm8 > 63) {
|
||||
result.u64[0] = 0;
|
||||
result.u64[1] = 0;
|
||||
} else {
|
||||
result.u64[0] = a.u64[0] << imm8;
|
||||
result.u64[1] = a.u64[1] << imm8;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128i _mm_srli_epi64(__m128i a, int imm8)
|
||||
{
|
||||
__m128i result;
|
||||
if (imm8 > 63) {
|
||||
result.u64[0] = 0;
|
||||
result.u64[1] = 0;
|
||||
} else {
|
||||
result.u64[0] = a.u64[0] >> imm8;
|
||||
result.u64[1] = a.u64[1] >> imm8;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Load/store operations */
|
||||
static inline __m128i _mm_load_si128(const __m128i* p)
|
||||
{
|
||||
__m128i result;
|
||||
memcpy(&result, p, sizeof(__m128i));
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128i _mm_loadu_si128(const __m128i* p)
|
||||
{
|
||||
__m128i result;
|
||||
memcpy(&result, p, sizeof(__m128i));
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline void _mm_store_si128(__m128i* p, __m128i a)
|
||||
{
|
||||
memcpy(p, &a, sizeof(__m128i));
|
||||
}
|
||||
|
||||
static inline void _mm_storeu_si128(__m128i* p, __m128i a)
|
||||
{
|
||||
memcpy(p, &a, sizeof(__m128i));
|
||||
}
|
||||
|
||||
/* Arithmetic operations */
|
||||
static inline __m128i _mm_add_epi64(__m128i a, __m128i b)
|
||||
{
|
||||
__m128i result;
|
||||
result.u64[0] = a.u64[0] + b.u64[0];
|
||||
result.u64[1] = a.u64[1] + b.u64[1];
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128i _mm_add_epi32(__m128i a, __m128i b)
|
||||
{
|
||||
__m128i result;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
result.i32[i] = a.i32[i] + b.i32[i];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128i _mm_sub_epi64(__m128i a, __m128i b)
|
||||
{
|
||||
__m128i result;
|
||||
result.u64[0] = a.u64[0] - b.u64[0];
|
||||
result.u64[1] = a.u64[1] - b.u64[1];
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128i _mm_mul_epu32(__m128i a, __m128i b)
|
||||
{
|
||||
__m128i result;
|
||||
result.u64[0] = (uint64_t)a.u32[0] * (uint64_t)b.u32[0];
|
||||
result.u64[1] = (uint64_t)a.u32[2] * (uint64_t)b.u32[2];
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Unpack operations */
|
||||
static inline __m128i _mm_unpacklo_epi64(__m128i a, __m128i b)
|
||||
{
|
||||
__m128i result;
|
||||
result.u64[0] = a.u64[0];
|
||||
result.u64[1] = b.u64[0];
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128i _mm_unpackhi_epi64(__m128i a, __m128i b)
|
||||
{
|
||||
__m128i result;
|
||||
result.u64[0] = a.u64[1];
|
||||
result.u64[1] = b.u64[1];
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Pause instruction for spin-wait loops */
|
||||
static inline void _mm_pause(void)
|
||||
{
|
||||
/* RISC-V doesn't have a direct equivalent to x86 PAUSE
|
||||
* Use a simple NOP or yield hint */
|
||||
__asm__ __volatile__("nop");
|
||||
}
|
||||
|
||||
/* Memory fence */
|
||||
static inline void _mm_mfence(void)
|
||||
{
|
||||
__asm__ __volatile__("fence" ::: "memory");
|
||||
}
|
||||
|
||||
static inline void _mm_lfence(void)
|
||||
{
|
||||
__asm__ __volatile__("fence r,r" ::: "memory");
|
||||
}
|
||||
|
||||
static inline void _mm_sfence(void)
|
||||
{
|
||||
__asm__ __volatile__("fence w,w" ::: "memory");
|
||||
}
|
||||
|
||||
/* Comparison operations */
|
||||
static inline __m128i _mm_cmpeq_epi32(__m128i a, __m128i b)
|
||||
{
|
||||
__m128i result;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
result.u32[i] = (a.u32[i] == b.u32[i]) ? 0xFFFFFFFF : 0;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128i _mm_cmpeq_epi64(__m128i a, __m128i b)
|
||||
{
|
||||
__m128i result;
|
||||
for (int i = 0; i < 2; i++) {
|
||||
result.u64[i] = (a.u64[i] == b.u64[i]) ? 0xFFFFFFFFFFFFFFFFULL : 0;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Additional shift operations */
|
||||
static inline __m128i _mm_slli_epi32(__m128i a, int imm8)
|
||||
{
|
||||
__m128i result;
|
||||
if (imm8 > 31) {
|
||||
for (int i = 0; i < 4; i++) result.u32[i] = 0;
|
||||
} else {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
result.u32[i] = a.u32[i] << imm8;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128i _mm_srli_epi32(__m128i a, int imm8)
|
||||
{
|
||||
__m128i result;
|
||||
if (imm8 > 31) {
|
||||
for (int i = 0; i < 4; i++) result.u32[i] = 0;
|
||||
} else {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
result.u32[i] = a.u32[i] >> imm8;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* 64-bit integer operations */
|
||||
static inline __m128i _mm_set1_epi64x(int64_t a)
|
||||
{
|
||||
__m128i result;
|
||||
result.i64[0] = a;
|
||||
result.i64[1] = a;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Float type for compatibility - we'll treat it as int for simplicity */
|
||||
typedef __m128i __m128;
|
||||
|
||||
/* Float operations - simplified scalar implementations */
|
||||
static inline __m128 _mm_set1_ps(float a)
|
||||
{
|
||||
__m128 result;
|
||||
uint32_t val;
|
||||
memcpy(&val, &a, sizeof(float));
|
||||
for (int i = 0; i < 4; i++) {
|
||||
result.u32[i] = val;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128 _mm_setzero_ps(void)
|
||||
{
|
||||
__m128 result;
|
||||
memset(&result, 0, sizeof(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128 _mm_add_ps(__m128 a, __m128 b)
|
||||
{
|
||||
__m128 result;
|
||||
float fa[4], fb[4], fr[4];
|
||||
memcpy(fa, &a, sizeof(__m128));
|
||||
memcpy(fb, &b, sizeof(__m128));
|
||||
for (int i = 0; i < 4; i++) {
|
||||
fr[i] = fa[i] + fb[i];
|
||||
}
|
||||
memcpy(&result, fr, sizeof(__m128));
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128 _mm_mul_ps(__m128 a, __m128 b)
|
||||
{
|
||||
__m128 result;
|
||||
float fa[4], fb[4], fr[4];
|
||||
memcpy(fa, &a, sizeof(__m128));
|
||||
memcpy(fb, &b, sizeof(__m128));
|
||||
for (int i = 0; i < 4; i++) {
|
||||
fr[i] = fa[i] * fb[i];
|
||||
}
|
||||
memcpy(&result, fr, sizeof(__m128));
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128 _mm_and_ps(__m128 a, __m128 b)
|
||||
{
|
||||
__m128 result;
|
||||
result.u64[0] = a.u64[0] & b.u64[0];
|
||||
result.u64[1] = a.u64[1] & b.u64[1];
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128 _mm_or_ps(__m128 a, __m128 b)
|
||||
{
|
||||
__m128 result;
|
||||
result.u64[0] = a.u64[0] | b.u64[0];
|
||||
result.u64[1] = a.u64[1] | b.u64[1];
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128 _mm_cvtepi32_ps(__m128i a)
|
||||
{
|
||||
__m128 result;
|
||||
float fr[4];
|
||||
for (int i = 0; i < 4; i++) {
|
||||
fr[i] = (float)a.i32[i];
|
||||
}
|
||||
memcpy(&result, fr, sizeof(__m128));
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128i _mm_cvttps_epi32(__m128 a)
|
||||
{
|
||||
__m128i result;
|
||||
float fa[4];
|
||||
memcpy(fa, &a, sizeof(__m128));
|
||||
for (int i = 0; i < 4; i++) {
|
||||
result.i32[i] = (int32_t)fa[i];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Casting operations */
|
||||
static inline __m128 _mm_castsi128_ps(__m128i a)
|
||||
{
|
||||
__m128 result;
|
||||
memcpy(&result, &a, sizeof(__m128));
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128i _mm_castps_si128(__m128 a)
|
||||
{
|
||||
__m128i result;
|
||||
memcpy(&result, &a, sizeof(__m128));
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Additional set operations */
|
||||
static inline __m128i _mm_set1_epi32(int a)
|
||||
{
|
||||
__m128i result;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
result.i32[i] = a;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* AES instructions - these are placeholders, actual AES is done via soft_aes.h */
|
||||
/* On RISC-V without crypto extensions, these should never be called directly */
|
||||
/* They are only here for compilation compatibility */
|
||||
static inline __m128i _mm_aesenc_si128(__m128i a, __m128i roundkey)
|
||||
{
|
||||
/* This is a placeholder - actual implementation should use soft_aes */
|
||||
/* If this function is called, it means SOFT_AES template parameter wasn't used */
|
||||
/* We return a XOR as a minimal fallback, but proper code should use soft_aesenc */
|
||||
return _mm_xor_si128(a, roundkey);
|
||||
}
|
||||
|
||||
static inline __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon)
|
||||
{
|
||||
/* Placeholder for AES key generation - should use soft_aeskeygenassist */
|
||||
return a;
|
||||
}
|
||||
|
||||
/* Rotate right operation for soft_aes.h */
|
||||
static inline uint32_t _rotr(uint32_t value, unsigned int count)
|
||||
{
|
||||
const unsigned int mask = 31;
|
||||
count &= mask;
|
||||
return (value >> count) | (value << ((-count) & mask));
|
||||
}
|
||||
|
||||
/* ARM NEON compatibility types and intrinsics for RISC-V */
|
||||
typedef __m128i_union uint64x2_t;
|
||||
typedef __m128i_union uint8x16_t;
|
||||
typedef __m128i_union int64x2_t;
|
||||
typedef __m128i_union int32x4_t;
|
||||
|
||||
static inline uint64x2_t vld1q_u64(const uint64_t *ptr)
|
||||
{
|
||||
uint64x2_t result;
|
||||
result.u64[0] = ptr[0];
|
||||
result.u64[1] = ptr[1];
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline int64x2_t vld1q_s64(const int64_t *ptr)
|
||||
{
|
||||
int64x2_t result;
|
||||
result.i64[0] = ptr[0];
|
||||
result.i64[1] = ptr[1];
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline void vst1q_u64(uint64_t *ptr, uint64x2_t val)
|
||||
{
|
||||
ptr[0] = val.u64[0];
|
||||
ptr[1] = val.u64[1];
|
||||
}
|
||||
|
||||
static inline uint64x2_t veorq_u64(uint64x2_t a, uint64x2_t b)
|
||||
{
|
||||
uint64x2_t result;
|
||||
result.u64[0] = a.u64[0] ^ b.u64[0];
|
||||
result.u64[1] = a.u64[1] ^ b.u64[1];
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline uint64x2_t vaddq_u64(uint64x2_t a, uint64x2_t b)
|
||||
{
|
||||
uint64x2_t result;
|
||||
result.u64[0] = a.u64[0] + b.u64[0];
|
||||
result.u64[1] = a.u64[1] + b.u64[1];
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline uint64x2_t vreinterpretq_u64_u8(uint8x16_t a)
|
||||
{
|
||||
uint64x2_t result;
|
||||
memcpy(&result, &a, sizeof(uint64x2_t));
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline uint64_t vgetq_lane_u64(uint64x2_t v, int lane)
|
||||
{
|
||||
return v.u64[lane];
|
||||
}
|
||||
|
||||
static inline int64_t vgetq_lane_s64(int64x2_t v, int lane)
|
||||
{
|
||||
return v.i64[lane];
|
||||
}
|
||||
|
||||
static inline int32_t vgetq_lane_s32(int32x4_t v, int lane)
|
||||
{
|
||||
return v.i32[lane];
|
||||
}
|
||||
|
||||
typedef struct { uint64_t val[1]; } uint64x1_t;
|
||||
|
||||
static inline uint64x1_t vcreate_u64(uint64_t a)
|
||||
{
|
||||
uint64x1_t result;
|
||||
result.val[0] = a;
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline uint64x2_t vcombine_u64(uint64x1_t low, uint64x1_t high)
|
||||
{
|
||||
uint64x2_t result;
|
||||
result.u64[0] = low.val[0];
|
||||
result.u64[1] = high.val[0];
|
||||
return result;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* XMRIG_SSE2RVV_H */
|
||||
@@ -1,6 +1,6 @@
|
||||
/* XMRig
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -35,15 +35,69 @@ constexpr size_t twoMiB = 2U * 1024U * 1024U;
|
||||
constexpr size_t oneGiB = 1024U * 1024U * 1024U;
|
||||
|
||||
|
||||
static inline std::string sysfs_path(uint32_t node, size_t hugePageSize, bool nr)
|
||||
static bool sysfs_write(const std::string &path, uint64_t value)
|
||||
{
|
||||
std::ofstream file(path, std::ios::out | std::ios::binary | std::ios::trunc);
|
||||
if (!file.is_open()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
file << value;
|
||||
file.flush();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
static int64_t sysfs_read(const std::string &path)
|
||||
{
|
||||
std::ifstream file(path);
|
||||
if (!file.is_open()) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint64_t value = 0;
|
||||
file >> value;
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
static std::string sysfs_path(uint32_t node, size_t hugePageSize, bool nr)
|
||||
{
|
||||
return fmt::format("/sys/devices/system/node/node{}/hugepages/hugepages-{}kB/{}_hugepages", node, hugePageSize / 1024, nr ? "nr" : "free");
|
||||
}
|
||||
|
||||
|
||||
static inline bool write_nr_hugepages(uint32_t node, size_t hugePageSize, uint64_t count) { return LinuxMemory::write(sysfs_path(node, hugePageSize, true).c_str(), count); }
|
||||
static inline int64_t free_hugepages(uint32_t node, size_t hugePageSize) { return LinuxMemory::read(sysfs_path(node, hugePageSize, false).c_str()); }
|
||||
static inline int64_t nr_hugepages(uint32_t node, size_t hugePageSize) { return LinuxMemory::read(sysfs_path(node, hugePageSize, true).c_str()); }
|
||||
static std::string sysfs_path(size_t hugePageSize, bool nr)
|
||||
{
|
||||
return fmt::format("/sys/kernel/mm/hugepages/hugepages-{}kB/{}_hugepages", hugePageSize / 1024, nr ? "nr" : "free");
|
||||
}
|
||||
|
||||
|
||||
static bool write_nr_hugepages(uint32_t node, size_t hugePageSize, uint64_t count)
|
||||
{
|
||||
if (sysfs_write(sysfs_path(node, hugePageSize, true), count)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return sysfs_write(sysfs_path(hugePageSize, true), count);
|
||||
}
|
||||
|
||||
|
||||
static int64_t sysfs_read_hugepages(uint32_t node, size_t hugePageSize, bool nr)
|
||||
{
|
||||
const int64_t value = sysfs_read(sysfs_path(node, hugePageSize, nr));
|
||||
if (value >= 0) {
|
||||
return value;
|
||||
}
|
||||
|
||||
return sysfs_read(sysfs_path(hugePageSize, nr));
|
||||
}
|
||||
|
||||
|
||||
static inline int64_t free_hugepages(uint32_t node, size_t hugePageSize) { return sysfs_read_hugepages(node, hugePageSize, false); }
|
||||
static inline int64_t nr_hugepages(uint32_t node, size_t hugePageSize) { return sysfs_read_hugepages(node, hugePageSize, true); }
|
||||
|
||||
|
||||
} // namespace xmrig
|
||||
@@ -62,31 +116,3 @@ bool xmrig::LinuxMemory::reserve(size_t size, uint32_t node, size_t hugePageSize
|
||||
|
||||
return write_nr_hugepages(node, hugePageSize, std::max<size_t>(nr_hugepages(node, hugePageSize), 0) + (required - available));
|
||||
}
|
||||
|
||||
|
||||
bool xmrig::LinuxMemory::write(const char *path, uint64_t value)
|
||||
{
|
||||
std::ofstream file(path, std::ios::out | std::ios::binary | std::ios::trunc);
|
||||
if (!file.is_open()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
file << value;
|
||||
file.flush();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
int64_t xmrig::LinuxMemory::read(const char *path)
|
||||
{
|
||||
std::ifstream file(path);
|
||||
if (!file.is_open()) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint64_t value = 0;
|
||||
file >> value;
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/* XMRig
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -31,13 +31,10 @@ class LinuxMemory
|
||||
{
|
||||
public:
|
||||
static bool reserve(size_t size, uint32_t node, size_t hugePageSize);
|
||||
|
||||
static bool write(const char *path, uint64_t value);
|
||||
static int64_t read(const char *path);
|
||||
};
|
||||
|
||||
|
||||
} /* namespace xmrig */
|
||||
} // namespace xmrig
|
||||
|
||||
|
||||
#endif /* XMRIG_LINUXMEMORY_H */
|
||||
#endif // XMRIG_LINUXMEMORY_H
|
||||
|
||||
@@ -49,7 +49,7 @@ xmrig::MemoryPool::MemoryPool(size_t size, bool hugePages, uint32_t node)
|
||||
|
||||
constexpr size_t alignment = 1 << 24;
|
||||
|
||||
m_memory = new VirtualMemory(size * pageSize + alignment, hugePages, false, false, node);
|
||||
m_memory = new VirtualMemory(size * pageSize + alignment, hugePages, false, false, node, VirtualMemory::kDefaultHugePageSize);
|
||||
|
||||
m_alignOffset = (alignment - (((size_t)m_memory->scratchpad()) % alignment)) % alignment;
|
||||
}
|
||||
|
||||
@@ -75,6 +75,16 @@ xmrig::VirtualMemory::VirtualMemory(size_t size, bool hugePages, bool oneGbPages
|
||||
}
|
||||
|
||||
m_scratchpad = static_cast<uint8_t*>(_mm_malloc(m_size, alignSize));
|
||||
|
||||
// Huge pages failed to allocate, but try to enable transparent huge pages for the range
|
||||
if (alignSize >= kDefaultHugePageSize) {
|
||||
if (m_scratchpad) {
|
||||
adviseLargePages(m_scratchpad, m_size);
|
||||
}
|
||||
else {
|
||||
m_scratchpad = static_cast<uint8_t*>(_mm_malloc(m_size, 64));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -65,6 +65,7 @@ public:
|
||||
static void *allocateExecutableMemory(size_t size, bool hugePages);
|
||||
static void *allocateLargePagesMemory(size_t size);
|
||||
static void *allocateOneGbPagesMemory(size_t size);
|
||||
static bool adviseLargePages(void *p, size_t size);
|
||||
static void destroy();
|
||||
static void flushInstructionCache(void *p, size_t size);
|
||||
static void freeLargePagesMemory(void *p, size_t size);
|
||||
|
||||
@@ -86,7 +86,7 @@ bool xmrig::VirtualMemory::isHugepagesAvailable()
|
||||
{
|
||||
# ifdef XMRIG_OS_LINUX
|
||||
return std::ifstream("/proc/sys/vm/nr_hugepages").good() || std::ifstream("/sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages").good();
|
||||
# elif defined(XMRIG_OS_MACOS) && defined(XMRIG_ARM)
|
||||
# elif defined(XMRIG_OS_MACOS) && defined(XMRIG_ARM) || defined(XMRIG_OS_HAIKU)
|
||||
return false;
|
||||
# else
|
||||
return true;
|
||||
@@ -156,7 +156,8 @@ void *xmrig::VirtualMemory::allocateExecutableMemory(size_t size, bool hugePages
|
||||
if (!mem) {
|
||||
mem = mmap(0, size, PROT_READ | PROT_WRITE | SECURE_PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
}
|
||||
|
||||
# elif defined(XMRIG_OS_HAIKU)
|
||||
void *mem = mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
# else
|
||||
|
||||
void *mem = nullptr;
|
||||
@@ -181,6 +182,8 @@ void *xmrig::VirtualMemory::allocateLargePagesMemory(size_t size)
|
||||
void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
|
||||
# elif defined(XMRIG_OS_FREEBSD)
|
||||
void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER | MAP_PREFAULT_READ, -1, 0);
|
||||
# elif defined(XMRIG_OS_HAIKU)
|
||||
void *mem = nullptr;
|
||||
# else
|
||||
void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE | hugePagesFlag(hugePageSize()), 0, 0);
|
||||
# endif
|
||||
@@ -273,6 +276,16 @@ bool xmrig::VirtualMemory::allocateOneGbPagesMemory()
|
||||
}
|
||||
|
||||
|
||||
bool xmrig::VirtualMemory::adviseLargePages(void *p, size_t size)
|
||||
{
|
||||
# ifdef XMRIG_OS_LINUX
|
||||
return (madvise(p, size, MADV_HUGEPAGE) == 0);
|
||||
# else
|
||||
return false;
|
||||
# endif
|
||||
}
|
||||
|
||||
|
||||
void xmrig::VirtualMemory::freeLargePagesMemory()
|
||||
{
|
||||
if (m_flags.test(FLAG_LOCK)) {
|
||||
|
||||
@@ -260,6 +260,12 @@ bool xmrig::VirtualMemory::allocateOneGbPagesMemory()
|
||||
}
|
||||
|
||||
|
||||
bool xmrig::VirtualMemory::adviseLargePages(void *p, size_t size)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
void xmrig::VirtualMemory::freeLargePagesMemory()
|
||||
{
|
||||
freeLargePagesMemory(m_scratchpad, m_size);
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
#define XMRIG_MM_MALLOC_PORTABLE_H
|
||||
|
||||
|
||||
#if defined(XMRIG_ARM) && !defined(__clang__)
|
||||
#if (defined(XMRIG_ARM) || defined(XMRIG_RISCV)) && !defined(__clang__)
|
||||
#include <stdlib.h>
|
||||
|
||||
|
||||
|
||||
@@ -57,6 +57,9 @@
|
||||
|
||||
#if defined(XMRIG_ARM)
|
||||
# include "crypto/cn/sse2neon.h"
|
||||
#elif defined(XMRIG_RISCV)
|
||||
// RISC-V doesn't have SSE/NEON, provide minimal compatibility
|
||||
# define _mm_pause() __asm__ __volatile__("nop")
|
||||
#elif defined(__GNUC__)
|
||||
# include <x86intrin.h>
|
||||
#else
|
||||
@@ -286,7 +289,7 @@ struct HelperThread
|
||||
|
||||
void benchmark()
|
||||
{
|
||||
#ifndef XMRIG_ARM
|
||||
#if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
|
||||
static std::atomic<int> done{ 0 };
|
||||
if (done.exchange(1)) {
|
||||
return;
|
||||
@@ -478,7 +481,7 @@ static inline bool findByType(hwloc_obj_t obj, hwloc_obj_type_t type, func lambd
|
||||
|
||||
HelperThread* create_helper_thread(int64_t cpu_index, int priority, const std::vector<int64_t>& affinities)
|
||||
{
|
||||
#ifndef XMRIG_ARM
|
||||
#if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
|
||||
hwloc_bitmap_t helper_cpu_set = hwloc_bitmap_alloc();
|
||||
hwloc_bitmap_t main_threads_set = hwloc_bitmap_alloc();
|
||||
|
||||
@@ -807,7 +810,7 @@ void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ct
|
||||
uint32_t cn_indices[6];
|
||||
select_indices(cn_indices, seed);
|
||||
|
||||
#ifdef XMRIG_ARM
|
||||
#if defined(XMRIG_ARM) || defined(XMRIG_RISCV)
|
||||
uint32_t step[6] = { 1, 1, 1, 1, 1, 1 };
|
||||
#else
|
||||
uint32_t step[6] = { 4, 4, 1, 2, 4, 4 };
|
||||
|
||||
@@ -38,6 +38,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#include "crypto/randomx/common.hpp"
|
||||
#include "crypto/rx/Profiler.h"
|
||||
|
||||
#include "backend/cpu/Cpu.h"
|
||||
|
||||
#ifdef XMRIG_RISCV
|
||||
#include "crypto/randomx/aes_hash_rv64_vector.hpp"
|
||||
#include "crypto/randomx/aes_hash_rv64_zvkned.hpp"
|
||||
#endif
|
||||
|
||||
#define AES_HASH_1R_STATE0 0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d
|
||||
#define AES_HASH_1R_STATE1 0xace78057, 0xf59e125a, 0x15c7b798, 0x338d996e
|
||||
#define AES_HASH_1R_STATE2 0xe8a07ce4, 0x5079506b, 0xae62c7d0, 0x6a770017
|
||||
@@ -59,14 +66,27 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
Hashing throughput: >20 GiB/s per CPU core with hardware AES
|
||||
*/
|
||||
template<int softAes>
|
||||
void hashAes1Rx4(const void *input, size_t inputSize, void *hash) {
|
||||
void hashAes1Rx4(const void *input, size_t inputSize, void *hash)
|
||||
{
|
||||
#ifdef XMRIG_RISCV
|
||||
if (xmrig::Cpu::info()->hasAES()) {
|
||||
hashAes1Rx4_zvkned(input, inputSize, hash);
|
||||
return;
|
||||
}
|
||||
|
||||
if (xmrig::Cpu::info()->hasRISCV_Vector()) {
|
||||
hashAes1Rx4_RVV<softAes>(input, inputSize, hash);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
const uint8_t* inptr = (uint8_t*)input;
|
||||
const uint8_t* inputEnd = inptr + inputSize;
|
||||
|
||||
rx_vec_i128 state0, state1, state2, state3;
|
||||
rx_vec_i128 in0, in1, in2, in3;
|
||||
|
||||
//intial state
|
||||
//initial state
|
||||
state0 = rx_set_int_vec_i128(AES_HASH_1R_STATE0);
|
||||
state1 = rx_set_int_vec_i128(AES_HASH_1R_STATE1);
|
||||
state2 = rx_set_int_vec_i128(AES_HASH_1R_STATE2);
|
||||
@@ -127,7 +147,20 @@ template void hashAes1Rx4<true>(const void *input, size_t inputSize, void *hash)
|
||||
calls to this function.
|
||||
*/
|
||||
template<int softAes>
|
||||
void fillAes1Rx4(void *state, size_t outputSize, void *buffer) {
|
||||
void fillAes1Rx4(void *state, size_t outputSize, void *buffer)
|
||||
{
|
||||
#ifdef XMRIG_RISCV
|
||||
if (xmrig::Cpu::info()->hasAES()) {
|
||||
fillAes1Rx4_zvkned(state, outputSize, buffer);
|
||||
return;
|
||||
}
|
||||
|
||||
if (xmrig::Cpu::info()->hasRISCV_Vector()) {
|
||||
fillAes1Rx4_RVV<softAes>(state, outputSize, buffer);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
const uint8_t* outptr = (uint8_t*)buffer;
|
||||
const uint8_t* outputEnd = outptr + outputSize;
|
||||
|
||||
@@ -171,7 +204,20 @@ static constexpr randomx::Instruction inst{ 0xFF, 7, 7, 0xFF, 0xFFFFFFFFU };
|
||||
alignas(16) static const randomx::Instruction inst_mask[2] = { inst, inst };
|
||||
|
||||
template<int softAes>
|
||||
void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
|
||||
void fillAes4Rx4(void *state, size_t outputSize, void *buffer)
|
||||
{
|
||||
#ifdef XMRIG_RISCV
|
||||
if (xmrig::Cpu::info()->hasAES()) {
|
||||
fillAes4Rx4_zvkned(state, outputSize, buffer);
|
||||
return;
|
||||
}
|
||||
|
||||
if (xmrig::Cpu::info()->hasRISCV_Vector()) {
|
||||
fillAes4Rx4_RVV<softAes>(state, outputSize, buffer);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
const uint8_t* outptr = (uint8_t*)buffer;
|
||||
const uint8_t* outputEnd = outptr + outputSize;
|
||||
|
||||
@@ -235,10 +281,34 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
|
||||
template void fillAes4Rx4<true>(void *state, size_t outputSize, void *buffer);
|
||||
template void fillAes4Rx4<false>(void *state, size_t outputSize, void *buffer);
|
||||
|
||||
#ifdef XMRIG_VAES
|
||||
void hashAndFillAes1Rx4_VAES512(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
||||
#endif
|
||||
|
||||
template<int softAes, int unroll>
|
||||
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
|
||||
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state)
|
||||
{
|
||||
PROFILE_SCOPE(RandomX_AES);
|
||||
|
||||
#ifdef XMRIG_RISCV
|
||||
if (xmrig::Cpu::info()->hasAES()) {
|
||||
hashAndFillAes1Rx4_zvkned(scratchpad, scratchpadSize, hash, fill_state);
|
||||
return;
|
||||
}
|
||||
|
||||
if (xmrig::Cpu::info()->hasRISCV_Vector()) {
|
||||
hashAndFillAes1Rx4_RVV<softAes, unroll>(scratchpad, scratchpadSize, hash, fill_state);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef XMRIG_VAES
|
||||
if (xmrig::Cpu::info()->arch() == xmrig::ICpuInfo::ARCH_ZEN5) {
|
||||
hashAndFillAes1Rx4_VAES512(scratchpad, scratchpadSize, hash, fill_state);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
uint8_t* scratchpadPtr = (uint8_t*)scratchpad;
|
||||
const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize;
|
||||
|
||||
@@ -386,43 +456,54 @@ hashAndFillAes1Rx4_impl* softAESImpl = &hashAndFillAes1Rx4<1,1>;
|
||||
|
||||
void SelectSoftAESImpl(size_t threadsCount)
|
||||
{
|
||||
constexpr uint64_t test_length_ms = 100;
|
||||
const std::array<hashAndFillAes1Rx4_impl *, 4> impl = {
|
||||
&hashAndFillAes1Rx4<1,1>,
|
||||
&hashAndFillAes1Rx4<2,1>,
|
||||
&hashAndFillAes1Rx4<2,2>,
|
||||
&hashAndFillAes1Rx4<2,4>,
|
||||
};
|
||||
size_t fast_idx = 0;
|
||||
double fast_speed = 0.0;
|
||||
for (size_t run = 0; run < 3; ++run) {
|
||||
for (size_t i = 0; i < impl.size(); ++i) {
|
||||
const double t1 = xmrig::Chrono::highResolutionMSecs();
|
||||
std::vector<uint32_t> count(threadsCount, 0);
|
||||
std::vector<std::thread> threads;
|
||||
for (size_t t = 0; t < threadsCount; ++t) {
|
||||
threads.emplace_back([&, t]() {
|
||||
std::vector<uint8_t> scratchpad(10 * 1024);
|
||||
alignas(16) uint8_t hash[64] = {};
|
||||
alignas(16) uint8_t state[64] = {};
|
||||
do {
|
||||
(*impl[i])(scratchpad.data(), scratchpad.size(), hash, state);
|
||||
++count[t];
|
||||
} while (xmrig::Chrono::highResolutionMSecs() - t1 < test_length_ms);
|
||||
});
|
||||
}
|
||||
uint32_t total = 0;
|
||||
for (size_t t = 0; t < threadsCount; ++t) {
|
||||
threads[t].join();
|
||||
total += count[t];
|
||||
}
|
||||
const double t2 = xmrig::Chrono::highResolutionMSecs();
|
||||
const double speed = total * 1e3 / (t2 - t1);
|
||||
if (speed > fast_speed) {
|
||||
fast_idx = i;
|
||||
fast_speed = speed;
|
||||
}
|
||||
}
|
||||
}
|
||||
softAESImpl = impl[fast_idx];
|
||||
constexpr uint64_t test_length_ms = 100;
|
||||
|
||||
const std::array<hashAndFillAes1Rx4_impl *, 4> impl = {
|
||||
&hashAndFillAes1Rx4<1,1>,
|
||||
&hashAndFillAes1Rx4<2,1>,
|
||||
&hashAndFillAes1Rx4<2,2>,
|
||||
&hashAndFillAes1Rx4<2,4>,
|
||||
};
|
||||
|
||||
size_t fast_idx = 0;
|
||||
double fast_speed = 0.0;
|
||||
|
||||
for (size_t run = 0; run < 3; ++run) {
|
||||
for (size_t i = 0; i < impl.size(); ++i) {
|
||||
const double t1 = xmrig::Chrono::highResolutionMSecs();
|
||||
|
||||
std::vector<uint32_t> count(threadsCount, 0);
|
||||
std::vector<std::thread> threads;
|
||||
|
||||
for (size_t t = 0; t < threadsCount; ++t) {
|
||||
threads.emplace_back([&, t]() {
|
||||
std::vector<uint8_t> scratchpad(10 * 1024);
|
||||
|
||||
alignas(16) uint8_t hash[64] = {};
|
||||
alignas(16) uint8_t state[64] = {};
|
||||
|
||||
do {
|
||||
(*impl[i])(scratchpad.data(), scratchpad.size(), hash, state);
|
||||
++count[t];
|
||||
} while (xmrig::Chrono::highResolutionMSecs() - t1 < test_length_ms);
|
||||
});
|
||||
}
|
||||
|
||||
uint32_t total = 0;
|
||||
|
||||
for (size_t t = 0; t < threadsCount; ++t) {
|
||||
threads[t].join();
|
||||
total += count[t];
|
||||
}
|
||||
|
||||
const double t2 = xmrig::Chrono::highResolutionMSecs();
|
||||
const double speed = total * 1e3 / (t2 - t1);
|
||||
|
||||
if (speed > fast_speed) {
|
||||
fast_idx = i;
|
||||
fast_speed = speed;
|
||||
}
|
||||
}
|
||||
}
|
||||
softAESImpl = impl[fast_idx];
|
||||
}
|
||||
|
||||
322
src/crypto/randomx/aes_hash_rv64_vector.cpp
Normal file
322
src/crypto/randomx/aes_hash_rv64_vector.cpp
Normal file
@@ -0,0 +1,322 @@
|
||||
/*
|
||||
Copyright (c) 2025 SChernykh <https://github.com/SChernykh>
|
||||
Copyright (c) 2025 XMRig <support@xmrig.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <riscv_vector.h>
|
||||
|
||||
#include "crypto/randomx/soft_aes.h"
|
||||
#include "crypto/randomx/randomx.h"
|
||||
|
||||
static FORCE_INLINE vuint32m1_t softaes_vector_double(
|
||||
vuint32m1_t in,
|
||||
vuint32m1_t key,
|
||||
vuint8m1_t i0, vuint8m1_t i1, vuint8m1_t i2, vuint8m1_t i3,
|
||||
const uint32_t* lut0, const uint32_t* lut1, const uint32_t *lut2, const uint32_t* lut3)
|
||||
{
|
||||
const vuint8m1_t in8 = __riscv_vreinterpret_v_u32m1_u8m1(in);
|
||||
|
||||
const vuint32m1_t index0 = __riscv_vreinterpret_v_u8m1_u32m1(__riscv_vrgather_vv_u8m1(in8, i0, 32));
|
||||
const vuint32m1_t index1 = __riscv_vreinterpret_v_u8m1_u32m1(__riscv_vrgather_vv_u8m1(in8, i1, 32));
|
||||
const vuint32m1_t index2 = __riscv_vreinterpret_v_u8m1_u32m1(__riscv_vrgather_vv_u8m1(in8, i2, 32));
|
||||
const vuint32m1_t index3 = __riscv_vreinterpret_v_u8m1_u32m1(__riscv_vrgather_vv_u8m1(in8, i3, 32));
|
||||
|
||||
vuint32m1_t s0 = __riscv_vluxei32_v_u32m1(lut0, __riscv_vsll_vx_u32m1(index0, 2, 8), 8);
|
||||
vuint32m1_t s1 = __riscv_vluxei32_v_u32m1(lut1, __riscv_vsll_vx_u32m1(index1, 2, 8), 8);
|
||||
vuint32m1_t s2 = __riscv_vluxei32_v_u32m1(lut2, __riscv_vsll_vx_u32m1(index2, 2, 8), 8);
|
||||
vuint32m1_t s3 = __riscv_vluxei32_v_u32m1(lut3, __riscv_vsll_vx_u32m1(index3, 2, 8), 8);
|
||||
|
||||
s0 = __riscv_vxor_vv_u32m1(s0, s1, 8);
|
||||
s2 = __riscv_vxor_vv_u32m1(s2, s3, 8);
|
||||
s0 = __riscv_vxor_vv_u32m1(s0, s2, 8);
|
||||
|
||||
return __riscv_vxor_vv_u32m1(s0, key, 8);
|
||||
}
|
||||
|
||||
static constexpr uint32_t AES_HASH_1R_STATE02[8] = { 0x92b52c0d, 0x9fa856de, 0xcc82db47, 0xd7983aad, 0x6a770017, 0xae62c7d0, 0x5079506b, 0xe8a07ce4 };
|
||||
static constexpr uint32_t AES_HASH_1R_STATE13[8] = { 0x338d996e, 0x15c7b798, 0xf59e125a, 0xace78057, 0x630a240c, 0x07ad828d, 0x79a10005, 0x7e994948 };
|
||||
|
||||
static constexpr uint32_t AES_GEN_1R_KEY02[8] = { 0x6daca553, 0x62716609, 0xdbb5552b, 0xb4f44917, 0x3f1262f1, 0x9f947ec6, 0xf4c0794f, 0x3e20e345 };
|
||||
static constexpr uint32_t AES_GEN_1R_KEY13[8] = { 0x6d7caf07, 0x846a710d, 0x1725d378, 0x0da1dc4e, 0x6aef8135, 0xb1ba317c, 0x16314c88, 0x49169154 };
|
||||
|
||||
static constexpr uint32_t AES_HASH_1R_XKEY00[8] = { 0xf6fa8389, 0x8b24949f, 0x90dc56bf, 0x06890201, 0xf6fa8389, 0x8b24949f, 0x90dc56bf, 0x06890201 };
|
||||
static constexpr uint32_t AES_HASH_1R_XKEY11[8] = { 0x61b263d1, 0x51f4e03c, 0xee1043c6, 0xed18f99b, 0x61b263d1, 0x51f4e03c, 0xee1043c6, 0xed18f99b };
|
||||
|
||||
static constexpr uint32_t AES_HASH_STRIDE_X2[8] = { 0, 4, 8, 12, 32, 36, 40, 44 };
|
||||
static constexpr uint32_t AES_HASH_STRIDE_X4[8] = { 0, 4, 8, 12, 64, 68, 72, 76 };
|
||||
|
||||
template<int softAes>
|
||||
void hashAes1Rx4_RVV(const void *input, size_t inputSize, void *hash) {
|
||||
const uint8_t* inptr = (const uint8_t*)input;
|
||||
const uint8_t* inputEnd = inptr + inputSize;
|
||||
|
||||
//intial state
|
||||
vuint32m1_t state02 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE02, 8);
|
||||
vuint32m1_t state13 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE13, 8);
|
||||
|
||||
const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X2, 8);
|
||||
|
||||
const vuint8m1_t lutenc_index0 = __riscv_vle8_v_u8m1(lutEncIndex[0], 32);
|
||||
const vuint8m1_t lutenc_index1 = __riscv_vle8_v_u8m1(lutEncIndex[1], 32);
|
||||
const vuint8m1_t lutenc_index2 = __riscv_vle8_v_u8m1(lutEncIndex[2], 32);
|
||||
const vuint8m1_t lutenc_index3 = __riscv_vle8_v_u8m1(lutEncIndex[3], 32);
|
||||
|
||||
const vuint8m1_t& lutdec_index0 = lutenc_index0;
|
||||
const vuint8m1_t lutdec_index1 = __riscv_vle8_v_u8m1(lutDecIndex[1], 32);
|
||||
const vuint8m1_t& lutdec_index2 = lutenc_index2;
|
||||
const vuint8m1_t lutdec_index3 = __riscv_vle8_v_u8m1(lutDecIndex[3], 32);
|
||||
|
||||
//process 64 bytes at a time in 4 lanes
|
||||
while (inptr < inputEnd) {
|
||||
state02 = softaes_vector_double(state02, __riscv_vluxei32_v_u32m1((uint32_t*)inptr + 0, stride, 8), lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
|
||||
state13 = softaes_vector_double(state13, __riscv_vluxei32_v_u32m1((uint32_t*)inptr + 4, stride, 8), lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
|
||||
|
||||
inptr += 64;
|
||||
}
|
||||
|
||||
//two extra rounds to achieve full diffusion
|
||||
const vuint32m1_t xkey00 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY00, 8);
|
||||
const vuint32m1_t xkey11 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY11, 8);
|
||||
|
||||
state02 = softaes_vector_double(state02, xkey00, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
|
||||
state13 = softaes_vector_double(state13, xkey00, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
|
||||
|
||||
state02 = softaes_vector_double(state02, xkey11, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
|
||||
state13 = softaes_vector_double(state13, xkey11, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
|
||||
|
||||
//output hash
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 0, stride, state02, 8);
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 4, stride, state13, 8);
|
||||
}
|
||||
|
||||
template void hashAes1Rx4_RVV<false>(const void *input, size_t inputSize, void *hash);
|
||||
template void hashAes1Rx4_RVV<true>(const void *input, size_t inputSize, void *hash);
|
||||
|
||||
template<int softAes>
|
||||
void fillAes1Rx4_RVV(void *state, size_t outputSize, void *buffer) {
|
||||
const uint8_t* outptr = (uint8_t*)buffer;
|
||||
const uint8_t* outputEnd = outptr + outputSize;
|
||||
|
||||
const vuint32m1_t key02 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY02, 8);
|
||||
const vuint32m1_t key13 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY13, 8);
|
||||
|
||||
const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X2, 8);
|
||||
|
||||
vuint32m1_t state02 = __riscv_vluxei32_v_u32m1((uint32_t*)state + 0, stride, 8);
|
||||
vuint32m1_t state13 = __riscv_vluxei32_v_u32m1((uint32_t*)state + 4, stride, 8);
|
||||
|
||||
const vuint8m1_t lutenc_index0 = __riscv_vle8_v_u8m1(lutEncIndex[0], 32);
|
||||
const vuint8m1_t lutenc_index1 = __riscv_vle8_v_u8m1(lutEncIndex[1], 32);
|
||||
const vuint8m1_t lutenc_index2 = __riscv_vle8_v_u8m1(lutEncIndex[2], 32);
|
||||
const vuint8m1_t lutenc_index3 = __riscv_vle8_v_u8m1(lutEncIndex[3], 32);
|
||||
|
||||
const vuint8m1_t& lutdec_index0 = lutenc_index0;
|
||||
const vuint8m1_t lutdec_index1 = __riscv_vle8_v_u8m1(lutDecIndex[1], 32);
|
||||
const vuint8m1_t& lutdec_index2 = lutenc_index2;
|
||||
const vuint8m1_t lutdec_index3 = __riscv_vle8_v_u8m1(lutDecIndex[3], 32);
|
||||
|
||||
while (outptr < outputEnd) {
|
||||
state02 = softaes_vector_double(state02, key02, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
|
||||
state13 = softaes_vector_double(state13, key13, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
|
||||
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)outptr + 0, stride, state02, 8);
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)outptr + 4, stride, state13, 8);
|
||||
|
||||
outptr += 64;
|
||||
}
|
||||
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)state + 0, stride, state02, 8);
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)state + 4, stride, state13, 8);
|
||||
}
|
||||
|
||||
template void fillAes1Rx4_RVV<false>(void *state, size_t outputSize, void *buffer);
|
||||
template void fillAes1Rx4_RVV<true>(void *state, size_t outputSize, void *buffer);
|
||||
|
||||
template<int softAes>
|
||||
void fillAes4Rx4_RVV(void *state, size_t outputSize, void *buffer) {
|
||||
const uint8_t* outptr = (uint8_t*)buffer;
|
||||
const uint8_t* outputEnd = outptr + outputSize;
|
||||
|
||||
const vuint32m1_t stride4 = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X4, 8);
|
||||
|
||||
const vuint32m1_t key04 = __riscv_vluxei32_v_u32m1((uint32_t*)(RandomX_CurrentConfig.fillAes4Rx4_Key + 0), stride4, 8);
|
||||
const vuint32m1_t key15 = __riscv_vluxei32_v_u32m1((uint32_t*)(RandomX_CurrentConfig.fillAes4Rx4_Key + 1), stride4, 8);
|
||||
const vuint32m1_t key26 = __riscv_vluxei32_v_u32m1((uint32_t*)(RandomX_CurrentConfig.fillAes4Rx4_Key + 2), stride4, 8);
|
||||
const vuint32m1_t key37 = __riscv_vluxei32_v_u32m1((uint32_t*)(RandomX_CurrentConfig.fillAes4Rx4_Key + 3), stride4, 8);
|
||||
|
||||
const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X2, 8);
|
||||
|
||||
vuint32m1_t state02 = __riscv_vluxei32_v_u32m1((uint32_t*)state + 0, stride, 8);
|
||||
vuint32m1_t state13 = __riscv_vluxei32_v_u32m1((uint32_t*)state + 4, stride, 8);
|
||||
|
||||
const vuint8m1_t lutenc_index0 = __riscv_vle8_v_u8m1(lutEncIndex[0], 32);
|
||||
const vuint8m1_t lutenc_index1 = __riscv_vle8_v_u8m1(lutEncIndex[1], 32);
|
||||
const vuint8m1_t lutenc_index2 = __riscv_vle8_v_u8m1(lutEncIndex[2], 32);
|
||||
const vuint8m1_t lutenc_index3 = __riscv_vle8_v_u8m1(lutEncIndex[3], 32);
|
||||
|
||||
const vuint8m1_t& lutdec_index0 = lutenc_index0;
|
||||
const vuint8m1_t lutdec_index1 = __riscv_vle8_v_u8m1(lutDecIndex[1], 32);
|
||||
const vuint8m1_t& lutdec_index2 = lutenc_index2;
|
||||
const vuint8m1_t lutdec_index3 = __riscv_vle8_v_u8m1(lutDecIndex[3], 32);
|
||||
|
||||
while (outptr < outputEnd) {
|
||||
state02 = softaes_vector_double(state02, key04, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
|
||||
state13 = softaes_vector_double(state13, key04, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
|
||||
|
||||
state02 = softaes_vector_double(state02, key15, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
|
||||
state13 = softaes_vector_double(state13, key15, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
|
||||
|
||||
state02 = softaes_vector_double(state02, key26, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
|
||||
state13 = softaes_vector_double(state13, key26, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
|
||||
|
||||
state02 = softaes_vector_double(state02, key37, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
|
||||
state13 = softaes_vector_double(state13, key37, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
|
||||
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)outptr + 0, stride, state02, 8);
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)outptr + 4, stride, state13, 8);
|
||||
|
||||
outptr += 64;
|
||||
}
|
||||
}
|
||||
|
||||
template void fillAes4Rx4_RVV<false>(void *state, size_t outputSize, void *buffer);
|
||||
template void fillAes4Rx4_RVV<true>(void *state, size_t outputSize, void *buffer);
|
||||
|
||||
template<int softAes, int unroll>
|
||||
void hashAndFillAes1Rx4_RVV(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
|
||||
uint8_t* scratchpadPtr = (uint8_t*)scratchpad;
|
||||
const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize;
|
||||
|
||||
vuint32m1_t hash_state02 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE02, 8);
|
||||
vuint32m1_t hash_state13 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE13, 8);
|
||||
|
||||
const vuint32m1_t key02 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY02, 8);
|
||||
const vuint32m1_t key13 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY13, 8);
|
||||
|
||||
const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X2, 8);
|
||||
|
||||
vuint32m1_t fill_state02 = __riscv_vluxei32_v_u32m1((uint32_t*)fill_state + 0, stride, 8);
|
||||
vuint32m1_t fill_state13 = __riscv_vluxei32_v_u32m1((uint32_t*)fill_state + 4, stride, 8);
|
||||
|
||||
const vuint8m1_t lutenc_index0 = __riscv_vle8_v_u8m1(lutEncIndex[0], 32);
|
||||
const vuint8m1_t lutenc_index1 = __riscv_vle8_v_u8m1(lutEncIndex[1], 32);
|
||||
const vuint8m1_t lutenc_index2 = __riscv_vle8_v_u8m1(lutEncIndex[2], 32);
|
||||
const vuint8m1_t lutenc_index3 = __riscv_vle8_v_u8m1(lutEncIndex[3], 32);
|
||||
|
||||
const vuint8m1_t& lutdec_index0 = lutenc_index0;
|
||||
const vuint8m1_t lutdec_index1 = __riscv_vle8_v_u8m1(lutDecIndex[1], 32);
|
||||
const vuint8m1_t& lutdec_index2 = lutenc_index2;
|
||||
const vuint8m1_t lutdec_index3 = __riscv_vle8_v_u8m1(lutDecIndex[3], 32);
|
||||
|
||||
//process 64 bytes at a time in 4 lanes
|
||||
while (scratchpadPtr < scratchpadEnd) {
|
||||
#define HASH_STATE(k) \
|
||||
hash_state02 = softaes_vector_double(hash_state02, __riscv_vluxei32_v_u32m1((uint32_t*)scratchpadPtr + k * 16 + 0, stride, 8), lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3); \
|
||||
hash_state13 = softaes_vector_double(hash_state13, __riscv_vluxei32_v_u32m1((uint32_t*)scratchpadPtr + k * 16 + 4, stride, 8), lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
|
||||
|
||||
#define FILL_STATE(k) \
|
||||
fill_state02 = softaes_vector_double(fill_state02, key02, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3); \
|
||||
fill_state13 = softaes_vector_double(fill_state13, key13, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3); \
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)scratchpadPtr + k * 16 + 0, stride, fill_state02, 8); \
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)scratchpadPtr + k * 16 + 4, stride, fill_state13, 8);
|
||||
|
||||
switch (softAes) {
|
||||
case 0:
|
||||
HASH_STATE(0);
|
||||
HASH_STATE(1);
|
||||
|
||||
FILL_STATE(0);
|
||||
FILL_STATE(1);
|
||||
|
||||
scratchpadPtr += 128;
|
||||
break;
|
||||
|
||||
default:
|
||||
switch (unroll) {
|
||||
case 4:
|
||||
HASH_STATE(0);
|
||||
FILL_STATE(0);
|
||||
|
||||
HASH_STATE(1);
|
||||
FILL_STATE(1);
|
||||
|
||||
HASH_STATE(2);
|
||||
FILL_STATE(2);
|
||||
|
||||
HASH_STATE(3);
|
||||
FILL_STATE(3);
|
||||
|
||||
scratchpadPtr += 64 * 4;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
HASH_STATE(0);
|
||||
FILL_STATE(0);
|
||||
|
||||
HASH_STATE(1);
|
||||
FILL_STATE(1);
|
||||
|
||||
scratchpadPtr += 64 * 2;
|
||||
break;
|
||||
|
||||
default:
|
||||
HASH_STATE(0);
|
||||
FILL_STATE(0);
|
||||
|
||||
scratchpadPtr += 64;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#undef HASH_STATE
|
||||
#undef FILL_STATE
|
||||
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)fill_state + 0, stride, fill_state02, 8);
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)fill_state + 4, stride, fill_state13, 8);
|
||||
|
||||
//two extra rounds to achieve full diffusion
|
||||
const vuint32m1_t xkey00 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY00, 8);
|
||||
const vuint32m1_t xkey11 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY11, 8);
|
||||
|
||||
hash_state02 = softaes_vector_double(hash_state02, xkey00, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
|
||||
hash_state13 = softaes_vector_double(hash_state13, xkey00, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
|
||||
|
||||
hash_state02 = softaes_vector_double(hash_state02, xkey11, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
|
||||
hash_state13 = softaes_vector_double(hash_state13, xkey11, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
|
||||
|
||||
//output hash
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 0, stride, hash_state02, 8);
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 4, stride, hash_state13, 8);
|
||||
}
|
||||
|
||||
template void hashAndFillAes1Rx4_RVV<0,2>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);
|
||||
template void hashAndFillAes1Rx4_RVV<1,1>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);
|
||||
template void hashAndFillAes1Rx4_RVV<2,1>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);
|
||||
template void hashAndFillAes1Rx4_RVV<2,2>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);
|
||||
template void hashAndFillAes1Rx4_RVV<2,4>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);
|
||||
42
src/crypto/randomx/aes_hash_rv64_vector.hpp
Normal file
42
src/crypto/randomx/aes_hash_rv64_vector.hpp
Normal file
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
Copyright (c) 2025 SChernykh <https://github.com/SChernykh>
|
||||
Copyright (c) 2025 XMRig <support@xmrig.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
template<int softAes>
|
||||
void hashAes1Rx4_RVV(const void *input, size_t inputSize, void *hash);
|
||||
|
||||
template<int softAes>
|
||||
void fillAes1Rx4_RVV(void *state, size_t outputSize, void *buffer);
|
||||
|
||||
template<int softAes>
|
||||
void fillAes4Rx4_RVV(void *state, size_t outputSize, void *buffer);
|
||||
|
||||
template<int softAes, int unroll>
|
||||
void hashAndFillAes1Rx4_RVV(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
||||
199
src/crypto/randomx/aes_hash_rv64_zvkned.cpp
Normal file
199
src/crypto/randomx/aes_hash_rv64_zvkned.cpp
Normal file
@@ -0,0 +1,199 @@
|
||||
/*
|
||||
Copyright (c) 2025 SChernykh <https://github.com/SChernykh>
|
||||
Copyright (c) 2025 XMRig <support@xmrig.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "crypto/randomx/aes_hash.hpp"
|
||||
#include "crypto/randomx/randomx.h"
|
||||
#include "crypto/rx/Profiler.h"
|
||||
|
||||
#include <riscv_vector.h>
|
||||
|
||||
static FORCE_INLINE vuint32m1_t aesenc_zvkned(vuint32m1_t a, vuint32m1_t b) { return __riscv_vaesem_vv_u32m1(a, b, 8); }
|
||||
static FORCE_INLINE vuint32m1_t aesdec_zvkned(vuint32m1_t a, vuint32m1_t b, vuint32m1_t zero) { return __riscv_vxor_vv_u32m1(__riscv_vaesdm_vv_u32m1(a, zero, 8), b, 8); }
|
||||
|
||||
static constexpr uint32_t AES_HASH_1R_STATE02[8] = { 0x92b52c0d, 0x9fa856de, 0xcc82db47, 0xd7983aad, 0x6a770017, 0xae62c7d0, 0x5079506b, 0xe8a07ce4 };
|
||||
static constexpr uint32_t AES_HASH_1R_STATE13[8] = { 0x338d996e, 0x15c7b798, 0xf59e125a, 0xace78057, 0x630a240c, 0x07ad828d, 0x79a10005, 0x7e994948 };
|
||||
|
||||
static constexpr uint32_t AES_GEN_1R_KEY02[8] = { 0x6daca553, 0x62716609, 0xdbb5552b, 0xb4f44917, 0x3f1262f1, 0x9f947ec6, 0xf4c0794f, 0x3e20e345 };
|
||||
static constexpr uint32_t AES_GEN_1R_KEY13[8] = { 0x6d7caf07, 0x846a710d, 0x1725d378, 0x0da1dc4e, 0x6aef8135, 0xb1ba317c, 0x16314c88, 0x49169154 };
|
||||
|
||||
static constexpr uint32_t AES_HASH_1R_XKEY00[8] = { 0xf6fa8389, 0x8b24949f, 0x90dc56bf, 0x06890201, 0xf6fa8389, 0x8b24949f, 0x90dc56bf, 0x06890201 };
|
||||
static constexpr uint32_t AES_HASH_1R_XKEY11[8] = { 0x61b263d1, 0x51f4e03c, 0xee1043c6, 0xed18f99b, 0x61b263d1, 0x51f4e03c, 0xee1043c6, 0xed18f99b };
|
||||
|
||||
static constexpr uint32_t AES_HASH_STRIDE_X2[8] = { 0, 4, 8, 12, 32, 36, 40, 44 };
|
||||
static constexpr uint32_t AES_HASH_STRIDE_X4[8] = { 0, 4, 8, 12, 64, 68, 72, 76 };
|
||||
|
||||
void hashAes1Rx4_zvkned(const void *input, size_t inputSize, void *hash)
|
||||
{
|
||||
const uint8_t* inptr = (const uint8_t*)input;
|
||||
const uint8_t* inputEnd = inptr + inputSize;
|
||||
|
||||
//intial state
|
||||
vuint32m1_t state02 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE02, 8);
|
||||
vuint32m1_t state13 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE13, 8);
|
||||
|
||||
const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X2, 8);
|
||||
const vuint32m1_t zero = {};
|
||||
|
||||
//process 64 bytes at a time in 4 lanes
|
||||
while (inptr < inputEnd) {
|
||||
state02 = aesenc_zvkned(state02, __riscv_vluxei32_v_u32m1((uint32_t*)inptr + 0, stride, 8));
|
||||
state13 = aesdec_zvkned(state13, __riscv_vluxei32_v_u32m1((uint32_t*)inptr + 4, stride, 8), zero);
|
||||
|
||||
inptr += 64;
|
||||
}
|
||||
|
||||
//two extra rounds to achieve full diffusion
|
||||
const vuint32m1_t xkey00 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY00, 8);
|
||||
const vuint32m1_t xkey11 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY11, 8);
|
||||
|
||||
state02 = aesenc_zvkned(state02, xkey00);
|
||||
state13 = aesdec_zvkned(state13, xkey00, zero);
|
||||
|
||||
state02 = aesenc_zvkned(state02, xkey11);
|
||||
state13 = aesdec_zvkned(state13, xkey11, zero);
|
||||
|
||||
//output hash
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 0, stride, state02, 8);
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 4, stride, state13, 8);
|
||||
}
|
||||
|
||||
void fillAes1Rx4_zvkned(void *state, size_t outputSize, void *buffer)
|
||||
{
|
||||
const uint8_t* outptr = (uint8_t*)buffer;
|
||||
const uint8_t* outputEnd = outptr + outputSize;
|
||||
|
||||
const vuint32m1_t key02 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY02, 8);
|
||||
const vuint32m1_t key13 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY13, 8);
|
||||
|
||||
const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X2, 8);
|
||||
const vuint32m1_t zero = {};
|
||||
|
||||
vuint32m1_t state02 = __riscv_vluxei32_v_u32m1((uint32_t*)state + 0, stride, 8);
|
||||
vuint32m1_t state13 = __riscv_vluxei32_v_u32m1((uint32_t*)state + 4, stride, 8);
|
||||
|
||||
while (outptr < outputEnd) {
|
||||
state02 = aesdec_zvkned(state02, key02, zero);
|
||||
state13 = aesenc_zvkned(state13, key13);
|
||||
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)outptr + 0, stride, state02, 8);
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)outptr + 4, stride, state13, 8);
|
||||
|
||||
outptr += 64;
|
||||
}
|
||||
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)state + 0, stride, state02, 8);
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)state + 4, stride, state13, 8);
|
||||
}
|
||||
|
||||
void fillAes4Rx4_zvkned(void *state, size_t outputSize, void *buffer)
|
||||
{
|
||||
const uint8_t* outptr = (uint8_t*)buffer;
|
||||
const uint8_t* outputEnd = outptr + outputSize;
|
||||
|
||||
const vuint32m1_t stride4 = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X4, 8);
|
||||
|
||||
const vuint32m1_t key04 = __riscv_vluxei32_v_u32m1((uint32_t*)(RandomX_CurrentConfig.fillAes4Rx4_Key + 0), stride4, 8);
|
||||
const vuint32m1_t key15 = __riscv_vluxei32_v_u32m1((uint32_t*)(RandomX_CurrentConfig.fillAes4Rx4_Key + 1), stride4, 8);
|
||||
const vuint32m1_t key26 = __riscv_vluxei32_v_u32m1((uint32_t*)(RandomX_CurrentConfig.fillAes4Rx4_Key + 2), stride4, 8);
|
||||
const vuint32m1_t key37 = __riscv_vluxei32_v_u32m1((uint32_t*)(RandomX_CurrentConfig.fillAes4Rx4_Key + 3), stride4, 8);
|
||||
|
||||
const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X2, 8);
|
||||
const vuint32m1_t zero = {};
|
||||
|
||||
vuint32m1_t state02 = __riscv_vluxei32_v_u32m1((uint32_t*)state + 0, stride, 8);
|
||||
vuint32m1_t state13 = __riscv_vluxei32_v_u32m1((uint32_t*)state + 4, stride, 8);
|
||||
|
||||
while (outptr < outputEnd) {
|
||||
state02 = aesdec_zvkned(state02, key04, zero);
|
||||
state13 = aesenc_zvkned(state13, key04);
|
||||
|
||||
state02 = aesdec_zvkned(state02, key15, zero);
|
||||
state13 = aesenc_zvkned(state13, key15);
|
||||
|
||||
state02 = aesdec_zvkned(state02, key26, zero);
|
||||
state13 = aesenc_zvkned(state13, key26);
|
||||
|
||||
state02 = aesdec_zvkned(state02, key37, zero);
|
||||
state13 = aesenc_zvkned(state13, key37);
|
||||
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)outptr + 0, stride, state02, 8);
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)outptr + 4, stride, state13, 8);
|
||||
|
||||
outptr += 64;
|
||||
}
|
||||
}
|
||||
|
||||
void hashAndFillAes1Rx4_zvkned(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state)
|
||||
{
|
||||
uint8_t* scratchpadPtr = (uint8_t*)scratchpad;
|
||||
const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize;
|
||||
|
||||
vuint32m1_t hash_state02 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE02, 8);
|
||||
vuint32m1_t hash_state13 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE13, 8);
|
||||
|
||||
const vuint32m1_t key02 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY02, 8);
|
||||
const vuint32m1_t key13 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY13, 8);
|
||||
|
||||
const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X2, 8);
|
||||
const vuint32m1_t zero = {};
|
||||
|
||||
vuint32m1_t fill_state02 = __riscv_vluxei32_v_u32m1((uint32_t*)fill_state + 0, stride, 8);
|
||||
vuint32m1_t fill_state13 = __riscv_vluxei32_v_u32m1((uint32_t*)fill_state + 4, stride, 8);
|
||||
|
||||
//process 64 bytes at a time in 4 lanes
|
||||
while (scratchpadPtr < scratchpadEnd) {
|
||||
hash_state02 = aesenc_zvkned(hash_state02, __riscv_vluxei32_v_u32m1((uint32_t*)scratchpadPtr + 0, stride, 8));
|
||||
hash_state13 = aesdec_zvkned(hash_state13, __riscv_vluxei32_v_u32m1((uint32_t*)scratchpadPtr + 4, stride, 8), zero);
|
||||
|
||||
fill_state02 = aesdec_zvkned(fill_state02, key02, zero);
|
||||
fill_state13 = aesenc_zvkned(fill_state13, key13);
|
||||
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)scratchpadPtr + 0, stride, fill_state02, 8);
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)scratchpadPtr + 4, stride, fill_state13, 8);
|
||||
|
||||
scratchpadPtr += 64;
|
||||
}
|
||||
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)fill_state + 0, stride, fill_state02, 8);
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)fill_state + 4, stride, fill_state13, 8);
|
||||
|
||||
//two extra rounds to achieve full diffusion
|
||||
const vuint32m1_t xkey00 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY00, 8);
|
||||
const vuint32m1_t xkey11 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY11, 8);
|
||||
|
||||
hash_state02 = aesenc_zvkned(hash_state02, xkey00);
|
||||
hash_state13 = aesdec_zvkned(hash_state13, xkey00, zero);
|
||||
|
||||
hash_state02 = aesenc_zvkned(hash_state02, xkey11);
|
||||
hash_state13 = aesdec_zvkned(hash_state13, xkey11, zero);
|
||||
|
||||
//output hash
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 0, stride, hash_state02, 8);
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 4, stride, hash_state13, 8);
|
||||
}
|
||||
35
src/crypto/randomx/aes_hash_rv64_zvkned.hpp
Normal file
35
src/crypto/randomx/aes_hash_rv64_zvkned.hpp
Normal file
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
Copyright (c) 2025 SChernykh <https://github.com/SChernykh>
|
||||
Copyright (c) 2025 XMRig <support@xmrig.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
void hashAes1Rx4_zvkned(const void *input, size_t inputSize, void *hash);
|
||||
void fillAes1Rx4_zvkned(void *state, size_t outputSize, void *buffer);
|
||||
void fillAes4Rx4_zvkned(void *state, size_t outputSize, void *buffer);
|
||||
void hashAndFillAes1Rx4_zvkned(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
||||
148
src/crypto/randomx/aes_hash_vaes512.cpp
Normal file
148
src/crypto/randomx/aes_hash_vaes512.cpp
Normal file
@@ -0,0 +1,148 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
Copyright (c) 2026 XMRig <support@xmrig.com>
|
||||
Copyright (c) 2026 SChernykh <https://github.com/SChernykh>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <immintrin.h>
|
||||
|
||||
#define REVERSE_4(A, B, C, D) D, C, B, A
|
||||
|
||||
alignas(64) static const uint32_t AES_HASH_1R_STATE[] = {
|
||||
REVERSE_4(0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d),
|
||||
REVERSE_4(0xace78057, 0xf59e125a, 0x15c7b798, 0x338d996e),
|
||||
REVERSE_4(0xe8a07ce4, 0x5079506b, 0xae62c7d0, 0x6a770017),
|
||||
REVERSE_4(0x7e994948, 0x79a10005, 0x07ad828d, 0x630a240c)
|
||||
};
|
||||
|
||||
alignas(64) static const uint32_t AES_GEN_1R_KEY[] = {
|
||||
REVERSE_4(0xb4f44917, 0xdbb5552b, 0x62716609, 0x6daca553),
|
||||
REVERSE_4(0x0da1dc4e, 0x1725d378, 0x846a710d, 0x6d7caf07),
|
||||
REVERSE_4(0x3e20e345, 0xf4c0794f, 0x9f947ec6, 0x3f1262f1),
|
||||
REVERSE_4(0x49169154, 0x16314c88, 0xb1ba317c, 0x6aef8135)
|
||||
};
|
||||
|
||||
alignas(64) static const uint32_t AES_HASH_1R_XKEY0[] = {
|
||||
REVERSE_4(0x06890201, 0x90dc56bf, 0x8b24949f, 0xf6fa8389),
|
||||
REVERSE_4(0x06890201, 0x90dc56bf, 0x8b24949f, 0xf6fa8389),
|
||||
REVERSE_4(0x06890201, 0x90dc56bf, 0x8b24949f, 0xf6fa8389),
|
||||
REVERSE_4(0x06890201, 0x90dc56bf, 0x8b24949f, 0xf6fa8389)
|
||||
};
|
||||
|
||||
alignas(64) static const uint32_t AES_HASH_1R_XKEY1[] = {
|
||||
REVERSE_4(0xed18f99b, 0xee1043c6, 0x51f4e03c, 0x61b263d1),
|
||||
REVERSE_4(0xed18f99b, 0xee1043c6, 0x51f4e03c, 0x61b263d1),
|
||||
REVERSE_4(0xed18f99b, 0xee1043c6, 0x51f4e03c, 0x61b263d1),
|
||||
REVERSE_4(0xed18f99b, 0xee1043c6, 0x51f4e03c, 0x61b263d1)
|
||||
};
|
||||
|
||||
void hashAndFillAes1Rx4_VAES512(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state)
|
||||
{
|
||||
uint8_t* scratchpadPtr = (uint8_t*)scratchpad;
|
||||
const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize;
|
||||
|
||||
const __m512i fill_key = _mm512_load_si512(AES_GEN_1R_KEY);
|
||||
|
||||
const __m512i initial_hash_state = _mm512_load_si512(AES_HASH_1R_STATE);
|
||||
const __m512i initial_fill_state = _mm512_load_si512(fill_state);
|
||||
|
||||
constexpr uint8_t mask = 0b11001100;
|
||||
|
||||
// enc_data[0] = hash_state[0]
|
||||
// enc_data[1] = fill_state[1]
|
||||
// enc_data[2] = hash_state[2]
|
||||
// enc_data[3] = fill_state[3]
|
||||
__m512i enc_data = _mm512_mask_blend_epi64(mask, initial_hash_state, initial_fill_state);
|
||||
|
||||
// dec_data[0] = fill_state[0]
|
||||
// dec_data[1] = hash_state[1]
|
||||
// dec_data[2] = fill_state[2]
|
||||
// dec_data[3] = hash_state[3]
|
||||
__m512i dec_data = _mm512_mask_blend_epi64(mask, initial_fill_state, initial_hash_state);
|
||||
|
||||
constexpr int PREFETCH_DISTANCE = 7168;
|
||||
|
||||
const uint8_t* prefetchPtr = scratchpadPtr + PREFETCH_DISTANCE;
|
||||
scratchpadEnd -= PREFETCH_DISTANCE;
|
||||
|
||||
for (const uint8_t* p = scratchpadPtr; p < prefetchPtr; p += 256) {
|
||||
_mm_prefetch((const char*)(p + 0), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(p + 64), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(p + 128), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(p + 192), _MM_HINT_T0);
|
||||
}
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
while (scratchpadPtr < scratchpadEnd) {
|
||||
const __m512i scratchpad_data = _mm512_load_si512(scratchpadPtr);
|
||||
|
||||
// enc_key[0] = scratchpad_data[0]
|
||||
// enc_key[1] = fill_key[1]
|
||||
// enc_key[2] = scratchpad_data[2]
|
||||
// enc_key[3] = fill_key[3]
|
||||
enc_data = _mm512_aesenc_epi128(enc_data, _mm512_mask_blend_epi64(mask, scratchpad_data, fill_key));
|
||||
|
||||
// dec_key[0] = fill_key[0]
|
||||
// dec_key[1] = scratchpad_data[1]
|
||||
// dec_key[2] = fill_key[2]
|
||||
// dec_key[3] = scratchpad_data[3]
|
||||
dec_data = _mm512_aesdec_epi128(dec_data, _mm512_mask_blend_epi64(mask, fill_key, scratchpad_data));
|
||||
|
||||
// fill_state[0] = dec_data[0]
|
||||
// fill_state[1] = enc_data[1]
|
||||
// fill_state[2] = dec_data[2]
|
||||
// fill_state[3] = enc_data[3]
|
||||
_mm512_store_si512(scratchpadPtr, _mm512_mask_blend_epi64(mask, dec_data, enc_data));
|
||||
|
||||
_mm_prefetch((const char*)prefetchPtr, _MM_HINT_T0);
|
||||
|
||||
scratchpadPtr += 64;
|
||||
prefetchPtr += 64;
|
||||
}
|
||||
prefetchPtr = (const uint8_t*) scratchpad;
|
||||
scratchpadEnd += PREFETCH_DISTANCE;
|
||||
}
|
||||
|
||||
_mm512_store_si512(fill_state, _mm512_mask_blend_epi64(mask, dec_data, enc_data));
|
||||
|
||||
//two extra rounds to achieve full diffusion
|
||||
const __m512i xkey0 = _mm512_load_si512(AES_HASH_1R_XKEY0);
|
||||
const __m512i xkey1 = _mm512_load_si512(AES_HASH_1R_XKEY1);
|
||||
|
||||
enc_data = _mm512_aesenc_epi128(enc_data, xkey0);
|
||||
dec_data = _mm512_aesdec_epi128(dec_data, xkey0);
|
||||
enc_data = _mm512_aesenc_epi128(enc_data, xkey1);
|
||||
dec_data = _mm512_aesdec_epi128(dec_data, xkey1);
|
||||
|
||||
//output hash
|
||||
_mm512_store_si512(hash, _mm512_mask_blend_epi64(mask, enc_data, dec_data));
|
||||
|
||||
// Just in case
|
||||
_mm256_zeroupper();
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
;# save VM register values
|
||||
add rsp, 40
|
||||
add rsp, 248
|
||||
pop rcx
|
||||
mov qword ptr [rcx+0], r8
|
||||
mov qword ptr [rcx+8], r9
|
||||
|
||||
30
src/crypto/randomx/asm/program_loop_store_hard_aes.inc
Normal file
30
src/crypto/randomx/asm/program_loop_store_hard_aes.inc
Normal file
@@ -0,0 +1,30 @@
|
||||
mov rcx, [rsp+24]
|
||||
mov qword ptr [rcx+0], r8
|
||||
mov qword ptr [rcx+8], r9
|
||||
mov qword ptr [rcx+16], r10
|
||||
mov qword ptr [rcx+24], r11
|
||||
mov qword ptr [rcx+32], r12
|
||||
mov qword ptr [rcx+40], r13
|
||||
mov qword ptr [rcx+48], r14
|
||||
mov qword ptr [rcx+56], r15
|
||||
mov rcx, [rsp+16]
|
||||
aesenc xmm0, xmm4
|
||||
aesdec xmm1, xmm4
|
||||
aesenc xmm2, xmm4
|
||||
aesdec xmm3, xmm4
|
||||
aesenc xmm0, xmm5
|
||||
aesdec xmm1, xmm5
|
||||
aesenc xmm2, xmm5
|
||||
aesdec xmm3, xmm5
|
||||
aesenc xmm0, xmm6
|
||||
aesdec xmm1, xmm6
|
||||
aesenc xmm2, xmm6
|
||||
aesdec xmm3, xmm6
|
||||
aesenc xmm0, xmm7
|
||||
aesdec xmm1, xmm7
|
||||
aesenc xmm2, xmm7
|
||||
aesdec xmm3, xmm7
|
||||
movapd xmmword ptr [rcx+0], xmm0
|
||||
movapd xmmword ptr [rcx+16], xmm1
|
||||
movapd xmmword ptr [rcx+32], xmm2
|
||||
movapd xmmword ptr [rcx+48], xmm3
|
||||
196
src/crypto/randomx/asm/program_loop_store_soft_aes.inc
Normal file
196
src/crypto/randomx/asm/program_loop_store_soft_aes.inc
Normal file
@@ -0,0 +1,196 @@
|
||||
mov rcx, [rsp+24]
|
||||
mov qword ptr [rcx+0], r8
|
||||
mov qword ptr [rcx+8], r9
|
||||
mov qword ptr [rcx+16], r10
|
||||
mov qword ptr [rcx+24], r11
|
||||
mov qword ptr [rcx+32], r12
|
||||
mov qword ptr [rcx+40], r13
|
||||
mov qword ptr [rcx+48], r14
|
||||
mov qword ptr [rcx+56], r15
|
||||
|
||||
movapd xmmword ptr [rsp+40], xmm0
|
||||
movapd xmmword ptr [rsp+56], xmm1
|
||||
movapd xmmword ptr [rsp+72], xmm2
|
||||
movapd xmmword ptr [rsp+88], xmm3
|
||||
movapd xmmword ptr [rsp+104], xmm4
|
||||
movapd xmmword ptr [rsp+120], xmm5
|
||||
movapd xmmword ptr [rsp+136], xmm6
|
||||
movapd xmmword ptr [rsp+152], xmm7
|
||||
|
||||
mov [rsp+168], rax
|
||||
mov [rsp+176], rbx
|
||||
mov [rsp+184], rdx
|
||||
mov [rsp+192], rsi
|
||||
mov [rsp+200], rdi
|
||||
mov [rsp+208], rbp
|
||||
mov [rsp+216], r8
|
||||
mov [rsp+224], r9
|
||||
|
||||
mov r8, [rsp+232] ;# aes_lut_enc
|
||||
mov r9, [rsp+240] ;# aes_lut_dec
|
||||
|
||||
movapd xmm12, xmmword ptr [rsp-8] ;# "call" will overwrite IMUL_RCP's data on stack, so save it
|
||||
|
||||
lea rsi, [rsp+104]
|
||||
lea rdi, [rsp+40]
|
||||
call soft_aes_enc
|
||||
lea rdi, [rsp+56]
|
||||
call soft_aes_dec
|
||||
lea rdi, [rsp+72]
|
||||
call soft_aes_enc
|
||||
lea rdi, [rsp+88]
|
||||
call soft_aes_dec
|
||||
|
||||
lea rsi, [rsp+120]
|
||||
lea rdi, [rsp+40]
|
||||
call soft_aes_enc
|
||||
lea rdi, [rsp+56]
|
||||
call soft_aes_dec
|
||||
lea rdi, [rsp+72]
|
||||
call soft_aes_enc
|
||||
lea rdi, [rsp+88]
|
||||
call soft_aes_dec
|
||||
|
||||
lea rsi, [rsp+136]
|
||||
lea rdi, [rsp+40]
|
||||
call soft_aes_enc
|
||||
lea rdi, [rsp+56]
|
||||
call soft_aes_dec
|
||||
lea rdi, [rsp+72]
|
||||
call soft_aes_enc
|
||||
lea rdi, [rsp+88]
|
||||
call soft_aes_dec
|
||||
|
||||
lea rsi, [rsp+152]
|
||||
lea rdi, [rsp+40]
|
||||
call soft_aes_enc
|
||||
lea rdi, [rsp+56]
|
||||
call soft_aes_dec
|
||||
lea rdi, [rsp+72]
|
||||
call soft_aes_enc
|
||||
lea rdi, [rsp+88]
|
||||
call soft_aes_dec
|
||||
|
||||
movapd xmmword ptr [rsp-8], xmm12
|
||||
|
||||
jmp soft_aes_end
|
||||
|
||||
soft_aes_enc:
|
||||
mov eax, dword ptr [rsi+0]
|
||||
mov ebx, dword ptr [rsi+4]
|
||||
mov ecx, dword ptr [rsi+8]
|
||||
mov edx, dword ptr [rsi+12]
|
||||
|
||||
movzx ebp, byte ptr [rdi+0]
|
||||
xor eax, dword ptr [r8+rbp*4]
|
||||
movzx ebp, byte ptr [rdi+1]
|
||||
xor edx, dword ptr [r8+rbp*4+1024]
|
||||
movzx ebp, byte ptr [rdi+2]
|
||||
xor ecx, dword ptr [r8+rbp*4+2048]
|
||||
movzx ebp, byte ptr [rdi+3]
|
||||
xor ebx, dword ptr [r8+rbp*4+3072]
|
||||
|
||||
movzx ebp, byte ptr [rdi+4]
|
||||
xor ebx, dword ptr [r8+rbp*4]
|
||||
movzx ebp, byte ptr [rdi+5]
|
||||
xor eax, dword ptr [r8+rbp*4+1024]
|
||||
movzx ebp, byte ptr [rdi+6]
|
||||
xor edx, dword ptr [r8+rbp*4+2048]
|
||||
movzx ebp, byte ptr [rdi+7]
|
||||
xor ecx, dword ptr [r8+rbp*4+3072]
|
||||
|
||||
movzx ebp, byte ptr [rdi+8]
|
||||
xor ecx, dword ptr [r8+rbp*4]
|
||||
movzx ebp, byte ptr [rdi+9]
|
||||
xor ebx, dword ptr [r8+rbp*4+1024]
|
||||
movzx ebp, byte ptr [rdi+10]
|
||||
xor eax, dword ptr [r8+rbp*4+2048]
|
||||
movzx ebp, byte ptr [rdi+11]
|
||||
xor edx, dword ptr [r8+rbp*4+3072]
|
||||
|
||||
movzx ebp, byte ptr [rdi+12]
|
||||
xor edx, dword ptr [r8+rbp*4]
|
||||
movzx ebp, byte ptr [rdi+13]
|
||||
xor ecx, dword ptr [r8+rbp*4+1024]
|
||||
movzx ebp, byte ptr [rdi+14]
|
||||
xor ebx, dword ptr [r8+rbp*4+2048]
|
||||
movzx ebp, byte ptr [rdi+15]
|
||||
xor eax, dword ptr [r8+rbp*4+3072]
|
||||
|
||||
mov dword ptr [rdi+0], eax
|
||||
mov dword ptr [rdi+4], ebx
|
||||
mov dword ptr [rdi+8], ecx
|
||||
mov dword ptr [rdi+12], edx
|
||||
|
||||
ret
|
||||
|
||||
soft_aes_dec:
|
||||
mov eax, dword ptr [rsi+0]
|
||||
mov ebx, dword ptr [rsi+4]
|
||||
mov ecx, dword ptr [rsi+8]
|
||||
mov edx, dword ptr [rsi+12]
|
||||
|
||||
movzx ebp, byte ptr [rdi+0]
|
||||
xor eax, dword ptr [r9+rbp*4]
|
||||
movzx ebp, byte ptr [rdi+1]
|
||||
xor ebx, dword ptr [r9+rbp*4+1024]
|
||||
movzx ebp, byte ptr [rdi+2]
|
||||
xor ecx, dword ptr [r9+rbp*4+2048]
|
||||
movzx ebp, byte ptr [rdi+3]
|
||||
xor edx, dword ptr [r9+rbp*4+3072]
|
||||
|
||||
movzx ebp, byte ptr [rdi+4]
|
||||
xor ebx, dword ptr [r9+rbp*4]
|
||||
movzx ebp, byte ptr [rdi+5]
|
||||
xor ecx, dword ptr [r9+rbp*4+1024]
|
||||
movzx ebp, byte ptr [rdi+6]
|
||||
xor edx, dword ptr [r9+rbp*4+2048]
|
||||
movzx ebp, byte ptr [rdi+7]
|
||||
xor eax, dword ptr [r9+rbp*4+3072]
|
||||
|
||||
movzx ebp, byte ptr [rdi+8]
|
||||
xor ecx, dword ptr [r9+rbp*4]
|
||||
movzx ebp, byte ptr [rdi+9]
|
||||
xor edx, dword ptr [r9+rbp*4+1024]
|
||||
movzx ebp, byte ptr [rdi+10]
|
||||
xor eax, dword ptr [r9+rbp*4+2048]
|
||||
movzx ebp, byte ptr [rdi+11]
|
||||
xor ebx, dword ptr [r9+rbp*4+3072]
|
||||
|
||||
movzx ebp, byte ptr [rdi+12]
|
||||
xor edx, dword ptr [r9+rbp*4]
|
||||
movzx ebp, byte ptr [rdi+13]
|
||||
xor eax, dword ptr [r9+rbp*4+1024]
|
||||
movzx ebp, byte ptr [rdi+14]
|
||||
xor ebx, dword ptr [r9+rbp*4+2048]
|
||||
movzx ebp, byte ptr [rdi+15]
|
||||
xor ecx, dword ptr [r9+rbp*4+3072]
|
||||
|
||||
mov dword ptr [rdi+0], eax
|
||||
mov dword ptr [rdi+4], ebx
|
||||
mov dword ptr [rdi+8], ecx
|
||||
mov dword ptr [rdi+12], edx
|
||||
|
||||
ret
|
||||
|
||||
soft_aes_end:
|
||||
|
||||
mov rax, [rsp+168]
|
||||
mov rbx, [rsp+176]
|
||||
mov rcx, [rsp+16]
|
||||
mov rdx, [rsp+184]
|
||||
mov rsi, [rsp+192]
|
||||
mov rdi, [rsp+200]
|
||||
mov rbp, [rsp+208]
|
||||
mov r8, [rsp+216]
|
||||
mov r9, [rsp+224]
|
||||
|
||||
movapd xmm0, xmmword ptr [rsp+40]
|
||||
movapd xmm1, xmmword ptr [rsp+56]
|
||||
movapd xmm2, xmmword ptr [rsp+72]
|
||||
movapd xmm3, xmmword ptr [rsp+88]
|
||||
|
||||
movapd xmmword ptr [rcx+0], xmm0
|
||||
movapd xmmword ptr [rcx+16], xmm1
|
||||
movapd xmmword ptr [rcx+32], xmm2
|
||||
movapd xmmword ptr [rcx+48], xmm3
|
||||
16
src/crypto/randomx/asm/program_read_dataset_v2.inc
Normal file
16
src/crypto/randomx/asm/program_read_dataset_v2.inc
Normal file
@@ -0,0 +1,16 @@
|
||||
mov ecx, ebp ;# ecx = ma
|
||||
and ecx, RANDOMX_DATASET_BASE_MASK
|
||||
xor r8, qword ptr [rdi+rcx]
|
||||
xor rbp, rax ;# modify "ma"
|
||||
mov edx, ebp ;# edx = "ma"
|
||||
ror rbp, 32 ;# swap "ma" and "mx"
|
||||
and edx, RANDOMX_DATASET_BASE_MASK
|
||||
prefetchnta byte ptr [rdi+rdx]
|
||||
xor r9, qword ptr [rdi+rcx+8]
|
||||
xor r10, qword ptr [rdi+rcx+16]
|
||||
xor r11, qword ptr [rdi+rcx+24]
|
||||
xor r12, qword ptr [rdi+rcx+32]
|
||||
xor r13, qword ptr [rdi+rcx+40]
|
||||
xor r14, qword ptr [rdi+rcx+48]
|
||||
xor r15, qword ptr [rdi+rcx+56]
|
||||
|
||||
@@ -225,7 +225,10 @@ namespace randomx {
|
||||
}
|
||||
|
||||
static void exe_CFROUND(RANDOMX_EXE_ARGS) {
|
||||
rx_set_rounding_mode(rotr64(*ibc.isrc, static_cast<uint32_t>(ibc.imm)) % 4);
|
||||
uint64_t isrc = rotr64(*ibc.isrc, ibc.imm);
|
||||
if (!RandomX_CurrentConfig.Tweak_V2_CFROUND || ((isrc & 60) == 0)) {
|
||||
rx_set_rounding_mode(isrc % 4);
|
||||
}
|
||||
}
|
||||
|
||||
static void exe_ISTORE(RANDOMX_EXE_ARGS) {
|
||||
|
||||
@@ -111,6 +111,10 @@ namespace randomx {
|
||||
#define RANDOMX_HAVE_COMPILER 1
|
||||
class JitCompilerA64;
|
||||
using JitCompiler = JitCompilerA64;
|
||||
#elif defined(__riscv) && defined(__riscv_xlen) && (__riscv_xlen == 64)
|
||||
#define RANDOMX_HAVE_COMPILER 1
|
||||
class JitCompilerRV64;
|
||||
using JitCompiler = JitCompilerRV64;
|
||||
#else
|
||||
#define RANDOMX_HAVE_COMPILER 0
|
||||
class JitCompilerFallback;
|
||||
|
||||
@@ -41,7 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define RANDOMX_DATASET_MAX_SIZE 2181038080
|
||||
|
||||
// Increase it if some configs use larger programs
|
||||
#define RANDOMX_PROGRAM_MAX_SIZE 280
|
||||
#define RANDOMX_PROGRAM_MAX_SIZE 384
|
||||
|
||||
// Increase it if some configs use larger scratchpad
|
||||
#define RANDOMX_SCRATCHPAD_L3_MAX_SIZE 2097152
|
||||
|
||||
@@ -174,7 +174,7 @@ FORCE_INLINE void rx_set_rounding_mode(uint32_t mode) {
|
||||
_mm_setcsr(rx_mxcsr_default | (mode << 13));
|
||||
}
|
||||
|
||||
#elif defined(__PPC64__) && defined(__ALTIVEC__) && defined(__VSX__) //sadly only POWER7 and newer will be able to use SIMD acceleration. Earlier processors cant use doubles or 64 bit integers with SIMD
|
||||
#elif defined(__PPC64__) && defined(__ALTIVEC__) && defined(__VSX__) //sadly only POWER7 and newer will be able to use SIMD acceleration. Earlier processors can't use doubles or 64 bit integers with SIMD
|
||||
#include <cstdint>
|
||||
#include <stdexcept>
|
||||
#include <cstdlib>
|
||||
|
||||
@@ -32,6 +32,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#include "crypto/randomx/jit_compiler_x86.hpp"
|
||||
#elif defined(__aarch64__)
|
||||
#include "crypto/randomx/jit_compiler_a64.hpp"
|
||||
#elif defined(__riscv) && defined(__riscv_xlen) && (__riscv_xlen == 64)
|
||||
#include "crypto/randomx/jit_compiler_rv64.hpp"
|
||||
#else
|
||||
#include "crypto/randomx/jit_compiler_fallback.hpp"
|
||||
#endif
|
||||
|
||||
@@ -67,7 +67,6 @@ constexpr uint32_t LDR_LITERAL = 0x58000000;
|
||||
constexpr uint32_t ROR = 0x9AC02C00;
|
||||
constexpr uint32_t ROR_IMM = 0x93C00000;
|
||||
constexpr uint32_t MOV_REG = 0xAA0003E0;
|
||||
constexpr uint32_t MOV_VREG_EL = 0x6E080400;
|
||||
constexpr uint32_t FADD = 0x4E60D400;
|
||||
constexpr uint32_t FSUB = 0x4EE0D400;
|
||||
constexpr uint32_t FEOR = 0x6E201C00;
|
||||
@@ -102,7 +101,7 @@ static size_t CalcDatasetItemSize()
|
||||
((uint8_t*)randomx_calc_dataset_item_aarch64_end - (uint8_t*)randomx_calc_dataset_item_aarch64_store_result);
|
||||
}
|
||||
|
||||
constexpr uint32_t IntRegMap[8] = { 4, 5, 6, 7, 12, 13, 14, 15 };
|
||||
constexpr uint8_t IntRegMap[8] = { 4, 5, 6, 7, 12, 13, 14, 15 };
|
||||
|
||||
JitCompilerA64::JitCompilerA64(bool hugePagesEnable, bool) :
|
||||
hugePages(hugePagesJIT && hugePagesEnable),
|
||||
@@ -115,7 +114,7 @@ JitCompilerA64::~JitCompilerA64()
|
||||
freePagedMemory(code, allocatedSize);
|
||||
}
|
||||
|
||||
void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& config, uint32_t)
|
||||
void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& config, uint32_t flags)
|
||||
{
|
||||
if (!allocatedSize) {
|
||||
allocate(CodeSize);
|
||||
@@ -126,13 +125,16 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
|
||||
}
|
||||
#endif
|
||||
|
||||
vm_flags = flags;
|
||||
|
||||
uint32_t codePos = MainLoopBegin + 4;
|
||||
|
||||
uint32_t mask = ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10);
|
||||
// and w16, w10, ScratchpadL3Mask64
|
||||
emit32(0x121A0000 | 16 | (10 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
|
||||
emit32(0x121A0000 | 16 | (10 << 5) | mask, code, codePos);
|
||||
|
||||
// and w17, w20, ScratchpadL3Mask64
|
||||
emit32(0x121A0000 | 17 | (20 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
|
||||
emit32(0x121A0000 | 17 | (20 << 5) | mask, code, codePos);
|
||||
|
||||
codePos = PrologueSize;
|
||||
literalPos = ImulRcpLiteralsEnd;
|
||||
@@ -155,19 +157,52 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
|
||||
const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end) - ((uint8_t*)randomx_program_aarch64)) - codePos;
|
||||
emit32(ARMV8A::B | (offset / 4), code, codePos);
|
||||
|
||||
mask = ((RandomX_CurrentConfig.Log2_DatasetBaseSize - 7) << 10);
|
||||
// and w20, w20, CacheLineAlignMask
|
||||
codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask1) - ((uint8_t*)randomx_program_aarch64));
|
||||
emit32(0x121A0000 | 20 | (20 << 5) | ((RandomX_CurrentConfig.Log2_DatasetBaseSize - 7) << 10), code, codePos);
|
||||
emit32(0x121A0000 | 20 | (20 << 5) | mask, code, codePos);
|
||||
|
||||
// and w10, w10, CacheLineAlignMask
|
||||
codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask2) - ((uint8_t*)randomx_program_aarch64));
|
||||
emit32(0x121A0000 | 10 | (10 << 5) | ((RandomX_CurrentConfig.Log2_DatasetBaseSize - 7) << 10), code, codePos);
|
||||
emit32(0x121A0000 | 10 | (10 << 5) | mask, code, codePos);
|
||||
|
||||
// Update spMix1
|
||||
// eor x10, config.readReg0, config.readReg1
|
||||
codePos = ((uint8_t*)randomx_program_aarch64_update_spMix1) - ((uint8_t*)randomx_program_aarch64);
|
||||
emit32(ARMV8A::EOR | 10 | (IntRegMap[config.readReg0] << 5) | (IntRegMap[config.readReg1] << 16), code, codePos);
|
||||
|
||||
codePos = ((uint8_t*)randomx_program_aarch64_v2_FE_mix) - ((uint8_t*)randomx_program_aarch64);
|
||||
|
||||
// Enable RandomX v2 AES tweak
|
||||
if (RandomX_CurrentConfig.Tweak_V2_AES) {
|
||||
if (flags & RANDOMX_FLAG_HARD_AES) {
|
||||
// Disable the jump to RandomX v1 FE mix code by writing "movi v28.4s, 0" instruction
|
||||
emit32(0x4F00041C, code, codePos);
|
||||
}
|
||||
else {
|
||||
// Jump to RandomX v2 FE mix soft AES code by writing "b randomx_program_aarch64_v2_FE_mix_soft_aes" instruction
|
||||
const uint32_t offset = (uint8_t*)randomx_program_aarch64_v2_FE_mix_soft_aes - (uint8_t*)randomx_program_aarch64_v2_FE_mix;
|
||||
emit32(ARMV8A::B | (offset / 4), code, codePos);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Restore the jump to RandomX v1 FE mix code
|
||||
const uint32_t offset = (uint8_t*)randomx_program_aarch64_v1_FE_mix - (uint8_t*)randomx_program_aarch64_v2_FE_mix;
|
||||
emit32(ARMV8A::B | (offset / 4), code, codePos);
|
||||
}
|
||||
|
||||
// Apply v2 prefetch tweak
|
||||
if (RandomX_CurrentConfig.Tweak_V2_PREFETCH) {
|
||||
uint32_t dst = (((uint8_t*)randomx_program_aarch64_vm_instructions_end) - ((uint8_t*)randomx_program_aarch64));
|
||||
uint32_t src = (((uint8_t*)randomx_program_aarch64_vm_instructions_end_v2) - ((uint8_t*)randomx_program_aarch64));
|
||||
memcpy(code + dst, code + src, 16);
|
||||
}
|
||||
else {
|
||||
uint32_t dst = (((uint8_t*)randomx_program_aarch64_vm_instructions_end) - ((uint8_t*)randomx_program_aarch64));
|
||||
uint32_t src = (((uint8_t*)randomx_program_aarch64_vm_instructions_end_v1) - ((uint8_t*)randomx_program_aarch64));
|
||||
memcpy(code + dst, code + src, 16);
|
||||
}
|
||||
|
||||
# ifndef XMRIG_OS_APPLE
|
||||
xmrig::VirtualMemory::flushInstructionCache(reinterpret_cast<char*>(code + MainLoopBegin), codePos - MainLoopBegin);
|
||||
# endif
|
||||
@@ -209,19 +244,51 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
|
||||
// eor w20, config.readReg2, config.readReg3
|
||||
emit32(ARMV8A::EOR32 | 20 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
|
||||
|
||||
// Apply v2 prefetch tweak
|
||||
if (RandomX_CurrentConfig.Tweak_V2_PREFETCH) {
|
||||
uint32_t dst = (((uint8_t*)randomx_program_aarch64_vm_instructions_end_light_tweak) - ((uint8_t*)randomx_program_aarch64));
|
||||
uint32_t src = (((uint8_t*)randomx_program_aarch64_vm_instructions_end_light_v2) - ((uint8_t*)randomx_program_aarch64));
|
||||
memcpy(code + dst, code + src, 8);
|
||||
}
|
||||
else {
|
||||
uint32_t dst = (((uint8_t*)randomx_program_aarch64_vm_instructions_end_light_tweak) - ((uint8_t*)randomx_program_aarch64));
|
||||
uint32_t src = (((uint8_t*)randomx_program_aarch64_vm_instructions_end_light_v1) - ((uint8_t*)randomx_program_aarch64));
|
||||
memcpy(code + dst, code + src, 8);
|
||||
}
|
||||
|
||||
// Jump back to the main loop
|
||||
const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end_light) - ((uint8_t*)randomx_program_aarch64)) - codePos;
|
||||
emit32(ARMV8A::B | (offset / 4), code, codePos);
|
||||
|
||||
// and w2, w9, CacheLineAlignMask
|
||||
// and w2, w2, CacheLineAlignMask
|
||||
codePos = (((uint8_t*)randomx_program_aarch64_light_cacheline_align_mask) - ((uint8_t*)randomx_program_aarch64));
|
||||
emit32(0x121A0000 | 2 | (9 << 5) | ((RandomX_CurrentConfig.Log2_DatasetBaseSize - 7) << 10), code, codePos);
|
||||
emit32(0x121A0000 | 2 | (2 << 5) | ((RandomX_CurrentConfig.Log2_DatasetBaseSize - 7) << 10), code, codePos);
|
||||
|
||||
// Update spMix1
|
||||
// eor x10, config.readReg0, config.readReg1
|
||||
codePos = ((uint8_t*)randomx_program_aarch64_update_spMix1) - ((uint8_t*)randomx_program_aarch64);
|
||||
emit32(ARMV8A::EOR | 10 | (IntRegMap[config.readReg0] << 5) | (IntRegMap[config.readReg1] << 16), code, codePos);
|
||||
|
||||
codePos = ((uint8_t*)randomx_program_aarch64_v2_FE_mix) - ((uint8_t*)randomx_program_aarch64);
|
||||
|
||||
// Enable RandomX v2 AES tweak
|
||||
if (RandomX_CurrentConfig.Tweak_V2_AES) {
|
||||
if (vm_flags & RANDOMX_FLAG_HARD_AES) {
|
||||
// Disable the jump to RandomX v1 FE mix code by writing "movi v28.4s, 0" instruction
|
||||
emit32(0x4F00041C, code, codePos);
|
||||
}
|
||||
else {
|
||||
// Jump to RandomX v2 FE mix soft AES code by writing "b randomx_program_aarch64_v2_FE_mix_soft_aes" instruction
|
||||
const uint32_t offset = (uint8_t*)randomx_program_aarch64_v2_FE_mix_soft_aes - (uint8_t*)randomx_program_aarch64_v2_FE_mix;
|
||||
emit32(ARMV8A::B | (offset / 4), code, codePos);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Restore the jump to RandomX v1 FE mix code
|
||||
const uint32_t offset = (uint8_t*)randomx_program_aarch64_v1_FE_mix - (uint8_t*)randomx_program_aarch64_v2_FE_mix;
|
||||
emit32(ARMV8A::B | (offset / 4), code, codePos);
|
||||
}
|
||||
|
||||
// Apply dataset offset
|
||||
codePos = ((uint8_t*)randomx_program_aarch64_light_dataset_offset) - ((uint8_t*)randomx_program_aarch64);
|
||||
|
||||
@@ -497,9 +564,12 @@ void JitCompilerA64::emitMemLoad(uint32_t dst, uint32_t src, Instruction& instr,
|
||||
if (src != dst)
|
||||
{
|
||||
imm &= instr.getModMem() ? (RandomX_CurrentConfig.ScratchpadL1_Size - 1) : (RandomX_CurrentConfig.ScratchpadL2_Size - 1);
|
||||
emitAddImmediate(tmp_reg, src, imm, code, k);
|
||||
uint32_t t = 0x927d0000 | tmp_reg | (tmp_reg << 5);
|
||||
if (imm)
|
||||
emitAddImmediate(tmp_reg, src, imm, code, k);
|
||||
else
|
||||
t = 0x927d0000 | tmp_reg | (src << 5);
|
||||
|
||||
constexpr uint32_t t = 0x927d0000 | tmp_reg | (tmp_reg << 5);
|
||||
const uint32_t andInstrL1 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL1 - 4) << 10);
|
||||
const uint32_t andInstrL2 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL2 - 4) << 10);
|
||||
|
||||
@@ -511,10 +581,18 @@ void JitCompilerA64::emitMemLoad(uint32_t dst, uint32_t src, Instruction& instr,
|
||||
else
|
||||
{
|
||||
imm = (imm & ScratchpadL3Mask) >> 3;
|
||||
emitMovImmediate(tmp_reg, imm, code, k);
|
||||
if (imm)
|
||||
{
|
||||
emitMovImmediate(tmp_reg, imm, code, k);
|
||||
|
||||
// ldr tmp_reg, [x2, tmp_reg, lsl 3]
|
||||
emit32(0xf8607840 | tmp_reg | (tmp_reg << 16), code, k);
|
||||
// ldr tmp_reg, [x2, tmp_reg, lsl 3]
|
||||
emit32(0xf8607840 | tmp_reg | (tmp_reg << 16), code, k);
|
||||
}
|
||||
else
|
||||
{
|
||||
// ldr tmp_reg, [x2]
|
||||
emit32(0xf9400040 | tmp_reg, code, k);
|
||||
}
|
||||
}
|
||||
|
||||
codePos = k;
|
||||
@@ -529,25 +607,22 @@ void JitCompilerA64::emitMemLoadFP(uint32_t src, Instruction& instr, uint8_t* co
|
||||
constexpr uint32_t tmp_reg = 19;
|
||||
|
||||
imm &= instr.getModMem() ? (RandomX_CurrentConfig.ScratchpadL1_Size - 1) : (RandomX_CurrentConfig.ScratchpadL2_Size - 1);
|
||||
emitAddImmediate(tmp_reg, src, imm, code, k);
|
||||
uint32_t t = 0x927d0000 | tmp_reg | (tmp_reg << 5);
|
||||
if (imm)
|
||||
emitAddImmediate(tmp_reg, src, imm, code, k);
|
||||
else
|
||||
t = 0x927d0000 | tmp_reg | (src << 5);
|
||||
|
||||
constexpr uint32_t t = 0x927d0000 | tmp_reg | (tmp_reg << 5);
|
||||
const uint32_t andInstrL1 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL1 - 4) << 10);
|
||||
const uint32_t andInstrL2 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL2 - 4) << 10);
|
||||
|
||||
emit32(instr.getModMem() ? andInstrL1 : andInstrL2, code, k);
|
||||
|
||||
// add tmp_reg, x2, tmp_reg
|
||||
emit32(ARMV8A::ADD | tmp_reg | (2 << 5) | (tmp_reg << 16), code, k);
|
||||
// ldr tmp_reg_fp, [x2, tmp_reg]
|
||||
emit32(0x3ce06800 | tmp_reg_fp | (2 << 5) | (tmp_reg << 16), code, k);
|
||||
|
||||
// ldpsw tmp_reg, tmp_reg + 1, [tmp_reg]
|
||||
emit32(0x69400000 | tmp_reg | (tmp_reg << 5) | ((tmp_reg + 1) << 10), code, k);
|
||||
|
||||
// ins tmp_reg_fp.d[0], tmp_reg
|
||||
emit32(0x4E081C00 | tmp_reg_fp | (tmp_reg << 5), code, k);
|
||||
|
||||
// ins tmp_reg_fp.d[1], tmp_reg + 1
|
||||
emit32(0x4E181C00 | tmp_reg_fp | ((tmp_reg + 1) << 5), code, k);
|
||||
// sxtl.2d tmp_reg_fp, tmp_reg_fp
|
||||
emit32(0x0f20a400 | tmp_reg_fp | (tmp_reg_fp << 5), code, k);
|
||||
|
||||
// scvtf tmp_reg_fp.2d, tmp_reg_fp.2d
|
||||
emit32(0x4E61D800 | tmp_reg_fp | (tmp_reg_fp << 5), code, k);
|
||||
@@ -835,7 +910,8 @@ void JitCompilerA64::h_IROR_R(Instruction& instr, uint32_t& codePos)
|
||||
else
|
||||
{
|
||||
// ror dst, dst, imm
|
||||
emit32(ARMV8A::ROR_IMM | dst | (dst << 5) | ((instr.getImm32() & 63) << 10) | (dst << 16), code, codePos);
|
||||
if ((instr.getImm32() & 63))
|
||||
emit32(ARMV8A::ROR_IMM | dst | (dst << 5) | ((instr.getImm32() & 63) << 10) | (dst << 16), code, codePos);
|
||||
}
|
||||
|
||||
reg_changed_offset[instr.dst] = codePos;
|
||||
@@ -861,7 +937,8 @@ void JitCompilerA64::h_IROL_R(Instruction& instr, uint32_t& codePos)
|
||||
else
|
||||
{
|
||||
// ror dst, dst, imm
|
||||
emit32(ARMV8A::ROR_IMM | dst | (dst << 5) | ((-instr.getImm32() & 63) << 10) | (dst << 16), code, k);
|
||||
if ((instr.getImm32() & 63))
|
||||
emit32(ARMV8A::ROR_IMM | dst | (dst << 5) | ((-instr.getImm32() & 63) << 10) | (dst << 16), code, k);
|
||||
}
|
||||
|
||||
reg_changed_offset[instr.dst] = k;
|
||||
@@ -894,13 +971,8 @@ void JitCompilerA64::h_FSWAP_R(Instruction& instr, uint32_t& codePos)
|
||||
|
||||
const uint32_t dst = instr.dst + 16;
|
||||
|
||||
constexpr uint32_t tmp_reg_fp = 28;
|
||||
constexpr uint32_t src_index1 = 1 << 14;
|
||||
constexpr uint32_t dst_index1 = 1 << 20;
|
||||
|
||||
emit32(ARMV8A::MOV_VREG_EL | tmp_reg_fp | (dst << 5) | src_index1, code, k);
|
||||
emit32(ARMV8A::MOV_VREG_EL | dst | (dst << 5) | dst_index1, code, k);
|
||||
emit32(ARMV8A::MOV_VREG_EL | dst | (tmp_reg_fp << 5), code, k);
|
||||
// ext dst.16b, dst.16b, dst.16b, #0x8
|
||||
emit32(0x6e004000 | dst | (dst << 5) | (dst << 16), code, k);
|
||||
|
||||
codePos = k;
|
||||
}
|
||||
@@ -1032,6 +1104,14 @@ void JitCompilerA64::h_CFROUND(Instruction& instr, uint32_t& codePos)
|
||||
// ror tmp_reg, src, imm
|
||||
emit32(ARMV8A::ROR_IMM | tmp_reg | (src << 5) | ((instr.getImm32() & 63) << 10) | (src << 16), code, k);
|
||||
|
||||
if (RandomX_CurrentConfig.Tweak_V2_CFROUND) {
|
||||
// tst tmp_reg, 60
|
||||
emit32(0xF27E0E9F, code, k);
|
||||
|
||||
// bne next
|
||||
emit32(0x54000081, code, k);
|
||||
}
|
||||
|
||||
// bfi fpcr_tmp_reg, tmp_reg, 40, 2
|
||||
emit32(0xB3580400 | fpcr_tmp_reg | (tmp_reg << 5), code, k);
|
||||
|
||||
@@ -1059,9 +1139,12 @@ void JitCompilerA64::h_ISTORE(Instruction& instr, uint32_t& codePos)
|
||||
else
|
||||
imm &= RandomX_CurrentConfig.ScratchpadL3_Size - 1;
|
||||
|
||||
emitAddImmediate(tmp_reg, dst, imm, code, k);
|
||||
uint32_t t = 0x927d0000 | tmp_reg | (tmp_reg << 5);
|
||||
if (imm)
|
||||
emitAddImmediate(tmp_reg, dst, imm, code, k);
|
||||
else
|
||||
t = 0x927d0000 | tmp_reg | (dst << 5);
|
||||
|
||||
constexpr uint32_t t = 0x927d0000 | tmp_reg | (tmp_reg << 5);
|
||||
const uint32_t andInstrL1 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL1 - 4) << 10);
|
||||
const uint32_t andInstrL2 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL2 - 4) << 10);
|
||||
const uint32_t andInstrL3 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 4) << 10);
|
||||
|
||||
@@ -83,6 +83,7 @@ namespace randomx {
|
||||
uint32_t literalPos;
|
||||
uint32_t num32bitLiterals = 0;
|
||||
size_t allocatedSize = 0;
|
||||
uint32_t vm_flags = 0;
|
||||
|
||||
void allocate(size_t size);
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@
|
||||
#define DECL(x) x
|
||||
#endif
|
||||
|
||||
.arch armv8-a
|
||||
.arch armv8-a+crypto
|
||||
.text
|
||||
.global DECL(randomx_program_aarch64)
|
||||
.global DECL(randomx_program_aarch64_main_loop)
|
||||
@@ -41,9 +41,17 @@
|
||||
.global DECL(randomx_program_aarch64_cacheline_align_mask1)
|
||||
.global DECL(randomx_program_aarch64_cacheline_align_mask2)
|
||||
.global DECL(randomx_program_aarch64_update_spMix1)
|
||||
.global DECL(randomx_program_aarch64_v2_FE_mix)
|
||||
.global DECL(randomx_program_aarch64_v1_FE_mix)
|
||||
.global DECL(randomx_program_aarch64_v2_FE_mix_soft_aes)
|
||||
.global DECL(randomx_program_aarch64_vm_instructions_end_light)
|
||||
.global DECL(randomx_program_aarch64_vm_instructions_end_light_tweak)
|
||||
.global DECL(randomx_program_aarch64_light_cacheline_align_mask)
|
||||
.global DECL(randomx_program_aarch64_light_dataset_offset)
|
||||
.global DECL(randomx_program_aarch64_vm_instructions_end_v1)
|
||||
.global DECL(randomx_program_aarch64_vm_instructions_end_v2)
|
||||
.global DECL(randomx_program_aarch64_vm_instructions_end_light_v1)
|
||||
.global DECL(randomx_program_aarch64_vm_instructions_end_light_v2)
|
||||
.global DECL(randomx_init_dataset_aarch64)
|
||||
.global DECL(randomx_init_dataset_aarch64_end)
|
||||
.global DECL(randomx_calc_dataset_item_aarch64)
|
||||
@@ -100,9 +108,9 @@
|
||||
# v26 -> "a2"
|
||||
# v27 -> "a3"
|
||||
# v28 -> temporary
|
||||
# v29 -> E 'and' mask = 0x00ffffffffffffff00ffffffffffffff
|
||||
# v30 -> E 'or' mask = 0x3*00000000******3*00000000******
|
||||
# v31 -> scale mask = 0x81f000000000000081f0000000000000
|
||||
# v29 -> E 'and' mask = 0x00ffffffffffffff'00ffffffffffffff
|
||||
# v30 -> E 'or' mask = 0x3*00000000******'3*00000000******
|
||||
# v31 -> scale mask = 0x80f0000000000000'80f0000000000000
|
||||
|
||||
.balign 4
|
||||
DECL(randomx_program_aarch64):
|
||||
@@ -142,17 +150,14 @@ DECL(randomx_program_aarch64):
|
||||
ldp q26, q27, [x0, 224]
|
||||
|
||||
# Load E 'and' mask
|
||||
mov x16, 0x00FFFFFFFFFFFFFF
|
||||
ins v29.d[0], x16
|
||||
ins v29.d[1], x16
|
||||
movi v29.2d, #0x00FFFFFFFFFFFFFF
|
||||
|
||||
# Load E 'or' mask (stored in reg.f[0])
|
||||
ldr q30, [x0, 64]
|
||||
|
||||
# Load scale mask
|
||||
mov x16, 0x80f0000000000000
|
||||
ins v31.d[0], x16
|
||||
ins v31.d[1], x16
|
||||
dup v31.2d, x16
|
||||
|
||||
# Read fpcr
|
||||
mrs x8, fpcr
|
||||
@@ -162,35 +167,22 @@ DECL(randomx_program_aarch64):
|
||||
str x0, [sp, -16]!
|
||||
|
||||
# Read literals
|
||||
ldr x0, literal_x0
|
||||
ldr x11, literal_x11
|
||||
ldr x21, literal_x21
|
||||
ldr x22, literal_x22
|
||||
ldr x23, literal_x23
|
||||
ldr x24, literal_x24
|
||||
ldr x25, literal_x25
|
||||
ldr x26, literal_x26
|
||||
ldr x27, literal_x27
|
||||
ldr x28, literal_x28
|
||||
ldr x29, literal_x29
|
||||
ldr x30, literal_x30
|
||||
adr x30, literal_v0
|
||||
ldp q0, q1, [x30]
|
||||
ldp q2, q3, [x30, 32]
|
||||
ldp q4, q5, [x30, 64]
|
||||
ldp q6, q7, [x30, 96]
|
||||
ldp q8, q9, [x30, 128]
|
||||
ldp q10, q11, [x30, 160]
|
||||
ldp q12, q13, [x30, 192]
|
||||
ldp q14, q15, [x30, 224]
|
||||
|
||||
ldr q0, literal_v0
|
||||
ldr q1, literal_v1
|
||||
ldr q2, literal_v2
|
||||
ldr q3, literal_v3
|
||||
ldr q4, literal_v4
|
||||
ldr q5, literal_v5
|
||||
ldr q6, literal_v6
|
||||
ldr q7, literal_v7
|
||||
ldr q8, literal_v8
|
||||
ldr q9, literal_v9
|
||||
ldr q10, literal_v10
|
||||
ldr q11, literal_v11
|
||||
ldr q12, literal_v12
|
||||
ldr q13, literal_v13
|
||||
ldr q14, literal_v14
|
||||
ldr q15, literal_v15
|
||||
ldp x0, x11, [x30, -96] // literal_x0
|
||||
ldp x21, x22, [x30, -80] // literal_x21
|
||||
ldp x23, x24, [x30, -64] // literal_x23
|
||||
ldp x25, x26, [x30, -48] // literal_x25
|
||||
ldp x27, x28, [x30, -32] // literal_x27
|
||||
ldp x29, x30, [x30, -16] // literal_x29
|
||||
|
||||
DECL(randomx_program_aarch64_main_loop):
|
||||
# spAddr0 = spMix1 & ScratchpadL3Mask64;
|
||||
@@ -221,40 +213,31 @@ DECL(randomx_program_aarch64_main_loop):
|
||||
eor x15, x15, x19
|
||||
|
||||
# Load group F registers (spAddr1)
|
||||
ldpsw x20, x19, [x17]
|
||||
ins v16.d[0], x20
|
||||
ins v16.d[1], x19
|
||||
ldpsw x20, x19, [x17, 8]
|
||||
ins v17.d[0], x20
|
||||
ins v17.d[1], x19
|
||||
ldpsw x20, x19, [x17, 16]
|
||||
ins v18.d[0], x20
|
||||
ins v18.d[1], x19
|
||||
ldpsw x20, x19, [x17, 24]
|
||||
ins v19.d[0], x20
|
||||
ins v19.d[1], x19
|
||||
ldr q17, [x17]
|
||||
sxtl v16.2d, v17.2s
|
||||
scvtf v16.2d, v16.2d
|
||||
sxtl2 v17.2d, v17.4s
|
||||
scvtf v17.2d, v17.2d
|
||||
|
||||
ldr q19, [x17, 16]
|
||||
sxtl v18.2d, v19.2s
|
||||
scvtf v18.2d, v18.2d
|
||||
sxtl2 v19.2d, v19.4s
|
||||
scvtf v19.2d, v19.2d
|
||||
|
||||
# Load group E registers (spAddr1)
|
||||
ldpsw x20, x19, [x17, 32]
|
||||
ins v20.d[0], x20
|
||||
ins v20.d[1], x19
|
||||
ldpsw x20, x19, [x17, 40]
|
||||
ins v21.d[0], x20
|
||||
ins v21.d[1], x19
|
||||
ldpsw x20, x19, [x17, 48]
|
||||
ins v22.d[0], x20
|
||||
ins v22.d[1], x19
|
||||
ldpsw x20, x19, [x17, 56]
|
||||
ins v23.d[0], x20
|
||||
ins v23.d[1], x19
|
||||
ldr q21, [x17, 32]
|
||||
sxtl v20.2d, v21.2s
|
||||
scvtf v20.2d, v20.2d
|
||||
sxtl2 v21.2d, v21.4s
|
||||
scvtf v21.2d, v21.2d
|
||||
|
||||
ldr q23, [x17, 48]
|
||||
sxtl v22.2d, v23.2s
|
||||
scvtf v22.2d, v22.2d
|
||||
sxtl2 v23.2d, v23.4s
|
||||
scvtf v23.2d, v23.2d
|
||||
|
||||
and v20.16b, v20.16b, v29.16b
|
||||
and v21.16b, v21.16b, v29.16b
|
||||
and v22.16b, v22.16b, v29.16b
|
||||
@@ -267,8 +250,8 @@ DECL(randomx_program_aarch64_main_loop):
|
||||
# Execute VM instructions
|
||||
DECL(randomx_program_aarch64_vm_instructions):
|
||||
|
||||
# 16 KB buffer for generated instructions
|
||||
.fill 4096,4,0
|
||||
# 24 KB buffer for generated instructions
|
||||
.fill 6144,4,0
|
||||
|
||||
literal_x0: .fill 1,8,0
|
||||
literal_x11: .fill 1,8,0
|
||||
@@ -311,6 +294,10 @@ DECL(randomx_program_aarch64_vm_instructions_end):
|
||||
|
||||
# Calculate dataset pointer for dataset prefetch
|
||||
mov w20, w9
|
||||
|
||||
# mx <-> ma
|
||||
ror x9, x9, 32
|
||||
|
||||
DECL(randomx_program_aarch64_cacheline_align_mask1):
|
||||
# Actual mask will be inserted by JIT compiler
|
||||
and x20, x20, 1
|
||||
@@ -319,9 +306,6 @@ DECL(randomx_program_aarch64_cacheline_align_mask1):
|
||||
# Prefetch dataset data
|
||||
prfm pldl2strm, [x20]
|
||||
|
||||
# mx <-> ma
|
||||
ror x9, x9, 32
|
||||
|
||||
DECL(randomx_program_aarch64_cacheline_align_mask2):
|
||||
# Actual mask will be inserted by JIT compiler
|
||||
and x10, x10, 1
|
||||
@@ -352,12 +336,93 @@ DECL(randomx_program_aarch64_update_spMix1):
|
||||
stp x12, x13, [x17, 32]
|
||||
stp x14, x15, [x17, 48]
|
||||
|
||||
# xor group F and group E registers
|
||||
# RandomX v2 AES tweak (mix group F and group E registers using AES)
|
||||
DECL(randomx_program_aarch64_v2_FE_mix):
|
||||
|
||||
# Jump to v1 FE mix code if we're running RandomX v1
|
||||
# JIT compiler will write a "movi v28.4s, 0" (set v28 to all 0) here if we're running RandomX v2
|
||||
# Or, JIT compiler will write a "b randomx_program_aarch64_v2_FE_mix_soft_aes" if we're running RandomX v2 with soft AES
|
||||
b DECL(randomx_program_aarch64_v1_FE_mix)
|
||||
|
||||
# f0 = aesenc(f0, e0), f1 = aesdec(f1, e0), f2 = aesenc(f2, e0), f3 = aesdec(f3, e0)
|
||||
|
||||
aese v16.16b, v28.16b
|
||||
aesd v17.16b, v28.16b
|
||||
aese v18.16b, v28.16b
|
||||
aesd v19.16b, v28.16b
|
||||
|
||||
aesmc v16.16b, v16.16b
|
||||
aesimc v17.16b, v17.16b
|
||||
aesmc v18.16b, v18.16b
|
||||
aesimc v19.16b, v19.16b
|
||||
|
||||
eor v16.16b, v16.16b, v20.16b
|
||||
eor v17.16b, v17.16b, v20.16b
|
||||
eor v18.16b, v18.16b, v20.16b
|
||||
eor v19.16b, v19.16b, v20.16b
|
||||
|
||||
# f0 = aesenc(f0, e1), f1 = aesdec(f1, e1), f2 = aesenc(f2, e1), f3 = aesdec(f3, e1)
|
||||
|
||||
aese v16.16b, v28.16b
|
||||
aesd v17.16b, v28.16b
|
||||
aese v18.16b, v28.16b
|
||||
aesd v19.16b, v28.16b
|
||||
|
||||
aesmc v16.16b, v16.16b
|
||||
aesimc v17.16b, v17.16b
|
||||
aesmc v18.16b, v18.16b
|
||||
aesimc v19.16b, v19.16b
|
||||
|
||||
eor v16.16b, v16.16b, v21.16b
|
||||
eor v17.16b, v17.16b, v21.16b
|
||||
eor v18.16b, v18.16b, v21.16b
|
||||
eor v19.16b, v19.16b, v21.16b
|
||||
|
||||
# f0 = aesenc(f0, e2), f1 = aesdec(f1, e2), f2 = aesenc(f2, e2), f3 = aesdec(f3, e2)
|
||||
|
||||
aese v16.16b, v28.16b
|
||||
aesd v17.16b, v28.16b
|
||||
aese v18.16b, v28.16b
|
||||
aesd v19.16b, v28.16b
|
||||
|
||||
aesmc v16.16b, v16.16b
|
||||
aesimc v17.16b, v17.16b
|
||||
aesmc v18.16b, v18.16b
|
||||
aesimc v19.16b, v19.16b
|
||||
|
||||
eor v16.16b, v16.16b, v22.16b
|
||||
eor v17.16b, v17.16b, v22.16b
|
||||
eor v18.16b, v18.16b, v22.16b
|
||||
eor v19.16b, v19.16b, v22.16b
|
||||
|
||||
# f0 = aesenc(f0, e3), f1 = aesdec(f1, e3), f2 = aesenc(f2, e3), f3 = aesdec(f3, e3)
|
||||
|
||||
aese v16.16b, v28.16b
|
||||
aesd v17.16b, v28.16b
|
||||
aese v18.16b, v28.16b
|
||||
aesd v19.16b, v28.16b
|
||||
|
||||
aesmc v16.16b, v16.16b
|
||||
aesimc v17.16b, v17.16b
|
||||
aesmc v18.16b, v18.16b
|
||||
aesimc v19.16b, v19.16b
|
||||
|
||||
eor v16.16b, v16.16b, v23.16b
|
||||
eor v17.16b, v17.16b, v23.16b
|
||||
eor v18.16b, v18.16b, v23.16b
|
||||
eor v19.16b, v19.16b, v23.16b
|
||||
|
||||
# Skip v1 FE mix code because we already did v2 FE mix
|
||||
b randomx_program_aarch64_FE_store
|
||||
|
||||
DECL(randomx_program_aarch64_v1_FE_mix):
|
||||
eor v16.16b, v16.16b, v20.16b
|
||||
eor v17.16b, v17.16b, v21.16b
|
||||
eor v18.16b, v18.16b, v22.16b
|
||||
eor v19.16b, v19.16b, v23.16b
|
||||
|
||||
randomx_program_aarch64_FE_store:
|
||||
|
||||
# Store FP registers to scratchpad (spAddr0)
|
||||
stp q16, q17, [x16, 0]
|
||||
stp q18, q19, [x16, 32]
|
||||
@@ -402,6 +467,13 @@ DECL(randomx_program_aarch64_vm_instructions_end_light):
|
||||
stp x0, x1, [sp, 64]
|
||||
stp x2, x30, [sp, 80]
|
||||
|
||||
lsr x2, x9, 32
|
||||
|
||||
DECL(randomx_program_aarch64_light_cacheline_align_mask):
|
||||
# Actual mask will be inserted by JIT compiler
|
||||
and w2, w2, 1
|
||||
|
||||
DECL(randomx_program_aarch64_vm_instructions_end_light_tweak):
|
||||
# mx ^= r[readReg2] ^ r[readReg3];
|
||||
eor x9, x9, x20
|
||||
|
||||
@@ -414,10 +486,6 @@ DECL(randomx_program_aarch64_vm_instructions_end_light):
|
||||
# x1 -> pointer to output
|
||||
mov x1, sp
|
||||
|
||||
DECL(randomx_program_aarch64_light_cacheline_align_mask):
|
||||
# Actual mask will be inserted by JIT compiler
|
||||
and w2, w9, 1
|
||||
|
||||
# x2 -> item number
|
||||
lsr x2, x2, 6
|
||||
|
||||
@@ -435,6 +503,500 @@ DECL(randomx_program_aarch64_light_dataset_offset):
|
||||
|
||||
b DECL(randomx_program_aarch64_xor_with_dataset_line)
|
||||
|
||||
DECL(randomx_program_aarch64_vm_instructions_end_v1):
|
||||
lsr x10, x9, 32
|
||||
eor x9, x9, x20
|
||||
mov w20, w9
|
||||
ror x9, x9, 32
|
||||
|
||||
DECL(randomx_program_aarch64_vm_instructions_end_v2):
|
||||
lsr x10, x9, 32
|
||||
ror x9, x9, 32
|
||||
eor x9, x9, x20
|
||||
mov w20, w9
|
||||
|
||||
DECL(randomx_program_aarch64_vm_instructions_end_light_v1):
|
||||
eor x9, x9, x20
|
||||
ror x9, x9, 32
|
||||
|
||||
DECL(randomx_program_aarch64_vm_instructions_end_light_v2):
|
||||
ror x9, x9, 32
|
||||
eor x9, x9, x20
|
||||
|
||||
DECL(randomx_program_aarch64_v2_FE_mix_soft_aes):
|
||||
sub sp, sp, 176
|
||||
|
||||
stp x0, x1, [sp]
|
||||
stp x2, x3, [sp, 16]
|
||||
stp x4, x5, [sp, 32]
|
||||
stp x6, x7, [sp, 48]
|
||||
stp x8, x9, [sp, 64]
|
||||
stp x10, x11, [sp, 80]
|
||||
stp x12, x13, [sp, 96]
|
||||
stp x14, x15, [sp, 112]
|
||||
stp x16, x30, [sp, 128]
|
||||
stp q0, q1, [sp, 144]
|
||||
|
||||
adr x19, randomx_aes_lut_enc
|
||||
adr x20, randomx_aes_lut_dec
|
||||
|
||||
# f0 = aesenc(f0, e0), f0 = aesenc(f0, e1), f0 = aesenc(f0, e2), f0 = aesenc(f0, e3)
|
||||
mov v0.16b, v16.16b
|
||||
mov v1.16b, v20.16b
|
||||
bl randomx_soft_aesenc
|
||||
mov v1.16b, v21.16b
|
||||
bl randomx_soft_aesenc
|
||||
mov v1.16b, v22.16b
|
||||
bl randomx_soft_aesenc
|
||||
mov v1.16b, v23.16b
|
||||
bl randomx_soft_aesenc
|
||||
mov v16.16b, v0.16b
|
||||
|
||||
# f1 = aesdec(f1, e0), f1 = aesdec(f1, e1), f1 = aesdec(f1, e2), f1 = aesdec(f1, e3)
|
||||
mov v0.16b, v17.16b
|
||||
mov v1.16b, v20.16b
|
||||
bl randomx_soft_aesdec
|
||||
mov v1.16b, v21.16b
|
||||
bl randomx_soft_aesdec
|
||||
mov v1.16b, v22.16b
|
||||
bl randomx_soft_aesdec
|
||||
mov v1.16b, v23.16b
|
||||
bl randomx_soft_aesdec
|
||||
mov v17.16b, v0.16b
|
||||
|
||||
# f2 = aesenc(f2, e0), f2 = aesenc(f2, e1), f2 = aesenc(f2, e2), f2 = aesenc(f2, e3)
|
||||
mov v0.16b, v18.16b
|
||||
mov v1.16b, v20.16b
|
||||
bl randomx_soft_aesenc
|
||||
mov v1.16b, v21.16b
|
||||
bl randomx_soft_aesenc
|
||||
mov v1.16b, v22.16b
|
||||
bl randomx_soft_aesenc
|
||||
mov v1.16b, v23.16b
|
||||
bl randomx_soft_aesenc
|
||||
mov v18.16b, v0.16b
|
||||
|
||||
# f3 = aesdec(f3, e0), f3 = aesdec(f3, e1), f3 = aesdec(f3, e2), f3 = aesdec(f3, e3)
|
||||
mov v0.16b, v19.16b
|
||||
mov v1.16b, v20.16b
|
||||
bl randomx_soft_aesdec
|
||||
mov v1.16b, v21.16b
|
||||
bl randomx_soft_aesdec
|
||||
mov v1.16b, v22.16b
|
||||
bl randomx_soft_aesdec
|
||||
mov v1.16b, v23.16b
|
||||
bl randomx_soft_aesdec
|
||||
mov v19.16b, v0.16b
|
||||
|
||||
ldp x0, x1, [sp]
|
||||
ldp x2, x3, [sp, 16]
|
||||
ldp x4, x5, [sp, 32]
|
||||
ldp x6, x7, [sp, 48]
|
||||
ldp x8, x9, [sp, 64]
|
||||
ldp x10, x11, [sp, 80]
|
||||
ldp x12, x13, [sp, 96]
|
||||
ldp x14, x15, [sp, 112]
|
||||
ldp x16, x30, [sp, 128]
|
||||
ldp q0, q1, [sp, 144]
|
||||
|
||||
add sp, sp, 176
|
||||
|
||||
b randomx_program_aarch64_FE_store
|
||||
|
||||
|
||||
randomx_soft_aesenc:
|
||||
umov w4, v0.b[5]
|
||||
umov w1, v0.b[10]
|
||||
umov w12, v0.b[15]
|
||||
umov w9, v0.b[9]
|
||||
umov w2, v0.b[14]
|
||||
umov w11, v0.b[3]
|
||||
umov w5, v0.b[0]
|
||||
umov w16, v0.b[4]
|
||||
add x4, x4, 256
|
||||
add x1, x1, 512
|
||||
add x12, x12, 768
|
||||
umov w3, v0.b[13]
|
||||
umov w8, v0.b[2]
|
||||
umov w7, v0.b[7]
|
||||
add x9, x9, 256
|
||||
add x2, x2, 512
|
||||
add x11, x11, 768
|
||||
ldr w10, [x19, x4, lsl 2]
|
||||
ldr w15, [x19, x5, lsl 2]
|
||||
umov w13, v0.b[8]
|
||||
ldr w14, [x19, x12, lsl 2]
|
||||
umov w6, v0.b[1]
|
||||
ldr w1, [x19, x1, lsl 2]
|
||||
eor w10, w10, w15
|
||||
ldr w2, [x19, x2, lsl 2]
|
||||
umov w5, v0.b[6]
|
||||
ldr w9, [x19, x9, lsl 2]
|
||||
umov w4, v0.b[11]
|
||||
ldr w12, [x19, x16, lsl 2]
|
||||
eor w1, w1, w14
|
||||
ldr w11, [x19, x11, lsl 2]
|
||||
eor w1, w1, w10
|
||||
add x8, x8, 512
|
||||
add x3, x3, 256
|
||||
add x7, x7, 768
|
||||
eor w9, w9, w12
|
||||
fmov s28, w1
|
||||
eor w1, w2, w11
|
||||
umov w10, v0.b[12]
|
||||
eor w1, w1, w9
|
||||
ldr w3, [x19, x3, lsl 2]
|
||||
add x6, x6, 256
|
||||
ldr w9, [x19, x13, lsl 2]
|
||||
ins v28.s[1], w1
|
||||
ldr w2, [x19, x8, lsl 2]
|
||||
add x5, x5, 512
|
||||
ldr w7, [x19, x7, lsl 2]
|
||||
add x4, x4, 768
|
||||
eor w1, w3, w9
|
||||
ldr w3, [x19, x6, lsl 2]
|
||||
eor w2, w2, w7
|
||||
ldr w6, [x19, x10, lsl 2]
|
||||
eor w2, w2, w1
|
||||
ldr w1, [x19, x5, lsl 2]
|
||||
ldr w0, [x19, x4, lsl 2]
|
||||
eor w3, w3, w6
|
||||
ins v28.s[2], w2
|
||||
eor w0, w1, w0
|
||||
eor w0, w0, w3
|
||||
ins v28.s[3], w0
|
||||
eor v0.16b, v1.16b, v28.16b
|
||||
ret
|
||||
|
||||
randomx_soft_aesdec:
|
||||
umov w1, v0.b[10]
|
||||
umov w3, v0.b[7]
|
||||
umov w12, v0.b[13]
|
||||
umov w2, v0.b[14]
|
||||
umov w9, v0.b[11]
|
||||
umov w11, v0.b[1]
|
||||
umov w4, v0.b[0]
|
||||
umov w16, v0.b[4]
|
||||
add x3, x3, 768
|
||||
add x1, x1, 512
|
||||
add x12, x12, 256
|
||||
umov w8, v0.b[5]
|
||||
umov w6, v0.b[2]
|
||||
umov w7, v0.b[15]
|
||||
add x9, x9, 768
|
||||
add x2, x2, 512
|
||||
add x11, x11, 256
|
||||
ldr w15, [x20, x3, lsl 2]
|
||||
ldr w10, [x20, x4, lsl 2]
|
||||
umov w13, v0.b[8]
|
||||
ldr w14, [x20, x12, lsl 2]
|
||||
umov w5, v0.b[9]
|
||||
ldr w1, [x20, x1, lsl 2]
|
||||
umov w3, v0.b[6]
|
||||
ldr w12, [x20, x9, lsl 2]
|
||||
umov w4, v0.b[3]
|
||||
ldr w9, [x20, x16, lsl 2]
|
||||
eor w1, w1, w15
|
||||
ldr w2, [x20, x2, lsl 2]
|
||||
eor w10, w10, w14
|
||||
ldr w11, [x20, x11, lsl 2]
|
||||
eor w1, w1, w10
|
||||
add x8, x8, 256
|
||||
add x6, x6, 512
|
||||
add x7, x7, 768
|
||||
eor w2, w2, w12
|
||||
fmov s28, w1
|
||||
eor w1, w9, w11
|
||||
eor w1, w2, w1
|
||||
umov w9, v0.b[12]
|
||||
ldr w2, [x20, x13, lsl 2]
|
||||
add x5, x5, 256
|
||||
ldr w8, [x20, x8, lsl 2]
|
||||
ins v28.s[1], w1
|
||||
ldr w6, [x20, x6, lsl 2]
|
||||
add x3, x3, 512
|
||||
ldr w7, [x20, x7, lsl 2]
|
||||
add x4, x4, 768
|
||||
eor w2, w2, w8
|
||||
ldr w1, [x20, x9, lsl 2]
|
||||
eor w6, w6, w7
|
||||
ldr w3, [x20, x3, lsl 2]
|
||||
eor w2, w2, w6
|
||||
ldr w4, [x20, x4, lsl 2]
|
||||
ldr w5, [x20, x5, lsl 2]
|
||||
ins v28.s[2], w2
|
||||
eor w0, w1, w5
|
||||
eor w1, w3, w4
|
||||
eor w0, w0, w1
|
||||
ins v28.s[3], w0
|
||||
eor v0.16b, v1.16b, v28.16b
|
||||
ret
|
||||
|
||||
randomx_aes_lut_enc:
|
||||
.word 0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6, 0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591
|
||||
.word 0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56, 0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec
|
||||
.word 0x45caca8f, 0x9d82821f, 0x40c9c989, 0x877d7dfa, 0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb
|
||||
.word 0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45, 0xbf9c9c23, 0xf7a4a453, 0x967272e4, 0x5bc0c09b
|
||||
.word 0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c, 0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83
|
||||
.word 0x5c343468, 0xf4a5a551, 0x34e5e5d1, 0x08f1f1f9, 0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a
|
||||
.word 0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d, 0x28181830, 0xa1969637, 0x0f05050a, 0xb59a9a2f
|
||||
.word 0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df, 0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea
|
||||
.word 0x1b090912, 0x9e83831d, 0x742c2c58, 0x2e1a1a34, 0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b
|
||||
.word 0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d, 0x7b292952, 0x3ee3e3dd, 0x712f2f5e, 0x97848413
|
||||
.word 0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1, 0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6
|
||||
.word 0xbe6a6ad4, 0x46cbcb8d, 0xd9bebe67, 0x4b393972, 0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85
|
||||
.word 0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed, 0xc5434386, 0xd74d4d9a, 0x55333366, 0x94858511
|
||||
.word 0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe, 0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b
|
||||
.word 0xf35151a2, 0xfea3a35d, 0xc0404080, 0x8a8f8f05, 0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1
|
||||
.word 0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142, 0x30101020, 0x1affffe5, 0x0ef3f3fd, 0x6dd2d2bf
|
||||
.word 0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3, 0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e
|
||||
.word 0x57c4c493, 0xf2a7a755, 0x827e7efc, 0x473d3d7a, 0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6
|
||||
.word 0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3, 0x66222244, 0x7e2a2a54, 0xab90903b, 0x8388880b
|
||||
.word 0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428, 0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad
|
||||
.word 0x3be0e0db, 0x56323264, 0x4e3a3a74, 0x1e0a0a14, 0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8
|
||||
.word 0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4, 0xa8919139, 0xa4959531, 0x37e4e4d3, 0x8b7979f2
|
||||
.word 0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda, 0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949
|
||||
.word 0xb46c6cd8, 0xfa5656ac, 0x07f4f4f3, 0x25eaeacf, 0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810
|
||||
.word 0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c, 0x241c1c38, 0xf1a6a657, 0xc7b4b473, 0x51c6c697
|
||||
.word 0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e, 0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f
|
||||
.word 0x907070e0, 0x423e3e7c, 0xc4b5b571, 0xaa6666cc, 0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c
|
||||
.word 0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969, 0x91868617, 0x58c1c199, 0x271d1d3a, 0xb99e9e27
|
||||
.word 0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122, 0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433
|
||||
.word 0xb69b9b2d, 0x221e1e3c, 0x92878715, 0x20e9e9c9, 0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5
|
||||
.word 0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a, 0xdabfbf65, 0x31e6e6d7, 0xc6424284, 0xb86868d0
|
||||
.word 0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e, 0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c
|
||||
|
||||
.word 0x6363c6a5, 0x7c7cf884, 0x7777ee99, 0x7b7bf68d, 0xf2f2ff0d, 0x6b6bd6bd, 0x6f6fdeb1, 0xc5c59154
|
||||
.word 0x30306050, 0x01010203, 0x6767cea9, 0x2b2b567d, 0xfefee719, 0xd7d7b562, 0xabab4de6, 0x7676ec9a
|
||||
.word 0xcaca8f45, 0x82821f9d, 0xc9c98940, 0x7d7dfa87, 0xfafaef15, 0x5959b2eb, 0x47478ec9, 0xf0f0fb0b
|
||||
.word 0xadad41ec, 0xd4d4b367, 0xa2a25ffd, 0xafaf45ea, 0x9c9c23bf, 0xa4a453f7, 0x7272e496, 0xc0c09b5b
|
||||
.word 0xb7b775c2, 0xfdfde11c, 0x93933dae, 0x26264c6a, 0x36366c5a, 0x3f3f7e41, 0xf7f7f502, 0xcccc834f
|
||||
.word 0x3434685c, 0xa5a551f4, 0xe5e5d134, 0xf1f1f908, 0x7171e293, 0xd8d8ab73, 0x31316253, 0x15152a3f
|
||||
.word 0x0404080c, 0xc7c79552, 0x23234665, 0xc3c39d5e, 0x18183028, 0x969637a1, 0x05050a0f, 0x9a9a2fb5
|
||||
.word 0x07070e09, 0x12122436, 0x80801b9b, 0xe2e2df3d, 0xebebcd26, 0x27274e69, 0xb2b27fcd, 0x7575ea9f
|
||||
.word 0x0909121b, 0x83831d9e, 0x2c2c5874, 0x1a1a342e, 0x1b1b362d, 0x6e6edcb2, 0x5a5ab4ee, 0xa0a05bfb
|
||||
.word 0x5252a4f6, 0x3b3b764d, 0xd6d6b761, 0xb3b37dce, 0x2929527b, 0xe3e3dd3e, 0x2f2f5e71, 0x84841397
|
||||
.word 0x5353a6f5, 0xd1d1b968, 0x00000000, 0xededc12c, 0x20204060, 0xfcfce31f, 0xb1b179c8, 0x5b5bb6ed
|
||||
.word 0x6a6ad4be, 0xcbcb8d46, 0xbebe67d9, 0x3939724b, 0x4a4a94de, 0x4c4c98d4, 0x5858b0e8, 0xcfcf854a
|
||||
.word 0xd0d0bb6b, 0xefefc52a, 0xaaaa4fe5, 0xfbfbed16, 0x434386c5, 0x4d4d9ad7, 0x33336655, 0x85851194
|
||||
.word 0x45458acf, 0xf9f9e910, 0x02020406, 0x7f7ffe81, 0x5050a0f0, 0x3c3c7844, 0x9f9f25ba, 0xa8a84be3
|
||||
.word 0x5151a2f3, 0xa3a35dfe, 0x404080c0, 0x8f8f058a, 0x92923fad, 0x9d9d21bc, 0x38387048, 0xf5f5f104
|
||||
.word 0xbcbc63df, 0xb6b677c1, 0xdadaaf75, 0x21214263, 0x10102030, 0xffffe51a, 0xf3f3fd0e, 0xd2d2bf6d
|
||||
.word 0xcdcd814c, 0x0c0c1814, 0x13132635, 0xececc32f, 0x5f5fbee1, 0x979735a2, 0x444488cc, 0x17172e39
|
||||
.word 0xc4c49357, 0xa7a755f2, 0x7e7efc82, 0x3d3d7a47, 0x6464c8ac, 0x5d5dbae7, 0x1919322b, 0x7373e695
|
||||
.word 0x6060c0a0, 0x81811998, 0x4f4f9ed1, 0xdcdca37f, 0x22224466, 0x2a2a547e, 0x90903bab, 0x88880b83
|
||||
.word 0x46468cca, 0xeeeec729, 0xb8b86bd3, 0x1414283c, 0xdedea779, 0x5e5ebce2, 0x0b0b161d, 0xdbdbad76
|
||||
.word 0xe0e0db3b, 0x32326456, 0x3a3a744e, 0x0a0a141e, 0x494992db, 0x06060c0a, 0x2424486c, 0x5c5cb8e4
|
||||
.word 0xc2c29f5d, 0xd3d3bd6e, 0xacac43ef, 0x6262c4a6, 0x919139a8, 0x959531a4, 0xe4e4d337, 0x7979f28b
|
||||
.word 0xe7e7d532, 0xc8c88b43, 0x37376e59, 0x6d6ddab7, 0x8d8d018c, 0xd5d5b164, 0x4e4e9cd2, 0xa9a949e0
|
||||
.word 0x6c6cd8b4, 0x5656acfa, 0xf4f4f307, 0xeaeacf25, 0x6565caaf, 0x7a7af48e, 0xaeae47e9, 0x08081018
|
||||
.word 0xbaba6fd5, 0x7878f088, 0x25254a6f, 0x2e2e5c72, 0x1c1c3824, 0xa6a657f1, 0xb4b473c7, 0xc6c69751
|
||||
.word 0xe8e8cb23, 0xdddda17c, 0x7474e89c, 0x1f1f3e21, 0x4b4b96dd, 0xbdbd61dc, 0x8b8b0d86, 0x8a8a0f85
|
||||
.word 0x7070e090, 0x3e3e7c42, 0xb5b571c4, 0x6666ccaa, 0x484890d8, 0x03030605, 0xf6f6f701, 0x0e0e1c12
|
||||
.word 0x6161c2a3, 0x35356a5f, 0x5757aef9, 0xb9b969d0, 0x86861791, 0xc1c19958, 0x1d1d3a27, 0x9e9e27b9
|
||||
.word 0xe1e1d938, 0xf8f8eb13, 0x98982bb3, 0x11112233, 0x6969d2bb, 0xd9d9a970, 0x8e8e0789, 0x949433a7
|
||||
.word 0x9b9b2db6, 0x1e1e3c22, 0x87871592, 0xe9e9c920, 0xcece8749, 0x5555aaff, 0x28285078, 0xdfdfa57a
|
||||
.word 0x8c8c038f, 0xa1a159f8, 0x89890980, 0x0d0d1a17, 0xbfbf65da, 0xe6e6d731, 0x424284c6, 0x6868d0b8
|
||||
.word 0x414182c3, 0x999929b0, 0x2d2d5a77, 0x0f0f1e11, 0xb0b07bcb, 0x5454a8fc, 0xbbbb6dd6, 0x16162c3a
|
||||
|
||||
.word 0x63c6a563, 0x7cf8847c, 0x77ee9977, 0x7bf68d7b, 0xf2ff0df2, 0x6bd6bd6b, 0x6fdeb16f, 0xc59154c5
|
||||
.word 0x30605030, 0x01020301, 0x67cea967, 0x2b567d2b, 0xfee719fe, 0xd7b562d7, 0xab4de6ab, 0x76ec9a76
|
||||
.word 0xca8f45ca, 0x821f9d82, 0xc98940c9, 0x7dfa877d, 0xfaef15fa, 0x59b2eb59, 0x478ec947, 0xf0fb0bf0
|
||||
.word 0xad41ecad, 0xd4b367d4, 0xa25ffda2, 0xaf45eaaf, 0x9c23bf9c, 0xa453f7a4, 0x72e49672, 0xc09b5bc0
|
||||
.word 0xb775c2b7, 0xfde11cfd, 0x933dae93, 0x264c6a26, 0x366c5a36, 0x3f7e413f, 0xf7f502f7, 0xcc834fcc
|
||||
.word 0x34685c34, 0xa551f4a5, 0xe5d134e5, 0xf1f908f1, 0x71e29371, 0xd8ab73d8, 0x31625331, 0x152a3f15
|
||||
.word 0x04080c04, 0xc79552c7, 0x23466523, 0xc39d5ec3, 0x18302818, 0x9637a196, 0x050a0f05, 0x9a2fb59a
|
||||
.word 0x070e0907, 0x12243612, 0x801b9b80, 0xe2df3de2, 0xebcd26eb, 0x274e6927, 0xb27fcdb2, 0x75ea9f75
|
||||
.word 0x09121b09, 0x831d9e83, 0x2c58742c, 0x1a342e1a, 0x1b362d1b, 0x6edcb26e, 0x5ab4ee5a, 0xa05bfba0
|
||||
.word 0x52a4f652, 0x3b764d3b, 0xd6b761d6, 0xb37dceb3, 0x29527b29, 0xe3dd3ee3, 0x2f5e712f, 0x84139784
|
||||
.word 0x53a6f553, 0xd1b968d1, 0x00000000, 0xedc12ced, 0x20406020, 0xfce31ffc, 0xb179c8b1, 0x5bb6ed5b
|
||||
.word 0x6ad4be6a, 0xcb8d46cb, 0xbe67d9be, 0x39724b39, 0x4a94de4a, 0x4c98d44c, 0x58b0e858, 0xcf854acf
|
||||
.word 0xd0bb6bd0, 0xefc52aef, 0xaa4fe5aa, 0xfbed16fb, 0x4386c543, 0x4d9ad74d, 0x33665533, 0x85119485
|
||||
.word 0x458acf45, 0xf9e910f9, 0x02040602, 0x7ffe817f, 0x50a0f050, 0x3c78443c, 0x9f25ba9f, 0xa84be3a8
|
||||
.word 0x51a2f351, 0xa35dfea3, 0x4080c040, 0x8f058a8f, 0x923fad92, 0x9d21bc9d, 0x38704838, 0xf5f104f5
|
||||
.word 0xbc63dfbc, 0xb677c1b6, 0xdaaf75da, 0x21426321, 0x10203010, 0xffe51aff, 0xf3fd0ef3, 0xd2bf6dd2
|
||||
.word 0xcd814ccd, 0x0c18140c, 0x13263513, 0xecc32fec, 0x5fbee15f, 0x9735a297, 0x4488cc44, 0x172e3917
|
||||
.word 0xc49357c4, 0xa755f2a7, 0x7efc827e, 0x3d7a473d, 0x64c8ac64, 0x5dbae75d, 0x19322b19, 0x73e69573
|
||||
.word 0x60c0a060, 0x81199881, 0x4f9ed14f, 0xdca37fdc, 0x22446622, 0x2a547e2a, 0x903bab90, 0x880b8388
|
||||
.word 0x468cca46, 0xeec729ee, 0xb86bd3b8, 0x14283c14, 0xdea779de, 0x5ebce25e, 0x0b161d0b, 0xdbad76db
|
||||
.word 0xe0db3be0, 0x32645632, 0x3a744e3a, 0x0a141e0a, 0x4992db49, 0x060c0a06, 0x24486c24, 0x5cb8e45c
|
||||
.word 0xc29f5dc2, 0xd3bd6ed3, 0xac43efac, 0x62c4a662, 0x9139a891, 0x9531a495, 0xe4d337e4, 0x79f28b79
|
||||
.word 0xe7d532e7, 0xc88b43c8, 0x376e5937, 0x6ddab76d, 0x8d018c8d, 0xd5b164d5, 0x4e9cd24e, 0xa949e0a9
|
||||
.word 0x6cd8b46c, 0x56acfa56, 0xf4f307f4, 0xeacf25ea, 0x65caaf65, 0x7af48e7a, 0xae47e9ae, 0x08101808
|
||||
.word 0xba6fd5ba, 0x78f08878, 0x254a6f25, 0x2e5c722e, 0x1c38241c, 0xa657f1a6, 0xb473c7b4, 0xc69751c6
|
||||
.word 0xe8cb23e8, 0xdda17cdd, 0x74e89c74, 0x1f3e211f, 0x4b96dd4b, 0xbd61dcbd, 0x8b0d868b, 0x8a0f858a
|
||||
.word 0x70e09070, 0x3e7c423e, 0xb571c4b5, 0x66ccaa66, 0x4890d848, 0x03060503, 0xf6f701f6, 0x0e1c120e
|
||||
.word 0x61c2a361, 0x356a5f35, 0x57aef957, 0xb969d0b9, 0x86179186, 0xc19958c1, 0x1d3a271d, 0x9e27b99e
|
||||
.word 0xe1d938e1, 0xf8eb13f8, 0x982bb398, 0x11223311, 0x69d2bb69, 0xd9a970d9, 0x8e07898e, 0x9433a794
|
||||
.word 0x9b2db69b, 0x1e3c221e, 0x87159287, 0xe9c920e9, 0xce8749ce, 0x55aaff55, 0x28507828, 0xdfa57adf
|
||||
.word 0x8c038f8c, 0xa159f8a1, 0x89098089, 0x0d1a170d, 0xbf65dabf, 0xe6d731e6, 0x4284c642, 0x68d0b868
|
||||
.word 0x4182c341, 0x9929b099, 0x2d5a772d, 0x0f1e110f, 0xb07bcbb0, 0x54a8fc54, 0xbb6dd6bb, 0x162c3a16
|
||||
|
||||
.word 0xc6a56363, 0xf8847c7c, 0xee997777, 0xf68d7b7b, 0xff0df2f2, 0xd6bd6b6b, 0xdeb16f6f, 0x9154c5c5
|
||||
.word 0x60503030, 0x02030101, 0xcea96767, 0x567d2b2b, 0xe719fefe, 0xb562d7d7, 0x4de6abab, 0xec9a7676
|
||||
.word 0x8f45caca, 0x1f9d8282, 0x8940c9c9, 0xfa877d7d, 0xef15fafa, 0xb2eb5959, 0x8ec94747, 0xfb0bf0f0
|
||||
.word 0x41ecadad, 0xb367d4d4, 0x5ffda2a2, 0x45eaafaf, 0x23bf9c9c, 0x53f7a4a4, 0xe4967272, 0x9b5bc0c0
|
||||
.word 0x75c2b7b7, 0xe11cfdfd, 0x3dae9393, 0x4c6a2626, 0x6c5a3636, 0x7e413f3f, 0xf502f7f7, 0x834fcccc
|
||||
.word 0x685c3434, 0x51f4a5a5, 0xd134e5e5, 0xf908f1f1, 0xe2937171, 0xab73d8d8, 0x62533131, 0x2a3f1515
|
||||
.word 0x080c0404, 0x9552c7c7, 0x46652323, 0x9d5ec3c3, 0x30281818, 0x37a19696, 0x0a0f0505, 0x2fb59a9a
|
||||
.word 0x0e090707, 0x24361212, 0x1b9b8080, 0xdf3de2e2, 0xcd26ebeb, 0x4e692727, 0x7fcdb2b2, 0xea9f7575
|
||||
.word 0x121b0909, 0x1d9e8383, 0x58742c2c, 0x342e1a1a, 0x362d1b1b, 0xdcb26e6e, 0xb4ee5a5a, 0x5bfba0a0
|
||||
.word 0xa4f65252, 0x764d3b3b, 0xb761d6d6, 0x7dceb3b3, 0x527b2929, 0xdd3ee3e3, 0x5e712f2f, 0x13978484
|
||||
.word 0xa6f55353, 0xb968d1d1, 0x00000000, 0xc12ceded, 0x40602020, 0xe31ffcfc, 0x79c8b1b1, 0xb6ed5b5b
|
||||
.word 0xd4be6a6a, 0x8d46cbcb, 0x67d9bebe, 0x724b3939, 0x94de4a4a, 0x98d44c4c, 0xb0e85858, 0x854acfcf
|
||||
.word 0xbb6bd0d0, 0xc52aefef, 0x4fe5aaaa, 0xed16fbfb, 0x86c54343, 0x9ad74d4d, 0x66553333, 0x11948585
|
||||
.word 0x8acf4545, 0xe910f9f9, 0x04060202, 0xfe817f7f, 0xa0f05050, 0x78443c3c, 0x25ba9f9f, 0x4be3a8a8
|
||||
.word 0xa2f35151, 0x5dfea3a3, 0x80c04040, 0x058a8f8f, 0x3fad9292, 0x21bc9d9d, 0x70483838, 0xf104f5f5
|
||||
.word 0x63dfbcbc, 0x77c1b6b6, 0xaf75dada, 0x42632121, 0x20301010, 0xe51affff, 0xfd0ef3f3, 0xbf6dd2d2
|
||||
.word 0x814ccdcd, 0x18140c0c, 0x26351313, 0xc32fecec, 0xbee15f5f, 0x35a29797, 0x88cc4444, 0x2e391717
|
||||
.word 0x9357c4c4, 0x55f2a7a7, 0xfc827e7e, 0x7a473d3d, 0xc8ac6464, 0xbae75d5d, 0x322b1919, 0xe6957373
|
||||
.word 0xc0a06060, 0x19988181, 0x9ed14f4f, 0xa37fdcdc, 0x44662222, 0x547e2a2a, 0x3bab9090, 0x0b838888
|
||||
.word 0x8cca4646, 0xc729eeee, 0x6bd3b8b8, 0x283c1414, 0xa779dede, 0xbce25e5e, 0x161d0b0b, 0xad76dbdb
|
||||
.word 0xdb3be0e0, 0x64563232, 0x744e3a3a, 0x141e0a0a, 0x92db4949, 0x0c0a0606, 0x486c2424, 0xb8e45c5c
|
||||
.word 0x9f5dc2c2, 0xbd6ed3d3, 0x43efacac, 0xc4a66262, 0x39a89191, 0x31a49595, 0xd337e4e4, 0xf28b7979
|
||||
.word 0xd532e7e7, 0x8b43c8c8, 0x6e593737, 0xdab76d6d, 0x018c8d8d, 0xb164d5d5, 0x9cd24e4e, 0x49e0a9a9
|
||||
.word 0xd8b46c6c, 0xacfa5656, 0xf307f4f4, 0xcf25eaea, 0xcaaf6565, 0xf48e7a7a, 0x47e9aeae, 0x10180808
|
||||
.word 0x6fd5baba, 0xf0887878, 0x4a6f2525, 0x5c722e2e, 0x38241c1c, 0x57f1a6a6, 0x73c7b4b4, 0x9751c6c6
|
||||
.word 0xcb23e8e8, 0xa17cdddd, 0xe89c7474, 0x3e211f1f, 0x96dd4b4b, 0x61dcbdbd, 0x0d868b8b, 0x0f858a8a
|
||||
.word 0xe0907070, 0x7c423e3e, 0x71c4b5b5, 0xccaa6666, 0x90d84848, 0x06050303, 0xf701f6f6, 0x1c120e0e
|
||||
.word 0xc2a36161, 0x6a5f3535, 0xaef95757, 0x69d0b9b9, 0x17918686, 0x9958c1c1, 0x3a271d1d, 0x27b99e9e
|
||||
.word 0xd938e1e1, 0xeb13f8f8, 0x2bb39898, 0x22331111, 0xd2bb6969, 0xa970d9d9, 0x07898e8e, 0x33a79494
|
||||
.word 0x2db69b9b, 0x3c221e1e, 0x15928787, 0xc920e9e9, 0x8749cece, 0xaaff5555, 0x50782828, 0xa57adfdf
|
||||
.word 0x038f8c8c, 0x59f8a1a1, 0x09808989, 0x1a170d0d, 0x65dabfbf, 0xd731e6e6, 0x84c64242, 0xd0b86868
|
||||
.word 0x82c34141, 0x29b09999, 0x5a772d2d, 0x1e110f0f, 0x7bcbb0b0, 0xa8fc5454, 0x6dd6bbbb, 0x2c3a1616
|
||||
|
||||
randomx_aes_lut_dec:
|
||||
.word 0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a, 0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b
|
||||
.word 0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5, 0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5
|
||||
.word 0x495ab1de, 0x671bba25, 0x980eea45, 0xe1c0fe5d, 0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b
|
||||
.word 0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295, 0x2d83bed4, 0xd3217458, 0x2969e049, 0x44c8c98e
|
||||
.word 0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927, 0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d
|
||||
.word 0x184adf63, 0x82311ae5, 0x60335197, 0x457f5362, 0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9
|
||||
.word 0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52, 0x23d373ab, 0xe2024b72, 0x578f1fe3, 0x2aab5566
|
||||
.word 0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3, 0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed
|
||||
.word 0x2b1ccf8a, 0x92b479a7, 0xf0f207f3, 0xa1e2694e, 0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4
|
||||
.word 0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4, 0x39ec830b, 0xaaef6040, 0x069f715e, 0x51106ebd
|
||||
.word 0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d, 0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060
|
||||
.word 0x24fb9819, 0x97e9bdd6, 0xcc434089, 0x779ed967, 0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879
|
||||
.word 0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000, 0x83868009, 0x48ed2b32, 0xac70111e, 0x4e725a6c
|
||||
.word 0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36, 0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624
|
||||
.word 0xb1670a0c, 0x0fe75793, 0xd296eeb4, 0x9e919b1b, 0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c
|
||||
.word 0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12, 0x0b0d090e, 0xadc78bf2, 0xb9a8b62d, 0xc8a91e14
|
||||
.word 0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3, 0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b
|
||||
.word 0x7629438b, 0xdcc623cb, 0x68fcedb6, 0x63f1e4b8, 0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684
|
||||
.word 0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7, 0x4b2f9e1d, 0xf330b2dc, 0xec52860d, 0xd0e3c177
|
||||
.word 0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947, 0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322
|
||||
.word 0xc74e4987, 0xc1d138d9, 0xfea2ca8c, 0x360bd498, 0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f
|
||||
.word 0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54, 0xc2138df6, 0xe8b8d890, 0x5ef7392e, 0xf5afc382
|
||||
.word 0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf, 0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb
|
||||
.word 0x097826cd, 0xf418596e, 0x01b79aec, 0xa89a4f83, 0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef
|
||||
.word 0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029, 0xafb2a431, 0x31233f2a, 0x3094a5c6, 0xc066a235
|
||||
.word 0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733, 0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117
|
||||
.word 0x8dd64d76, 0x4db0ef43, 0x544daacc, 0xdf0496e4, 0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546
|
||||
.word 0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb, 0x5a1d67b3, 0x52d2db92, 0x335610e9, 0x1347d66d
|
||||
.word 0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb, 0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a
|
||||
.word 0x59dfd29c, 0x3f73f255, 0x79ce1418, 0xbf37c773, 0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478
|
||||
.word 0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2, 0x72c31d16, 0x0c25e2bc, 0x8b493c28, 0x41950dff
|
||||
.word 0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664, 0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0
|
||||
|
||||
.word 0xa7f45150, 0x65417e53, 0xa4171ac3, 0x5e273a96, 0x6bab3bcb, 0x459d1ff1, 0x58faacab, 0x03e34b93
|
||||
.word 0xfa302055, 0x6d76adf6, 0x76cc8891, 0x4c02f525, 0xd7e54ffc, 0xcb2ac5d7, 0x44352680, 0xa362b58f
|
||||
.word 0x5ab1de49, 0x1bba2567, 0x0eea4598, 0xc0fe5de1, 0x752fc302, 0xf04c8112, 0x97468da3, 0xf9d36bc6
|
||||
.word 0x5f8f03e7, 0x9c921595, 0x7a6dbfeb, 0x595295da, 0x83bed42d, 0x217458d3, 0x69e04929, 0xc8c98e44
|
||||
.word 0x89c2756a, 0x798ef478, 0x3e58996b, 0x71b927dd, 0x4fe1beb6, 0xad88f017, 0xac20c966, 0x3ace7db4
|
||||
.word 0x4adf6318, 0x311ae582, 0x33519760, 0x7f536245, 0x7764b1e0, 0xae6bbb84, 0xa081fe1c, 0x2b08f994
|
||||
.word 0x68487058, 0xfd458f19, 0x6cde9487, 0xf87b52b7, 0xd373ab23, 0x024b72e2, 0x8f1fe357, 0xab55662a
|
||||
.word 0x28ebb207, 0xc2b52f03, 0x7bc5869a, 0x0837d3a5, 0x872830f2, 0xa5bf23b2, 0x6a0302ba, 0x8216ed5c
|
||||
.word 0x1ccf8a2b, 0xb479a792, 0xf207f3f0, 0xe2694ea1, 0xf4da65cd, 0xbe0506d5, 0x6234d11f, 0xfea6c48a
|
||||
.word 0x532e349d, 0x55f3a2a0, 0xe18a0532, 0xebf6a475, 0xec830b39, 0xef6040aa, 0x9f715e06, 0x106ebd51
|
||||
.word 0x8a213ef9, 0x06dd963d, 0x053eddae, 0xbde64d46, 0x8d5491b5, 0x5dc47105, 0xd406046f, 0x155060ff
|
||||
.word 0xfb981924, 0xe9bdd697, 0x434089cc, 0x9ed96777, 0x42e8b0bd, 0x8b890788, 0x5b19e738, 0xeec879db
|
||||
.word 0x0a7ca147, 0x0f427ce9, 0x1e84f8c9, 0x00000000, 0x86800983, 0xed2b3248, 0x70111eac, 0x725a6c4e
|
||||
.word 0xff0efdfb, 0x38850f56, 0xd5ae3d1e, 0x392d3627, 0xd90f0a64, 0xa65c6821, 0x545b9bd1, 0x2e36243a
|
||||
.word 0x670a0cb1, 0xe757930f, 0x96eeb4d2, 0x919b1b9e, 0xc5c0804f, 0x20dc61a2, 0x4b775a69, 0x1a121c16
|
||||
.word 0xba93e20a, 0x2aa0c0e5, 0xe0223c43, 0x171b121d, 0x0d090e0b, 0xc78bf2ad, 0xa8b62db9, 0xa91e14c8
|
||||
.word 0x19f15785, 0x0775af4c, 0xdd99eebb, 0x607fa3fd, 0x2601f79f, 0xf5725cbc, 0x3b6644c5, 0x7efb5b34
|
||||
.word 0x29438b76, 0xc623cbdc, 0xfcedb668, 0xf1e4b863, 0xdc31d7ca, 0x85634210, 0x22971340, 0x11c68420
|
||||
.word 0x244a857d, 0x3dbbd2f8, 0x32f9ae11, 0xa129c76d, 0x2f9e1d4b, 0x30b2dcf3, 0x52860dec, 0xe3c177d0
|
||||
.word 0x16b32b6c, 0xb970a999, 0x489411fa, 0x64e94722, 0x8cfca8c4, 0x3ff0a01a, 0x2c7d56d8, 0x903322ef
|
||||
.word 0x4e4987c7, 0xd138d9c1, 0xa2ca8cfe, 0x0bd49836, 0x81f5a6cf, 0xde7aa528, 0x8eb7da26, 0xbfad3fa4
|
||||
.word 0x9d3a2ce4, 0x9278500d, 0xcc5f6a9b, 0x467e5462, 0x138df6c2, 0xb8d890e8, 0xf7392e5e, 0xafc382f5
|
||||
.word 0x805d9fbe, 0x93d0697c, 0x2dd56fa9, 0x1225cfb3, 0x99acc83b, 0x7d1810a7, 0x639ce86e, 0xbb3bdb7b
|
||||
.word 0x7826cd09, 0x18596ef4, 0xb79aec01, 0x9a4f83a8, 0x6e95e665, 0xe6ffaa7e, 0xcfbc2108, 0xe815efe6
|
||||
.word 0x9be7bad9, 0x366f4ace, 0x099fead4, 0x7cb029d6, 0xb2a431af, 0x233f2a31, 0x94a5c630, 0x66a235c0
|
||||
.word 0xbc4e7437, 0xca82fca6, 0xd090e0b0, 0xd8a73315, 0x9804f14a, 0xdaec41f7, 0x50cd7f0e, 0xf691172f
|
||||
.word 0xd64d768d, 0xb0ef434d, 0x4daacc54, 0x0496e4df, 0xb5d19ee3, 0x886a4c1b, 0x1f2cc1b8, 0x5165467f
|
||||
.word 0xea5e9d04, 0x358c015d, 0x7487fa73, 0x410bfb2e, 0x1d67b35a, 0xd2db9252, 0x5610e933, 0x47d66d13
|
||||
.word 0x61d79a8c, 0x0ca1377a, 0x14f8598e, 0x3c13eb89, 0x27a9ceee, 0xc961b735, 0xe51ce1ed, 0xb1477a3c
|
||||
.word 0xdfd29c59, 0x73f2553f, 0xce141879, 0x37c773bf, 0xcdf753ea, 0xaafd5f5b, 0x6f3ddf14, 0xdb447886
|
||||
.word 0xf3afca81, 0xc468b93e, 0x3424382c, 0x40a3c25f, 0xc31d1672, 0x25e2bc0c, 0x493c288b, 0x950dff41
|
||||
.word 0x01a83971, 0xb30c08de, 0xe4b4d89c, 0xc1566490, 0x84cb7b61, 0xb632d570, 0x5c6c4874, 0x57b8d042
|
||||
|
||||
.word 0xf45150a7, 0x417e5365, 0x171ac3a4, 0x273a965e, 0xab3bcb6b, 0x9d1ff145, 0xfaacab58, 0xe34b9303
|
||||
.word 0x302055fa, 0x76adf66d, 0xcc889176, 0x02f5254c, 0xe54ffcd7, 0x2ac5d7cb, 0x35268044, 0x62b58fa3
|
||||
.word 0xb1de495a, 0xba25671b, 0xea45980e, 0xfe5de1c0, 0x2fc30275, 0x4c8112f0, 0x468da397, 0xd36bc6f9
|
||||
.word 0x8f03e75f, 0x9215959c, 0x6dbfeb7a, 0x5295da59, 0xbed42d83, 0x7458d321, 0xe0492969, 0xc98e44c8
|
||||
.word 0xc2756a89, 0x8ef47879, 0x58996b3e, 0xb927dd71, 0xe1beb64f, 0x88f017ad, 0x20c966ac, 0xce7db43a
|
||||
.word 0xdf63184a, 0x1ae58231, 0x51976033, 0x5362457f, 0x64b1e077, 0x6bbb84ae, 0x81fe1ca0, 0x08f9942b
|
||||
.word 0x48705868, 0x458f19fd, 0xde94876c, 0x7b52b7f8, 0x73ab23d3, 0x4b72e202, 0x1fe3578f, 0x55662aab
|
||||
.word 0xebb20728, 0xb52f03c2, 0xc5869a7b, 0x37d3a508, 0x2830f287, 0xbf23b2a5, 0x0302ba6a, 0x16ed5c82
|
||||
.word 0xcf8a2b1c, 0x79a792b4, 0x07f3f0f2, 0x694ea1e2, 0xda65cdf4, 0x0506d5be, 0x34d11f62, 0xa6c48afe
|
||||
.word 0x2e349d53, 0xf3a2a055, 0x8a0532e1, 0xf6a475eb, 0x830b39ec, 0x6040aaef, 0x715e069f, 0x6ebd5110
|
||||
.word 0x213ef98a, 0xdd963d06, 0x3eddae05, 0xe64d46bd, 0x5491b58d, 0xc471055d, 0x06046fd4, 0x5060ff15
|
||||
.word 0x981924fb, 0xbdd697e9, 0x4089cc43, 0xd967779e, 0xe8b0bd42, 0x8907888b, 0x19e7385b, 0xc879dbee
|
||||
.word 0x7ca1470a, 0x427ce90f, 0x84f8c91e, 0x00000000, 0x80098386, 0x2b3248ed, 0x111eac70, 0x5a6c4e72
|
||||
.word 0x0efdfbff, 0x850f5638, 0xae3d1ed5, 0x2d362739, 0x0f0a64d9, 0x5c6821a6, 0x5b9bd154, 0x36243a2e
|
||||
.word 0x0a0cb167, 0x57930fe7, 0xeeb4d296, 0x9b1b9e91, 0xc0804fc5, 0xdc61a220, 0x775a694b, 0x121c161a
|
||||
.word 0x93e20aba, 0xa0c0e52a, 0x223c43e0, 0x1b121d17, 0x090e0b0d, 0x8bf2adc7, 0xb62db9a8, 0x1e14c8a9
|
||||
.word 0xf1578519, 0x75af4c07, 0x99eebbdd, 0x7fa3fd60, 0x01f79f26, 0x725cbcf5, 0x6644c53b, 0xfb5b347e
|
||||
.word 0x438b7629, 0x23cbdcc6, 0xedb668fc, 0xe4b863f1, 0x31d7cadc, 0x63421085, 0x97134022, 0xc6842011
|
||||
.word 0x4a857d24, 0xbbd2f83d, 0xf9ae1132, 0x29c76da1, 0x9e1d4b2f, 0xb2dcf330, 0x860dec52, 0xc177d0e3
|
||||
.word 0xb32b6c16, 0x70a999b9, 0x9411fa48, 0xe9472264, 0xfca8c48c, 0xf0a01a3f, 0x7d56d82c, 0x3322ef90
|
||||
.word 0x4987c74e, 0x38d9c1d1, 0xca8cfea2, 0xd498360b, 0xf5a6cf81, 0x7aa528de, 0xb7da268e, 0xad3fa4bf
|
||||
.word 0x3a2ce49d, 0x78500d92, 0x5f6a9bcc, 0x7e546246, 0x8df6c213, 0xd890e8b8, 0x392e5ef7, 0xc382f5af
|
||||
.word 0x5d9fbe80, 0xd0697c93, 0xd56fa92d, 0x25cfb312, 0xacc83b99, 0x1810a77d, 0x9ce86e63, 0x3bdb7bbb
|
||||
.word 0x26cd0978, 0x596ef418, 0x9aec01b7, 0x4f83a89a, 0x95e6656e, 0xffaa7ee6, 0xbc2108cf, 0x15efe6e8
|
||||
.word 0xe7bad99b, 0x6f4ace36, 0x9fead409, 0xb029d67c, 0xa431afb2, 0x3f2a3123, 0xa5c63094, 0xa235c066
|
||||
.word 0x4e7437bc, 0x82fca6ca, 0x90e0b0d0, 0xa73315d8, 0x04f14a98, 0xec41f7da, 0xcd7f0e50, 0x91172ff6
|
||||
.word 0x4d768dd6, 0xef434db0, 0xaacc544d, 0x96e4df04, 0xd19ee3b5, 0x6a4c1b88, 0x2cc1b81f, 0x65467f51
|
||||
.word 0x5e9d04ea, 0x8c015d35, 0x87fa7374, 0x0bfb2e41, 0x67b35a1d, 0xdb9252d2, 0x10e93356, 0xd66d1347
|
||||
.word 0xd79a8c61, 0xa1377a0c, 0xf8598e14, 0x13eb893c, 0xa9ceee27, 0x61b735c9, 0x1ce1ede5, 0x477a3cb1
|
||||
.word 0xd29c59df, 0xf2553f73, 0x141879ce, 0xc773bf37, 0xf753eacd, 0xfd5f5baa, 0x3ddf146f, 0x447886db
|
||||
.word 0xafca81f3, 0x68b93ec4, 0x24382c34, 0xa3c25f40, 0x1d1672c3, 0xe2bc0c25, 0x3c288b49, 0x0dff4195
|
||||
.word 0xa8397101, 0x0c08deb3, 0xb4d89ce4, 0x566490c1, 0xcb7b6184, 0x32d570b6, 0x6c48745c, 0xb8d04257
|
||||
|
||||
.word 0x5150a7f4, 0x7e536541, 0x1ac3a417, 0x3a965e27, 0x3bcb6bab, 0x1ff1459d, 0xacab58fa, 0x4b9303e3
|
||||
.word 0x2055fa30, 0xadf66d76, 0x889176cc, 0xf5254c02, 0x4ffcd7e5, 0xc5d7cb2a, 0x26804435, 0xb58fa362
|
||||
.word 0xde495ab1, 0x25671bba, 0x45980eea, 0x5de1c0fe, 0xc302752f, 0x8112f04c, 0x8da39746, 0x6bc6f9d3
|
||||
.word 0x03e75f8f, 0x15959c92, 0xbfeb7a6d, 0x95da5952, 0xd42d83be, 0x58d32174, 0x492969e0, 0x8e44c8c9
|
||||
.word 0x756a89c2, 0xf478798e, 0x996b3e58, 0x27dd71b9, 0xbeb64fe1, 0xf017ad88, 0xc966ac20, 0x7db43ace
|
||||
.word 0x63184adf, 0xe582311a, 0x97603351, 0x62457f53, 0xb1e07764, 0xbb84ae6b, 0xfe1ca081, 0xf9942b08
|
||||
.word 0x70586848, 0x8f19fd45, 0x94876cde, 0x52b7f87b, 0xab23d373, 0x72e2024b, 0xe3578f1f, 0x662aab55
|
||||
.word 0xb20728eb, 0x2f03c2b5, 0x869a7bc5, 0xd3a50837, 0x30f28728, 0x23b2a5bf, 0x02ba6a03, 0xed5c8216
|
||||
.word 0x8a2b1ccf, 0xa792b479, 0xf3f0f207, 0x4ea1e269, 0x65cdf4da, 0x06d5be05, 0xd11f6234, 0xc48afea6
|
||||
.word 0x349d532e, 0xa2a055f3, 0x0532e18a, 0xa475ebf6, 0x0b39ec83, 0x40aaef60, 0x5e069f71, 0xbd51106e
|
||||
.word 0x3ef98a21, 0x963d06dd, 0xddae053e, 0x4d46bde6, 0x91b58d54, 0x71055dc4, 0x046fd406, 0x60ff1550
|
||||
.word 0x1924fb98, 0xd697e9bd, 0x89cc4340, 0x67779ed9, 0xb0bd42e8, 0x07888b89, 0xe7385b19, 0x79dbeec8
|
||||
.word 0xa1470a7c, 0x7ce90f42, 0xf8c91e84, 0x00000000, 0x09838680, 0x3248ed2b, 0x1eac7011, 0x6c4e725a
|
||||
.word 0xfdfbff0e, 0x0f563885, 0x3d1ed5ae, 0x3627392d, 0x0a64d90f, 0x6821a65c, 0x9bd1545b, 0x243a2e36
|
||||
.word 0x0cb1670a, 0x930fe757, 0xb4d296ee, 0x1b9e919b, 0x804fc5c0, 0x61a220dc, 0x5a694b77, 0x1c161a12
|
||||
.word 0xe20aba93, 0xc0e52aa0, 0x3c43e022, 0x121d171b, 0x0e0b0d09, 0xf2adc78b, 0x2db9a8b6, 0x14c8a91e
|
||||
.word 0x578519f1, 0xaf4c0775, 0xeebbdd99, 0xa3fd607f, 0xf79f2601, 0x5cbcf572, 0x44c53b66, 0x5b347efb
|
||||
.word 0x8b762943, 0xcbdcc623, 0xb668fced, 0xb863f1e4, 0xd7cadc31, 0x42108563, 0x13402297, 0x842011c6
|
||||
.word 0x857d244a, 0xd2f83dbb, 0xae1132f9, 0xc76da129, 0x1d4b2f9e, 0xdcf330b2, 0x0dec5286, 0x77d0e3c1
|
||||
.word 0x2b6c16b3, 0xa999b970, 0x11fa4894, 0x472264e9, 0xa8c48cfc, 0xa01a3ff0, 0x56d82c7d, 0x22ef9033
|
||||
.word 0x87c74e49, 0xd9c1d138, 0x8cfea2ca, 0x98360bd4, 0xa6cf81f5, 0xa528de7a, 0xda268eb7, 0x3fa4bfad
|
||||
.word 0x2ce49d3a, 0x500d9278, 0x6a9bcc5f, 0x5462467e, 0xf6c2138d, 0x90e8b8d8, 0x2e5ef739, 0x82f5afc3
|
||||
.word 0x9fbe805d, 0x697c93d0, 0x6fa92dd5, 0xcfb31225, 0xc83b99ac, 0x10a77d18, 0xe86e639c, 0xdb7bbb3b
|
||||
.word 0xcd097826, 0x6ef41859, 0xec01b79a, 0x83a89a4f, 0xe6656e95, 0xaa7ee6ff, 0x2108cfbc, 0xefe6e815
|
||||
.word 0xbad99be7, 0x4ace366f, 0xead4099f, 0x29d67cb0, 0x31afb2a4, 0x2a31233f, 0xc63094a5, 0x35c066a2
|
||||
.word 0x7437bc4e, 0xfca6ca82, 0xe0b0d090, 0x3315d8a7, 0xf14a9804, 0x41f7daec, 0x7f0e50cd, 0x172ff691
|
||||
.word 0x768dd64d, 0x434db0ef, 0xcc544daa, 0xe4df0496, 0x9ee3b5d1, 0x4c1b886a, 0xc1b81f2c, 0x467f5165
|
||||
.word 0x9d04ea5e, 0x015d358c, 0xfa737487, 0xfb2e410b, 0xb35a1d67, 0x9252d2db, 0xe9335610, 0x6d1347d6
|
||||
.word 0x9a8c61d7, 0x377a0ca1, 0x598e14f8, 0xeb893c13, 0xceee27a9, 0xb735c961, 0xe1ede51c, 0x7a3cb147
|
||||
.word 0x9c59dfd2, 0x553f73f2, 0x1879ce14, 0x73bf37c7, 0x53eacdf7, 0x5f5baafd, 0xdf146f3d, 0x7886db44
|
||||
.word 0xca81f3af, 0xb93ec468, 0x382c3424, 0xc25f40a3, 0x1672c31d, 0xbc0c25e2, 0x288b493c, 0xff41950d
|
||||
.word 0x397101a8, 0x08deb30c, 0xd89ce4b4, 0x6490c156, 0x7b6184cb, 0xd570b632, 0x48745c6c, 0xd04257b8
|
||||
|
||||
|
||||
# Input parameters
|
||||
@@ -491,42 +1053,39 @@ DECL(randomx_calc_dataset_item_aarch64):
|
||||
stp x10, x11, [sp, 80]
|
||||
stp x12, x13, [sp, 96]
|
||||
|
||||
ldr x12, superscalarMul0
|
||||
adr x7, superscalarMul0
|
||||
# superscalarMul0, superscalarAdd1
|
||||
ldp x12, x13, [x7]
|
||||
|
||||
mov x8, x0
|
||||
mov x9, x1
|
||||
ldp x8, x9, [sp]
|
||||
mov x10, x2
|
||||
|
||||
# rl[0] = (itemNumber + 1) * superscalarMul0;
|
||||
madd x0, x2, x12, x12
|
||||
|
||||
# rl[1] = rl[0] ^ superscalarAdd1;
|
||||
ldr x12, superscalarAdd1
|
||||
eor x1, x0, x12
|
||||
eor x1, x0, x13
|
||||
|
||||
# rl[2] = rl[0] ^ superscalarAdd2;
|
||||
ldr x12, superscalarAdd2
|
||||
ldp x12, x13, [x7, 16]
|
||||
eor x2, x0, x12
|
||||
|
||||
# rl[3] = rl[0] ^ superscalarAdd3;
|
||||
ldr x12, superscalarAdd3
|
||||
eor x3, x0, x12
|
||||
eor x3, x0, x13
|
||||
|
||||
# rl[4] = rl[0] ^ superscalarAdd4;
|
||||
ldr x12, superscalarAdd4
|
||||
ldp x12, x13, [x7, 32]
|
||||
eor x4, x0, x12
|
||||
|
||||
# rl[5] = rl[0] ^ superscalarAdd5;
|
||||
ldr x12, superscalarAdd5
|
||||
eor x5, x0, x12
|
||||
eor x5, x0, x13
|
||||
|
||||
# rl[6] = rl[0] ^ superscalarAdd6;
|
||||
ldr x12, superscalarAdd6
|
||||
ldp x12, x13, [x7, 48]
|
||||
eor x6, x0, x12
|
||||
|
||||
# rl[7] = rl[0] ^ superscalarAdd7;
|
||||
ldr x12, superscalarAdd7
|
||||
eor x7, x0, x12
|
||||
eor x7, x0, x13
|
||||
|
||||
b DECL(randomx_calc_dataset_item_aarch64_prefetch)
|
||||
|
||||
|
||||
@@ -38,9 +38,17 @@ extern "C" {
|
||||
void randomx_program_aarch64_cacheline_align_mask1();
|
||||
void randomx_program_aarch64_cacheline_align_mask2();
|
||||
void randomx_program_aarch64_update_spMix1();
|
||||
void randomx_program_aarch64_v2_FE_mix();
|
||||
void randomx_program_aarch64_v1_FE_mix();
|
||||
void randomx_program_aarch64_v2_FE_mix_soft_aes();
|
||||
void randomx_program_aarch64_vm_instructions_end_light();
|
||||
void randomx_program_aarch64_vm_instructions_end_light_tweak();
|
||||
void randomx_program_aarch64_light_cacheline_align_mask();
|
||||
void randomx_program_aarch64_light_dataset_offset();
|
||||
void randomx_program_aarch64_vm_instructions_end_v1();
|
||||
void randomx_program_aarch64_vm_instructions_end_v2();
|
||||
void randomx_program_aarch64_vm_instructions_end_light_v1();
|
||||
void randomx_program_aarch64_vm_instructions_end_light_v2();
|
||||
void randomx_init_dataset_aarch64();
|
||||
void randomx_init_dataset_aarch64_end();
|
||||
void randomx_calc_dataset_item_aarch64();
|
||||
|
||||
1204
src/crypto/randomx/jit_compiler_rv64.cpp
Normal file
1204
src/crypto/randomx/jit_compiler_rv64.cpp
Normal file
File diff suppressed because it is too large
Load Diff
151
src/crypto/randomx/jit_compiler_rv64.hpp
Normal file
151
src/crypto/randomx/jit_compiler_rv64.hpp
Normal file
@@ -0,0 +1,151 @@
|
||||
/*
|
||||
Copyright (c) 2023 tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
#include "crypto/randomx/common.hpp"
|
||||
#include "crypto/randomx/jit_compiler_rv64_static.hpp"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
struct CodeBuffer {
|
||||
uint8_t* code;
|
||||
int32_t codePos;
|
||||
int32_t rcpCount;
|
||||
|
||||
void emit(const uint8_t* src, int32_t len) {
|
||||
memcpy(&code[codePos], src, len);
|
||||
codePos += len;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void emit(T src) {
|
||||
memcpy(&code[codePos], &src, sizeof(src));
|
||||
codePos += sizeof(src);
|
||||
}
|
||||
|
||||
void emitAt(int32_t codePos, const uint8_t* src, int32_t len) {
|
||||
memcpy(&code[codePos], src, len);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void emitAt(int32_t codePos, T src) {
|
||||
memcpy(&code[codePos], &src, sizeof(src));
|
||||
}
|
||||
};
|
||||
|
||||
struct CompilerState : public CodeBuffer {
|
||||
int32_t instructionOffsets[RANDOMX_PROGRAM_MAX_SIZE];
|
||||
int registerUsage[RegistersCount];
|
||||
};
|
||||
|
||||
class Program;
|
||||
struct ProgramConfiguration;
|
||||
class SuperscalarProgram;
|
||||
class Instruction;
|
||||
|
||||
#define HANDLER_ARGS randomx::CompilerState& state, randomx::Instruction isn, int i
|
||||
typedef void(*InstructionGeneratorRV64)(HANDLER_ARGS);
|
||||
|
||||
class JitCompilerRV64 {
|
||||
public:
|
||||
JitCompilerRV64(bool hugePagesEnable, bool optimizedInitDatasetEnable);
|
||||
~JitCompilerRV64();
|
||||
|
||||
void prepare() {}
|
||||
void generateProgram(Program&, ProgramConfiguration&, uint32_t);
|
||||
void generateProgramLight(Program&, ProgramConfiguration&, uint32_t);
|
||||
|
||||
template<size_t N>
|
||||
void generateSuperscalarHash(SuperscalarProgram(&programs)[N]);
|
||||
|
||||
void generateDatasetInitCode() {}
|
||||
|
||||
ProgramFunc* getProgramFunc() {
|
||||
return (ProgramFunc*)(vectorCode ? entryProgramVector : entryProgram);
|
||||
}
|
||||
DatasetInitFunc* getDatasetInitFunc() {
|
||||
return (DatasetInitFunc*)(vectorCode ? entryDataInitVector : entryDataInit);
|
||||
}
|
||||
uint8_t* getCode() {
|
||||
return state.code;
|
||||
}
|
||||
size_t getCodeSize();
|
||||
|
||||
void enableWriting() const;
|
||||
void enableExecution() const;
|
||||
|
||||
static InstructionGeneratorRV64 engine[256];
|
||||
static uint8_t inst_map[256];
|
||||
private:
|
||||
CompilerState state;
|
||||
|
||||
uint8_t* vectorCode = nullptr;
|
||||
size_t vectorCodeSize = 0;
|
||||
|
||||
void* entryDataInit = nullptr;
|
||||
void* entryDataInitVector = nullptr;
|
||||
void* entryProgram = nullptr;
|
||||
void* entryProgramVector = nullptr;
|
||||
|
||||
public:
|
||||
static void v1_IADD_RS(HANDLER_ARGS);
|
||||
static void v1_IADD_M(HANDLER_ARGS);
|
||||
static void v1_ISUB_R(HANDLER_ARGS);
|
||||
static void v1_ISUB_M(HANDLER_ARGS);
|
||||
static void v1_IMUL_R(HANDLER_ARGS);
|
||||
static void v1_IMUL_M(HANDLER_ARGS);
|
||||
static void v1_IMULH_R(HANDLER_ARGS);
|
||||
static void v1_IMULH_M(HANDLER_ARGS);
|
||||
static void v1_ISMULH_R(HANDLER_ARGS);
|
||||
static void v1_ISMULH_M(HANDLER_ARGS);
|
||||
static void v1_IMUL_RCP(HANDLER_ARGS);
|
||||
static void v1_INEG_R(HANDLER_ARGS);
|
||||
static void v1_IXOR_R(HANDLER_ARGS);
|
||||
static void v1_IXOR_M(HANDLER_ARGS);
|
||||
static void v1_IROR_R(HANDLER_ARGS);
|
||||
static void v1_IROL_R(HANDLER_ARGS);
|
||||
static void v1_ISWAP_R(HANDLER_ARGS);
|
||||
static void v1_FSWAP_R(HANDLER_ARGS);
|
||||
static void v1_FADD_R(HANDLER_ARGS);
|
||||
static void v1_FADD_M(HANDLER_ARGS);
|
||||
static void v1_FSUB_R(HANDLER_ARGS);
|
||||
static void v1_FSUB_M(HANDLER_ARGS);
|
||||
static void v1_FSCAL_R(HANDLER_ARGS);
|
||||
static void v1_FMUL_R(HANDLER_ARGS);
|
||||
static void v1_FDIV_M(HANDLER_ARGS);
|
||||
static void v1_FSQRT_R(HANDLER_ARGS);
|
||||
static void v1_CBRANCH(HANDLER_ARGS);
|
||||
static void v1_CFROUND(HANDLER_ARGS);
|
||||
static void v1_ISTORE(HANDLER_ARGS);
|
||||
static void v1_NOP(HANDLER_ARGS);
|
||||
};
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user