mirror of
https://github.com/xmrig/xmrig.git
synced 2025-12-06 15:42:38 -05:00
Compare commits
59 Commits
3ff41f7c94
...
dev
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
856813c1ae | ||
|
|
23da1a90f5 | ||
|
|
7981e4a76a | ||
|
|
7ef5142a52 | ||
|
|
db5c6d9190 | ||
|
|
e88009d575 | ||
|
|
5115597e7f | ||
|
|
4cdc35f966 | ||
|
|
b02519b9f5 | ||
|
|
a44b21cef3 | ||
|
|
ea832899f2 | ||
|
|
3ecacf0ac2 | ||
|
|
27c8e60919 | ||
|
|
985fe06e8d | ||
|
|
75b63ddde9 | ||
|
|
643b65f2c0 | ||
|
|
116ba1828f | ||
|
|
da5a5674b4 | ||
|
|
6cc4819cec | ||
|
|
a659397c41 | ||
|
|
20acfd0d79 | ||
|
|
da683d8c3e | ||
|
|
255565b533 | ||
|
|
878e83bf59 | ||
|
|
7abf17cb59 | ||
|
|
eeec5ecd10 | ||
|
|
93f5067999 | ||
|
|
dd6671bc59 | ||
|
|
a1ee2fd9d2 | ||
|
|
2619131176 | ||
|
|
1161f230c5 | ||
|
|
d2363ba28b | ||
|
|
1676da1fe9 | ||
|
|
6e4a5a6d94 | ||
|
|
273133aa63 | ||
|
|
c69e30c9a0 | ||
|
|
6a690ba1e9 | ||
|
|
545aef0937 | ||
|
|
9fa66d3242 | ||
|
|
ec286c7fef | ||
|
|
e28d663d80 | ||
|
|
aba1ad8cfc | ||
|
|
bf44ed52e9 | ||
|
|
762c435fa8 | ||
|
|
48faf0a11b | ||
|
|
d125d22d27 | ||
|
|
9f3591ae0d | ||
|
|
6bbbcc71f1 | ||
|
|
e5a7a69cc0 | ||
|
|
f354b85a7b | ||
|
|
5ed8d79574 | ||
|
|
fc395a5800 | ||
|
|
9138690126 | ||
|
|
d58061c903 | ||
|
|
3b863cf88f | ||
|
|
9c7468df64 | ||
|
|
a18fa269a6 | ||
|
|
bcc5581535 | ||
|
|
dba336aa04 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -4,3 +4,4 @@ scripts/deps
|
|||||||
/CMakeLists.txt.user
|
/CMakeLists.txt.user
|
||||||
/.idea
|
/.idea
|
||||||
/src/backend/opencl/cl/cn/cryptonight_gen.cl
|
/src/backend/opencl/cl/cn/cryptonight_gen.cl
|
||||||
|
.vscode
|
||||||
|
|||||||
10
CHANGELOG.md
10
CHANGELOG.md
@@ -1,3 +1,13 @@
|
|||||||
|
# v6.24.0
|
||||||
|
- [#3671](https://github.com/xmrig/xmrig/pull/3671) Fixed detection of L2 cache size for some complex NUMA topologies.
|
||||||
|
- [#3674](https://github.com/xmrig/xmrig/pull/3674) Fixed ARMv7 build.
|
||||||
|
- [#3677](https://github.com/xmrig/xmrig/pull/3677) Fixed auto-config for AMD CPUs with less than 2 MB L3 cache per thread.
|
||||||
|
- [#3678](https://github.com/xmrig/xmrig/pull/3678) Improved IPv6 support: the new default settings use IPv6 equally with IPv4.
|
||||||
|
|
||||||
|
# v6.23.0
|
||||||
|
- [#3668](https://github.com/xmrig/xmrig/issues/3668) Added support for Windows ARM64.
|
||||||
|
- [#3665](https://github.com/xmrig/xmrig/pull/3665) Tweaked auto-config for AMD CPUs with < 2 MB L3 cache per thread.
|
||||||
|
|
||||||
# v6.22.3
|
# v6.22.3
|
||||||
- [#3605](https://github.com/xmrig/xmrig/pull/3605) CUDA backend: added missing RandomX dataset update.
|
- [#3605](https://github.com/xmrig/xmrig/pull/3605) CUDA backend: added missing RandomX dataset update.
|
||||||
- [#3646](https://github.com/xmrig/xmrig/pull/3646) Optimized auto-config for AMD CPUs with less than 2 MB L3 cache per thread.
|
- [#3646](https://github.com/xmrig/xmrig/pull/3646) Optimized auto-config for AMD CPUs with less than 2 MB L3 cache per thread.
|
||||||
|
|||||||
@@ -95,7 +95,7 @@ set(HEADERS_CRYPTO
|
|||||||
src/crypto/common/VirtualMemory.h
|
src/crypto/common/VirtualMemory.h
|
||||||
)
|
)
|
||||||
|
|
||||||
if (XMRIG_ARM)
|
if (XMRIG_ARM OR XMRIG_RISCV)
|
||||||
set(HEADERS_CRYPTO "${HEADERS_CRYPTO}" src/crypto/cn/CryptoNight_arm.h)
|
set(HEADERS_CRYPTO "${HEADERS_CRYPTO}" src/crypto/cn/CryptoNight_arm.h)
|
||||||
else()
|
else()
|
||||||
set(HEADERS_CRYPTO "${HEADERS_CRYPTO}" src/crypto/cn/CryptoNight_x86.h)
|
set(HEADERS_CRYPTO "${HEADERS_CRYPTO}" src/crypto/cn/CryptoNight_x86.h)
|
||||||
|
|||||||
@@ -10,7 +10,7 @@
|
|||||||
XMRig is a high performance, open source, cross platform RandomX, KawPow, CryptoNight and [GhostRider](https://github.com/xmrig/xmrig/tree/master/src/crypto/ghostrider#readme) unified CPU/GPU miner and [RandomX benchmark](https://xmrig.com/benchmark). Official binaries are available for Windows, Linux, macOS and FreeBSD.
|
XMRig is a high performance, open source, cross platform RandomX, KawPow, CryptoNight and [GhostRider](https://github.com/xmrig/xmrig/tree/master/src/crypto/ghostrider#readme) unified CPU/GPU miner and [RandomX benchmark](https://xmrig.com/benchmark). Official binaries are available for Windows, Linux, macOS and FreeBSD.
|
||||||
|
|
||||||
## Mining backends
|
## Mining backends
|
||||||
- **CPU** (x86/x64/ARMv7/ARMv8)
|
- **CPU** (x86/x64/ARMv7/ARMv8/RISC-V)
|
||||||
- **OpenCL** for AMD GPUs.
|
- **OpenCL** for AMD GPUs.
|
||||||
- **CUDA** for NVIDIA GPUs via external [CUDA plugin](https://github.com/xmrig/xmrig-cuda).
|
- **CUDA** for NVIDIA GPUs via external [CUDA plugin](https://github.com/xmrig/xmrig-cuda).
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
|
if (WITH_ASM AND NOT XMRIG_ARM AND NOT XMRIG_RISCV AND CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||||
set(XMRIG_ASM_LIBRARY "xmrig-asm")
|
set(XMRIG_ASM_LIBRARY "xmrig-asm")
|
||||||
|
|
||||||
if (CMAKE_C_COMPILER_ID MATCHES MSVC)
|
if (CMAKE_C_COMPILER_ID MATCHES MSVC)
|
||||||
|
|||||||
@@ -21,6 +21,19 @@ if (NOT VAES_SUPPORTED)
|
|||||||
set(WITH_VAES OFF)
|
set(WITH_VAES OFF)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
# Detect RISC-V architecture early (before it's used below)
|
||||||
|
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv64|riscv|rv64)$")
|
||||||
|
set(RISCV_TARGET 64)
|
||||||
|
set(XMRIG_RISCV ON)
|
||||||
|
add_definitions(-DXMRIG_RISCV)
|
||||||
|
message(STATUS "Detected RISC-V 64-bit architecture (${CMAKE_SYSTEM_PROCESSOR})")
|
||||||
|
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv32|rv32)$")
|
||||||
|
set(RISCV_TARGET 32)
|
||||||
|
set(XMRIG_RISCV ON)
|
||||||
|
add_definitions(-DXMRIG_RISCV)
|
||||||
|
message(STATUS "Detected RISC-V 32-bit architecture (${CMAKE_SYSTEM_PROCESSOR})")
|
||||||
|
endif()
|
||||||
|
|
||||||
if (XMRIG_64_BIT AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|AMD64)$")
|
if (XMRIG_64_BIT AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|AMD64)$")
|
||||||
add_definitions(-DRAPIDJSON_SSE2)
|
add_definitions(-DRAPIDJSON_SSE2)
|
||||||
else()
|
else()
|
||||||
@@ -29,6 +42,57 @@ else()
|
|||||||
set(WITH_VAES OFF)
|
set(WITH_VAES OFF)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
# Disable x86-specific features for RISC-V
|
||||||
|
if (XMRIG_RISCV)
|
||||||
|
set(WITH_SSE4_1 OFF)
|
||||||
|
set(WITH_AVX2 OFF)
|
||||||
|
set(WITH_VAES OFF)
|
||||||
|
|
||||||
|
# default build uses the RV64GC baseline
|
||||||
|
set(RVARCH "rv64gc")
|
||||||
|
|
||||||
|
# for native builds, enable Zba and Zbb if supported by the CPU
|
||||||
|
if(ARCH STREQUAL "native")
|
||||||
|
enable_language(ASM)
|
||||||
|
|
||||||
|
try_run(RANDOMX_VECTOR_RUN_FAIL
|
||||||
|
RANDOMX_VECTOR_COMPILE_OK
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/src/crypto/randomx/tests/riscv64_vector.s
|
||||||
|
COMPILE_DEFINITIONS "-march=rv64gcv_zicbop")
|
||||||
|
|
||||||
|
if (RANDOMX_VECTOR_COMPILE_OK AND NOT RANDOMX_VECTOR_RUN_FAIL)
|
||||||
|
set(RVARCH "${RVARCH}v_zicbop")
|
||||||
|
add_definitions(-DXMRIG_RVV_ENABLED)
|
||||||
|
message(STATUS "RISC-V vector extension detected")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
try_run(RANDOMX_ZBA_RUN_FAIL
|
||||||
|
RANDOMX_ZBA_COMPILE_OK
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/src/crypto/randomx/tests/riscv64_zba.s
|
||||||
|
COMPILE_DEFINITIONS "-march=rv64gc_zba")
|
||||||
|
|
||||||
|
if (RANDOMX_ZBA_COMPILE_OK AND NOT RANDOMX_ZBA_RUN_FAIL)
|
||||||
|
set(RVARCH "${RVARCH}_zba")
|
||||||
|
message(STATUS "RISC-V zba extension detected")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
try_run(RANDOMX_ZBB_RUN_FAIL
|
||||||
|
RANDOMX_ZBB_COMPILE_OK
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/src/crypto/randomx/tests/riscv64_zbb.s
|
||||||
|
COMPILE_DEFINITIONS "-march=rv64gc_zbb")
|
||||||
|
|
||||||
|
if (RANDOMX_ZBB_COMPILE_OK AND NOT RANDOMX_ZBB_RUN_FAIL)
|
||||||
|
set(RVARCH "${RVARCH}_zbb")
|
||||||
|
message(STATUS "RISC-V zbb extension detected")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
message(STATUS "Using -march=${RVARCH}")
|
||||||
|
endif()
|
||||||
|
|
||||||
add_definitions(-DRAPIDJSON_WRITE_DEFAULT_FLAGS=6) # rapidjson::kWriteNanAndInfFlag | rapidjson::kWriteNanAndInfNullFlag
|
add_definitions(-DRAPIDJSON_WRITE_DEFAULT_FLAGS=6) # rapidjson::kWriteNanAndInfFlag | rapidjson::kWriteNanAndInfNullFlag
|
||||||
|
|
||||||
if (ARM_V8)
|
if (ARM_V8)
|
||||||
@@ -40,7 +104,7 @@ endif()
|
|||||||
if (NOT ARM_TARGET)
|
if (NOT ARM_TARGET)
|
||||||
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64|ARM64|armv8-a)$")
|
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64|ARM64|armv8-a)$")
|
||||||
set(ARM_TARGET 8)
|
set(ARM_TARGET 8)
|
||||||
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv7|armv7f|armv7s|armv7k|armv7-a|armv7l|armv7ve)$")
|
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv7|armv7f|armv7s|armv7k|armv7-a|armv7l|armv7ve|armv8l)$")
|
||||||
set(ARM_TARGET 7)
|
set(ARM_TARGET 7)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
@@ -26,8 +26,13 @@ if (CMAKE_CXX_COMPILER_ID MATCHES GNU)
|
|||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARM8_CXX_FLAGS}")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARM8_CXX_FLAGS}")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARM8_CXX_FLAGS} -flax-vector-conversions")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARM8_CXX_FLAGS} -flax-vector-conversions")
|
||||||
elseif (ARM_TARGET EQUAL 7)
|
elseif (ARM_TARGET EQUAL 7)
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv7-a -mfpu=neon")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv7-a -mfpu=neon -flax-vector-conversions")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv7-a -mfpu=neon -flax-vector-conversions")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv7-a -mfpu=neon -flax-vector-conversions")
|
||||||
|
elseif (XMRIG_RISCV)
|
||||||
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=${RVARCH}")
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=${RVARCH}")
|
||||||
|
|
||||||
|
add_definitions(-DHAVE_ROTR)
|
||||||
else()
|
else()
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes")
|
||||||
@@ -41,6 +46,8 @@ if (CMAKE_CXX_COMPILER_ID MATCHES GNU)
|
|||||||
else()
|
else()
|
||||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static -Wl,--large-address-aware")
|
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static -Wl,--large-address-aware")
|
||||||
endif()
|
endif()
|
||||||
|
elseif(CMAKE_SYSTEM_NAME STREQUAL "Haiku")
|
||||||
|
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc")
|
||||||
else()
|
else()
|
||||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc -static-libstdc++")
|
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc -static-libstdc++")
|
||||||
endif()
|
endif()
|
||||||
@@ -74,6 +81,11 @@ elseif (CMAKE_CXX_COMPILER_ID MATCHES Clang)
|
|||||||
elseif (ARM_TARGET EQUAL 7)
|
elseif (ARM_TARGET EQUAL 7)
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon -march=${CMAKE_SYSTEM_PROCESSOR}")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon -march=${CMAKE_SYSTEM_PROCESSOR}")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -march=${CMAKE_SYSTEM_PROCESSOR}")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -march=${CMAKE_SYSTEM_PROCESSOR}")
|
||||||
|
elseif (XMRIG_RISCV)
|
||||||
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=${RVARCH}")
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=${RVARCH}")
|
||||||
|
|
||||||
|
add_definitions(-DHAVE_ROTR)
|
||||||
else()
|
else()
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes")
|
||||||
|
|||||||
@@ -17,6 +17,10 @@ else()
|
|||||||
set(XMRIG_OS_LINUX ON)
|
set(XMRIG_OS_LINUX ON)
|
||||||
elseif(CMAKE_SYSTEM_NAME STREQUAL FreeBSD OR CMAKE_SYSTEM_NAME STREQUAL DragonFly)
|
elseif(CMAKE_SYSTEM_NAME STREQUAL FreeBSD OR CMAKE_SYSTEM_NAME STREQUAL DragonFly)
|
||||||
set(XMRIG_OS_FREEBSD ON)
|
set(XMRIG_OS_FREEBSD ON)
|
||||||
|
elseif(CMAKE_SYSTEM_NAME STREQUAL OpenBSD)
|
||||||
|
set(XMRIG_OS_OPENBSD ON)
|
||||||
|
elseif(CMAKE_SYSTEM_NAME STREQUAL "Haiku")
|
||||||
|
set(XMRIG_OS_HAIKU ON)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
@@ -43,6 +47,10 @@ elseif(XMRIG_OS_UNIX)
|
|||||||
add_definitions(-DXMRIG_OS_LINUX)
|
add_definitions(-DXMRIG_OS_LINUX)
|
||||||
elseif (XMRIG_OS_FREEBSD)
|
elseif (XMRIG_OS_FREEBSD)
|
||||||
add_definitions(-DXMRIG_OS_FREEBSD)
|
add_definitions(-DXMRIG_OS_FREEBSD)
|
||||||
|
elseif (XMRIG_OS_OPENBSD)
|
||||||
|
add_definitions(-DXMRIG_OS_OPENBSD)
|
||||||
|
elseif (XMRIG_OS_HAIKU)
|
||||||
|
add_definitions(-DXMRIG_OS_HAIKU)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,18 @@
|
|||||||
if (WITH_RANDOMX)
|
if (WITH_RANDOMX)
|
||||||
|
include(CheckSymbolExists)
|
||||||
|
|
||||||
|
if (WIN32)
|
||||||
|
check_symbol_exists(_aligned_malloc "stdlib.h" HAVE_ALIGNED_MALLOC)
|
||||||
|
if (HAVE_ALIGNED_MALLOC)
|
||||||
|
add_compile_definitions(HAVE_ALIGNED_MALLOC)
|
||||||
|
endif()
|
||||||
|
else()
|
||||||
|
check_symbol_exists(posix_memalign "stdlib.h" HAVE_POSIX_MEMALIGN)
|
||||||
|
if (HAVE_POSIX_MEMALIGN)
|
||||||
|
add_compile_definitions(HAVE_POSIX_MEMALIGN)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
add_definitions(/DXMRIG_ALGO_RANDOMX)
|
add_definitions(/DXMRIG_ALGO_RANDOMX)
|
||||||
set(WITH_ARGON2 ON)
|
set(WITH_ARGON2 ON)
|
||||||
|
|
||||||
@@ -48,7 +62,7 @@ if (WITH_RANDOMX)
|
|||||||
src/crypto/randomx/jit_compiler_x86_static.asm
|
src/crypto/randomx/jit_compiler_x86_static.asm
|
||||||
src/crypto/randomx/jit_compiler_x86.cpp
|
src/crypto/randomx/jit_compiler_x86.cpp
|
||||||
)
|
)
|
||||||
elseif (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
|
elseif (WITH_ASM AND NOT XMRIG_ARM AND NOT XMRIG_RISCV AND CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||||
list(APPEND SOURCES_CRYPTO
|
list(APPEND SOURCES_CRYPTO
|
||||||
src/crypto/randomx/jit_compiler_x86_static.S
|
src/crypto/randomx/jit_compiler_x86_static.S
|
||||||
src/crypto/randomx/jit_compiler_x86.cpp
|
src/crypto/randomx/jit_compiler_x86.cpp
|
||||||
@@ -66,6 +80,16 @@ if (WITH_RANDOMX)
|
|||||||
else()
|
else()
|
||||||
set_property(SOURCE src/crypto/randomx/jit_compiler_a64_static.S PROPERTY LANGUAGE C)
|
set_property(SOURCE src/crypto/randomx/jit_compiler_a64_static.S PROPERTY LANGUAGE C)
|
||||||
endif()
|
endif()
|
||||||
|
elseif (XMRIG_RISCV AND CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||||
|
list(APPEND SOURCES_CRYPTO
|
||||||
|
src/crypto/randomx/jit_compiler_rv64_static.S
|
||||||
|
src/crypto/randomx/jit_compiler_rv64_vector_static.S
|
||||||
|
src/crypto/randomx/jit_compiler_rv64.cpp
|
||||||
|
src/crypto/randomx/jit_compiler_rv64_vector.cpp
|
||||||
|
)
|
||||||
|
# cheat because cmake and ccache hate each other
|
||||||
|
set_property(SOURCE src/crypto/randomx/jit_compiler_rv64_static.S PROPERTY LANGUAGE C)
|
||||||
|
set_property(SOURCE src/crypto/randomx/jit_compiler_rv64_vector_static.S PROPERTY LANGUAGE C)
|
||||||
else()
|
else()
|
||||||
list(APPEND SOURCES_CRYPTO
|
list(APPEND SOURCES_CRYPTO
|
||||||
src/crypto/randomx/jit_compiler_fallback.cpp
|
src/crypto/randomx/jit_compiler_fallback.cpp
|
||||||
@@ -102,7 +126,7 @@ if (WITH_RANDOMX)
|
|||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (WITH_MSR AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8 AND (XMRIG_OS_WIN OR XMRIG_OS_LINUX))
|
if (WITH_MSR AND NOT XMRIG_ARM AND NOT XMRIG_RISCV AND CMAKE_SIZEOF_VOID_P EQUAL 8 AND (XMRIG_OS_WIN OR XMRIG_OS_LINUX))
|
||||||
add_definitions(/DXMRIG_FEATURE_MSR)
|
add_definitions(/DXMRIG_FEATURE_MSR)
|
||||||
add_definitions(/DXMRIG_FIX_RYZEN)
|
add_definitions(/DXMRIG_FIX_RYZEN)
|
||||||
message("-- WITH_MSR=ON")
|
message("-- WITH_MSR=ON")
|
||||||
|
|||||||
365
doc/RISCV_PERF_TUNING.md
Normal file
365
doc/RISCV_PERF_TUNING.md
Normal file
@@ -0,0 +1,365 @@
|
|||||||
|
# RISC-V Performance Optimization Guide
|
||||||
|
|
||||||
|
This guide provides comprehensive instructions for optimizing XMRig on RISC-V architectures.
|
||||||
|
|
||||||
|
## Build Optimizations
|
||||||
|
|
||||||
|
### Compiler Flags Applied Automatically
|
||||||
|
|
||||||
|
The CMake build now applies aggressive RISC-V-specific optimizations:
|
||||||
|
|
||||||
|
```cmake
|
||||||
|
# RISC-V ISA with extensions
|
||||||
|
-march=rv64gcv_zba_zbb_zbc_zbs
|
||||||
|
|
||||||
|
# Aggressive compiler optimizations
|
||||||
|
-funroll-loops # Unroll loops for ILP (instruction-level parallelism)
|
||||||
|
-fomit-frame-pointer # Free up frame pointer register (RISC-V has limited registers)
|
||||||
|
-fno-common # Better code generation for global variables
|
||||||
|
-finline-functions # Inline more functions for better cache locality
|
||||||
|
-ffast-math # Relaxed FP semantics (safe for mining)
|
||||||
|
-flto # Link-time optimization for cross-module inlining
|
||||||
|
|
||||||
|
# Release build additions
|
||||||
|
-minline-atomics # Inline atomic operations for faster synchronization
|
||||||
|
```
|
||||||
|
|
||||||
|
### Optimal Build Command
|
||||||
|
|
||||||
|
```bash
|
||||||
|
mkdir build && cd build
|
||||||
|
cmake -DCMAKE_BUILD_TYPE=Release ..
|
||||||
|
make -j$(nproc)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Expected build time**: 5-15 minutes depending on CPU
|
||||||
|
|
||||||
|
## Runtime Optimizations
|
||||||
|
|
||||||
|
### 1. Memory Configuration (Most Important)
|
||||||
|
|
||||||
|
Enable huge pages to reduce TLB misses and fragmentation:
|
||||||
|
|
||||||
|
#### Enable 2MB Huge Pages
|
||||||
|
```bash
|
||||||
|
# Calculate required huge pages (1 page = 2MB)
|
||||||
|
# For 2 GB dataset: 1024 pages
|
||||||
|
# For cache + dataset: 1536 pages minimum
|
||||||
|
sudo sysctl -w vm.nr_hugepages=2048
|
||||||
|
```
|
||||||
|
|
||||||
|
Verify:
|
||||||
|
```bash
|
||||||
|
grep HugePages /proc/meminfo
|
||||||
|
# Expected: HugePages_Free should be close to nr_hugepages
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Enable 1GB Huge Pages (Optional but Recommended)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Run provided helper script
|
||||||
|
sudo ./scripts/enable_1gb_pages.sh
|
||||||
|
|
||||||
|
# Verify 1GB pages are available
|
||||||
|
cat /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages
|
||||||
|
# Should be: >= 1 (one 1GB page)
|
||||||
|
```
|
||||||
|
|
||||||
|
Update config.json:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"cpu": {
|
||||||
|
"huge-pages": true
|
||||||
|
},
|
||||||
|
"randomx": {
|
||||||
|
"1gb-pages": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. RandomX Mode Selection
|
||||||
|
|
||||||
|
| Mode | Memory | Init Time | Throughput | Recommendation |
|
||||||
|
|------|--------|-----------|-----------|-----------------|
|
||||||
|
| **light** | 256 MB | 10 sec | Low | Testing, resource-constrained |
|
||||||
|
| **fast** | 2 GB | 2-5 min* | High | Production (with huge pages) |
|
||||||
|
| **auto** | 2 GB | Varies | High | Default (uses fast if possible) |
|
||||||
|
|
||||||
|
*With optimizations; can be 30+ minutes without huge pages
|
||||||
|
|
||||||
|
**For RISC-V, use fast mode with huge pages enabled.**
|
||||||
|
|
||||||
|
### 3. Dataset Initialization Threads
|
||||||
|
|
||||||
|
Optimal thread count = 60-75% of CPU cores (leaves headroom for OS/other tasks)
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"randomx": {
|
||||||
|
"init": 4
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Or auto-detect (rewritten for RISC-V):
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"randomx": {
|
||||||
|
"init": -1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. CPU Affinity (Optional)
|
||||||
|
|
||||||
|
Pin threads to specific cores for better cache locality:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"cpu": {
|
||||||
|
"rx/0": [
|
||||||
|
{ "threads": 1, "affinity": 0 },
|
||||||
|
{ "threads": 1, "affinity": 1 },
|
||||||
|
{ "threads": 1, "affinity": 2 },
|
||||||
|
{ "threads": 1, "affinity": 3 }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5. CPU Governor (Linux)
|
||||||
|
|
||||||
|
Set to performance mode for maximum throughput:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check current governor
|
||||||
|
cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor
|
||||||
|
|
||||||
|
# Set to performance (requires root)
|
||||||
|
echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor
|
||||||
|
# Should output: performance
|
||||||
|
```
|
||||||
|
|
||||||
|
## Configuration Examples
|
||||||
|
|
||||||
|
### Minimum (Testing)
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"randomx": {
|
||||||
|
"mode": "light"
|
||||||
|
},
|
||||||
|
"cpu": {
|
||||||
|
"huge-pages": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Recommended (Balanced)
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"randomx": {
|
||||||
|
"mode": "auto",
|
||||||
|
"init": 4,
|
||||||
|
"1gb-pages": true
|
||||||
|
},
|
||||||
|
"cpu": {
|
||||||
|
"huge-pages": true,
|
||||||
|
"priority": 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Maximum Performance (Production)
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"randomx": {
|
||||||
|
"mode": "fast",
|
||||||
|
"init": -1,
|
||||||
|
"1gb-pages": true,
|
||||||
|
"scratchpad_prefetch_mode": 1
|
||||||
|
},
|
||||||
|
"cpu": {
|
||||||
|
"huge-pages": true,
|
||||||
|
"priority": 3,
|
||||||
|
"yield": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## CLI Equivalents
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Light mode
|
||||||
|
./xmrig --randomx-mode=light
|
||||||
|
|
||||||
|
# Fast mode with 4 init threads
|
||||||
|
./xmrig --randomx-mode=fast --randomx-init=4
|
||||||
|
|
||||||
|
# Benchmark
|
||||||
|
./xmrig --bench=1M --algo=rx/0
|
||||||
|
|
||||||
|
# Benchmark Wownero variant (1 MB scratchpad)
|
||||||
|
./xmrig --bench=1M --algo=rx/wow
|
||||||
|
|
||||||
|
# Mine to pool
|
||||||
|
./xmrig -o pool.example.com:3333 -u YOUR_WALLET -p x
|
||||||
|
```
|
||||||
|
|
||||||
|
## Performance Diagnostics
|
||||||
|
|
||||||
|
### Check if Vector Extensions are Detected
|
||||||
|
|
||||||
|
Look for `FEATURES:` line in output:
|
||||||
|
```
|
||||||
|
* CPU: ky,x60 (uarch ky,x1)
|
||||||
|
* FEATURES: rv64imafdcv zba zbb zbc zbs
|
||||||
|
```
|
||||||
|
|
||||||
|
- `v`: Vector extension (RVV) ✓
|
||||||
|
- `zba`, `zbb`, `zbc`, `zbs`: Bit manipulation ✓
|
||||||
|
- If missing, make sure build used `-march=rv64gcv_zba_zbb_zbc_zbs`
|
||||||
|
|
||||||
|
### Verify Huge Pages at Runtime
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Run xmrig with --bench=1M and check output
|
||||||
|
./xmrig --bench=1M
|
||||||
|
|
||||||
|
# Look for line like:
|
||||||
|
# HUGE PAGES 100% 1 / 1 (1024 MB)
|
||||||
|
```
|
||||||
|
|
||||||
|
- Should show 100% for dataset AND threads
|
||||||
|
- If less, increase `vm.nr_hugepages` and reboot
|
||||||
|
|
||||||
|
### Monitor Performance
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Run benchmark multiple times to find stable hashrate
|
||||||
|
./xmrig --bench=1M --algo=rx/0
|
||||||
|
./xmrig --bench=10M --algo=rx/0
|
||||||
|
./xmrig --bench=100M --algo=rx/0
|
||||||
|
|
||||||
|
# Check system load and memory during mining
|
||||||
|
while true; do free -h; grep HugePages /proc/meminfo; sleep 2; done
|
||||||
|
```
|
||||||
|
|
||||||
|
## Expected Performance
|
||||||
|
|
||||||
|
### Hardware: Orange Pi RV2 (Ky X1, 8 cores @ ~1.5 GHz)
|
||||||
|
|
||||||
|
| Config | Mode | Hashrate | Init Time |
|
||||||
|
|--------|------|----------|-----------|
|
||||||
|
| Scalar (baseline) | fast | 30 H/s | 10 min |
|
||||||
|
| Scalar + huge pages | fast | 33 H/s | 2 min |
|
||||||
|
| RVV (if enabled) | fast | 70-100 H/s | 3 min |
|
||||||
|
|
||||||
|
*Actual results depend on CPU frequency, memory speed, and load*
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Long Initialization Times (30+ minutes)
|
||||||
|
|
||||||
|
**Cause**: Huge pages not enabled, system using swap
|
||||||
|
**Solution**:
|
||||||
|
1. Enable huge pages: `sudo sysctl -w vm.nr_hugepages=2048`
|
||||||
|
2. Reboot: `sudo reboot`
|
||||||
|
3. Reduce mining threads to free memory
|
||||||
|
4. Check available memory: `free -h`
|
||||||
|
|
||||||
|
### Low Hashrate (50% of expected)
|
||||||
|
|
||||||
|
**Cause**: CPU governor set to power-save, no huge pages, high contention
|
||||||
|
**Solution**:
|
||||||
|
1. Set governor to performance: `echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor`
|
||||||
|
2. Enable huge pages
|
||||||
|
3. Reduce number of mining threads
|
||||||
|
4. Check system load: `top` or `htop`
|
||||||
|
|
||||||
|
### Dataset Init Crashes or Hangs
|
||||||
|
|
||||||
|
**Cause**: Insufficient memory, corrupted huge pages
|
||||||
|
**Solution**:
|
||||||
|
1. Disable huge pages temporarily: set `huge-pages: false` in config
|
||||||
|
2. Reduce mining threads
|
||||||
|
3. Reboot and re-enable huge pages
|
||||||
|
4. Try light mode: `--randomx-mode=light`
|
||||||
|
|
||||||
|
### Out of Memory During Benchmark
|
||||||
|
|
||||||
|
**Cause**: Not enough RAM for dataset + cache + threads
|
||||||
|
**Solution**:
|
||||||
|
1. Use light mode: `--randomx-mode=light`
|
||||||
|
2. Reduce mining threads: `--threads=1`
|
||||||
|
3. Increase available memory (kill other processes)
|
||||||
|
4. Check: `free -h` before mining
|
||||||
|
|
||||||
|
## Advanced Tuning
|
||||||
|
|
||||||
|
### Vector Length (VLEN) Detection
|
||||||
|
|
||||||
|
RISC-V vector extension variable length (VLEN) affects performance:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check VLEN on your CPU
|
||||||
|
cat /proc/cpuinfo | grep vlen
|
||||||
|
|
||||||
|
# Expected values:
|
||||||
|
# - 128 bits (16 bytes) = minimum
|
||||||
|
# - 256 bits (32 bytes) = common
|
||||||
|
# - 512 bits (64 bytes) = high performance
|
||||||
|
```
|
||||||
|
|
||||||
|
Larger VLEN generally means better performance for vectorized operations.
|
||||||
|
|
||||||
|
### Prefetch Optimization
|
||||||
|
|
||||||
|
The code automatically optimizes memory prefetching for RISC-V:
|
||||||
|
|
||||||
|
```
|
||||||
|
scratchpad_prefetch_mode: 0 = disabled (slowest)
|
||||||
|
scratchpad_prefetch_mode: 1 = prefetch.r (default, recommended)
|
||||||
|
scratchpad_prefetch_mode: 2 = prefetch.w (experimental)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Memory Bandwidth Saturation
|
||||||
|
|
||||||
|
If experiencing memory bandwidth saturation (high latency):
|
||||||
|
|
||||||
|
1. Reduce mining threads
|
||||||
|
2. Increase L2/L3 cache by mining fewer threads per core
|
||||||
|
3. Enable cache QoS (AMD Ryzen): `cache_qos: true`
|
||||||
|
|
||||||
|
## Building with Custom Flags
|
||||||
|
|
||||||
|
To build with custom RISC-V flags:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
mkdir build && cd build
|
||||||
|
cmake -DCMAKE_BUILD_TYPE=Release \
|
||||||
|
-DCMAKE_C_FLAGS="-march=rv64gcv_zba_zbb_zbc_zbs -O3 -funroll-loops -fomit-frame-pointer" \
|
||||||
|
..
|
||||||
|
make -j$(nproc)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Future Optimizations
|
||||||
|
|
||||||
|
- [ ] Zbk* (crypto) support detection and usage
|
||||||
|
- [ ] Optimal VLEN-aware algorithm selection
|
||||||
|
- [ ] Per-core memory affinity (NUMA support)
|
||||||
|
- [ ] Dynamic thread count adjustment based on thermals
|
||||||
|
- [ ] Cross-compile optimizations for various RISC-V cores
|
||||||
|
|
||||||
|
## References
|
||||||
|
|
||||||
|
- [RISC-V Vector Extension Spec](https://github.com/riscv/riscv-v-spec)
|
||||||
|
- [RISC-V Bit Manipulation Spec](https://github.com/riscv/riscv-bitmanip)
|
||||||
|
- [RISC-V Crypto Spec](https://github.com/riscv/riscv-crypto)
|
||||||
|
- [XMRig Documentation](https://xmrig.com/docs)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
For further optimization, enable RVV intrinsics by replacing `sse2rvv.h` with `sse2rvv_optimized.h` in the build.
|
||||||
@@ -12,7 +12,7 @@ if grep -E 'AMD Ryzen|AMD EPYC|AuthenticAMD' /proc/cpuinfo > /dev/null;
|
|||||||
then
|
then
|
||||||
if grep "cpu family[[:space:]]\{1,\}:[[:space:]]25" /proc/cpuinfo > /dev/null;
|
if grep "cpu family[[:space:]]\{1,\}:[[:space:]]25" /proc/cpuinfo > /dev/null;
|
||||||
then
|
then
|
||||||
if grep "model[[:space:]]\{1,\}:[[:space:]]97" /proc/cpuinfo > /dev/null;
|
if grep "model[[:space:]]\{1,\}:[[:space:]]\(97\|117\)" /proc/cpuinfo > /dev/null;
|
||||||
then
|
then
|
||||||
echo "Detected Zen4 CPU"
|
echo "Detected Zen4 CPU"
|
||||||
wrmsr -a 0xc0011020 0x4400000000000
|
wrmsr -a 0xc0011020 0x4400000000000
|
||||||
|
|||||||
2
src/3rdparty/argon2/CMakeLists.txt
vendored
2
src/3rdparty/argon2/CMakeLists.txt
vendored
@@ -35,7 +35,7 @@ if (CMAKE_C_COMPILER_ID MATCHES MSVC)
|
|||||||
add_feature_impl(xop "" HAVE_XOP)
|
add_feature_impl(xop "" HAVE_XOP)
|
||||||
add_feature_impl(avx2 "/arch:AVX2" HAVE_AVX2)
|
add_feature_impl(avx2 "/arch:AVX2" HAVE_AVX2)
|
||||||
add_feature_impl(avx512f "/arch:AVX512F" HAVE_AVX512F)
|
add_feature_impl(avx512f "/arch:AVX512F" HAVE_AVX512F)
|
||||||
elseif (NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
|
elseif (NOT XMRIG_ARM AND NOT XMRIG_RISCV AND CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||||
function(add_feature_impl FEATURE GCC_FLAG DEF)
|
function(add_feature_impl FEATURE GCC_FLAG DEF)
|
||||||
add_library(argon2-${FEATURE} STATIC arch/x86_64/lib/argon2-${FEATURE}.c)
|
add_library(argon2-${FEATURE} STATIC arch/x86_64/lib/argon2-${FEATURE}.c)
|
||||||
target_include_directories(argon2-${FEATURE} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../)
|
target_include_directories(argon2-${FEATURE} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../)
|
||||||
|
|||||||
48
src/3rdparty/hwloc/NEWS
vendored
48
src/3rdparty/hwloc/NEWS
vendored
@@ -1,5 +1,5 @@
|
|||||||
Copyright © 2009 CNRS
|
Copyright © 2009 CNRS
|
||||||
Copyright © 2009-2024 Inria. All rights reserved.
|
Copyright © 2009-2025 Inria. All rights reserved.
|
||||||
Copyright © 2009-2013 Université Bordeaux
|
Copyright © 2009-2013 Université Bordeaux
|
||||||
Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||||
Copyright © 2020 Hewlett Packard Enterprise. All rights reserved.
|
Copyright © 2020 Hewlett Packard Enterprise. All rights reserved.
|
||||||
@@ -17,6 +17,52 @@ bug fixes (and other actions) for each version of hwloc since version
|
|||||||
0.9.
|
0.9.
|
||||||
|
|
||||||
|
|
||||||
|
Version 2.12.1
|
||||||
|
--------------
|
||||||
|
* Add hwloc-calc's --default-nodes option to hwloc-bind and hwloc-info.
|
||||||
|
* Improve the --best-memattr "default" fallback, try to use "default"
|
||||||
|
memory nodes, and add verbose messages and warnings if some
|
||||||
|
performance info are incomplete or missing.
|
||||||
|
Thanks to Antoine Morvan for the report.
|
||||||
|
* Fix CPU and memory binding on different locations,
|
||||||
|
thanks to Antoine Morvan for the report.
|
||||||
|
* Add HWLOC_LOCAL_NUMANODE_FLAG_INTERSECT_LOCALITY and enable it by
|
||||||
|
default in hwloc-calc --local-memory for finding local NUMA nodes
|
||||||
|
that do not exactly match input locations.
|
||||||
|
Thanks to Antoine Morvan for the report.
|
||||||
|
* Fix a possible crash in the x86 backend when Qemu is configured to
|
||||||
|
expose multicore/thread CPUs that are actually single-core/thread.
|
||||||
|
Thanks to Georg Pfuetzenreuter.
|
||||||
|
|
||||||
|
|
||||||
|
Version 2.12.0
|
||||||
|
--------------
|
||||||
|
* Add hwloc_topology_get_default_nodeset() for the set of default
|
||||||
|
NUMA nodes.
|
||||||
|
- hwloc-calc now has --default-nodes option.
|
||||||
|
* Rework oneAPI LevelZero support to use zesInit() and avoid the need
|
||||||
|
to set ZES_ENABLE_SYSMAN=1 in the environment.
|
||||||
|
- zesDriverGetDeviceByUuidExp() is now required in the L0 runtime.
|
||||||
|
- ZES/Sysman variants were added in hwloc/levelzero.h to specifically
|
||||||
|
handle ZES/Sysman device handles.
|
||||||
|
* Fix the locality of AMD GPU partitions, thanks to Edgar Leon for
|
||||||
|
reporting and debugging the issue.
|
||||||
|
* Better detect Cray Slingshot NICs, thanks to Edgar Leon.
|
||||||
|
* Add support for Die objects and Module groups on Windows.
|
||||||
|
* Only filter-out Dies that are identical to their Packages
|
||||||
|
when it applies to all Dies.
|
||||||
|
* Improve hwloc-calc to handle CPU-less NUMA nodes or platforms with
|
||||||
|
heterogeneous memory without requiring --nodeset-output.
|
||||||
|
* hwloc-calc now accepts counting/listing cpukinds and memory tiers
|
||||||
|
with -N and -I cpukind/memorytier.
|
||||||
|
* The systemd-dbus-api output of hwloc-calc has changed, and
|
||||||
|
--nodeset-output-format was added, to support NUMA node outputs.
|
||||||
|
Thanks to Pierre Neyron.
|
||||||
|
* Update NVLink bandwidth and CUDA capabilities up to NVIDIA Blackwell.
|
||||||
|
* Fix some NUMA syscalls on Linux for platforms with old libc headers.
|
||||||
|
* Some minor fixes in distances.
|
||||||
|
|
||||||
|
|
||||||
Version 2.11.2
|
Version 2.11.2
|
||||||
--------------
|
--------------
|
||||||
* Add missing CPU info attrs on aarch64 on Linux.
|
* Add missing CPU info attrs on aarch64 on Linux.
|
||||||
|
|||||||
8
src/3rdparty/hwloc/VERSION
vendored
8
src/3rdparty/hwloc/VERSION
vendored
@@ -8,8 +8,8 @@
|
|||||||
# Please update HWLOC_VERSION* in contrib/windows/hwloc_config.h too.
|
# Please update HWLOC_VERSION* in contrib/windows/hwloc_config.h too.
|
||||||
|
|
||||||
major=2
|
major=2
|
||||||
minor=11
|
minor=12
|
||||||
release=2
|
release=1
|
||||||
|
|
||||||
# greek is used for alpha or beta release tags. If it is non-empty,
|
# greek is used for alpha or beta release tags. If it is non-empty,
|
||||||
# it will be appended to the version number. It does not have to be
|
# it will be appended to the version number. It does not have to be
|
||||||
@@ -22,7 +22,7 @@ greek=
|
|||||||
|
|
||||||
# The date when this release was created
|
# The date when this release was created
|
||||||
|
|
||||||
date="Sep 26, 2024"
|
date="May 12, 2025"
|
||||||
|
|
||||||
# If snapshot=1, then use the value from snapshot_version as the
|
# If snapshot=1, then use the value from snapshot_version as the
|
||||||
# entire hwloc version (i.e., ignore major, minor, release, and
|
# entire hwloc version (i.e., ignore major, minor, release, and
|
||||||
@@ -41,6 +41,6 @@ snapshot_version=${major}.${minor}.${release}${greek}-git
|
|||||||
# 2. Version numbers are described in the Libtool current:revision:age
|
# 2. Version numbers are described in the Libtool current:revision:age
|
||||||
# format.
|
# format.
|
||||||
|
|
||||||
libhwloc_so_version=23:1:8
|
libhwloc_so_version=25:0:10
|
||||||
|
|
||||||
# Please also update the <TargetName> lines in contrib/windows/libhwloc.vcxproj
|
# Please also update the <TargetName> lines in contrib/windows/libhwloc.vcxproj
|
||||||
|
|||||||
15
src/3rdparty/hwloc/include/hwloc.h
vendored
15
src/3rdparty/hwloc/include/hwloc.h
vendored
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright © 2009 CNRS
|
* Copyright © 2009 CNRS
|
||||||
* Copyright © 2009-2024 Inria. All rights reserved.
|
* Copyright © 2009-2025 Inria. All rights reserved.
|
||||||
* Copyright © 2009-2012 Université Bordeaux
|
* Copyright © 2009-2012 Université Bordeaux
|
||||||
* Copyright © 2009-2020 Cisco Systems, Inc. All rights reserved.
|
* Copyright © 2009-2020 Cisco Systems, Inc. All rights reserved.
|
||||||
* See COPYING in top-level directory.
|
* See COPYING in top-level directory.
|
||||||
@@ -112,7 +112,7 @@ extern "C" {
|
|||||||
* Two stable releases of the same series usually have the same ::HWLOC_API_VERSION
|
* Two stable releases of the same series usually have the same ::HWLOC_API_VERSION
|
||||||
* even if their HWLOC_VERSION are different.
|
* even if their HWLOC_VERSION are different.
|
||||||
*/
|
*/
|
||||||
#define HWLOC_API_VERSION 0x00020b00
|
#define HWLOC_API_VERSION 0x00020c00
|
||||||
|
|
||||||
/** \brief Indicate at runtime which hwloc API version was used at build time.
|
/** \brief Indicate at runtime which hwloc API version was used at build time.
|
||||||
*
|
*
|
||||||
@@ -346,9 +346,10 @@ typedef enum {
|
|||||||
*
|
*
|
||||||
* Some operating systems (e.g. Linux) may expose a single die per package
|
* Some operating systems (e.g. Linux) may expose a single die per package
|
||||||
* even if the hardware does not support dies at all. To avoid showing
|
* even if the hardware does not support dies at all. To avoid showing
|
||||||
* such non-existing dies, the corresponding hwloc backend may filter them out.
|
* such non-existing dies, hwloc will filter them out if all of them are
|
||||||
|
* identical to packages.
|
||||||
* This is functionally equivalent to ::HWLOC_TYPE_FILTER_KEEP_STRUCTURE
|
* This is functionally equivalent to ::HWLOC_TYPE_FILTER_KEEP_STRUCTURE
|
||||||
* being enforced.
|
* being enforced for Dies versus Packages.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
HWLOC_OBJ_TYPE_MAX /**< \private Sentinel value */
|
HWLOC_OBJ_TYPE_MAX /**< \private Sentinel value */
|
||||||
@@ -1047,7 +1048,7 @@ HWLOC_DECLSPEC const char * hwloc_obj_type_string (hwloc_obj_type_t type) __hwlo
|
|||||||
* If \p size is 0, \p string may safely be \c NULL.
|
* If \p size is 0, \p string may safely be \c NULL.
|
||||||
*
|
*
|
||||||
* \return the number of characters that were actually written if not truncating,
|
* \return the number of characters that were actually written if not truncating,
|
||||||
* or that would have been written (not including the ending \\0).
|
* or that would have been written (not including the ending \c \0).
|
||||||
*/
|
*/
|
||||||
HWLOC_DECLSPEC int hwloc_obj_type_snprintf(char * __hwloc_restrict string, size_t size,
|
HWLOC_DECLSPEC int hwloc_obj_type_snprintf(char * __hwloc_restrict string, size_t size,
|
||||||
hwloc_obj_t obj,
|
hwloc_obj_t obj,
|
||||||
@@ -1062,7 +1063,7 @@ HWLOC_DECLSPEC int hwloc_obj_type_snprintf(char * __hwloc_restrict string, size_
|
|||||||
* If \p size is 0, \p string may safely be \c NULL.
|
* If \p size is 0, \p string may safely be \c NULL.
|
||||||
*
|
*
|
||||||
* \return the number of characters that were actually written if not truncating,
|
* \return the number of characters that were actually written if not truncating,
|
||||||
* or that would have been written (not including the ending \\0).
|
* or that would have been written (not including the ending \c \0).
|
||||||
*/
|
*/
|
||||||
HWLOC_DECLSPEC int hwloc_obj_attr_snprintf(char * __hwloc_restrict string, size_t size,
|
HWLOC_DECLSPEC int hwloc_obj_attr_snprintf(char * __hwloc_restrict string, size_t size,
|
||||||
hwloc_obj_t obj, const char * __hwloc_restrict separator,
|
hwloc_obj_t obj, const char * __hwloc_restrict separator,
|
||||||
@@ -2002,7 +2003,7 @@ HWLOC_DECLSPEC int hwloc_topology_set_xml(hwloc_topology_t __hwloc_restrict topo
|
|||||||
* a file, as with hwloc_topology_set_xml()).
|
* a file, as with hwloc_topology_set_xml()).
|
||||||
*
|
*
|
||||||
* Gather topology information from the XML memory buffer given at
|
* Gather topology information from the XML memory buffer given at
|
||||||
* \p buffer and of length \p size (including an ending \0).
|
* \p buffer and of length \p size (including an ending \c \0).
|
||||||
* This buffer may have been filled earlier with
|
* This buffer may have been filled earlier with
|
||||||
* hwloc_topology_export_xmlbuffer() in hwloc/export.h.
|
* hwloc_topology_export_xmlbuffer() in hwloc/export.h.
|
||||||
*
|
*
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright © 2009 CNRS
|
* Copyright © 2009 CNRS
|
||||||
* Copyright © 2009-2024 Inria. All rights reserved.
|
* Copyright © 2009-2025 Inria. All rights reserved.
|
||||||
* Copyright © 2009-2012 Université Bordeaux
|
* Copyright © 2009-2012 Université Bordeaux
|
||||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||||
* See COPYING in top-level directory.
|
* See COPYING in top-level directory.
|
||||||
@@ -11,10 +11,10 @@
|
|||||||
#ifndef HWLOC_CONFIG_H
|
#ifndef HWLOC_CONFIG_H
|
||||||
#define HWLOC_CONFIG_H
|
#define HWLOC_CONFIG_H
|
||||||
|
|
||||||
#define HWLOC_VERSION "2.11.2"
|
#define HWLOC_VERSION "2.12.1"
|
||||||
#define HWLOC_VERSION_MAJOR 2
|
#define HWLOC_VERSION_MAJOR 2
|
||||||
#define HWLOC_VERSION_MINOR 11
|
#define HWLOC_VERSION_MINOR 12
|
||||||
#define HWLOC_VERSION_RELEASE 2
|
#define HWLOC_VERSION_RELEASE 1
|
||||||
#define HWLOC_VERSION_GREEK ""
|
#define HWLOC_VERSION_GREEK ""
|
||||||
|
|
||||||
#define __hwloc_restrict
|
#define __hwloc_restrict
|
||||||
|
|||||||
79
src/3rdparty/hwloc/include/hwloc/bitmap.h
vendored
79
src/3rdparty/hwloc/include/hwloc/bitmap.h
vendored
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright © 2009 CNRS
|
* Copyright © 2009 CNRS
|
||||||
* Copyright © 2009-2023 Inria. All rights reserved.
|
* Copyright © 2009-2024 Inria. All rights reserved.
|
||||||
* Copyright © 2009-2012 Université Bordeaux
|
* Copyright © 2009-2012 Université Bordeaux
|
||||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||||
* See COPYING in top-level directory.
|
* See COPYING in top-level directory.
|
||||||
@@ -113,51 +113,88 @@ HWLOC_DECLSPEC int hwloc_bitmap_copy(hwloc_bitmap_t dst, hwloc_const_bitmap_t sr
|
|||||||
* Bitmap/String Conversion
|
* Bitmap/String Conversion
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/** \brief Stringify a bitmap.
|
/** \brief Stringify a bitmap in the default hwloc format.
|
||||||
|
*
|
||||||
|
* <b>Note that if the bitmap is a CPU or nodeset, it contains physical indexes.</b>
|
||||||
|
*
|
||||||
|
* Print the bits set inside a bitmap as a comma-separated list of hexadecimal 32-bit blocks.
|
||||||
|
* A bitmap containing bits 1, 33, 34, and all from 64 to 95 is printed as <tt>"0xffffffff,0x00000006,0x00000002"</tt>.
|
||||||
*
|
*
|
||||||
* Up to \p buflen characters may be written in buffer \p buf.
|
* Up to \p buflen characters may be written in buffer \p buf.
|
||||||
*
|
*
|
||||||
* If \p buflen is 0, \p buf may safely be \c NULL.
|
* If \p buflen is 0, \p buf may safely be \c NULL.
|
||||||
*
|
*
|
||||||
* \return the number of characters that were actually written if not truncating,
|
* \return the number of characters that were actually written if not truncating,
|
||||||
* or that would have been written (not including the ending \\0).
|
* or that would have been written (not including the ending \c \0).
|
||||||
|
* \return -1 on error.
|
||||||
*/
|
*/
|
||||||
HWLOC_DECLSPEC int hwloc_bitmap_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap);
|
HWLOC_DECLSPEC int hwloc_bitmap_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap);
|
||||||
|
|
||||||
/** \brief Stringify a bitmap into a newly allocated string.
|
/** \brief Stringify a bitmap into a newly allocated string in the default hwloc format.
|
||||||
*
|
*
|
||||||
* \return 0 on success, -1 on error.
|
* <b>Note that if the bitmap is a CPU or nodeset, it contains physical indexes.</b>
|
||||||
|
*
|
||||||
|
* Print the bits set inside a bitmap as a comma-separated list of hexadecimal 32-bit blocks.
|
||||||
|
* A bitmap containing bits 1, 33, 34, and all from 64 to 95 is printed as <tt>"0xffffffff,0x00000006,0x00000002"</tt>.
|
||||||
|
*
|
||||||
|
* \return the number of characters that were written (not including the ending \c \0).
|
||||||
|
* \return -1 on error, for instance with \p errno set to \c ENOMEM on failure to allocate the output string.
|
||||||
*/
|
*/
|
||||||
HWLOC_DECLSPEC int hwloc_bitmap_asprintf(char ** strp, hwloc_const_bitmap_t bitmap);
|
HWLOC_DECLSPEC int hwloc_bitmap_asprintf(char ** strp, hwloc_const_bitmap_t bitmap);
|
||||||
|
|
||||||
/** \brief Parse a bitmap string and stores it in bitmap \p bitmap.
|
/** \brief Parse a bitmap string as the default hwloc format and stores it in bitmap \p bitmap.
|
||||||
|
*
|
||||||
|
* <b>Note that if the bitmap is a CPU or nodeset, the input string must contain physical indexes.</b>
|
||||||
|
*
|
||||||
|
* The input string should be a comma-separared list of hexadecimal 32-bit blocks.
|
||||||
|
* String <tt>"0xffffffff,0x6,0x2"</tt> is parsed as a bitmap containing all bits between 64 and 95,
|
||||||
|
* and bits 33, 34 and 1.
|
||||||
*
|
*
|
||||||
* \return 0 on success, -1 on error.
|
* \return 0 on success, -1 on error.
|
||||||
*/
|
*/
|
||||||
HWLOC_DECLSPEC int hwloc_bitmap_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string);
|
HWLOC_DECLSPEC int hwloc_bitmap_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string);
|
||||||
|
|
||||||
/** \brief Stringify a bitmap in the list format.
|
/** \brief Stringify a bitmap in the list format.
|
||||||
|
*
|
||||||
|
* <b>Note that if the bitmap is a CPU or nodeset, it contains physical indexes.</b>
|
||||||
*
|
*
|
||||||
* Lists are comma-separated indexes or ranges.
|
* Lists are comma-separated indexes or ranges.
|
||||||
* Ranges are dash separated indexes.
|
* Ranges are dash separated indexes.
|
||||||
* The last range may not have an ending indexes if the bitmap is infinitely set.
|
* A bitmap containing bits 1, 33, 34, and all from 64 to 95 is printed as <tt>"1,33-34,64-95"</tt>.
|
||||||
|
* The last range may not have an ending index if the bitmap is infinitely set.
|
||||||
*
|
*
|
||||||
* Up to \p buflen characters may be written in buffer \p buf.
|
* Up to \p buflen characters may be written in buffer \p buf.
|
||||||
*
|
*
|
||||||
* If \p buflen is 0, \p buf may safely be \c NULL.
|
* If \p buflen is 0, \p buf may safely be \c NULL.
|
||||||
*
|
*
|
||||||
* \return the number of characters that were actually written if not truncating,
|
* \return the number of characters that were actually written if not truncating,
|
||||||
* or that would have been written (not including the ending \\0).
|
* or that would have been written (not including the ending \c \0).
|
||||||
|
* \return -1 on error.
|
||||||
*/
|
*/
|
||||||
HWLOC_DECLSPEC int hwloc_bitmap_list_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap);
|
HWLOC_DECLSPEC int hwloc_bitmap_list_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap);
|
||||||
|
|
||||||
/** \brief Stringify a bitmap into a newly allocated list string.
|
/** \brief Stringify a bitmap into a newly allocated list string.
|
||||||
*
|
*
|
||||||
* \return 0 on success, -1 on error.
|
* <b>Note that if the bitmap is a CPU or nodeset, it contains physical indexes.</b>
|
||||||
|
*
|
||||||
|
* Lists are comma-separated indexes or ranges.
|
||||||
|
* Ranges are dash separated indexes.
|
||||||
|
* A bitmap containing bits 1, 33, 34, and all from 64 to 95 is printed as <tt>"1,33-34,64-95"</tt>.
|
||||||
|
* The last range may not have an ending index if the bitmap is infinitely set.
|
||||||
|
*
|
||||||
|
* \return the number of characters that were written (not including the ending \c \0).
|
||||||
|
* \return -1 on error, for instance with \p errno set to \c ENOMEM on failure to allocate the output string.
|
||||||
*/
|
*/
|
||||||
HWLOC_DECLSPEC int hwloc_bitmap_list_asprintf(char ** strp, hwloc_const_bitmap_t bitmap);
|
HWLOC_DECLSPEC int hwloc_bitmap_list_asprintf(char ** strp, hwloc_const_bitmap_t bitmap);
|
||||||
|
|
||||||
/** \brief Parse a list string and stores it in bitmap \p bitmap.
|
/** \brief Parse a list string and stores it in bitmap \p bitmap.
|
||||||
|
*
|
||||||
|
* <b>Note that if the bitmap is a CPU or nodeset, the input string must contain physical indexes.</b>
|
||||||
|
*
|
||||||
|
* Lists are comma-separated indexes or ranges.
|
||||||
|
* Ranges are dash separated indexes.
|
||||||
|
* String <tt>"1,33-34,64-95"</tt> is parsed as a bitmap containing bits 1, 33, 34, and all from 64 to 95.
|
||||||
|
* The last range may not have an ending index if the bitmap is infinitely set.
|
||||||
*
|
*
|
||||||
* \return 0 on success, -1 on error.
|
* \return 0 on success, -1 on error.
|
||||||
*/
|
*/
|
||||||
@@ -165,25 +202,43 @@ HWLOC_DECLSPEC int hwloc_bitmap_list_sscanf(hwloc_bitmap_t bitmap, const char *
|
|||||||
|
|
||||||
/** \brief Stringify a bitmap in the taskset-specific format.
|
/** \brief Stringify a bitmap in the taskset-specific format.
|
||||||
*
|
*
|
||||||
* The taskset command manipulates bitmap strings that contain a single
|
* <b>Note that if the bitmap is a CPU or nodeset, it contains physical indexes.</b>
|
||||||
|
*
|
||||||
|
* The taskset program manipulates bitmap strings that contain a single
|
||||||
* (possible very long) hexadecimal number starting with 0x.
|
* (possible very long) hexadecimal number starting with 0x.
|
||||||
|
* A bitmap containing bits 1, 33, 34, and all from 64 to 95 is printed as </tt>"0xffffffff0000000600000002"</tt>.
|
||||||
*
|
*
|
||||||
* Up to \p buflen characters may be written in buffer \p buf.
|
* Up to \p buflen characters may be written in buffer \p buf.
|
||||||
*
|
*
|
||||||
* If \p buflen is 0, \p buf may safely be \c NULL.
|
* If \p buflen is 0, \p buf may safely be \c NULL.
|
||||||
*
|
*
|
||||||
* \return the number of characters that were actually written if not truncating,
|
* \return the number of characters that were actually written if not truncating,
|
||||||
* or that would have been written (not including the ending \\0).
|
* or that would have been written (not including the ending \c \0).
|
||||||
|
* \return -1 on error.
|
||||||
*/
|
*/
|
||||||
HWLOC_DECLSPEC int hwloc_bitmap_taskset_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap);
|
HWLOC_DECLSPEC int hwloc_bitmap_taskset_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap);
|
||||||
|
|
||||||
/** \brief Stringify a bitmap into a newly allocated taskset-specific string.
|
/** \brief Stringify a bitmap into a newly allocated taskset-specific string.
|
||||||
*
|
*
|
||||||
* \return 0 on success, -1 on error.
|
* <b>Note that if the bitmap is a CPU or nodeset, it contains physical indexes.</b>
|
||||||
|
*
|
||||||
|
* The taskset program manipulates bitmap strings that contain a single
|
||||||
|
* (possible very long) hexadecimal number starting with 0x.
|
||||||
|
* A bitmap containing bits 1, 33, 34, and all from 64 to 95 is printed as <tt>"0xffffffff0000000600000002"</tt>.
|
||||||
|
*
|
||||||
|
* \return the number of characters that were written (not including the ending \c \0).
|
||||||
|
* \return -1 on error, for instance with \p errno set to \c ENOMEM on failure to allocate the output string.
|
||||||
*/
|
*/
|
||||||
HWLOC_DECLSPEC int hwloc_bitmap_taskset_asprintf(char ** strp, hwloc_const_bitmap_t bitmap);
|
HWLOC_DECLSPEC int hwloc_bitmap_taskset_asprintf(char ** strp, hwloc_const_bitmap_t bitmap);
|
||||||
|
|
||||||
/** \brief Parse a taskset-specific bitmap string and stores it in bitmap \p bitmap.
|
/** \brief Parse a taskset-specific bitmap string and stores it in bitmap \p bitmap.
|
||||||
|
*
|
||||||
|
* <b>Note that if the bitmap is a CPU or nodeset, the input string must contain physical indexes.</b>
|
||||||
|
*
|
||||||
|
* The taskset program manipulates bitmap strings that contain a single
|
||||||
|
* (possible very long) hexadecimal number starting with 0x.
|
||||||
|
* String <tt>"0xffffffff0000000600000002"</tt> is parsed as a bitmap containing all bits between 64 and 95,
|
||||||
|
* and bits 33, 34 and 1.
|
||||||
*
|
*
|
||||||
* \return 0 on success, -1 on error.
|
* \return 0 on success, -1 on error.
|
||||||
*/
|
*/
|
||||||
|
|||||||
6
src/3rdparty/hwloc/include/hwloc/diff.h
vendored
6
src/3rdparty/hwloc/include/hwloc/diff.h
vendored
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright © 2013-2023 Inria. All rights reserved.
|
* Copyright © 2013-2024 Inria. All rights reserved.
|
||||||
* See COPYING in top-level directory.
|
* See COPYING in top-level directory.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@@ -258,7 +258,7 @@ HWLOC_DECLSPEC int hwloc_topology_diff_export_xml(hwloc_topology_diff_t diff, co
|
|||||||
/** \brief Load a list of topology differences from a XML buffer.
|
/** \brief Load a list of topology differences from a XML buffer.
|
||||||
*
|
*
|
||||||
* Build a list of differences from the XML memory buffer given
|
* Build a list of differences from the XML memory buffer given
|
||||||
* at \p xmlbuffer and of length \p buflen (including an ending \0).
|
* at \p xmlbuffer and of length \p buflen (including an ending \c \0).
|
||||||
* This buffer may have been filled earlier with
|
* This buffer may have been filled earlier with
|
||||||
* hwloc_topology_diff_export_xmlbuffer().
|
* hwloc_topology_diff_export_xmlbuffer().
|
||||||
*
|
*
|
||||||
@@ -284,7 +284,7 @@ HWLOC_DECLSPEC int hwloc_topology_diff_load_xmlbuffer(const char *xmlbuffer, int
|
|||||||
* that contains the reference topology.
|
* that contains the reference topology.
|
||||||
* This attribute is given back when reading the diff from XML.
|
* This attribute is given back when reading the diff from XML.
|
||||||
*
|
*
|
||||||
* The returned buffer ends with a \0 that is included in the returned
|
* The returned buffer ends with a \c \0 that is included in the returned
|
||||||
* length.
|
* length.
|
||||||
*
|
*
|
||||||
* \return 0 on success, -1 on error.
|
* \return 0 on success, -1 on error.
|
||||||
|
|||||||
21
src/3rdparty/hwloc/include/hwloc/distances.h
vendored
21
src/3rdparty/hwloc/include/hwloc/distances.h
vendored
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright © 2010-2024 Inria. All rights reserved.
|
* Copyright © 2010-2025 Inria. All rights reserved.
|
||||||
* See COPYING in top-level directory.
|
* See COPYING in top-level directory.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@@ -227,17 +227,24 @@ enum hwloc_distances_transform_e {
|
|||||||
HWLOC_DISTANCES_TRANSFORM_LINKS = 1,
|
HWLOC_DISTANCES_TRANSFORM_LINKS = 1,
|
||||||
|
|
||||||
/** \brief Merge switches with multiple ports into a single object.
|
/** \brief Merge switches with multiple ports into a single object.
|
||||||
* This currently only applies to NVSwitches where GPUs seem connected to different
|
*
|
||||||
* separate switch ports in the NVLinkBandwidth matrix. This transformation will
|
* This currently only applies to NVSwitches where GPUs seem connected
|
||||||
* replace all of them with the same port connected to all GPUs.
|
* to different switch ports. Switch ports must be objects with subtype
|
||||||
* Other ports are removed by applying ::HWLOC_DISTANCES_TRANSFORM_REMOVE_NULL internally.
|
* "NVSwitch" as in the NVLinkBandwidth matrix.
|
||||||
|
*
|
||||||
|
* This transformation will replace all ports with only the first one,
|
||||||
|
* now connected to all GPUs. Other ports are removed by applying
|
||||||
|
* ::HWLOC_DISTANCES_TRANSFORM_REMOVE_NULL internally.
|
||||||
* \hideinitializer
|
* \hideinitializer
|
||||||
*/
|
*/
|
||||||
HWLOC_DISTANCES_TRANSFORM_MERGE_SWITCH_PORTS = 2,
|
HWLOC_DISTANCES_TRANSFORM_MERGE_SWITCH_PORTS = 2,
|
||||||
|
|
||||||
/** \brief Apply a transitive closure to the matrix to connect objects across switches.
|
/** \brief Apply a transitive closure to the matrix to connect objects across switches.
|
||||||
* This currently only applies to GPUs and NVSwitches in the NVLinkBandwidth matrix.
|
*
|
||||||
* All pairs of GPUs will be reported as directly connected.
|
* All pairs of GPUs will be reported as directly connected instead GPUs being
|
||||||
|
* only connected to switches.
|
||||||
|
*
|
||||||
|
* Switch ports must be objects with subtype "NVSwitch" as in the NVLinkBandwidth matrix.
|
||||||
* \hideinitializer
|
* \hideinitializer
|
||||||
*/
|
*/
|
||||||
HWLOC_DISTANCES_TRANSFORM_TRANSITIVE_CLOSURE = 3
|
HWLOC_DISTANCES_TRANSFORM_TRANSITIVE_CLOSURE = 3
|
||||||
|
|||||||
163
src/3rdparty/hwloc/include/hwloc/levelzero.h
vendored
163
src/3rdparty/hwloc/include/hwloc/levelzero.h
vendored
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright © 2021-2023 Inria. All rights reserved.
|
* Copyright © 2021-2024 Inria. All rights reserved.
|
||||||
* See COPYING in top-level directory.
|
* See COPYING in top-level directory.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@@ -32,7 +32,8 @@ extern "C" {
|
|||||||
/** \defgroup hwlocality_levelzero Interoperability with the oneAPI Level Zero interface.
|
/** \defgroup hwlocality_levelzero Interoperability with the oneAPI Level Zero interface.
|
||||||
*
|
*
|
||||||
* This interface offers ways to retrieve topology information about
|
* This interface offers ways to retrieve topology information about
|
||||||
* devices managed by the Level Zero API.
|
* devices managed by the Level Zero API, both for main Core devices (ZE API)
|
||||||
|
* and the Sysman devices (ZES API).
|
||||||
*
|
*
|
||||||
* @{
|
* @{
|
||||||
*/
|
*/
|
||||||
@@ -44,9 +45,68 @@ extern "C" {
|
|||||||
* the Level Zero device \p device.
|
* the Level Zero device \p device.
|
||||||
*
|
*
|
||||||
* Topology \p topology and device \p device must match the local machine.
|
* Topology \p topology and device \p device must match the local machine.
|
||||||
|
* The Level Zero library must have been initialized with zeInit().
|
||||||
|
* I/O devices detection and the Level Zero component are not needed in the
|
||||||
|
* topology.
|
||||||
|
*
|
||||||
|
* The function only returns the locality of the device.
|
||||||
|
* If more information about the device is needed, OS objects should
|
||||||
|
* be used instead, see hwloc_levelzero_get_device_osdev().
|
||||||
|
*
|
||||||
|
* This function is currently only implemented in a meaningful way for
|
||||||
|
* Linux; other systems will simply get a full cpuset.
|
||||||
|
*
|
||||||
|
* \return 0 on success.
|
||||||
|
* \return -1 on error, for instance if device information could not be found.
|
||||||
|
*
|
||||||
|
* \note zeDevicePciGetPropertiesExt() must be supported, or the entire machine
|
||||||
|
* locality will be returned.
|
||||||
|
*/
|
||||||
|
static __hwloc_inline int
|
||||||
|
hwloc_levelzero_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||||
|
ze_device_handle_t device, hwloc_cpuset_t set)
|
||||||
|
{
|
||||||
|
#ifdef HWLOC_LINUX_SYS
|
||||||
|
/* If we're on Linux, use the sysfs mechanism to get the local cpus */
|
||||||
|
#define HWLOC_LEVELZERO_DEVICE_SYSFS_PATH_MAX 128
|
||||||
|
char path[HWLOC_LEVELZERO_DEVICE_SYSFS_PATH_MAX];
|
||||||
|
ze_pci_ext_properties_t pci;
|
||||||
|
ze_result_t res;
|
||||||
|
|
||||||
|
if (!hwloc_topology_is_thissystem(topology)) {
|
||||||
|
errno = EINVAL;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
pci.stype = ZE_STRUCTURE_TYPE_PCI_EXT_PROPERTIES;
|
||||||
|
pci.pNext = NULL;
|
||||||
|
res = zeDevicePciGetPropertiesExt(device, &pci);
|
||||||
|
if (res != ZE_RESULT_SUCCESS) {
|
||||||
|
errno = EINVAL;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/local_cpus",
|
||||||
|
pci.address.domain, pci.address.bus, pci.address.device, pci.address.function);
|
||||||
|
if (hwloc_linux_read_path_as_cpumask(path, set) < 0
|
||||||
|
|| hwloc_bitmap_iszero(set))
|
||||||
|
hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
|
||||||
|
#else
|
||||||
|
/* Non-Linux systems simply get a full cpuset */
|
||||||
|
hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
|
||||||
|
#endif
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \brief Get the CPU set of logical processors that are physically
|
||||||
|
* close to the Level Zero Sysman device \p device
|
||||||
|
*
|
||||||
|
* Store in \p set the CPU-set describing the locality of
|
||||||
|
* the Level Zero device \p device.
|
||||||
|
*
|
||||||
|
* Topology \p topology and device \p device must match the local machine.
|
||||||
* The Level Zero library must have been initialized with Sysman enabled
|
* The Level Zero library must have been initialized with Sysman enabled
|
||||||
* (by calling zesInit(0) if supported,
|
* with zesInit().
|
||||||
* or by setting ZES_ENABLE_SYSMAN=1 in the environment).
|
|
||||||
* I/O devices detection and the Level Zero component are not needed in the
|
* I/O devices detection and the Level Zero component are not needed in the
|
||||||
* topology.
|
* topology.
|
||||||
*
|
*
|
||||||
@@ -61,15 +121,14 @@ extern "C" {
|
|||||||
* \return -1 on error, for instance if device information could not be found.
|
* \return -1 on error, for instance if device information could not be found.
|
||||||
*/
|
*/
|
||||||
static __hwloc_inline int
|
static __hwloc_inline int
|
||||||
hwloc_levelzero_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
|
hwloc_levelzero_get_sysman_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||||
ze_device_handle_t device, hwloc_cpuset_t set)
|
zes_device_handle_t device, hwloc_cpuset_t set)
|
||||||
{
|
{
|
||||||
#ifdef HWLOC_LINUX_SYS
|
#ifdef HWLOC_LINUX_SYS
|
||||||
/* If we're on Linux, use the sysfs mechanism to get the local cpus */
|
/* If we're on Linux, use the sysfs mechanism to get the local cpus */
|
||||||
#define HWLOC_LEVELZERO_DEVICE_SYSFS_PATH_MAX 128
|
#define HWLOC_LEVELZERO_DEVICE_SYSFS_PATH_MAX 128
|
||||||
char path[HWLOC_LEVELZERO_DEVICE_SYSFS_PATH_MAX];
|
char path[HWLOC_LEVELZERO_DEVICE_SYSFS_PATH_MAX];
|
||||||
zes_pci_properties_t pci;
|
zes_pci_properties_t pci;
|
||||||
zes_device_handle_t sdevice = device;
|
|
||||||
ze_result_t res;
|
ze_result_t res;
|
||||||
|
|
||||||
if (!hwloc_topology_is_thissystem(topology)) {
|
if (!hwloc_topology_is_thissystem(topology)) {
|
||||||
@@ -77,7 +136,7 @@ hwloc_levelzero_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_un
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
res = zesDevicePciGetProperties(sdevice, &pci);
|
res = zesDevicePciGetProperties(device, &pci);
|
||||||
if (res != ZE_RESULT_SUCCESS) {
|
if (res != ZE_RESULT_SUCCESS) {
|
||||||
errno = EINVAL;
|
errno = EINVAL;
|
||||||
return -1;
|
return -1;
|
||||||
@@ -102,17 +161,90 @@ hwloc_levelzero_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_un
|
|||||||
* \return \c NULL if none could be found.
|
* \return \c NULL if none could be found.
|
||||||
*
|
*
|
||||||
* Topology \p topology and device \p dv_ind must match the local machine.
|
* Topology \p topology and device \p dv_ind must match the local machine.
|
||||||
|
* The Level Zero library must have been initialized with zeInit().
|
||||||
* I/O devices detection and the Level Zero component must be enabled in the
|
* I/O devices detection and the Level Zero component must be enabled in the
|
||||||
* topology. If not, the locality of the object may still be found using
|
* topology. If not, the locality of the object may still be found using
|
||||||
* hwloc_levelzero_get_device_cpuset().
|
* hwloc_levelzero_get_device_cpuset().
|
||||||
*
|
*
|
||||||
|
* \note If the input ZE device is actually a subdevice, then its parent
|
||||||
|
* (root device) is actually translated, i.e. the main hwloc OS device
|
||||||
|
* is returned instead of one of its children.
|
||||||
|
*
|
||||||
|
* \note The corresponding hwloc PCI device may be found by looking
|
||||||
|
* at the result parent pointer (unless PCI devices are filtered out).
|
||||||
|
*
|
||||||
|
* \note zeDevicePciGetPropertiesExt() must be supported.
|
||||||
|
*/
|
||||||
|
static __hwloc_inline hwloc_obj_t
|
||||||
|
hwloc_levelzero_get_device_osdev(hwloc_topology_t topology, ze_device_handle_t device)
|
||||||
|
{
|
||||||
|
ze_pci_ext_properties_t pci;
|
||||||
|
ze_result_t res;
|
||||||
|
hwloc_obj_t osdev;
|
||||||
|
|
||||||
|
if (!hwloc_topology_is_thissystem(topology)) {
|
||||||
|
errno = EINVAL;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
pci.stype = ZE_STRUCTURE_TYPE_PCI_EXT_PROPERTIES;
|
||||||
|
pci.pNext = NULL;
|
||||||
|
res = zeDevicePciGetPropertiesExt(device, &pci);
|
||||||
|
if (res != ZE_RESULT_SUCCESS) {
|
||||||
|
errno = EINVAL;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
osdev = NULL;
|
||||||
|
while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
|
||||||
|
hwloc_obj_t pcidev;
|
||||||
|
|
||||||
|
if (strncmp(osdev->name, "ze", 2))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
pcidev = osdev;
|
||||||
|
while (pcidev && pcidev->type != HWLOC_OBJ_PCI_DEVICE)
|
||||||
|
pcidev = pcidev->parent;
|
||||||
|
if (!pcidev)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (pcidev
|
||||||
|
&& pcidev->type == HWLOC_OBJ_PCI_DEVICE
|
||||||
|
&& pcidev->attr->pcidev.domain == pci.address.domain
|
||||||
|
&& pcidev->attr->pcidev.bus == pci.address.bus
|
||||||
|
&& pcidev->attr->pcidev.dev == pci.address.device
|
||||||
|
&& pcidev->attr->pcidev.func == pci.address.function)
|
||||||
|
return osdev;
|
||||||
|
|
||||||
|
/* FIXME: when we'll have serialnumber, try it in case PCI is filtered-out */
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \brief Get the hwloc OS device object corresponding to Level Zero Sysman device
|
||||||
|
* \p device.
|
||||||
|
*
|
||||||
|
* \return The hwloc OS device object that describes the given Level Zero device \p device.
|
||||||
|
* \return \c NULL if none could be found.
|
||||||
|
*
|
||||||
|
* Topology \p topology and device \p dv_ind must match the local machine.
|
||||||
|
* The Level Zero library must have been initialized with Sysman enabled
|
||||||
|
* with zesInit().
|
||||||
|
* I/O devices detection and the Level Zero component must be enabled in the
|
||||||
|
* topology. If not, the locality of the object may still be found using
|
||||||
|
* hwloc_levelzero_get_device_cpuset().
|
||||||
|
*
|
||||||
|
* \note If the input ZES device is actually a subdevice, then its parent
|
||||||
|
* (root device) is actually translated, i.e. the main hwloc OS device
|
||||||
|
* is returned instead of one of its children.
|
||||||
|
*
|
||||||
* \note The corresponding hwloc PCI device may be found by looking
|
* \note The corresponding hwloc PCI device may be found by looking
|
||||||
* at the result parent pointer (unless PCI devices are filtered out).
|
* at the result parent pointer (unless PCI devices are filtered out).
|
||||||
*/
|
*/
|
||||||
static __hwloc_inline hwloc_obj_t
|
static __hwloc_inline hwloc_obj_t
|
||||||
hwloc_levelzero_get_device_osdev(hwloc_topology_t topology, ze_device_handle_t device)
|
hwloc_levelzero_get_sysman_device_osdev(hwloc_topology_t topology, zes_device_handle_t device)
|
||||||
{
|
{
|
||||||
zes_device_handle_t sdevice = device;
|
|
||||||
zes_pci_properties_t pci;
|
zes_pci_properties_t pci;
|
||||||
ze_result_t res;
|
ze_result_t res;
|
||||||
hwloc_obj_t osdev;
|
hwloc_obj_t osdev;
|
||||||
@@ -122,20 +254,25 @@ hwloc_levelzero_get_device_osdev(hwloc_topology_t topology, ze_device_handle_t d
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
res = zesDevicePciGetProperties(sdevice, &pci);
|
res = zesDevicePciGetProperties(device, &pci);
|
||||||
if (res != ZE_RESULT_SUCCESS) {
|
if (res != ZE_RESULT_SUCCESS) {
|
||||||
/* L0 was likely initialized without sysman, don't bother */
|
|
||||||
errno = EINVAL;
|
errno = EINVAL;
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
osdev = NULL;
|
osdev = NULL;
|
||||||
while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
|
while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
|
||||||
hwloc_obj_t pcidev = osdev->parent;
|
hwloc_obj_t pcidev;
|
||||||
|
|
||||||
if (strncmp(osdev->name, "ze", 2))
|
if (strncmp(osdev->name, "ze", 2))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
pcidev = osdev;
|
||||||
|
while (pcidev && pcidev->type != HWLOC_OBJ_PCI_DEVICE)
|
||||||
|
pcidev = pcidev->parent;
|
||||||
|
if (!pcidev)
|
||||||
|
continue;
|
||||||
|
|
||||||
if (pcidev
|
if (pcidev
|
||||||
&& pcidev->type == HWLOC_OBJ_PCI_DEVICE
|
&& pcidev->type == HWLOC_OBJ_PCI_DEVICE
|
||||||
&& pcidev->attr->pcidev.domain == pci.address.domain
|
&& pcidev->attr->pcidev.domain == pci.address.domain
|
||||||
|
|||||||
69
src/3rdparty/hwloc/include/hwloc/memattrs.h
vendored
69
src/3rdparty/hwloc/include/hwloc/memattrs.h
vendored
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright © 2019-2024 Inria. All rights reserved.
|
* Copyright © 2019-2025 Inria. All rights reserved.
|
||||||
* See COPYING in top-level directory.
|
* See COPYING in top-level directory.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@@ -58,6 +58,11 @@ extern "C" {
|
|||||||
* an easy way to distinguish NUMA nodes of different kinds, as explained
|
* an easy way to distinguish NUMA nodes of different kinds, as explained
|
||||||
* in \ref heteromem.
|
* in \ref heteromem.
|
||||||
*
|
*
|
||||||
|
* Beside tiers, hwloc defines a set of "default" nodes where normal memory
|
||||||
|
* allocations should be made from (see hwloc_topology_get_default_nodeset()).
|
||||||
|
* This is also useful for dividing the machine into a set of non-overlapping
|
||||||
|
* NUMA domains, for instance for binding tasks per domain.
|
||||||
|
*
|
||||||
* \sa An example is available in doc/examples/memory-attributes.c in the source tree.
|
* \sa An example is available in doc/examples/memory-attributes.c in the source tree.
|
||||||
*
|
*
|
||||||
* \note The API also supports specific objects as initiator,
|
* \note The API also supports specific objects as initiator,
|
||||||
@@ -245,6 +250,16 @@ enum hwloc_local_numanode_flag_e {
|
|||||||
*/
|
*/
|
||||||
HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY = (1UL<<1),
|
HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY = (1UL<<1),
|
||||||
|
|
||||||
|
/** \breif Select NUMA nodes whose locality intersects the given cpuset.
|
||||||
|
* This includes larger and smaller localities as well as localities
|
||||||
|
* that are partially included.
|
||||||
|
* For instance, if the locality is one core of both packages, a NUMA node
|
||||||
|
* local to one package is neither larger nor smaller than this locality,
|
||||||
|
* but it intersects it.
|
||||||
|
* \hideinitializer
|
||||||
|
*/
|
||||||
|
HWLOC_LOCAL_NUMANODE_FLAG_INTERSECT_LOCALITY = (1UL<<3),
|
||||||
|
|
||||||
/** \brief Select all NUMA nodes in the topology.
|
/** \brief Select all NUMA nodes in the topology.
|
||||||
* The initiator \p initiator is ignored.
|
* The initiator \p initiator is ignored.
|
||||||
* \hideinitializer
|
* \hideinitializer
|
||||||
@@ -290,7 +305,57 @@ hwloc_get_local_numanode_objs(hwloc_topology_t topology,
|
|||||||
hwloc_obj_t *nodes,
|
hwloc_obj_t *nodes,
|
||||||
unsigned long flags);
|
unsigned long flags);
|
||||||
|
|
||||||
|
/** \brief Return the set of default NUMA nodes
|
||||||
|
*
|
||||||
|
* In machines with heterogeneous memory, some NUMA nodes are considered
|
||||||
|
* the default ones, i.e. where basic allocations should be made from.
|
||||||
|
* These are usually DRAM nodes.
|
||||||
|
*
|
||||||
|
* Other nodes may be reserved for specific use (I/O device memory, e.g. GPU memory),
|
||||||
|
* small but high performance (HBM), large but slow memory (NVM), etc.
|
||||||
|
* Buffers should usually not be allocated from there unless explicitly required.
|
||||||
|
*
|
||||||
|
* This function fills \p nodeset with the bits of NUMA nodes considered default.
|
||||||
|
*
|
||||||
|
* It is guaranteed that these nodes have non-intersecting CPU sets,
|
||||||
|
* i.e. cores may not have multiple local NUMA nodes anymore.
|
||||||
|
* Hence this may be used to iterate over the platform divided into separate
|
||||||
|
* NUMA localities, for instance for binding one task per NUMA domain.
|
||||||
|
*
|
||||||
|
* Any core that had some local NUMA node(s) in the initial topology should
|
||||||
|
* still have one in the default nodeset. Corner cases where this would be
|
||||||
|
* wrong consist in asymmetric platforms with missing DRAM nodes, or topologies
|
||||||
|
* that were already restricted to less NUMA nodes.
|
||||||
|
*
|
||||||
|
* The returned nodeset may be passed to hwloc_topology_restrict() with
|
||||||
|
* ::HWLOC_RESTRICT_FLAG_BYNODESET to remove all non-default nodes from
|
||||||
|
* the topology. The resulting topology will be easier to use when iterating
|
||||||
|
* over (now homogeneous) NUMA nodes.
|
||||||
|
*
|
||||||
|
* The heuristics for finding default nodes relies on memory tiers and subtypes
|
||||||
|
* (see \ref heteromem) as well as the assumption that hardware vendors list
|
||||||
|
* default nodes first in hardware tables.
|
||||||
|
*
|
||||||
|
* \p flags must be \c 0 for now.
|
||||||
|
*
|
||||||
|
* \return 0 on success.
|
||||||
|
* \return -1 on error.
|
||||||
|
*
|
||||||
|
* \note The returned nodeset usually contains all nodes from a single memory
|
||||||
|
* tier, likely the DRAM one.
|
||||||
|
*
|
||||||
|
* \note The returned nodeset is included in the list of available nodes
|
||||||
|
* returned by hwloc_topology_get_topology_nodeset(). It is strictly smaller
|
||||||
|
* if the machine has heterogeneous memory.
|
||||||
|
*
|
||||||
|
* \note The heuristics may return a suboptimal set of nodes if hwloc could
|
||||||
|
* not guess memory types and/or if some default nodes were removed earlier
|
||||||
|
* from the topology (e.g. with hwloc_topology_restrict()).
|
||||||
|
*/
|
||||||
|
HWLOC_DECLSPEC int
|
||||||
|
hwloc_topology_get_default_nodeset(hwloc_topology_t topology,
|
||||||
|
hwloc_nodeset_t nodeset,
|
||||||
|
unsigned long flags);
|
||||||
|
|
||||||
/** \brief Return an attribute value for a specific target NUMA node.
|
/** \brief Return an attribute value for a specific target NUMA node.
|
||||||
*
|
*
|
||||||
|
|||||||
130
src/3rdparty/hwloc/include/hwloc/plugins.h
vendored
130
src/3rdparty/hwloc/include/hwloc/plugins.h
vendored
@@ -26,7 +26,7 @@ struct hwloc_backend;
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/** \defgroup hwlocality_disc_components Components and Plugins: Discovery components
|
/** \defgroup hwlocality_disc_components Components and Plugins: Discovery components and backends
|
||||||
*
|
*
|
||||||
* \note These structures and functions may change when ::HWLOC_COMPONENT_ABI is modified.
|
* \note These structures and functions may change when ::HWLOC_COMPONENT_ABI is modified.
|
||||||
*
|
*
|
||||||
@@ -90,18 +90,6 @@ struct hwloc_disc_component {
|
|||||||
struct hwloc_disc_component * next;
|
struct hwloc_disc_component * next;
|
||||||
};
|
};
|
||||||
|
|
||||||
/** @} */
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/** \defgroup hwlocality_disc_backends Components and Plugins: Discovery backends
|
|
||||||
*
|
|
||||||
* \note These structures and functions may change when ::HWLOC_COMPONENT_ABI is modified.
|
|
||||||
*
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/** \brief Discovery phase */
|
/** \brief Discovery phase */
|
||||||
typedef enum hwloc_disc_phase_e {
|
typedef enum hwloc_disc_phase_e {
|
||||||
/** \brief xml or synthetic, platform-specific components such as bgq.
|
/** \brief xml or synthetic, platform-specific components such as bgq.
|
||||||
@@ -313,6 +301,64 @@ struct hwloc_component {
|
|||||||
void * data;
|
void * data;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/** \brief Make sure that plugins can lookup core symbols.
|
||||||
|
*
|
||||||
|
* This is a sanity check to avoid lazy-lookup failures when libhwloc
|
||||||
|
* is loaded within a plugin, and later tries to load its own plugins.
|
||||||
|
* This may fail (and abort the program) if libhwloc symbols are in a
|
||||||
|
* private namespace.
|
||||||
|
*
|
||||||
|
* \return 0 on success.
|
||||||
|
* \return -1 if the plugin cannot be successfully loaded. The caller
|
||||||
|
* plugin init() callback should return a negative error code as well.
|
||||||
|
*
|
||||||
|
* Plugins should call this function in their init() callback to avoid
|
||||||
|
* later crashes if lazy symbol resolution is used by the upper layer that
|
||||||
|
* loaded hwloc (e.g. OpenCL implementations using dlopen with RTLD_LAZY).
|
||||||
|
*
|
||||||
|
* \note The build system must define HWLOC_INSIDE_PLUGIN if and only if
|
||||||
|
* building the caller as a plugin.
|
||||||
|
*
|
||||||
|
* \note This function should remain inline so plugins can call it even
|
||||||
|
* when they cannot find libhwloc symbols.
|
||||||
|
*/
|
||||||
|
static __hwloc_inline int
|
||||||
|
hwloc_plugin_check_namespace(const char *pluginname __hwloc_attribute_unused, const char *symbol __hwloc_attribute_unused)
|
||||||
|
{
|
||||||
|
#ifdef HWLOC_INSIDE_PLUGIN
|
||||||
|
void *sym;
|
||||||
|
#ifdef HWLOC_HAVE_LTDL
|
||||||
|
lt_dlhandle handle = lt_dlopen(NULL);
|
||||||
|
#else
|
||||||
|
void *handle = dlopen(NULL, RTLD_NOW|RTLD_LOCAL);
|
||||||
|
#endif
|
||||||
|
if (!handle)
|
||||||
|
/* cannot check, assume things will work */
|
||||||
|
return 0;
|
||||||
|
#ifdef HWLOC_HAVE_LTDL
|
||||||
|
sym = lt_dlsym(handle, symbol);
|
||||||
|
lt_dlclose(handle);
|
||||||
|
#else
|
||||||
|
sym = dlsym(handle, symbol);
|
||||||
|
dlclose(handle);
|
||||||
|
#endif
|
||||||
|
if (!sym) {
|
||||||
|
static int verboseenv_checked = 0;
|
||||||
|
static int verboseenv_value = 0;
|
||||||
|
if (!verboseenv_checked) {
|
||||||
|
const char *verboseenv = getenv("HWLOC_PLUGINS_VERBOSE");
|
||||||
|
verboseenv_value = verboseenv ? atoi(verboseenv) : 0;
|
||||||
|
verboseenv_checked = 1;
|
||||||
|
}
|
||||||
|
if (verboseenv_value)
|
||||||
|
fprintf(stderr, "Plugin `%s' disabling itself because it cannot find the `%s' core symbol.\n",
|
||||||
|
pluginname, symbol);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
#endif /* HWLOC_INSIDE_PLUGIN */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/** @} */
|
/** @} */
|
||||||
|
|
||||||
|
|
||||||
@@ -422,64 +468,6 @@ HWLOC_DECLSPEC int hwloc_obj_add_children_sets(hwloc_obj_t obj);
|
|||||||
*/
|
*/
|
||||||
HWLOC_DECLSPEC int hwloc_topology_reconnect(hwloc_topology_t topology, unsigned long flags __hwloc_attribute_unused);
|
HWLOC_DECLSPEC int hwloc_topology_reconnect(hwloc_topology_t topology, unsigned long flags __hwloc_attribute_unused);
|
||||||
|
|
||||||
/** \brief Make sure that plugins can lookup core symbols.
|
|
||||||
*
|
|
||||||
* This is a sanity check to avoid lazy-lookup failures when libhwloc
|
|
||||||
* is loaded within a plugin, and later tries to load its own plugins.
|
|
||||||
* This may fail (and abort the program) if libhwloc symbols are in a
|
|
||||||
* private namespace.
|
|
||||||
*
|
|
||||||
* \return 0 on success.
|
|
||||||
* \return -1 if the plugin cannot be successfully loaded. The caller
|
|
||||||
* plugin init() callback should return a negative error code as well.
|
|
||||||
*
|
|
||||||
* Plugins should call this function in their init() callback to avoid
|
|
||||||
* later crashes if lazy symbol resolution is used by the upper layer that
|
|
||||||
* loaded hwloc (e.g. OpenCL implementations using dlopen with RTLD_LAZY).
|
|
||||||
*
|
|
||||||
* \note The build system must define HWLOC_INSIDE_PLUGIN if and only if
|
|
||||||
* building the caller as a plugin.
|
|
||||||
*
|
|
||||||
* \note This function should remain inline so plugins can call it even
|
|
||||||
* when they cannot find libhwloc symbols.
|
|
||||||
*/
|
|
||||||
static __hwloc_inline int
|
|
||||||
hwloc_plugin_check_namespace(const char *pluginname __hwloc_attribute_unused, const char *symbol __hwloc_attribute_unused)
|
|
||||||
{
|
|
||||||
#ifdef HWLOC_INSIDE_PLUGIN
|
|
||||||
void *sym;
|
|
||||||
#ifdef HWLOC_HAVE_LTDL
|
|
||||||
lt_dlhandle handle = lt_dlopen(NULL);
|
|
||||||
#else
|
|
||||||
void *handle = dlopen(NULL, RTLD_NOW|RTLD_LOCAL);
|
|
||||||
#endif
|
|
||||||
if (!handle)
|
|
||||||
/* cannot check, assume things will work */
|
|
||||||
return 0;
|
|
||||||
#ifdef HWLOC_HAVE_LTDL
|
|
||||||
sym = lt_dlsym(handle, symbol);
|
|
||||||
lt_dlclose(handle);
|
|
||||||
#else
|
|
||||||
sym = dlsym(handle, symbol);
|
|
||||||
dlclose(handle);
|
|
||||||
#endif
|
|
||||||
if (!sym) {
|
|
||||||
static int verboseenv_checked = 0;
|
|
||||||
static int verboseenv_value = 0;
|
|
||||||
if (!verboseenv_checked) {
|
|
||||||
const char *verboseenv = getenv("HWLOC_PLUGINS_VERBOSE");
|
|
||||||
verboseenv_value = verboseenv ? atoi(verboseenv) : 0;
|
|
||||||
verboseenv_checked = 1;
|
|
||||||
}
|
|
||||||
if (verboseenv_value)
|
|
||||||
fprintf(stderr, "Plugin `%s' disabling itself because it cannot find the `%s' core symbol.\n",
|
|
||||||
pluginname, symbol);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
#endif /* HWLOC_INSIDE_PLUGIN */
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** @} */
|
/** @} */
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
8
src/3rdparty/hwloc/include/hwloc/rename.h
vendored
8
src/3rdparty/hwloc/include/hwloc/rename.h
vendored
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||||
* Copyright © 2010-2024 Inria. All rights reserved.
|
* Copyright © 2010-2025 Inria. All rights reserved.
|
||||||
* See COPYING in top-level directory.
|
* See COPYING in top-level directory.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@@ -409,8 +409,10 @@ extern "C" {
|
|||||||
#define hwloc_local_numanode_flag_e HWLOC_NAME(local_numanode_flag_e)
|
#define hwloc_local_numanode_flag_e HWLOC_NAME(local_numanode_flag_e)
|
||||||
#define HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY HWLOC_NAME_CAPS(LOCAL_NUMANODE_FLAG_LARGER_LOCALITY)
|
#define HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY HWLOC_NAME_CAPS(LOCAL_NUMANODE_FLAG_LARGER_LOCALITY)
|
||||||
#define HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY HWLOC_NAME_CAPS(LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY)
|
#define HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY HWLOC_NAME_CAPS(LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY)
|
||||||
|
#define HWLOC_LOCAL_NUMANODE_FLAG_INTERSECT_LOCALITY HWLOC_NAME_CAPS(LOCAL_NUMANODE_FLAG_INTERSECT_LOCALITY)
|
||||||
#define HWLOC_LOCAL_NUMANODE_FLAG_ALL HWLOC_NAME_CAPS(LOCAL_NUMANODE_FLAG_ALL)
|
#define HWLOC_LOCAL_NUMANODE_FLAG_ALL HWLOC_NAME_CAPS(LOCAL_NUMANODE_FLAG_ALL)
|
||||||
#define hwloc_get_local_numanode_objs HWLOC_NAME(get_local_numanode_objs)
|
#define hwloc_get_local_numanode_objs HWLOC_NAME(get_local_numanode_objs)
|
||||||
|
#define hwloc_topology_get_default_nodeset HWLOC_NAME(topology_get_default_nodeset)
|
||||||
|
|
||||||
#define hwloc_memattr_get_name HWLOC_NAME(memattr_get_name)
|
#define hwloc_memattr_get_name HWLOC_NAME(memattr_get_name)
|
||||||
#define hwloc_memattr_get_flags HWLOC_NAME(memattr_get_flags)
|
#define hwloc_memattr_get_flags HWLOC_NAME(memattr_get_flags)
|
||||||
@@ -599,7 +601,9 @@ extern "C" {
|
|||||||
/* levelzero.h */
|
/* levelzero.h */
|
||||||
|
|
||||||
#define hwloc_levelzero_get_device_cpuset HWLOC_NAME(levelzero_get_device_cpuset)
|
#define hwloc_levelzero_get_device_cpuset HWLOC_NAME(levelzero_get_device_cpuset)
|
||||||
|
#define hwloc_levelzero_get_sysman_device_cpuset HWLOC_NAME(levelzero_get_sysman_device_cpuset)
|
||||||
#define hwloc_levelzero_get_device_osdev HWLOC_NAME(levelzero_get_device_osdev)
|
#define hwloc_levelzero_get_device_osdev HWLOC_NAME(levelzero_get_device_osdev)
|
||||||
|
#define hwloc_levelzero_get_sysman_device_osdev HWLOC_NAME(levelzero_get_sysman_device_osdev)
|
||||||
|
|
||||||
/* gl.h */
|
/* gl.h */
|
||||||
|
|
||||||
@@ -813,6 +817,8 @@ extern "C" {
|
|||||||
#define hwloc_topology_setup_defaults HWLOC_NAME(topology_setup_defaults)
|
#define hwloc_topology_setup_defaults HWLOC_NAME(topology_setup_defaults)
|
||||||
#define hwloc_topology_clear HWLOC_NAME(topology_clear)
|
#define hwloc_topology_clear HWLOC_NAME(topology_clear)
|
||||||
|
|
||||||
|
#define hwloc__reconnect HWLOC_NAME(_reconnect)
|
||||||
|
|
||||||
#define hwloc__attach_memory_object HWLOC_NAME(insert_memory_object)
|
#define hwloc__attach_memory_object HWLOC_NAME(insert_memory_object)
|
||||||
|
|
||||||
#define hwloc_get_obj_by_type_and_gp_index HWLOC_NAME(get_obj_by_type_and_gp_index)
|
#define hwloc_get_obj_by_type_and_gp_index HWLOC_NAME(get_obj_by_type_and_gp_index)
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright © 2009, 2011, 2012 CNRS. All rights reserved.
|
* Copyright © 2009, 2011, 2012 CNRS. All rights reserved.
|
||||||
* Copyright © 2009-2020 Inria. All rights reserved.
|
* Copyright © 2009-2021 Inria. All rights reserved.
|
||||||
* Copyright © 2009, 2011, 2012, 2015 Université Bordeaux. All rights reserved.
|
* Copyright © 2009, 2011, 2012, 2015 Université Bordeaux. All rights reserved.
|
||||||
* Copyright © 2009-2020 Cisco Systems, Inc. All rights reserved.
|
* Copyright © 2009-2020 Cisco Systems, Inc. All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
@@ -17,10 +17,6 @@
|
|||||||
|
|
||||||
#define HWLOC_HAVE_MSVC_CPUIDEX 1
|
#define HWLOC_HAVE_MSVC_CPUIDEX 1
|
||||||
|
|
||||||
/* #undef HAVE_MKSTEMP */
|
|
||||||
|
|
||||||
#define HWLOC_HAVE_X86_CPUID 1
|
|
||||||
|
|
||||||
/* Define to 1 if the system has the type `CACHE_DESCRIPTOR'. */
|
/* Define to 1 if the system has the type `CACHE_DESCRIPTOR'. */
|
||||||
#define HAVE_CACHE_DESCRIPTOR 0
|
#define HAVE_CACHE_DESCRIPTOR 0
|
||||||
|
|
||||||
@@ -132,7 +128,8 @@
|
|||||||
#define HAVE_DECL__SC_PAGE_SIZE 0
|
#define HAVE_DECL__SC_PAGE_SIZE 0
|
||||||
|
|
||||||
/* Define to 1 if you have the <dirent.h> header file. */
|
/* Define to 1 if you have the <dirent.h> header file. */
|
||||||
/* #undef HAVE_DIRENT_H */
|
/* #define HAVE_DIRENT_H 1 */
|
||||||
|
#undef HAVE_DIRENT_H
|
||||||
|
|
||||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||||
/* #undef HAVE_DLFCN_H */
|
/* #undef HAVE_DLFCN_H */
|
||||||
@@ -285,7 +282,7 @@
|
|||||||
#define HAVE_STRING_H 1
|
#define HAVE_STRING_H 1
|
||||||
|
|
||||||
/* Define to 1 if you have the `strncasecmp' function. */
|
/* Define to 1 if you have the `strncasecmp' function. */
|
||||||
/* #undef HAVE_STRNCASECMP */
|
#define HAVE_STRNCASECMP 1
|
||||||
|
|
||||||
/* Define to '1' if sysctl is present and usable */
|
/* Define to '1' if sysctl is present and usable */
|
||||||
/* #undef HAVE_SYSCTL */
|
/* #undef HAVE_SYSCTL */
|
||||||
@@ -326,7 +323,8 @@
|
|||||||
/* #undef HAVE_UNAME */
|
/* #undef HAVE_UNAME */
|
||||||
|
|
||||||
/* Define to 1 if you have the <unistd.h> header file. */
|
/* Define to 1 if you have the <unistd.h> header file. */
|
||||||
/* #undef HAVE_UNISTD_H */
|
/* #define HAVE_UNISTD_H 1 */
|
||||||
|
#undef HAVE_UNISTD_H
|
||||||
|
|
||||||
/* Define to 1 if you have the `uselocale' function. */
|
/* Define to 1 if you have the `uselocale' function. */
|
||||||
/* #undef HAVE_USELOCALE */
|
/* #undef HAVE_USELOCALE */
|
||||||
@@ -661,7 +659,7 @@
|
|||||||
#define hwloc_pid_t HANDLE
|
#define hwloc_pid_t HANDLE
|
||||||
|
|
||||||
/* Define this to either strncasecmp or strncmp */
|
/* Define this to either strncasecmp or strncmp */
|
||||||
/* #undef hwloc_strncasecmp */
|
#define hwloc_strncasecmp strncasecmp
|
||||||
|
|
||||||
/* Define this to the thread ID type */
|
/* Define this to the thread ID type */
|
||||||
#define hwloc_thread_t HANDLE
|
#define hwloc_thread_t HANDLE
|
||||||
|
|||||||
2
src/3rdparty/hwloc/include/private/misc.h
vendored
2
src/3rdparty/hwloc/include/private/misc.h
vendored
@@ -186,7 +186,7 @@ hwloc_ffsl_from_ffs32(unsigned long x)
|
|||||||
/**
|
/**
|
||||||
* flsl helpers.
|
* flsl helpers.
|
||||||
*/
|
*/
|
||||||
#ifdef __GNUC_____
|
#ifdef __GNUC__
|
||||||
|
|
||||||
# if (__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))
|
# if (__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))
|
||||||
# define hwloc_flsl(x) ((x) ? (8*sizeof(long) - __builtin_clzl(x)) : 0)
|
# define hwloc_flsl(x) ((x) ? (8*sizeof(long) - __builtin_clzl(x)) : 0)
|
||||||
|
|||||||
5
src/3rdparty/hwloc/include/private/private.h
vendored
5
src/3rdparty/hwloc/include/private/private.h
vendored
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright © 2009 CNRS
|
* Copyright © 2009 CNRS
|
||||||
* Copyright © 2009-2023 Inria. All rights reserved.
|
* Copyright © 2009-2025 Inria. All rights reserved.
|
||||||
* Copyright © 2009-2012, 2020 Université Bordeaux
|
* Copyright © 2009-2012, 2020 Université Bordeaux
|
||||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||||
*
|
*
|
||||||
@@ -302,6 +302,9 @@ extern void hwloc__reorder_children(hwloc_obj_t parent);
|
|||||||
extern void hwloc_topology_setup_defaults(struct hwloc_topology *topology);
|
extern void hwloc_topology_setup_defaults(struct hwloc_topology *topology);
|
||||||
extern void hwloc_topology_clear(struct hwloc_topology *topology);
|
extern void hwloc_topology_clear(struct hwloc_topology *topology);
|
||||||
|
|
||||||
|
#define _HWLOC_RECONNECT_FLAG_KEEPSTRUCTURE (1UL<<0)
|
||||||
|
extern int hwloc__reconnect(struct hwloc_topology *topology, unsigned long flags);
|
||||||
|
|
||||||
/* insert memory object as memory child of normal parent */
|
/* insert memory object as memory child of normal parent */
|
||||||
extern struct hwloc_obj * hwloc__attach_memory_object(struct hwloc_topology *topology, hwloc_obj_t parent,
|
extern struct hwloc_obj * hwloc__attach_memory_object(struct hwloc_topology *topology, hwloc_obj_t parent,
|
||||||
hwloc_obj_t obj, const char *reason);
|
hwloc_obj_t obj, const char *reason);
|
||||||
|
|||||||
32
src/3rdparty/hwloc/src/distances.c
vendored
32
src/3rdparty/hwloc/src/distances.c
vendored
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright © 2010-2024 Inria. All rights reserved.
|
* Copyright © 2010-2025 Inria. All rights reserved.
|
||||||
* Copyright © 2011-2012 Université Bordeaux
|
* Copyright © 2011-2012 Université Bordeaux
|
||||||
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
|
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
|
||||||
* See COPYING in top-level directory.
|
* See COPYING in top-level directory.
|
||||||
@@ -699,7 +699,7 @@ hwloc_distances_add_commit(hwloc_topology_t topology,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* in case we added some groups, see if we need to reconnect */
|
/* in case we added some groups, see if we need to reconnect */
|
||||||
hwloc_topology_reconnect(topology, 0);
|
hwloc__reconnect(topology, 0);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
@@ -1387,19 +1387,12 @@ static __hwloc_inline int is_nvswitch(hwloc_obj_t obj)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
hwloc__distances_transform_merge_switch_ports(hwloc_topology_t topology,
|
hwloc__distances_transform_merge_switch_ports(struct hwloc_distances_s *distances)
|
||||||
struct hwloc_distances_s *distances)
|
|
||||||
{
|
{
|
||||||
struct hwloc_internal_distances_s *dist = hwloc__internal_distances_from_public(topology, distances);
|
|
||||||
hwloc_obj_t *objs = distances->objs;
|
hwloc_obj_t *objs = distances->objs;
|
||||||
hwloc_uint64_t *values = distances->values;
|
hwloc_uint64_t *values = distances->values;
|
||||||
unsigned first, i, j, nbobjs = distances->nbobjs;
|
unsigned first, i, j, nbobjs = distances->nbobjs;
|
||||||
|
|
||||||
if (strcmp(dist->name, "NVLinkBandwidth")) {
|
|
||||||
errno = EINVAL;
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* find the first port */
|
/* find the first port */
|
||||||
first = (unsigned) -1;
|
first = (unsigned) -1;
|
||||||
for(i=0; i<nbobjs; i++)
|
for(i=0; i<nbobjs; i++)
|
||||||
@@ -1435,20 +1428,13 @@ hwloc__distances_transform_merge_switch_ports(hwloc_topology_t topology,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
hwloc__distances_transform_transitive_closure(hwloc_topology_t topology,
|
hwloc__distances_transform_transitive_closure(struct hwloc_distances_s *distances)
|
||||||
struct hwloc_distances_s *distances)
|
|
||||||
{
|
{
|
||||||
struct hwloc_internal_distances_s *dist = hwloc__internal_distances_from_public(topology, distances);
|
|
||||||
hwloc_obj_t *objs = distances->objs;
|
hwloc_obj_t *objs = distances->objs;
|
||||||
hwloc_uint64_t *values = distances->values;
|
hwloc_uint64_t *values = distances->values;
|
||||||
unsigned nbobjs = distances->nbobjs;
|
unsigned nbobjs = distances->nbobjs;
|
||||||
unsigned i, j, k;
|
unsigned i, j, k;
|
||||||
|
|
||||||
if (strcmp(dist->name, "NVLinkBandwidth")) {
|
|
||||||
errno = EINVAL;
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
for(i=0; i<nbobjs; i++) {
|
for(i=0; i<nbobjs; i++) {
|
||||||
hwloc_uint64_t bw_i2sw = 0;
|
hwloc_uint64_t bw_i2sw = 0;
|
||||||
if (is_nvswitch(objs[i]))
|
if (is_nvswitch(objs[i]))
|
||||||
@@ -1467,8 +1453,8 @@ hwloc__distances_transform_transitive_closure(hwloc_topology_t topology,
|
|||||||
if (is_nvswitch(objs[k]))
|
if (is_nvswitch(objs[k]))
|
||||||
bw_sw2j += values[k*nbobjs+j];
|
bw_sw2j += values[k*nbobjs+j];
|
||||||
|
|
||||||
/* bandwidth from i to j is now min(i2sw,sw2j) */
|
/* bandwidth from i to j now gets indirect bandwidth too, min(i2sw,sw2j) */
|
||||||
values[i*nbobjs+j] = bw_i2sw > bw_sw2j ? bw_sw2j : bw_i2sw;
|
values[i*nbobjs+j] += bw_i2sw > bw_sw2j ? bw_sw2j : bw_i2sw;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1476,7 +1462,7 @@ hwloc__distances_transform_transitive_closure(hwloc_topology_t topology,
|
|||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
hwloc_distances_transform(hwloc_topology_t topology,
|
hwloc_distances_transform(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||||
struct hwloc_distances_s *distances,
|
struct hwloc_distances_s *distances,
|
||||||
enum hwloc_distances_transform_e transform,
|
enum hwloc_distances_transform_e transform,
|
||||||
void *transform_attr,
|
void *transform_attr,
|
||||||
@@ -1495,13 +1481,13 @@ hwloc_distances_transform(hwloc_topology_t topology,
|
|||||||
case HWLOC_DISTANCES_TRANSFORM_MERGE_SWITCH_PORTS:
|
case HWLOC_DISTANCES_TRANSFORM_MERGE_SWITCH_PORTS:
|
||||||
{
|
{
|
||||||
int err;
|
int err;
|
||||||
err = hwloc__distances_transform_merge_switch_ports(topology, distances);
|
err = hwloc__distances_transform_merge_switch_ports(distances);
|
||||||
if (!err)
|
if (!err)
|
||||||
err = hwloc__distances_transform_remove_null(distances);
|
err = hwloc__distances_transform_remove_null(distances);
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
case HWLOC_DISTANCES_TRANSFORM_TRANSITIVE_CLOSURE:
|
case HWLOC_DISTANCES_TRANSFORM_TRANSITIVE_CLOSURE:
|
||||||
return hwloc__distances_transform_transitive_closure(topology, distances);
|
return hwloc__distances_transform_transitive_closure(distances);
|
||||||
default:
|
default:
|
||||||
errno = EINVAL;
|
errno = EINVAL;
|
||||||
return -1;
|
return -1;
|
||||||
|
|||||||
105
src/3rdparty/hwloc/src/memattrs.c
vendored
105
src/3rdparty/hwloc/src/memattrs.c
vendored
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright © 2020-2024 Inria. All rights reserved.
|
* Copyright © 2020-2025 Inria. All rights reserved.
|
||||||
* See COPYING in top-level directory.
|
* See COPYING in top-level directory.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@@ -1158,6 +1158,8 @@ match_local_obj_cpuset(hwloc_obj_t node, hwloc_cpuset_t cpuset, unsigned long fl
|
|||||||
{
|
{
|
||||||
if (flags & HWLOC_LOCAL_NUMANODE_FLAG_ALL)
|
if (flags & HWLOC_LOCAL_NUMANODE_FLAG_ALL)
|
||||||
return 1;
|
return 1;
|
||||||
|
if (flags & HWLOC_LOCAL_NUMANODE_FLAG_INTERSECT_LOCALITY)
|
||||||
|
return hwloc_bitmap_intersects(node->cpuset, cpuset);
|
||||||
if ((flags & HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY)
|
if ((flags & HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY)
|
||||||
&& hwloc_bitmap_isincluded(cpuset, node->cpuset))
|
&& hwloc_bitmap_isincluded(cpuset, node->cpuset))
|
||||||
return 1;
|
return 1;
|
||||||
@@ -1180,6 +1182,7 @@ hwloc_get_local_numanode_objs(hwloc_topology_t topology,
|
|||||||
|
|
||||||
if (flags & ~(HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY
|
if (flags & ~(HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY
|
||||||
|HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY
|
|HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY
|
||||||
|
|HWLOC_LOCAL_NUMANODE_FLAG_INTERSECT_LOCALITY
|
||||||
| HWLOC_LOCAL_NUMANODE_FLAG_ALL)) {
|
| HWLOC_LOCAL_NUMANODE_FLAG_ALL)) {
|
||||||
errno = EINVAL;
|
errno = EINVAL;
|
||||||
return -1;
|
return -1;
|
||||||
@@ -1226,6 +1229,93 @@ hwloc_get_local_numanode_objs(hwloc_topology_t topology,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int compare_nodes_by_os_index(const void *_a, const void *_b)
|
||||||
|
{
|
||||||
|
const hwloc_obj_t * a = _a, * b = _b;
|
||||||
|
return (*a)->os_index - (*b)->os_index;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
hwloc_topology_get_default_nodeset(hwloc_topology_t topology,
|
||||||
|
hwloc_nodeset_t nodeset,
|
||||||
|
unsigned long flags)
|
||||||
|
{
|
||||||
|
hwloc_obj_t *nodes;
|
||||||
|
hwloc_bitmap_t remainingcpuset;
|
||||||
|
unsigned nrnodes, i;
|
||||||
|
const char *first_subtype;
|
||||||
|
|
||||||
|
if (flags) {
|
||||||
|
errno = EINVAL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
remainingcpuset = hwloc_bitmap_dup(topology->levels[0][0]->cpuset);
|
||||||
|
if (!remainingcpuset)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
nrnodes = topology->slevels[HWLOC_SLEVEL_NUMANODE].nbobjs;
|
||||||
|
nodes = malloc(nrnodes * sizeof(*nodes));
|
||||||
|
if (!nodes)
|
||||||
|
goto out_with_remainingcpuset;
|
||||||
|
|
||||||
|
memcpy(nodes, topology->slevels[HWLOC_SLEVEL_NUMANODE].objs, nrnodes * sizeof(*nodes));
|
||||||
|
qsort(nodes, nrnodes, sizeof(*nodes), compare_nodes_by_os_index);
|
||||||
|
|
||||||
|
hwloc_bitmap_zero(nodeset);
|
||||||
|
|
||||||
|
/* always take the first node (FIXME: except if unexpected subtype?) */
|
||||||
|
first_subtype = nodes[0]->subtype;
|
||||||
|
hwloc_bitmap_set(nodeset, nodes[0]->os_index);
|
||||||
|
hwloc_bitmap_andnot(remainingcpuset, remainingcpuset, nodes[0]->cpuset);
|
||||||
|
|
||||||
|
/* use all non-intersecting nodes with same subtype */
|
||||||
|
for(i=1; i<nrnodes; i++) {
|
||||||
|
/* check same or no subtype */
|
||||||
|
if (first_subtype) {
|
||||||
|
if (!nodes[i]->subtype || strcmp(first_subtype, nodes[i]->subtype))
|
||||||
|
continue;
|
||||||
|
} else if (nodes[i]->subtype) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
/* take non-overlapping nodes */
|
||||||
|
if (hwloc_bitmap_isincluded(nodes[i]->cpuset, remainingcpuset) /* can be empty */) {
|
||||||
|
hwloc_bitmap_set(nodeset, nodes[i]->os_index);
|
||||||
|
hwloc_bitmap_andnot(remainingcpuset, remainingcpuset, nodes[i]->cpuset);
|
||||||
|
}
|
||||||
|
/* more needed? */
|
||||||
|
if (hwloc_bitmap_iszero(remainingcpuset))
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* find more nodes to cover the entire topology cpuset.
|
||||||
|
* only take what's necessary: first nodes, non-empty */
|
||||||
|
for(i=1; i<nrnodes; i++) {
|
||||||
|
/* already taken? */
|
||||||
|
if (hwloc_bitmap_isset(nodeset, i))
|
||||||
|
continue;
|
||||||
|
/* take non-overlapping nodes, except empty */
|
||||||
|
if (hwloc_bitmap_isincluded(nodes[i]->cpuset, remainingcpuset)
|
||||||
|
&& !hwloc_bitmap_iszero(nodes[i]->cpuset)) {
|
||||||
|
hwloc_bitmap_set(nodeset, nodes[i]->os_index);
|
||||||
|
hwloc_bitmap_andnot(remainingcpuset, remainingcpuset, nodes[i]->cpuset);
|
||||||
|
}
|
||||||
|
/* more needed? */
|
||||||
|
if (hwloc_bitmap_iszero(remainingcpuset))
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
done:
|
||||||
|
free(nodes);
|
||||||
|
hwloc_bitmap_free(remainingcpuset);
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
out_with_remainingcpuset:
|
||||||
|
hwloc_bitmap_free(remainingcpuset);
|
||||||
|
out:
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**************************************
|
/**************************************
|
||||||
* Using memattrs to identify HBM/DRAM
|
* Using memattrs to identify HBM/DRAM
|
||||||
@@ -1433,10 +1523,15 @@ hwloc__group_memory_tiers(hwloc_topology_t topology,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Sort nodes.
|
/* Sort nodes by tier type and bandwidth.
|
||||||
* We could also sort by the existing subtype.
|
*
|
||||||
* KNL is the only case where subtypes are set in backends, but we set memattrs as well there.
|
* We could also use the existing subtype but it's not clear it'd be better.
|
||||||
* Also HWLOC_MEMTIERS_REFRESH would be a special value to ignore existing subtypes.
|
* For NVIDIA GPU, "GPUMemory" is set in the Linux backend, and used above to set tier type anyway.
|
||||||
|
* For KNL, the Linux backend sets subtypes and memattrs, sorting by memattrs already works fine.
|
||||||
|
* Existing subtypes could have been imported from XML, usually mostly OK except maybe SPM (fallback for I don't know)?
|
||||||
|
* An envvar (or HWLOC_MEMTIERS_REFRESH special value?) could be passed to ignore existing subtypes,
|
||||||
|
* but "GPUMemory" wouldn't be available anymore, we'd have to use something else like "PCIBusId",
|
||||||
|
* but that one might not always be specific to GPU-backed NUMA nodes?
|
||||||
*/
|
*/
|
||||||
hwloc_debug("Sorting memory node infos...\n");
|
hwloc_debug("Sorting memory node infos...\n");
|
||||||
qsort(nodeinfos, n, sizeof(*nodeinfos), compare_node_infos_by_type_and_bw);
|
qsort(nodeinfos, n, sizeof(*nodeinfos), compare_node_infos_by_type_and_bw);
|
||||||
|
|||||||
38
src/3rdparty/hwloc/src/topology-windows.c
vendored
38
src/3rdparty/hwloc/src/topology-windows.c
vendored
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright © 2009 CNRS
|
* Copyright © 2009 CNRS
|
||||||
* Copyright © 2009-2024 Inria. All rights reserved.
|
* Copyright © 2009-2025 Inria. All rights reserved.
|
||||||
* Copyright © 2009-2012, 2020 Université Bordeaux
|
* Copyright © 2009-2012, 2020 Université Bordeaux
|
||||||
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
|
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
|
||||||
* See COPYING in top-level directory.
|
* See COPYING in top-level directory.
|
||||||
@@ -56,6 +56,9 @@ typedef enum _LOGICAL_PROCESSOR_RELATIONSHIP {
|
|||||||
RelationCache,
|
RelationCache,
|
||||||
RelationProcessorPackage,
|
RelationProcessorPackage,
|
||||||
RelationGroup,
|
RelationGroup,
|
||||||
|
RelationProcessorDie,
|
||||||
|
RelationNumaNodeEx, /* only used to *request* extended numa info only, but included in RelationAll, never returned on output */
|
||||||
|
RelationProcessorModule,
|
||||||
RelationAll = 0xffff
|
RelationAll = 0xffff
|
||||||
} LOGICAL_PROCESSOR_RELATIONSHIP;
|
} LOGICAL_PROCESSOR_RELATIONSHIP;
|
||||||
#else /* HAVE_LOGICAL_PROCESSOR_RELATIONSHIP */
|
#else /* HAVE_LOGICAL_PROCESSOR_RELATIONSHIP */
|
||||||
@@ -64,6 +67,11 @@ typedef enum _LOGICAL_PROCESSOR_RELATIONSHIP {
|
|||||||
# define RelationGroup 4
|
# define RelationGroup 4
|
||||||
# define RelationAll 0xffff
|
# define RelationAll 0xffff
|
||||||
# endif /* HAVE_RELATIONPROCESSORPACKAGE */
|
# endif /* HAVE_RELATIONPROCESSORPACKAGE */
|
||||||
|
# ifndef HAVE_RELATIONPROCESSORDIE
|
||||||
|
# define RelationProcessorDie 5
|
||||||
|
# define RelationNumaNodeEx 6
|
||||||
|
# define RelationProcessorModule 7
|
||||||
|
# endif
|
||||||
#endif /* HAVE_LOGICAL_PROCESSOR_RELATIONSHIP */
|
#endif /* HAVE_LOGICAL_PROCESSOR_RELATIONSHIP */
|
||||||
|
|
||||||
#ifndef HAVE_GROUP_AFFINITY
|
#ifndef HAVE_GROUP_AFFINITY
|
||||||
@@ -366,7 +374,7 @@ hwloc_win_get_processor_groups(void)
|
|||||||
hwloc_debug("found %lu windows processor groups\n", nr_processor_groups);
|
hwloc_debug("found %lu windows processor groups\n", nr_processor_groups);
|
||||||
|
|
||||||
if (nr_processor_groups > 1 && SIZEOF_VOID_P == 4) {
|
if (nr_processor_groups > 1 && SIZEOF_VOID_P == 4) {
|
||||||
if (HWLOC_SHOW_ALL_ERRORS())
|
if (HWLOC_SHOW_CRITICAL_ERRORS())
|
||||||
fprintf(stderr, "hwloc/windows: multiple processor groups found on 32bits Windows, topology may be invalid/incomplete.\n");
|
fprintf(stderr, "hwloc/windows: multiple processor groups found on 32bits Windows, topology may be invalid/incomplete.\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1068,6 +1076,7 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
|
|||||||
|
|
||||||
id = HWLOC_UNKNOWN_INDEX;
|
id = HWLOC_UNKNOWN_INDEX;
|
||||||
switch (procInfo->Relationship) {
|
switch (procInfo->Relationship) {
|
||||||
|
case RelationNumaNodeEx: /* only used on input anyway */
|
||||||
case RelationNumaNode:
|
case RelationNumaNode:
|
||||||
type = HWLOC_OBJ_NUMANODE;
|
type = HWLOC_OBJ_NUMANODE;
|
||||||
/* Starting with Windows 11 and Server 2022, the GroupCount field is valid and >=1
|
/* Starting with Windows 11 and Server 2022, the GroupCount field is valid and >=1
|
||||||
@@ -1087,9 +1096,19 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
|
|||||||
break;
|
break;
|
||||||
case RelationProcessorPackage:
|
case RelationProcessorPackage:
|
||||||
type = HWLOC_OBJ_PACKAGE;
|
type = HWLOC_OBJ_PACKAGE;
|
||||||
|
num = procInfo->Processor.GroupCount;
|
||||||
|
GroupMask = procInfo->Processor.GroupMask;
|
||||||
|
break;
|
||||||
|
case RelationProcessorDie:
|
||||||
|
type = HWLOC_OBJ_DIE;
|
||||||
num = procInfo->Processor.GroupCount;
|
num = procInfo->Processor.GroupCount;
|
||||||
GroupMask = procInfo->Processor.GroupMask;
|
GroupMask = procInfo->Processor.GroupMask;
|
||||||
break;
|
break;
|
||||||
|
case RelationProcessorModule:
|
||||||
|
type = HWLOC_OBJ_GROUP;
|
||||||
|
num = procInfo->Processor.GroupCount;
|
||||||
|
GroupMask = procInfo->Processor.GroupMask;
|
||||||
|
break;
|
||||||
case RelationCache:
|
case RelationCache:
|
||||||
type = (procInfo->Cache.Type == CacheInstruction ? HWLOC_OBJ_L1ICACHE : HWLOC_OBJ_L1CACHE) + procInfo->Cache.Level - 1;
|
type = (procInfo->Cache.Type == CacheInstruction ? HWLOC_OBJ_L1ICACHE : HWLOC_OBJ_L1CACHE) + procInfo->Cache.Level - 1;
|
||||||
/* GroupCount added approximately with NumaNode.GroupCount above */
|
/* GroupCount added approximately with NumaNode.GroupCount above */
|
||||||
@@ -1211,6 +1230,19 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case HWLOC_OBJ_GROUP:
|
||||||
|
switch (procInfo->Relationship) {
|
||||||
|
case RelationGroup:
|
||||||
|
obj->attr->group.kind = HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP;
|
||||||
|
break;
|
||||||
|
case RelationProcessorModule:
|
||||||
|
obj->attr->group.kind = HWLOC_GROUP_KIND_INTEL_MODULE;
|
||||||
|
obj->subtype = strdup("Module");
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
obj->attr->group.kind = HWLOC_GROUP_KIND_WINDOWS_RELATIONSHIP_UNKNOWN;
|
||||||
|
}
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|||||||
12
src/3rdparty/hwloc/src/topology-x86.c
vendored
12
src/3rdparty/hwloc/src/topology-x86.c
vendored
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright © 2010-2024 Inria. All rights reserved.
|
* Copyright © 2010-2025 Inria. All rights reserved.
|
||||||
* Copyright © 2010-2013 Université Bordeaux
|
* Copyright © 2010-2013 Université Bordeaux
|
||||||
* Copyright © 2010-2011 Cisco Systems, Inc. All rights reserved.
|
* Copyright © 2010-2011 Cisco Systems, Inc. All rights reserved.
|
||||||
* See COPYING in top-level directory.
|
* See COPYING in top-level directory.
|
||||||
@@ -653,7 +653,13 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns
|
|||||||
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
|
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
|
||||||
infos->apicid = ebx >> 24;
|
infos->apicid = ebx >> 24;
|
||||||
if (edx & (1 << 28)) {
|
if (edx & (1 << 28)) {
|
||||||
legacy_max_log_proc = 1 << hwloc_flsl(((ebx >> 16) & 0xff) - 1);
|
unsigned ebx_16_23 = (ebx >> 16) & 0xff;
|
||||||
|
if (ebx_16_23) {
|
||||||
|
legacy_max_log_proc = 1 << hwloc_flsl(ebx_16_23 - 1);
|
||||||
|
} else {
|
||||||
|
hwloc_debug("HTT bit set in CPUID 0x01.edx, but legacy_max_proc = 0 in ebx, assuming legacy_max_log_proc = 1\n");
|
||||||
|
legacy_max_log_proc = 1;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
hwloc_debug("HTT bit not set in CPUID 0x01.edx, assuming legacy_max_log_proc = 1\n");
|
hwloc_debug("HTT bit not set in CPUID 0x01.edx, assuming legacy_max_log_proc = 1\n");
|
||||||
legacy_max_log_proc = 1;
|
legacy_max_log_proc = 1;
|
||||||
@@ -1742,7 +1748,7 @@ hwloc_x86_discover(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
|
|||||||
|
|
||||||
if (topology->levels[0][0]->cpuset) {
|
if (topology->levels[0][0]->cpuset) {
|
||||||
/* somebody else discovered things, reconnect levels so that we can look at them */
|
/* somebody else discovered things, reconnect levels so that we can look at them */
|
||||||
hwloc_topology_reconnect(topology, 0);
|
hwloc__reconnect(topology, 0);
|
||||||
if (topology->nb_levels == 2 && topology->level_nbobjects[1] == data->nbprocs) {
|
if (topology->nb_levels == 2 && topology->level_nbobjects[1] == data->nbprocs) {
|
||||||
/* only PUs were discovered, as much as we would, complete the topology with everything else */
|
/* only PUs were discovered, as much as we would, complete the topology with everything else */
|
||||||
alreadypus = 1;
|
alreadypus = 1;
|
||||||
|
|||||||
37
src/3rdparty/hwloc/src/topology-xml.c
vendored
37
src/3rdparty/hwloc/src/topology-xml.c
vendored
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright © 2009 CNRS
|
* Copyright © 2009 CNRS
|
||||||
* Copyright © 2009-2024 Inria. All rights reserved.
|
* Copyright © 2009-2025 Inria. All rights reserved.
|
||||||
* Copyright © 2009-2011, 2020 Université Bordeaux
|
* Copyright © 2009-2011, 2020 Université Bordeaux
|
||||||
* Copyright © 2009-2018 Cisco Systems, Inc. All rights reserved.
|
* Copyright © 2009-2018 Cisco Systems, Inc. All rights reserved.
|
||||||
* See COPYING in top-level directory.
|
* See COPYING in top-level directory.
|
||||||
@@ -415,6 +415,20 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
else if (!strcmp(name, "numanode_type")) {
|
||||||
|
switch (obj->type) {
|
||||||
|
case HWLOC_OBJ_NUMANODE: {
|
||||||
|
/* ignored for now, here for possible forward compat */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
if (hwloc__xml_verbose())
|
||||||
|
fprintf(stderr, "%s: ignoring numanode_type attribute for non-NUMA object\n",
|
||||||
|
state->global->msgprefix);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
else if (data->version_major < 2) {
|
else if (data->version_major < 2) {
|
||||||
/************************
|
/************************
|
||||||
* deprecated from 1.x
|
* deprecated from 1.x
|
||||||
@@ -876,14 +890,19 @@ hwloc__xml_import_object(hwloc_topology_t topology,
|
|||||||
/* deal with possible future type */
|
/* deal with possible future type */
|
||||||
obj->type = HWLOC_OBJ_GROUP;
|
obj->type = HWLOC_OBJ_GROUP;
|
||||||
obj->attr->group.kind = HWLOC_GROUP_KIND_LINUX_CLUSTER;
|
obj->attr->group.kind = HWLOC_GROUP_KIND_LINUX_CLUSTER;
|
||||||
} else if (!strcasecmp(attrvalue, "MemCache")) {
|
}
|
||||||
|
#if 0
|
||||||
|
/* reenable if there's ever a future type that should be ignored without being an error */
|
||||||
|
else if (!strcasecmp(attrvalue, "MemCache")) {
|
||||||
/* ignore possible future type */
|
/* ignore possible future type */
|
||||||
obj->type = _HWLOC_OBJ_FUTURE;
|
obj->type = _HWLOC_OBJ_FUTURE;
|
||||||
ignored = 1;
|
ignored = 1;
|
||||||
if (hwloc__xml_verbose())
|
if (hwloc__xml_verbose())
|
||||||
fprintf(stderr, "%s: %s object not-supported, will be ignored\n",
|
fprintf(stderr, "%s: %s object not-supported, will be ignored\n",
|
||||||
state->global->msgprefix, attrvalue);
|
state->global->msgprefix, attrvalue);
|
||||||
} else {
|
}
|
||||||
|
#endif
|
||||||
|
else {
|
||||||
if (hwloc__xml_verbose())
|
if (hwloc__xml_verbose())
|
||||||
fprintf(stderr, "%s: unrecognized object type string %s\n",
|
fprintf(stderr, "%s: unrecognized object type string %s\n",
|
||||||
state->global->msgprefix, attrvalue);
|
state->global->msgprefix, attrvalue);
|
||||||
@@ -958,22 +977,22 @@ hwloc__xml_import_object(hwloc_topology_t topology,
|
|||||||
if (hwloc__obj_type_is_normal(obj->type)) {
|
if (hwloc__obj_type_is_normal(obj->type)) {
|
||||||
if (!hwloc__obj_type_is_normal(parent->type)) {
|
if (!hwloc__obj_type_is_normal(parent->type)) {
|
||||||
if (hwloc__xml_verbose())
|
if (hwloc__xml_verbose())
|
||||||
fprintf(stderr, "normal object %s cannot be child of non-normal parent %s\n",
|
fprintf(stderr, "%s: normal object %s cannot be child of non-normal parent %s\n",
|
||||||
hwloc_obj_type_string(obj->type), hwloc_obj_type_string(parent->type));
|
state->global->msgprefix, hwloc_obj_type_string(obj->type), hwloc_obj_type_string(parent->type));
|
||||||
goto error_with_object;
|
goto error_with_object;
|
||||||
}
|
}
|
||||||
} else if (hwloc__obj_type_is_memory(obj->type)) {
|
} else if (hwloc__obj_type_is_memory(obj->type)) {
|
||||||
if (hwloc__obj_type_is_io(parent->type) || HWLOC_OBJ_MISC == parent->type) {
|
if (hwloc__obj_type_is_io(parent->type) || HWLOC_OBJ_MISC == parent->type) {
|
||||||
if (hwloc__xml_verbose())
|
if (hwloc__xml_verbose())
|
||||||
fprintf(stderr, "Memory object %s cannot be child of non-normal-or-memory parent %s\n",
|
fprintf(stderr, "%s: Memory object %s cannot be child of non-normal-or-memory parent %s\n",
|
||||||
hwloc_obj_type_string(obj->type), hwloc_obj_type_string(parent->type));
|
state->global->msgprefix, hwloc_obj_type_string(obj->type), hwloc_obj_type_string(parent->type));
|
||||||
goto error_with_object;
|
goto error_with_object;
|
||||||
}
|
}
|
||||||
} else if (hwloc__obj_type_is_io(obj->type)) {
|
} else if (hwloc__obj_type_is_io(obj->type)) {
|
||||||
if (hwloc__obj_type_is_memory(parent->type) || HWLOC_OBJ_MISC == parent->type) {
|
if (hwloc__obj_type_is_memory(parent->type) || HWLOC_OBJ_MISC == parent->type) {
|
||||||
if (hwloc__xml_verbose())
|
if (hwloc__xml_verbose())
|
||||||
fprintf(stderr, "I/O object %s cannot be child of non-normal-or-I/O parent %s\n",
|
fprintf(stderr, "%s: I/O object %s cannot be child of non-normal-or-I/O parent %s\n",
|
||||||
hwloc_obj_type_string(obj->type), hwloc_obj_type_string(parent->type));
|
state->global->msgprefix, hwloc_obj_type_string(obj->type), hwloc_obj_type_string(parent->type));
|
||||||
goto error_with_object;
|
goto error_with_object;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
201
src/3rdparty/hwloc/src/topology.c
vendored
201
src/3rdparty/hwloc/src/topology.c
vendored
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright © 2009 CNRS
|
* Copyright © 2009 CNRS
|
||||||
* Copyright © 2009-2023 Inria. All rights reserved.
|
* Copyright © 2009-2025 Inria. All rights reserved.
|
||||||
* Copyright © 2009-2012, 2020 Université Bordeaux
|
* Copyright © 2009-2012, 2020 Université Bordeaux
|
||||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||||
* Copyright © 2022 IBM Corporation. All rights reserved.
|
* Copyright © 2022 IBM Corporation. All rights reserved.
|
||||||
@@ -54,56 +54,6 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#ifdef HWLOC_HAVE_LEVELZERO
|
|
||||||
/*
|
|
||||||
* Define ZES_ENABLE_SYSMAN=1 early so that the LevelZero backend gets Sysman enabled.
|
|
||||||
*
|
|
||||||
* Only if the levelzero was enabled in this build so that we don't enable sysman
|
|
||||||
* for external levelzero users when hwloc doesn't need it. If somebody ever loads
|
|
||||||
* an external levelzero plugin in a hwloc library built without levelzero (unlikely),
|
|
||||||
* he may have to manually set ZES_ENABLE_SYSMAN=1.
|
|
||||||
*
|
|
||||||
* Use the constructor if supported and/or the Windows DllMain callback.
|
|
||||||
* Do it in the main hwloc library instead of the levelzero component because
|
|
||||||
* the latter could be loaded later as a plugin.
|
|
||||||
*
|
|
||||||
* L0 seems to be using getenv() to check this variable on Windows
|
|
||||||
* (at least in the Intel Compute-Runtime of March 2021),
|
|
||||||
* but setenv() doesn't seem to exist on Windows, hence use putenv() to set the variable.
|
|
||||||
*
|
|
||||||
* For the record, Get/SetEnvironmentVariable() is not exactly the same as getenv/putenv():
|
|
||||||
* - getenv() doesn't see what was set with SetEnvironmentVariable()
|
|
||||||
* - GetEnvironmentVariable() doesn't see putenv() in cygwin (while it does in MSVC and MinGW).
|
|
||||||
* Hence, if L0 ever switches from getenv() to GetEnvironmentVariable(),
|
|
||||||
* it will break in cygwin, we'll have to use both putenv() and SetEnvironmentVariable().
|
|
||||||
* Hopefully L0 will provide a way to enable Sysman without env vars before it happens.
|
|
||||||
*/
|
|
||||||
#if HWLOC_HAVE_ATTRIBUTE_CONSTRUCTOR
|
|
||||||
static void hwloc_constructor(void) __attribute__((constructor));
|
|
||||||
static void hwloc_constructor(void)
|
|
||||||
{
|
|
||||||
if (!getenv("ZES_ENABLE_SYSMAN"))
|
|
||||||
#ifdef HWLOC_WIN_SYS
|
|
||||||
putenv("ZES_ENABLE_SYSMAN=1");
|
|
||||||
#else
|
|
||||||
setenv("ZES_ENABLE_SYSMAN", "1", 1);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#ifdef HWLOC_WIN_SYS
|
|
||||||
BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved)
|
|
||||||
{
|
|
||||||
if (fdwReason == DLL_PROCESS_ATTACH) {
|
|
||||||
if (!getenv("ZES_ENABLE_SYSMAN"))
|
|
||||||
/* Windows does not have a setenv, so use putenv. */
|
|
||||||
putenv((char *) "ZES_ENABLE_SYSMAN=1");
|
|
||||||
}
|
|
||||||
return TRUE;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#endif /* HWLOC_HAVE_LEVELZERO */
|
|
||||||
|
|
||||||
|
|
||||||
unsigned hwloc_get_api_version(void)
|
unsigned hwloc_get_api_version(void)
|
||||||
{
|
{
|
||||||
return HWLOC_API_VERSION;
|
return HWLOC_API_VERSION;
|
||||||
@@ -179,7 +129,7 @@ static void report_insert_error(hwloc_obj_t new, hwloc_obj_t old, const char *ms
|
|||||||
report_insert_error_format_obj(oldstr, sizeof(oldstr), old);
|
report_insert_error_format_obj(oldstr, sizeof(oldstr), old);
|
||||||
|
|
||||||
fprintf(stderr, "****************************************************************************\n");
|
fprintf(stderr, "****************************************************************************\n");
|
||||||
fprintf(stderr, "* hwloc %s received invalid information from the operating system.\n", HWLOC_VERSION);
|
fprintf(stderr, "* hwloc %s received invalid information.\n", HWLOC_VERSION);
|
||||||
fprintf(stderr, "*\n");
|
fprintf(stderr, "*\n");
|
||||||
fprintf(stderr, "* Failed with error: %s\n", msg);
|
fprintf(stderr, "* Failed with error: %s\n", msg);
|
||||||
fprintf(stderr, "* while inserting %s\n", newstr);
|
fprintf(stderr, "* while inserting %s\n", newstr);
|
||||||
@@ -1966,6 +1916,51 @@ static void hwloc_set_group_depth(hwloc_topology_t topology);
|
|||||||
static void hwloc_connect_children(hwloc_obj_t parent);
|
static void hwloc_connect_children(hwloc_obj_t parent);
|
||||||
static int hwloc_connect_levels(hwloc_topology_t topology);
|
static int hwloc_connect_levels(hwloc_topology_t topology);
|
||||||
static int hwloc_connect_special_levels(hwloc_topology_t topology);
|
static int hwloc_connect_special_levels(hwloc_topology_t topology);
|
||||||
|
static int hwloc_filter_levels_keep_structure(hwloc_topology_t topology);
|
||||||
|
|
||||||
|
/* reconnect children and levels,
|
||||||
|
* and optionnally merged identical levels while keeping structure.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
hwloc__reconnect(struct hwloc_topology *topology, unsigned long flags)
|
||||||
|
{
|
||||||
|
int merged_levels = 0;
|
||||||
|
|
||||||
|
if (topology->modified) {
|
||||||
|
hwloc_connect_children(topology->levels[0][0]);
|
||||||
|
|
||||||
|
if (hwloc_connect_levels(topology) < 0)
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (flags & _HWLOC_RECONNECT_FLAG_KEEPSTRUCTURE) {
|
||||||
|
merged_levels = hwloc_filter_levels_keep_structure(topology);
|
||||||
|
/* If > 0, we merged some levels,
|
||||||
|
* some child+parent special children list may have been merged,
|
||||||
|
* hence specials level might need reordering,
|
||||||
|
* So reconnect special levels only here at the end.
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
|
if (topology->modified || merged_levels) {
|
||||||
|
if (hwloc_connect_special_levels(topology) < 0)
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
topology->modified = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
hwloc_topology_reconnect(struct hwloc_topology *topology, unsigned long flags)
|
||||||
|
{
|
||||||
|
if (flags) {
|
||||||
|
errno = EINVAL;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return hwloc__reconnect(topology, 0);
|
||||||
|
}
|
||||||
|
|
||||||
hwloc_obj_t
|
hwloc_obj_t
|
||||||
hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t obj)
|
hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t obj)
|
||||||
@@ -2058,7 +2053,10 @@ hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t
|
|||||||
|
|
||||||
/* properly inserted */
|
/* properly inserted */
|
||||||
hwloc_obj_add_children_sets(res);
|
hwloc_obj_add_children_sets(res);
|
||||||
if (hwloc_topology_reconnect(topology, 0) < 0)
|
/* reconnect levels.
|
||||||
|
* no need to filter levels keep_structure because groups are either auto-merged
|
||||||
|
* or have the dont_merge attribute */
|
||||||
|
if (hwloc__reconnect(topology, 0) < 0)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
/* Compute group total_memory. */
|
/* Compute group total_memory. */
|
||||||
@@ -2550,26 +2548,13 @@ hwloc_compare_levels_structure(hwloc_topology_t topology, unsigned i)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* return > 0 if any level was removed.
|
/* return > 0 if any level was removed. */
|
||||||
* performs its own reconnect internally if needed
|
|
||||||
*/
|
|
||||||
static int
|
static int
|
||||||
hwloc_filter_levels_keep_structure(hwloc_topology_t topology)
|
hwloc_filter_levels_keep_structure(hwloc_topology_t topology)
|
||||||
{
|
{
|
||||||
unsigned i, j;
|
unsigned i, j;
|
||||||
int res = 0;
|
int res = 0;
|
||||||
|
|
||||||
if (topology->modified) {
|
|
||||||
/* WARNING: hwloc_topology_reconnect() is duplicated partially here
|
|
||||||
* and at the end of this function:
|
|
||||||
* - we need normal levels before merging.
|
|
||||||
* - and we'll need to update special levels after merging.
|
|
||||||
*/
|
|
||||||
hwloc_connect_children(topology->levels[0][0]);
|
|
||||||
if (hwloc_connect_levels(topology) < 0)
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* start from the bottom since we'll remove intermediate levels */
|
/* start from the bottom since we'll remove intermediate levels */
|
||||||
for(i=topology->nb_levels-1; i>0; i--) {
|
for(i=topology->nb_levels-1; i>0; i--) {
|
||||||
int replacechild = 0, replaceparent = 0;
|
int replacechild = 0, replaceparent = 0;
|
||||||
@@ -2591,9 +2576,15 @@ hwloc_filter_levels_keep_structure(hwloc_topology_t topology)
|
|||||||
if (type1 == HWLOC_OBJ_GROUP && hwloc_dont_merge_group_level(topology, i))
|
if (type1 == HWLOC_OBJ_GROUP && hwloc_dont_merge_group_level(topology, i))
|
||||||
replacechild = 0;
|
replacechild = 0;
|
||||||
}
|
}
|
||||||
if (!replacechild && !replaceparent)
|
if (!replacechild && !replaceparent) {
|
||||||
|
/* always merge Die into Package when levels are identical */
|
||||||
|
if (type1 == HWLOC_OBJ_PACKAGE && type2 == HWLOC_OBJ_DIE)
|
||||||
|
replacechild = 1;
|
||||||
|
}
|
||||||
|
if (!replacechild && !replaceparent) {
|
||||||
/* no ignoring */
|
/* no ignoring */
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
/* Decide which one to actually replace */
|
/* Decide which one to actually replace */
|
||||||
if (replaceparent && replacechild) {
|
if (replaceparent && replacechild) {
|
||||||
/* If both may be replaced, look at obj_type_priority */
|
/* If both may be replaced, look at obj_type_priority */
|
||||||
@@ -2736,20 +2727,6 @@ hwloc_filter_levels_keep_structure(hwloc_topology_t topology)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (res > 0 || topology-> modified) {
|
|
||||||
/* WARNING: hwloc_topology_reconnect() is duplicated partially here
|
|
||||||
* and at the beginning of this function.
|
|
||||||
* If we merged some levels, some child+parent special children lisst
|
|
||||||
* may have been merged, hence specials level might need reordering,
|
|
||||||
* So reconnect special levels only here at the end
|
|
||||||
* (it's not needed at the beginning of this function).
|
|
||||||
*/
|
|
||||||
if (hwloc_connect_special_levels(topology) < 0)
|
|
||||||
return -1;
|
|
||||||
topology->modified = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3278,33 +3255,6 @@ hwloc_connect_levels(hwloc_topology_t topology)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
|
||||||
hwloc_topology_reconnect(struct hwloc_topology *topology, unsigned long flags)
|
|
||||||
{
|
|
||||||
/* WARNING: when updating this function, the replicated code must
|
|
||||||
* also be updated inside hwloc_filter_levels_keep_structure()
|
|
||||||
*/
|
|
||||||
|
|
||||||
if (flags) {
|
|
||||||
errno = EINVAL;
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
if (!topology->modified)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
hwloc_connect_children(topology->levels[0][0]);
|
|
||||||
|
|
||||||
if (hwloc_connect_levels(topology) < 0)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
if (hwloc_connect_special_levels(topology) < 0)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
topology->modified = 0;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* for regression testing, make sure the order of io devices
|
/* for regression testing, make sure the order of io devices
|
||||||
* doesn't change with the dentry order in the filesystem
|
* doesn't change with the dentry order in the filesystem
|
||||||
*
|
*
|
||||||
@@ -3561,32 +3511,13 @@ hwloc_discover(struct hwloc_topology *topology,
|
|||||||
hwloc_debug_print_objects(0, topology->levels[0][0]);
|
hwloc_debug_print_objects(0, topology->levels[0][0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* see if we should ignore the root now that we know how many children it has */
|
|
||||||
if (!hwloc_filter_check_keep_object(topology, topology->levels[0][0])
|
|
||||||
&& topology->levels[0][0]->first_child && !topology->levels[0][0]->first_child->next_sibling) {
|
|
||||||
hwloc_obj_t oldroot = topology->levels[0][0];
|
|
||||||
hwloc_obj_t newroot = oldroot->first_child;
|
|
||||||
/* switch to the new root */
|
|
||||||
newroot->parent = NULL;
|
|
||||||
topology->levels[0][0] = newroot;
|
|
||||||
/* move oldroot memory/io/misc children before newroot children */
|
|
||||||
if (oldroot->memory_first_child)
|
|
||||||
prepend_siblings_list(&newroot->memory_first_child, oldroot->memory_first_child, newroot);
|
|
||||||
if (oldroot->io_first_child)
|
|
||||||
prepend_siblings_list(&newroot->io_first_child, oldroot->io_first_child, newroot);
|
|
||||||
if (oldroot->misc_first_child)
|
|
||||||
prepend_siblings_list(&newroot->misc_first_child, oldroot->misc_first_child, newroot);
|
|
||||||
/* destroy oldroot and use the new one */
|
|
||||||
hwloc_free_unlinked_object(oldroot);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* All object cpusets and nodesets are properly set now.
|
* All object cpusets and nodesets are properly set now.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* Now connect handy pointers to make remaining discovery easier. */
|
/* Now connect handy pointers to make remaining discovery easier. */
|
||||||
hwloc_debug("%s", "\nOk, finished tweaking, now connect\n");
|
hwloc_debug("%s", "\nOk, finished tweaking, now connect\n");
|
||||||
if (hwloc_topology_reconnect(topology, 0) < 0)
|
if (hwloc__reconnect(topology, 0) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
hwloc_debug_print_objects(0, topology->levels[0][0]);
|
hwloc_debug_print_objects(0, topology->levels[0][0]);
|
||||||
|
|
||||||
@@ -3642,12 +3573,12 @@ hwloc_discover(struct hwloc_topology *topology,
|
|||||||
}
|
}
|
||||||
hwloc_debug_print_objects(0, topology->levels[0][0]);
|
hwloc_debug_print_objects(0, topology->levels[0][0]);
|
||||||
|
|
||||||
|
/* reconnect all (new groups might have appears, IO added, etc),
|
||||||
|
* and (now that everything was added) remove identical levels while keeping structure
|
||||||
|
*/
|
||||||
hwloc_debug("%s", "\nRemoving levels with HWLOC_TYPE_FILTER_KEEP_STRUCTURE\n");
|
hwloc_debug("%s", "\nRemoving levels with HWLOC_TYPE_FILTER_KEEP_STRUCTURE\n");
|
||||||
if (hwloc_filter_levels_keep_structure(topology) < 0)
|
if (hwloc__reconnect(topology, _HWLOC_RECONNECT_FLAG_KEEPSTRUCTURE) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
/* takes care of reconnecting children/levels internally,
|
|
||||||
* because it needs normal levels.
|
|
||||||
* and it's often needed below because of Groups inserted for I/Os anyway */
|
|
||||||
hwloc_debug_print_objects(0, topology->levels[0][0]);
|
hwloc_debug_print_objects(0, topology->levels[0][0]);
|
||||||
|
|
||||||
/* accumulate children memory in total_memory fields (only once parent is set) */
|
/* accumulate children memory in total_memory fields (only once parent is set) */
|
||||||
@@ -4494,7 +4425,7 @@ hwloc_topology_restrict(struct hwloc_topology *topology, hwloc_const_bitmap_t se
|
|||||||
hwloc_bitmap_free(droppedcpuset);
|
hwloc_bitmap_free(droppedcpuset);
|
||||||
hwloc_bitmap_free(droppednodeset);
|
hwloc_bitmap_free(droppednodeset);
|
||||||
|
|
||||||
if (hwloc_filter_levels_keep_structure(topology) < 0) /* takes care of reconnecting internally */
|
if (hwloc__reconnect(topology, _HWLOC_RECONNECT_FLAG_KEEPSTRUCTURE) < 0)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
/* some objects may have disappeared and sets were modified,
|
/* some objects may have disappeared and sets were modified,
|
||||||
@@ -5116,6 +5047,8 @@ hwloc_topology_check(struct hwloc_topology *topology)
|
|||||||
unsigned i;
|
unsigned i;
|
||||||
int j, depth;
|
int j, depth;
|
||||||
|
|
||||||
|
assert(!topology->modified);
|
||||||
|
|
||||||
/* make sure we can use ranges to check types */
|
/* make sure we can use ranges to check types */
|
||||||
|
|
||||||
/* hwloc__obj_type_is_{,d,i}cache() want cache types to be ordered like this */
|
/* hwloc__obj_type_is_{,d,i}cache() want cache types to be ordered like this */
|
||||||
|
|||||||
2
src/3rdparty/libethash/endian.h
vendored
2
src/3rdparty/libethash/endian.h
vendored
@@ -31,7 +31,7 @@
|
|||||||
#include <libkern/OSByteOrder.h>
|
#include <libkern/OSByteOrder.h>
|
||||||
#define ethash_swap_u32(input_) OSSwapInt32(input_)
|
#define ethash_swap_u32(input_) OSSwapInt32(input_)
|
||||||
#define ethash_swap_u64(input_) OSSwapInt64(input_)
|
#define ethash_swap_u64(input_) OSSwapInt64(input_)
|
||||||
#elif defined(__FreeBSD__) || defined(__DragonFly__) || defined(__NetBSD__)
|
#elif defined(__FreeBSD__) || defined(__DragonFly__) || defined(__NetBSD__) || defined(__HAIKU__)
|
||||||
#define ethash_swap_u32(input_) bswap32(input_)
|
#define ethash_swap_u32(input_) bswap32(input_)
|
||||||
#define ethash_swap_u64(input_) bswap64(input_)
|
#define ethash_swap_u64(input_) bswap64(input_)
|
||||||
#elif defined(__OpenBSD__)
|
#elif defined(__OpenBSD__)
|
||||||
|
|||||||
@@ -46,7 +46,12 @@ else()
|
|||||||
set(CPUID_LIB "")
|
set(CPUID_LIB "")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (XMRIG_ARM)
|
if (XMRIG_RISCV)
|
||||||
|
list(APPEND SOURCES_BACKEND_CPU
|
||||||
|
src/backend/cpu/platform/lscpu_riscv.cpp
|
||||||
|
src/backend/cpu/platform/BasicCpuInfo_riscv.cpp
|
||||||
|
)
|
||||||
|
elseif (XMRIG_ARM)
|
||||||
list(APPEND SOURCES_BACKEND_CPU src/backend/cpu/platform/BasicCpuInfo_arm.cpp)
|
list(APPEND SOURCES_BACKEND_CPU src/backend/cpu/platform/BasicCpuInfo_arm.cpp)
|
||||||
|
|
||||||
if (XMRIG_OS_WIN)
|
if (XMRIG_OS_WIN)
|
||||||
|
|||||||
@@ -91,7 +91,7 @@ public:
|
|||||||
ICpuInfo() = default;
|
ICpuInfo() = default;
|
||||||
virtual ~ICpuInfo() = default;
|
virtual ~ICpuInfo() = default;
|
||||||
|
|
||||||
# if defined(__x86_64__) || defined(_M_AMD64) || defined (__arm64__) || defined (__aarch64__)
|
# if defined(__x86_64__) || defined(_M_AMD64) || defined (__arm64__) || defined (__aarch64__) || defined(__riscv) && (__riscv_xlen == 64)
|
||||||
inline constexpr static bool is64bit() { return true; }
|
inline constexpr static bool is64bit() { return true; }
|
||||||
# else
|
# else
|
||||||
inline constexpr static bool is64bit() { return false; }
|
inline constexpr static bool is64bit() { return false; }
|
||||||
|
|||||||
@@ -250,7 +250,7 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x19:
|
case 0x19:
|
||||||
if (m_model == 0x61) {
|
if ((m_model == 0x61) || (m_model == 0x75)) {
|
||||||
m_arch = ARCH_ZEN4;
|
m_arch = ARCH_ZEN4;
|
||||||
m_msrMod = MSR_MOD_RYZEN_19H_ZEN4;
|
m_msrMod = MSR_MOD_RYZEN_19H_ZEN4;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -65,7 +65,7 @@ protected:
|
|||||||
inline Vendor vendor() const override { return m_vendor; }
|
inline Vendor vendor() const override { return m_vendor; }
|
||||||
inline uint32_t model() const override
|
inline uint32_t model() const override
|
||||||
{
|
{
|
||||||
# ifndef XMRIG_ARM
|
# if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
|
||||||
return m_model;
|
return m_model;
|
||||||
# else
|
# else
|
||||||
return 0;
|
return 0;
|
||||||
@@ -80,7 +80,7 @@ protected:
|
|||||||
Vendor m_vendor = VENDOR_UNKNOWN;
|
Vendor m_vendor = VENDOR_UNKNOWN;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
# ifndef XMRIG_ARM
|
# if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
|
||||||
uint32_t m_procInfo = 0;
|
uint32_t m_procInfo = 0;
|
||||||
uint32_t m_family = 0;
|
uint32_t m_family = 0;
|
||||||
uint32_t m_model = 0;
|
uint32_t m_model = 0;
|
||||||
|
|||||||
116
src/backend/cpu/platform/BasicCpuInfo_riscv.cpp
Normal file
116
src/backend/cpu/platform/BasicCpuInfo_riscv.cpp
Normal file
@@ -0,0 +1,116 @@
|
|||||||
|
/* XMRig
|
||||||
|
* Copyright (c) 2025 Slayingripper <https://github.com/Slayingripper>
|
||||||
|
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||||
|
* Copyright (c) 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||||
|
* Copyright (c) 2016-2025 XMRig <support@xmrig.com>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <cstring>
|
||||||
|
#include <fstream>
|
||||||
|
#include <thread>
|
||||||
|
|
||||||
|
|
||||||
|
#include "backend/cpu/platform/BasicCpuInfo.h"
|
||||||
|
#include "base/tools/String.h"
|
||||||
|
#include "3rdparty/rapidjson/document.h"
|
||||||
|
|
||||||
|
|
||||||
|
namespace xmrig {
|
||||||
|
|
||||||
|
|
||||||
|
extern String cpu_name_riscv();
|
||||||
|
extern bool has_riscv_vector();
|
||||||
|
extern bool has_riscv_crypto();
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace xmrig
|
||||||
|
|
||||||
|
|
||||||
|
xmrig::BasicCpuInfo::BasicCpuInfo() :
|
||||||
|
m_threads(std::thread::hardware_concurrency())
|
||||||
|
{
|
||||||
|
m_units.resize(m_threads);
|
||||||
|
for (int32_t i = 0; i < static_cast<int32_t>(m_threads); ++i) {
|
||||||
|
m_units[i] = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy(m_brand, "RISC-V", 6);
|
||||||
|
|
||||||
|
auto name = cpu_name_riscv();
|
||||||
|
if (!name.isNull()) {
|
||||||
|
strncpy(m_brand, name.data(), sizeof(m_brand) - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for crypto extensions (Zknd/Zkne/Zknh - AES and SHA)
|
||||||
|
m_flags.set(FLAG_AES, has_riscv_crypto());
|
||||||
|
|
||||||
|
// RISC-V typically supports 1GB huge pages
|
||||||
|
m_flags.set(FLAG_PDPE1GB, std::ifstream("/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages").good());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
const char *xmrig::BasicCpuInfo::backend() const
|
||||||
|
{
|
||||||
|
return "basic/1";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
xmrig::CpuThreads xmrig::BasicCpuInfo::threads(const Algorithm &algorithm, uint32_t) const
|
||||||
|
{
|
||||||
|
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||||
|
if (algorithm.family() == Algorithm::GHOSTRIDER) {
|
||||||
|
return CpuThreads(threads(), 8);
|
||||||
|
}
|
||||||
|
# endif
|
||||||
|
|
||||||
|
return CpuThreads(threads());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
rapidjson::Value xmrig::BasicCpuInfo::toJSON(rapidjson::Document &doc) const
|
||||||
|
{
|
||||||
|
using namespace rapidjson;
|
||||||
|
auto &allocator = doc.GetAllocator();
|
||||||
|
|
||||||
|
Value out(kObjectType);
|
||||||
|
|
||||||
|
out.AddMember("brand", StringRef(brand()), allocator);
|
||||||
|
out.AddMember("aes", hasAES(), allocator);
|
||||||
|
out.AddMember("avx2", false, allocator);
|
||||||
|
out.AddMember("x64", is64bit(), allocator); // DEPRECATED will be removed in the next major release.
|
||||||
|
out.AddMember("64_bit", is64bit(), allocator);
|
||||||
|
out.AddMember("l2", static_cast<uint64_t>(L2()), allocator);
|
||||||
|
out.AddMember("l3", static_cast<uint64_t>(L3()), allocator);
|
||||||
|
out.AddMember("cores", static_cast<uint64_t>(cores()), allocator);
|
||||||
|
out.AddMember("threads", static_cast<uint64_t>(threads()), allocator);
|
||||||
|
out.AddMember("packages", static_cast<uint64_t>(packages()), allocator);
|
||||||
|
out.AddMember("nodes", static_cast<uint64_t>(nodes()), allocator);
|
||||||
|
out.AddMember("backend", StringRef(backend()), allocator);
|
||||||
|
out.AddMember("msr", "none", allocator);
|
||||||
|
out.AddMember("assembly", "none", allocator);
|
||||||
|
out.AddMember("arch", "riscv64", allocator);
|
||||||
|
|
||||||
|
Value flags(kArrayType);
|
||||||
|
|
||||||
|
if (hasAES()) {
|
||||||
|
flags.PushBack("aes", allocator);
|
||||||
|
}
|
||||||
|
|
||||||
|
out.AddMember("flags", flags, allocator);
|
||||||
|
|
||||||
|
return out;
|
||||||
|
}
|
||||||
@@ -87,7 +87,7 @@ static inline size_t countByType(hwloc_topology_t topology, hwloc_obj_type_t typ
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#ifndef XMRIG_ARM
|
#if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
|
||||||
static inline std::vector<hwloc_obj_t> findByType(hwloc_obj_t obj, hwloc_obj_type_t type)
|
static inline std::vector<hwloc_obj_t> findByType(hwloc_obj_t obj, hwloc_obj_type_t type)
|
||||||
{
|
{
|
||||||
std::vector<hwloc_obj_t> out;
|
std::vector<hwloc_obj_t> out;
|
||||||
@@ -207,7 +207,7 @@ bool xmrig::HwlocCpuInfo::membind(hwloc_const_bitmap_t nodeset)
|
|||||||
|
|
||||||
xmrig::CpuThreads xmrig::HwlocCpuInfo::threads(const Algorithm &algorithm, uint32_t limit) const
|
xmrig::CpuThreads xmrig::HwlocCpuInfo::threads(const Algorithm &algorithm, uint32_t limit) const
|
||||||
{
|
{
|
||||||
# ifndef XMRIG_ARM
|
# if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
|
||||||
if (L2() == 0 && L3() == 0) {
|
if (L2() == 0 && L3() == 0) {
|
||||||
return BasicCpuInfo::threads(algorithm, limit);
|
return BasicCpuInfo::threads(algorithm, limit);
|
||||||
}
|
}
|
||||||
@@ -277,7 +277,7 @@ xmrig::CpuThreads xmrig::HwlocCpuInfo::allThreads(const Algorithm &algorithm, ui
|
|||||||
|
|
||||||
void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorithm &algorithm, CpuThreads &threads, size_t limit) const
|
void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorithm &algorithm, CpuThreads &threads, size_t limit) const
|
||||||
{
|
{
|
||||||
# ifndef XMRIG_ARM
|
# if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
|
||||||
constexpr size_t oneMiB = 1024U * 1024U;
|
constexpr size_t oneMiB = 1024U * 1024U;
|
||||||
|
|
||||||
size_t PUs = countByType(cache, HWLOC_OBJ_PU);
|
size_t PUs = countByType(cache, HWLOC_OBJ_PU);
|
||||||
@@ -311,17 +311,17 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith
|
|||||||
uint32_t intensity = algorithm.maxIntensity() == 1 ? 0 : 1;
|
uint32_t intensity = algorithm.maxIntensity() == 1 ? 0 : 1;
|
||||||
|
|
||||||
if (cache->attr->cache.depth == 3) {
|
if (cache->attr->cache.depth == 3) {
|
||||||
for (size_t i = 0; i < cache->arity; ++i) {
|
auto process_L2 = [&L2, &L2_associativity, L3_exclusive, this, &extra, scratchpad](hwloc_obj_t l2) {
|
||||||
hwloc_obj_t l2 = cache->children[i];
|
|
||||||
if (!hwloc_obj_type_is_cache(l2->type) || l2->attr == nullptr) {
|
if (!hwloc_obj_type_is_cache(l2->type) || l2->attr == nullptr) {
|
||||||
continue;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
L2 += l2->attr->cache.size;
|
L2 += l2->attr->cache.size;
|
||||||
L2_associativity = l2->attr->cache.associativity;
|
L2_associativity = l2->attr->cache.associativity;
|
||||||
|
|
||||||
if (L3_exclusive) {
|
if (L3_exclusive) {
|
||||||
if (vendor() == VENDOR_AMD) {
|
if ((vendor() == VENDOR_AMD) && ((arch() == ARCH_ZEN4) || (arch() == ARCH_ZEN5))) {
|
||||||
|
// Use extra L2 only on newer CPUs because older CPUs (Zen 3 and older) don't benefit from it.
|
||||||
// For some reason, AMD CPUs can use only half of the exclusive L2/L3 cache combo efficiently
|
// For some reason, AMD CPUs can use only half of the exclusive L2/L3 cache combo efficiently
|
||||||
extra += std::min<size_t>(l2->attr->cache.size / 2, scratchpad);
|
extra += std::min<size_t>(l2->attr->cache.size / 2, scratchpad);
|
||||||
}
|
}
|
||||||
@@ -329,6 +329,18 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith
|
|||||||
extra += scratchpad;
|
extra += scratchpad;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
for (size_t i = 0; i < cache->arity; ++i) {
|
||||||
|
hwloc_obj_t ch = cache->children[i];
|
||||||
|
if (ch->type == HWLOC_OBJ_GROUP) {
|
||||||
|
for (size_t j = 0; j < ch->arity; ++j) {
|
||||||
|
process_L2(ch->children[j]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
process_L2(ch);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
/* XMRig
|
/* XMRig
|
||||||
* Copyright (c) 2018 Riku Voipio <riku.voipio@iki.fi>
|
* Copyright (c) 2018 Riku Voipio <riku.voipio@iki.fi>
|
||||||
* Copyright (c) 2018-2023 SChernykh <https://github.com/SChernykh>
|
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||||
* Copyright (c) 2016-2023 XMRig <support@xmrig.com>
|
* Copyright (c) 2016-2025 XMRig <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -98,8 +98,11 @@ static const id_part arm_part[] = {
|
|||||||
{ 0xd0e, "Cortex-A76AE" },
|
{ 0xd0e, "Cortex-A76AE" },
|
||||||
{ 0xd13, "Cortex-R52" },
|
{ 0xd13, "Cortex-R52" },
|
||||||
{ 0xd15, "Cortex-R82" },
|
{ 0xd15, "Cortex-R82" },
|
||||||
|
{ 0xd16, "Cortex-R52+" },
|
||||||
{ 0xd20, "Cortex-M23" },
|
{ 0xd20, "Cortex-M23" },
|
||||||
{ 0xd21, "Cortex-M33" },
|
{ 0xd21, "Cortex-M33" },
|
||||||
|
{ 0xd22, "Cortex-M55" },
|
||||||
|
{ 0xd23, "Cortex-M85" },
|
||||||
{ 0xd40, "Neoverse-V1" },
|
{ 0xd40, "Neoverse-V1" },
|
||||||
{ 0xd41, "Cortex-A78" },
|
{ 0xd41, "Cortex-A78" },
|
||||||
{ 0xd42, "Cortex-A78AE" },
|
{ 0xd42, "Cortex-A78AE" },
|
||||||
@@ -115,6 +118,17 @@ static const id_part arm_part[] = {
|
|||||||
{ 0xd4d, "Cortex-A715" },
|
{ 0xd4d, "Cortex-A715" },
|
||||||
{ 0xd4e, "Cortex-X3" },
|
{ 0xd4e, "Cortex-X3" },
|
||||||
{ 0xd4f, "Neoverse-V2" },
|
{ 0xd4f, "Neoverse-V2" },
|
||||||
|
{ 0xd80, "Cortex-A520" },
|
||||||
|
{ 0xd81, "Cortex-A720" },
|
||||||
|
{ 0xd82, "Cortex-X4" },
|
||||||
|
{ 0xd83, "Neoverse-V3AE" },
|
||||||
|
{ 0xd84, "Neoverse-V3" },
|
||||||
|
{ 0xd85, "Cortex-X925" },
|
||||||
|
{ 0xd87, "Cortex-A725" },
|
||||||
|
{ 0xd88, "Cortex-A520AE" },
|
||||||
|
{ 0xd89, "Cortex-A720AE" },
|
||||||
|
{ 0xd8e, "Neoverse-N3" },
|
||||||
|
{ 0xd8f, "Cortex-A320" },
|
||||||
{ -1, nullptr }
|
{ -1, nullptr }
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -154,6 +168,7 @@ static const id_part apm_part[] = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static const id_part qcom_part[] = {
|
static const id_part qcom_part[] = {
|
||||||
|
{ 0x001, "Oryon" },
|
||||||
{ 0x00f, "Scorpion" },
|
{ 0x00f, "Scorpion" },
|
||||||
{ 0x02d, "Scorpion" },
|
{ 0x02d, "Scorpion" },
|
||||||
{ 0x04d, "Krait" },
|
{ 0x04d, "Krait" },
|
||||||
@@ -194,6 +209,22 @@ static const id_part marvell_part[] = {
|
|||||||
{ -1, nullptr }
|
{ -1, nullptr }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const id_part apple_part[] = {
|
||||||
|
{ 0x022, "M1" },
|
||||||
|
{ 0x023, "M1" },
|
||||||
|
{ 0x024, "M1-Pro" },
|
||||||
|
{ 0x025, "M1-Pro" },
|
||||||
|
{ 0x028, "M1-Max" },
|
||||||
|
{ 0x029, "M1-Max" },
|
||||||
|
{ 0x032, "M2" },
|
||||||
|
{ 0x033, "M2" },
|
||||||
|
{ 0x034, "M2-Pro" },
|
||||||
|
{ 0x035, "M2-Pro" },
|
||||||
|
{ 0x038, "M2-Max" },
|
||||||
|
{ 0x039, "M2-Max" },
|
||||||
|
{ -1, nullptr }
|
||||||
|
};
|
||||||
|
|
||||||
static const id_part faraday_part[] = {
|
static const id_part faraday_part[] = {
|
||||||
{ 0x526, "FA526" },
|
{ 0x526, "FA526" },
|
||||||
{ 0x626, "FA626" },
|
{ 0x626, "FA626" },
|
||||||
@@ -227,47 +258,40 @@ static const id_part intel_part[] = {
|
|||||||
|
|
||||||
static const struct id_part fujitsu_part[] = {
|
static const struct id_part fujitsu_part[] = {
|
||||||
{ 0x001, "A64FX" },
|
{ 0x001, "A64FX" },
|
||||||
|
{ 0x003, "MONAKA" },
|
||||||
{ -1, nullptr }
|
{ -1, nullptr }
|
||||||
};
|
};
|
||||||
|
|
||||||
static const id_part hisi_part[] = {
|
static const id_part hisi_part[] = {
|
||||||
{ 0xd01, "Kunpeng-920" }, /* aka tsv110 */
|
{ 0xd01, "TaiShan-v110" }, /* used in Kunpeng-920 SoC */
|
||||||
{ 0xd40, "Cortex-A76" }, /* HiSilicon uses this ID though advertises A76 */
|
{ 0xd02, "TaiShan-v120" }, /* used in Kirin 990A and 9000S SoCs */
|
||||||
|
{ 0xd40, "Cortex-A76" }, /* HiSilicon uses this ID though advertises A76 */
|
||||||
|
{ 0xd41, "Cortex-A77" }, /* HiSilicon uses this ID though advertises A77 */
|
||||||
{ -1, nullptr }
|
{ -1, nullptr }
|
||||||
};
|
};
|
||||||
|
|
||||||
static const id_part apple_part[] = {
|
|
||||||
{ 0x022, "M1" },
|
|
||||||
{ 0x023, "M1" },
|
|
||||||
{ 0x024, "M1-Pro" },
|
|
||||||
{ 0x025, "M1-Pro" },
|
|
||||||
{ 0x028, "M1-Max" },
|
|
||||||
{ 0x029, "M1-Max" },
|
|
||||||
{ 0x032, "M2" },
|
|
||||||
{ 0x033, "M2" },
|
|
||||||
{ 0x034, "M2-Pro" },
|
|
||||||
{ 0x035, "M2-Pro" },
|
|
||||||
{ 0x038, "M2-Max" },
|
|
||||||
{ 0x039, "M2-Max" },
|
|
||||||
{ -1, nullptr }
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
static const struct id_part ft_part[] = {
|
|
||||||
{ 0x660, "FTC660" },
|
|
||||||
{ 0x661, "FTC661" },
|
|
||||||
{ 0x662, "FTC662" },
|
|
||||||
{ 0x663, "FTC663" },
|
|
||||||
{ -1, nullptr }
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
static const struct id_part ampere_part[] = {
|
static const struct id_part ampere_part[] = {
|
||||||
{ 0xac3, "Ampere-1" },
|
{ 0xac3, "Ampere-1" },
|
||||||
{ 0xac4, "Ampere-1a" },
|
{ 0xac4, "Ampere-1a" },
|
||||||
{ -1, nullptr }
|
{ -1, nullptr }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const struct id_part ft_part[] = {
|
||||||
|
{ 0x303, "FTC310" },
|
||||||
|
{ 0x660, "FTC660" },
|
||||||
|
{ 0x661, "FTC661" },
|
||||||
|
{ 0x662, "FTC662" },
|
||||||
|
{ 0x663, "FTC663" },
|
||||||
|
{ 0x664, "FTC664" },
|
||||||
|
{ 0x862, "FTC862" },
|
||||||
|
{ -1, nullptr }
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct id_part ms_part[] = {
|
||||||
|
{ 0xd49, "Azure-Cobalt-100" },
|
||||||
|
{ -1, nullptr }
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
static const hw_impl hw_implementer[] = {
|
static const hw_impl hw_implementer[] = {
|
||||||
{ 0x41, arm_part, "ARM" },
|
{ 0x41, arm_part, "ARM" },
|
||||||
@@ -276,7 +300,7 @@ static const hw_impl hw_implementer[] = {
|
|||||||
{ 0x44, dec_part, "DEC" },
|
{ 0x44, dec_part, "DEC" },
|
||||||
{ 0x46, fujitsu_part, "FUJITSU" },
|
{ 0x46, fujitsu_part, "FUJITSU" },
|
||||||
{ 0x48, hisi_part, "HiSilicon" },
|
{ 0x48, hisi_part, "HiSilicon" },
|
||||||
{ 0x4e, nvidia_part, "Nvidia" },
|
{ 0x4e, nvidia_part, "NVIDIA" },
|
||||||
{ 0x50, apm_part, "APM" },
|
{ 0x50, apm_part, "APM" },
|
||||||
{ 0x51, qcom_part, "Qualcomm" },
|
{ 0x51, qcom_part, "Qualcomm" },
|
||||||
{ 0x53, samsung_part, "Samsung" },
|
{ 0x53, samsung_part, "Samsung" },
|
||||||
@@ -284,6 +308,7 @@ static const hw_impl hw_implementer[] = {
|
|||||||
{ 0x61, apple_part, "Apple" },
|
{ 0x61, apple_part, "Apple" },
|
||||||
{ 0x66, faraday_part, "Faraday" },
|
{ 0x66, faraday_part, "Faraday" },
|
||||||
{ 0x69, intel_part, "Intel" },
|
{ 0x69, intel_part, "Intel" },
|
||||||
|
{ 0x6d, ms_part, "Microsoft" },
|
||||||
{ 0x70, ft_part, "Phytium" },
|
{ 0x70, ft_part, "Phytium" },
|
||||||
{ 0xc0, ampere_part, "Ampere" }
|
{ 0xc0, ampere_part, "Ampere" }
|
||||||
};
|
};
|
||||||
|
|||||||
140
src/backend/cpu/platform/lscpu_riscv.cpp
Normal file
140
src/backend/cpu/platform/lscpu_riscv.cpp
Normal file
@@ -0,0 +1,140 @@
|
|||||||
|
/* XMRig
|
||||||
|
* Copyright (c) 2025 Slayingripper <https://github.com/Slayingripper>
|
||||||
|
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||||
|
* Copyright (c) 2016-2025 XMRig <support@xmrig.com>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "base/tools/String.h"
|
||||||
|
#include "3rdparty/fmt/core.h"
|
||||||
|
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cstring>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace xmrig {
|
||||||
|
|
||||||
|
struct riscv_cpu_desc
|
||||||
|
{
|
||||||
|
String model;
|
||||||
|
String isa;
|
||||||
|
String uarch;
|
||||||
|
bool has_vector = false;
|
||||||
|
bool has_crypto = false;
|
||||||
|
|
||||||
|
inline bool isReady() const { return !model.isNull(); }
|
||||||
|
};
|
||||||
|
|
||||||
|
static bool lookup_riscv(char *line, const char *pattern, String &value)
|
||||||
|
{
|
||||||
|
char *p = strstr(line, pattern);
|
||||||
|
if (!p) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
p += strlen(pattern);
|
||||||
|
while (isspace(*p)) {
|
||||||
|
++p;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*p == ':') {
|
||||||
|
++p;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (isspace(*p)) {
|
||||||
|
++p;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove trailing newline
|
||||||
|
size_t len = strlen(p);
|
||||||
|
if (len > 0 && p[len - 1] == '\n') {
|
||||||
|
p[len - 1] = '\0';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure we call the const char* assignment (which performs a copy)
|
||||||
|
// instead of the char* overload (which would take ownership of the pointer)
|
||||||
|
value = (const char*)p;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool read_riscv_cpuinfo(riscv_cpu_desc *desc)
|
||||||
|
{
|
||||||
|
auto fp = fopen("/proc/cpuinfo", "r");
|
||||||
|
if (!fp) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
char buf[2048]; // Larger buffer for long ISA strings
|
||||||
|
while (fgets(buf, sizeof(buf), fp) != nullptr) {
|
||||||
|
lookup_riscv(buf, "model name", desc->model);
|
||||||
|
|
||||||
|
if (lookup_riscv(buf, "isa", desc->isa)) {
|
||||||
|
// Check for vector extensions
|
||||||
|
if (strstr(buf, "zve") || strstr(buf, "v_")) {
|
||||||
|
desc->has_vector = true;
|
||||||
|
}
|
||||||
|
// Check for crypto extensions (AES, SHA, etc.)
|
||||||
|
// zkn* = NIST crypto suite, zks* = SM crypto suite
|
||||||
|
// Note: zba/zbb/zbc/zbs are bit-manipulation, NOT crypto
|
||||||
|
if (strstr(buf, "zknd") || strstr(buf, "zkne") || strstr(buf, "zknh") ||
|
||||||
|
strstr(buf, "zksed") || strstr(buf, "zksh")) {
|
||||||
|
desc->has_crypto = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
lookup_riscv(buf, "uarch", desc->uarch);
|
||||||
|
|
||||||
|
if (desc->isReady() && !desc->isa.isNull()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fclose(fp);
|
||||||
|
|
||||||
|
return desc->isReady();
|
||||||
|
}
|
||||||
|
|
||||||
|
String cpu_name_riscv()
|
||||||
|
{
|
||||||
|
riscv_cpu_desc desc;
|
||||||
|
if (read_riscv_cpuinfo(&desc)) {
|
||||||
|
if (!desc.uarch.isNull()) {
|
||||||
|
return fmt::format("{} ({})", desc.model, desc.uarch).c_str();
|
||||||
|
}
|
||||||
|
return desc.model;
|
||||||
|
}
|
||||||
|
|
||||||
|
return "RISC-V";
|
||||||
|
}
|
||||||
|
|
||||||
|
bool has_riscv_vector()
|
||||||
|
{
|
||||||
|
riscv_cpu_desc desc;
|
||||||
|
if (read_riscv_cpuinfo(&desc)) {
|
||||||
|
return desc.has_vector;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool has_riscv_crypto()
|
||||||
|
{
|
||||||
|
riscv_cpu_desc desc;
|
||||||
|
if (read_riscv_cpuinfo(&desc)) {
|
||||||
|
return desc.has_crypto;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace xmrig
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
/* XMRig
|
/* XMRig
|
||||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -71,11 +71,11 @@ char *xmrig::Platform::createUserAgent()
|
|||||||
|
|
||||||
|
|
||||||
#ifndef XMRIG_FEATURE_HWLOC
|
#ifndef XMRIG_FEATURE_HWLOC
|
||||||
#ifdef __DragonFly__
|
#if defined(__DragonFly__) || defined(XMRIG_OS_OPENBSD) || defined(XMRIG_OS_HAIKU)
|
||||||
|
|
||||||
bool xmrig::Platform::setThreadAffinity(uint64_t cpu_id)
|
bool xmrig::Platform::setThreadAffinity(uint64_t cpu_id)
|
||||||
{
|
{
|
||||||
return true;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/* XMRig
|
/* XMRig
|
||||||
* Copyright (c) 2018-2023 SChernykh <https://github.com/SChernykh>
|
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||||
* Copyright (c) 2016-2023 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -53,13 +53,21 @@ char *xmrig::Platform::createUserAgent()
|
|||||||
char *buf = new char[max]();
|
char *buf = new char[max]();
|
||||||
int length = snprintf(buf, max, "%s/%s (Windows NT %lu.%lu", APP_NAME, APP_VERSION, osver.dwMajorVersion, osver.dwMinorVersion);
|
int length = snprintf(buf, max, "%s/%s (Windows NT %lu.%lu", APP_NAME, APP_VERSION, osver.dwMajorVersion, osver.dwMinorVersion);
|
||||||
|
|
||||||
# if defined(__x86_64__) || defined(_M_AMD64)
|
# if defined(XMRIG_64_BIT)
|
||||||
length += snprintf(buf + length, max - length, "; Win64; x64) libuv/%s", uv_version_string());
|
length += snprintf(buf + length, max - length, "; Win64; "
|
||||||
|
# if defined(XMRIG_ARM)
|
||||||
|
"arm64"
|
||||||
|
# else
|
||||||
|
"x64"
|
||||||
|
# endif
|
||||||
|
") libuv/%s", uv_version_string());
|
||||||
# else
|
# else
|
||||||
length += snprintf(buf + length, max - length, ") libuv/%s", uv_version_string());
|
length += snprintf(buf + length, max - length, ") libuv/%s", uv_version_string());
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
# ifdef __GNUC__
|
# ifdef __clang__
|
||||||
|
snprintf(buf + length, max - length, " clang/%d.%d.%d", __clang_major__, __clang_minor__, __clang_patchlevel__);
|
||||||
|
# elif defined(__GNUC__)
|
||||||
snprintf(buf + length, max - length, " gcc/%d.%d.%d", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__);
|
snprintf(buf + length, max - length, " gcc/%d.%d.%d", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__);
|
||||||
# elif _MSC_VER
|
# elif _MSC_VER
|
||||||
snprintf(buf + length, max - length, " msvc/%d", MSVC_VERSION);
|
snprintf(buf + length, max - length, " msvc/%d", MSVC_VERSION);
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/* XMRig
|
/* XMRig
|
||||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -18,14 +18,12 @@
|
|||||||
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
|
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
# include "getopt/getopt.h"
|
# include "getopt/getopt.h"
|
||||||
#else
|
#else
|
||||||
# include <getopt.h>
|
# include <getopt.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#include "base/kernel/config/BaseTransform.h"
|
#include "base/kernel/config/BaseTransform.h"
|
||||||
#include "base/io/json/JsonChain.h"
|
#include "base/io/json/JsonChain.h"
|
||||||
#include "base/io/log/Log.h"
|
#include "base/io/log/Log.h"
|
||||||
@@ -37,7 +35,6 @@
|
|||||||
#include "base/net/stratum/Pools.h"
|
#include "base/net/stratum/Pools.h"
|
||||||
#include "core/config/Config_platform.h"
|
#include "core/config/Config_platform.h"
|
||||||
|
|
||||||
|
|
||||||
#ifdef XMRIG_FEATURE_TLS
|
#ifdef XMRIG_FEATURE_TLS
|
||||||
# include "base/net/tls/TlsConfig.h"
|
# include "base/net/tls/TlsConfig.h"
|
||||||
#endif
|
#endif
|
||||||
@@ -47,9 +44,9 @@ void xmrig::BaseTransform::load(JsonChain &chain, Process *process, IConfigTrans
|
|||||||
{
|
{
|
||||||
using namespace rapidjson;
|
using namespace rapidjson;
|
||||||
|
|
||||||
int key = 0;
|
int key = 0;
|
||||||
int argc = process->arguments().argc();
|
const int argc = process->arguments().argc();
|
||||||
char **argv = process->arguments().argv();
|
char **argv = process->arguments().argv();
|
||||||
|
|
||||||
Document doc(kObjectType);
|
Document doc(kObjectType);
|
||||||
|
|
||||||
@@ -262,7 +259,8 @@ void xmrig::BaseTransform::transform(rapidjson::Document &doc, int key, const ch
|
|||||||
case IConfig::DaemonKey: /* --daemon */
|
case IConfig::DaemonKey: /* --daemon */
|
||||||
case IConfig::SubmitToOriginKey: /* --submit-to-origin */
|
case IConfig::SubmitToOriginKey: /* --submit-to-origin */
|
||||||
case IConfig::VerboseKey: /* --verbose */
|
case IConfig::VerboseKey: /* --verbose */
|
||||||
case IConfig::DnsIPv6Key: /* --dns-ipv6 */
|
case IConfig::DnsIPv4Key: /* --ipv4 */
|
||||||
|
case IConfig::DnsIPv6Key: /* --ipv6 */
|
||||||
return transformBoolean(doc, key, true);
|
return transformBoolean(doc, key, true);
|
||||||
|
|
||||||
case IConfig::ColorKey: /* --no-color */
|
case IConfig::ColorKey: /* --no-color */
|
||||||
@@ -323,8 +321,11 @@ void xmrig::BaseTransform::transformBoolean(rapidjson::Document &doc, int key, b
|
|||||||
case IConfig::NoTitleKey: /* --no-title */
|
case IConfig::NoTitleKey: /* --no-title */
|
||||||
return set(doc, BaseConfig::kTitle, enable);
|
return set(doc, BaseConfig::kTitle, enable);
|
||||||
|
|
||||||
case IConfig::DnsIPv6Key: /* --dns-ipv6 */
|
case IConfig::DnsIPv4Key: /* --ipv4 */
|
||||||
return set(doc, DnsConfig::kField, DnsConfig::kIPv6, enable);
|
return set(doc, DnsConfig::kField, DnsConfig::kIPv, 4);
|
||||||
|
|
||||||
|
case IConfig::DnsIPv6Key: /* --ipv6 */
|
||||||
|
return set(doc, DnsConfig::kField, DnsConfig::kIPv, 6);
|
||||||
|
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/* XMRig
|
/* XMRig
|
||||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -16,9 +16,7 @@
|
|||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef XMRIG_ICONFIG_H
|
#pragma once
|
||||||
#define XMRIG_ICONFIG_H
|
|
||||||
|
|
||||||
|
|
||||||
#include "3rdparty/rapidjson/fwd.h"
|
#include "3rdparty/rapidjson/fwd.h"
|
||||||
|
|
||||||
@@ -82,7 +80,8 @@ public:
|
|||||||
HugePageSizeKey = 1050,
|
HugePageSizeKey = 1050,
|
||||||
PauseOnActiveKey = 1051,
|
PauseOnActiveKey = 1051,
|
||||||
SubmitToOriginKey = 1052,
|
SubmitToOriginKey = 1052,
|
||||||
DnsIPv6Key = 1053,
|
DnsIPv4Key = '4',
|
||||||
|
DnsIPv6Key = '6',
|
||||||
DnsTtlKey = 1054,
|
DnsTtlKey = 1054,
|
||||||
SpendSecretKey = 1055,
|
SpendSecretKey = 1055,
|
||||||
DaemonZMQPortKey = 1056,
|
DaemonZMQPortKey = 1056,
|
||||||
@@ -177,7 +176,4 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
} /* namespace xmrig */
|
} // namespace xmrig
|
||||||
|
|
||||||
|
|
||||||
#endif // XMRIG_ICONFIG_H
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/* XMRig
|
/* XMRig
|
||||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -16,21 +16,16 @@
|
|||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef XMRIG_IDNSBACKEND_H
|
#pragma once
|
||||||
#define XMRIG_IDNSBACKEND_H
|
|
||||||
|
|
||||||
|
|
||||||
#include "base/tools/Object.h"
|
#include "base/tools/Object.h"
|
||||||
|
|
||||||
|
|
||||||
#include <memory>
|
|
||||||
|
|
||||||
|
|
||||||
namespace xmrig {
|
namespace xmrig {
|
||||||
|
|
||||||
|
|
||||||
|
class DnsConfig;
|
||||||
class DnsRecords;
|
class DnsRecords;
|
||||||
class DnsRequest;
|
|
||||||
class IDnsListener;
|
class IDnsListener;
|
||||||
class String;
|
class String;
|
||||||
|
|
||||||
@@ -43,12 +38,8 @@ public:
|
|||||||
IDnsBackend() = default;
|
IDnsBackend() = default;
|
||||||
virtual ~IDnsBackend() = default;
|
virtual ~IDnsBackend() = default;
|
||||||
|
|
||||||
virtual const DnsRecords &records() const = 0;
|
virtual void resolve(const String &host, const std::weak_ptr<IDnsListener> &listener, const DnsConfig &config) = 0;
|
||||||
virtual std::shared_ptr<DnsRequest> resolve(const String &host, IDnsListener *listener, uint64_t ttl) = 0;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
} /* namespace xmrig */
|
} // namespace xmrig
|
||||||
|
|
||||||
|
|
||||||
#endif // XMRIG_IDNSBACKEND_H
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/* XMRig
|
/* XMRig
|
||||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -18,6 +18,7 @@
|
|||||||
|
|
||||||
|
|
||||||
#include "base/net/dns/Dns.h"
|
#include "base/net/dns/Dns.h"
|
||||||
|
#include "base/net/dns/DnsRequest.h"
|
||||||
#include "base/net/dns/DnsUvBackend.h"
|
#include "base/net/dns/DnsUvBackend.h"
|
||||||
|
|
||||||
|
|
||||||
@@ -25,17 +26,21 @@ namespace xmrig {
|
|||||||
|
|
||||||
|
|
||||||
DnsConfig Dns::m_config;
|
DnsConfig Dns::m_config;
|
||||||
std::map<String, std::shared_ptr<IDnsBackend> > Dns::m_backends;
|
std::map<String, std::shared_ptr<IDnsBackend>> Dns::m_backends;
|
||||||
|
|
||||||
|
|
||||||
} // namespace xmrig
|
} // namespace xmrig
|
||||||
|
|
||||||
|
|
||||||
std::shared_ptr<xmrig::DnsRequest> xmrig::Dns::resolve(const String &host, IDnsListener *listener, uint64_t ttl)
|
std::shared_ptr<xmrig::DnsRequest> xmrig::Dns::resolve(const String &host, IDnsListener *listener)
|
||||||
{
|
{
|
||||||
|
auto req = std::make_shared<DnsRequest>(listener);
|
||||||
|
|
||||||
if (m_backends.find(host) == m_backends.end()) {
|
if (m_backends.find(host) == m_backends.end()) {
|
||||||
m_backends.insert({ host, std::make_shared<DnsUvBackend>() });
|
m_backends.insert({ host, std::make_shared<DnsUvBackend>() });
|
||||||
}
|
}
|
||||||
|
|
||||||
return m_backends.at(host)->resolve(host, listener, ttl == 0 ? m_config.ttl() : ttl);
|
m_backends.at(host)->resolve(host, req, m_config);
|
||||||
|
|
||||||
|
return req;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/* XMRig
|
/* XMRig
|
||||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -43,7 +43,7 @@ public:
|
|||||||
inline static const DnsConfig &config() { return m_config; }
|
inline static const DnsConfig &config() { return m_config; }
|
||||||
inline static void set(const DnsConfig &config) { m_config = config; }
|
inline static void set(const DnsConfig &config) { m_config = config; }
|
||||||
|
|
||||||
static std::shared_ptr<DnsRequest> resolve(const String &host, IDnsListener *listener, uint64_t ttl = 0);
|
static std::shared_ptr<DnsRequest> resolve(const String &host, IDnsListener *listener);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static DnsConfig m_config;
|
static DnsConfig m_config;
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/* XMRig
|
/* XMRig
|
||||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -20,15 +20,15 @@
|
|||||||
#include "3rdparty/rapidjson/document.h"
|
#include "3rdparty/rapidjson/document.h"
|
||||||
#include "base/io/json/Json.h"
|
#include "base/io/json/Json.h"
|
||||||
|
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <uv.h>
|
||||||
|
|
||||||
|
|
||||||
namespace xmrig {
|
namespace xmrig {
|
||||||
|
|
||||||
|
|
||||||
const char *DnsConfig::kField = "dns";
|
const char *DnsConfig::kField = "dns";
|
||||||
const char *DnsConfig::kIPv6 = "ipv6";
|
const char *DnsConfig::kIPv = "ip_version";
|
||||||
const char *DnsConfig::kTTL = "ttl";
|
const char *DnsConfig::kTTL = "ttl";
|
||||||
|
|
||||||
|
|
||||||
@@ -37,8 +37,26 @@ const char *DnsConfig::kTTL = "ttl";
|
|||||||
|
|
||||||
xmrig::DnsConfig::DnsConfig(const rapidjson::Value &value)
|
xmrig::DnsConfig::DnsConfig(const rapidjson::Value &value)
|
||||||
{
|
{
|
||||||
m_ipv6 = Json::getBool(value, kIPv6, m_ipv6);
|
const uint32_t ipv = Json::getUint(value, kIPv, m_ipv);
|
||||||
m_ttl = std::max(Json::getUint(value, kTTL, m_ttl), 1U);
|
if (ipv == 0 || ipv == 4 || ipv == 6) {
|
||||||
|
m_ipv = ipv;
|
||||||
|
}
|
||||||
|
|
||||||
|
m_ttl = std::max(Json::getUint(value, kTTL, m_ttl), 1U);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int xmrig::DnsConfig::ai_family() const
|
||||||
|
{
|
||||||
|
if (m_ipv == 4) {
|
||||||
|
return AF_INET;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (m_ipv == 6) {
|
||||||
|
return AF_INET6;
|
||||||
|
}
|
||||||
|
|
||||||
|
return AF_UNSPEC;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -49,8 +67,8 @@ rapidjson::Value xmrig::DnsConfig::toJSON(rapidjson::Document &doc) const
|
|||||||
auto &allocator = doc.GetAllocator();
|
auto &allocator = doc.GetAllocator();
|
||||||
Value obj(kObjectType);
|
Value obj(kObjectType);
|
||||||
|
|
||||||
obj.AddMember(StringRef(kIPv6), m_ipv6, allocator);
|
obj.AddMember(StringRef(kIPv), m_ipv, allocator);
|
||||||
obj.AddMember(StringRef(kTTL), m_ttl, allocator);
|
obj.AddMember(StringRef(kTTL), m_ttl, allocator);
|
||||||
|
|
||||||
return obj;
|
return obj;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/* XMRig
|
/* XMRig
|
||||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -16,9 +16,7 @@
|
|||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef XMRIG_DNSCONFIG_H
|
#pragma once
|
||||||
#define XMRIG_DNSCONFIG_H
|
|
||||||
|
|
||||||
|
|
||||||
#include "3rdparty/rapidjson/fwd.h"
|
#include "3rdparty/rapidjson/fwd.h"
|
||||||
|
|
||||||
@@ -30,25 +28,22 @@ class DnsConfig
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
static const char *kField;
|
static const char *kField;
|
||||||
static const char *kIPv6;
|
static const char *kIPv;
|
||||||
static const char *kTTL;
|
static const char *kTTL;
|
||||||
|
|
||||||
DnsConfig() = default;
|
DnsConfig() = default;
|
||||||
DnsConfig(const rapidjson::Value &value);
|
DnsConfig(const rapidjson::Value &value);
|
||||||
|
|
||||||
inline bool isIPv6() const { return m_ipv6; }
|
inline uint32_t ipv() const { return m_ipv; }
|
||||||
inline uint32_t ttl() const { return m_ttl * 1000U; }
|
inline uint32_t ttl() const { return m_ttl * 1000U; }
|
||||||
|
|
||||||
|
int ai_family() const;
|
||||||
rapidjson::Value toJSON(rapidjson::Document &doc) const;
|
rapidjson::Value toJSON(rapidjson::Document &doc) const;
|
||||||
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool m_ipv6 = false;
|
uint32_t m_ttl = 30U;
|
||||||
uint32_t m_ttl = 30U;
|
uint32_t m_ipv = 0U;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
} /* namespace xmrig */
|
} // namespace xmrig
|
||||||
|
|
||||||
|
|
||||||
#endif /* XMRIG_DNSCONFIG_H */
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/* XMRig
|
/* XMRig
|
||||||
* Copyright (c) 2018-2023 SChernykh <https://github.com/SChernykh>
|
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||||
* Copyright (c) 2016-2023 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -16,19 +16,16 @@
|
|||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
#include <uv.h>
|
#include <uv.h>
|
||||||
|
|
||||||
|
|
||||||
#include "base/net/dns/DnsRecord.h"
|
#include "base/net/dns/DnsRecord.h"
|
||||||
|
|
||||||
|
|
||||||
xmrig::DnsRecord::DnsRecord(const addrinfo *addr) :
|
xmrig::DnsRecord::DnsRecord(const addrinfo *addr)
|
||||||
m_type(addr->ai_family == AF_INET6 ? AAAA : (addr->ai_family == AF_INET ? A : Unknown))
|
|
||||||
{
|
{
|
||||||
static_assert(sizeof(m_data) >= sizeof(sockaddr_in6), "Not enough storage for IPv6 address.");
|
static_assert(sizeof(m_data) >= sizeof(sockaddr_in6), "Not enough storage for IPv6 address.");
|
||||||
|
|
||||||
memcpy(m_data, addr->ai_addr, m_type == AAAA ? sizeof(sockaddr_in6) : sizeof(sockaddr_in));
|
memcpy(m_data, addr->ai_addr, addr->ai_family == AF_INET6 ? sizeof(sockaddr_in6) : sizeof(sockaddr_in));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -44,7 +41,7 @@ xmrig::String xmrig::DnsRecord::ip() const
|
|||||||
{
|
{
|
||||||
char *buf = nullptr;
|
char *buf = nullptr;
|
||||||
|
|
||||||
if (m_type == AAAA) {
|
if (reinterpret_cast<const sockaddr &>(m_data).sa_family == AF_INET6) {
|
||||||
buf = new char[45]();
|
buf = new char[45]();
|
||||||
uv_ip6_name(reinterpret_cast<const sockaddr_in6*>(m_data), buf, 45);
|
uv_ip6_name(reinterpret_cast<const sockaddr_in6*>(m_data), buf, 45);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/* XMRig
|
/* XMRig
|
||||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -16,14 +16,11 @@
|
|||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef XMRIG_DNSRECORD_H
|
#pragma once
|
||||||
#define XMRIG_DNSRECORD_H
|
|
||||||
|
|
||||||
|
|
||||||
struct addrinfo;
|
struct addrinfo;
|
||||||
struct sockaddr;
|
struct sockaddr;
|
||||||
|
|
||||||
|
|
||||||
#include "base/tools/String.h"
|
#include "base/tools/String.h"
|
||||||
|
|
||||||
|
|
||||||
@@ -33,28 +30,15 @@ namespace xmrig {
|
|||||||
class DnsRecord
|
class DnsRecord
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
enum Type : uint32_t {
|
|
||||||
Unknown,
|
|
||||||
A,
|
|
||||||
AAAA
|
|
||||||
};
|
|
||||||
|
|
||||||
DnsRecord() {}
|
DnsRecord() {}
|
||||||
DnsRecord(const addrinfo *addr);
|
DnsRecord(const addrinfo *addr);
|
||||||
|
|
||||||
const sockaddr *addr(uint16_t port = 0) const;
|
const sockaddr *addr(uint16_t port = 0) const;
|
||||||
String ip() const;
|
String ip() const;
|
||||||
|
|
||||||
inline bool isValid() const { return m_type != Unknown; }
|
|
||||||
inline Type type() const { return m_type; }
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
mutable uint8_t m_data[28]{};
|
mutable uint8_t m_data[28]{};
|
||||||
const Type m_type = Unknown;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
} /* namespace xmrig */
|
} // namespace xmrig
|
||||||
|
|
||||||
|
|
||||||
#endif /* XMRIG_DNSRECORD_H */
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/* XMRig
|
/* XMRig
|
||||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -18,90 +18,96 @@
|
|||||||
|
|
||||||
#include <uv.h>
|
#include <uv.h>
|
||||||
|
|
||||||
|
|
||||||
#include "base/net/dns/DnsRecords.h"
|
#include "base/net/dns/DnsRecords.h"
|
||||||
#include "base/net/dns/Dns.h"
|
|
||||||
|
|
||||||
|
|
||||||
const xmrig::DnsRecord &xmrig::DnsRecords::get(DnsRecord::Type prefered) const
|
namespace {
|
||||||
|
|
||||||
|
|
||||||
|
static size_t dns_records_count(const addrinfo *res, int &ai_family)
|
||||||
|
{
|
||||||
|
size_t ipv4 = 0;
|
||||||
|
size_t ipv6 = 0;
|
||||||
|
|
||||||
|
while (res != nullptr) {
|
||||||
|
if (res->ai_family == AF_INET) {
|
||||||
|
++ipv4;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (res->ai_family == AF_INET6) {
|
||||||
|
++ipv6;
|
||||||
|
}
|
||||||
|
|
||||||
|
res = res->ai_next;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ai_family == AF_INET6 && !ipv6) {
|
||||||
|
ai_family = AF_INET;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (ai_family) {
|
||||||
|
case AF_UNSPEC:
|
||||||
|
return ipv4 + ipv6;
|
||||||
|
|
||||||
|
case AF_INET:
|
||||||
|
return ipv4;
|
||||||
|
|
||||||
|
case AF_INET6:
|
||||||
|
return ipv6;
|
||||||
|
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
|
||||||
|
xmrig::DnsRecords::DnsRecords(const addrinfo *res, int ai_family)
|
||||||
|
{
|
||||||
|
size_t size = dns_records_count(res, ai_family);
|
||||||
|
if (!size) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
m_records.reserve(size);
|
||||||
|
|
||||||
|
if (ai_family == AF_UNSPEC) {
|
||||||
|
while (res != nullptr) {
|
||||||
|
if (res->ai_family == AF_INET || res->ai_family == AF_INET6) {
|
||||||
|
m_records.emplace_back(res);
|
||||||
|
}
|
||||||
|
|
||||||
|
res = res->ai_next;
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
while (res != nullptr) {
|
||||||
|
if (res->ai_family == ai_family) {
|
||||||
|
m_records.emplace_back(res);
|
||||||
|
}
|
||||||
|
|
||||||
|
res = res->ai_next;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
size = m_records.size();
|
||||||
|
if (size > 1) {
|
||||||
|
m_index = static_cast<size_t>(rand()) % size; // NOLINT(concurrency-mt-unsafe, cert-msc30-c, cert-msc50-cpp)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
const xmrig::DnsRecord &xmrig::DnsRecords::get() const
|
||||||
{
|
{
|
||||||
static const DnsRecord defaultRecord;
|
static const DnsRecord defaultRecord;
|
||||||
|
|
||||||
if (isEmpty()) {
|
const size_t size = m_records.size();
|
||||||
return defaultRecord;
|
if (size > 0) {
|
||||||
}
|
return m_records[m_index++ % size];
|
||||||
|
|
||||||
const size_t ipv4 = m_ipv4.size();
|
|
||||||
const size_t ipv6 = m_ipv6.size();
|
|
||||||
|
|
||||||
if (ipv6 && (prefered == DnsRecord::AAAA || Dns::config().isIPv6() || !ipv4)) {
|
|
||||||
return m_ipv6[ipv6 == 1 ? 0 : static_cast<size_t>(rand()) % ipv6]; // NOLINT(concurrency-mt-unsafe, cert-msc30-c, cert-msc50-cpp)
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ipv4) {
|
|
||||||
return m_ipv4[ipv4 == 1 ? 0 : static_cast<size_t>(rand()) % ipv4]; // NOLINT(concurrency-mt-unsafe, cert-msc30-c, cert-msc50-cpp)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return defaultRecord;
|
return defaultRecord;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
size_t xmrig::DnsRecords::count(DnsRecord::Type type) const
|
|
||||||
{
|
|
||||||
if (type == DnsRecord::A) {
|
|
||||||
return m_ipv4.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (type == DnsRecord::AAAA) {
|
|
||||||
return m_ipv6.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
return m_ipv4.size() + m_ipv6.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void xmrig::DnsRecords::clear()
|
|
||||||
{
|
|
||||||
m_ipv4.clear();
|
|
||||||
m_ipv6.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void xmrig::DnsRecords::parse(addrinfo *res)
|
|
||||||
{
|
|
||||||
clear();
|
|
||||||
|
|
||||||
addrinfo *ptr = res;
|
|
||||||
size_t ipv4 = 0;
|
|
||||||
size_t ipv6 = 0;
|
|
||||||
|
|
||||||
while (ptr != nullptr) {
|
|
||||||
if (ptr->ai_family == AF_INET) {
|
|
||||||
++ipv4;
|
|
||||||
}
|
|
||||||
else if (ptr->ai_family == AF_INET6) {
|
|
||||||
++ipv6;
|
|
||||||
}
|
|
||||||
|
|
||||||
ptr = ptr->ai_next;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ipv4 == 0 && ipv6 == 0) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
m_ipv4.reserve(ipv4);
|
|
||||||
m_ipv6.reserve(ipv6);
|
|
||||||
|
|
||||||
ptr = res;
|
|
||||||
while (ptr != nullptr) {
|
|
||||||
if (ptr->ai_family == AF_INET) {
|
|
||||||
m_ipv4.emplace_back(ptr);
|
|
||||||
}
|
|
||||||
else if (ptr->ai_family == AF_INET6) {
|
|
||||||
m_ipv6.emplace_back(ptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
ptr = ptr->ai_next;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/* XMRig
|
/* XMRig
|
||||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -16,9 +16,7 @@
|
|||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef XMRIG_DNSRECORDS_H
|
#pragma once
|
||||||
#define XMRIG_DNSRECORDS_H
|
|
||||||
|
|
||||||
|
|
||||||
#include "base/net/dns/DnsRecord.h"
|
#include "base/net/dns/DnsRecord.h"
|
||||||
|
|
||||||
@@ -29,20 +27,19 @@ namespace xmrig {
|
|||||||
class DnsRecords
|
class DnsRecords
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
inline bool isEmpty() const { return m_ipv4.empty() && m_ipv6.empty(); }
|
DnsRecords() = default;
|
||||||
|
DnsRecords(const addrinfo *res, int ai_family);
|
||||||
|
|
||||||
const DnsRecord &get(DnsRecord::Type prefered = DnsRecord::Unknown) const;
|
inline bool isEmpty() const { return m_records.empty(); }
|
||||||
size_t count(DnsRecord::Type type = DnsRecord::Unknown) const;
|
inline const std::vector<DnsRecord> &records() const { return m_records; }
|
||||||
void clear();
|
inline size_t size() const { return m_records.size(); }
|
||||||
void parse(addrinfo *res);
|
|
||||||
|
const DnsRecord &get() const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::vector<DnsRecord> m_ipv4;
|
mutable size_t m_index = 0;
|
||||||
std::vector<DnsRecord> m_ipv6;
|
std::vector<DnsRecord> m_records;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
} /* namespace xmrig */
|
} // namespace xmrig
|
||||||
|
|
||||||
|
|
||||||
#endif /* XMRIG_DNSRECORDS_H */
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/* XMRig
|
/* XMRig
|
||||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -16,35 +16,30 @@
|
|||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef XMRIG_DNSREQUEST_H
|
#pragma once
|
||||||
#define XMRIG_DNSREQUEST_H
|
|
||||||
|
|
||||||
|
#include "base/kernel/interfaces/IDnsListener.h"
|
||||||
#include "base/tools/Object.h"
|
|
||||||
|
|
||||||
|
|
||||||
#include <cstdint>
|
|
||||||
|
|
||||||
|
|
||||||
namespace xmrig {
|
namespace xmrig {
|
||||||
|
|
||||||
|
|
||||||
class IDnsListener;
|
class DnsRequest : public IDnsListener
|
||||||
|
|
||||||
|
|
||||||
class DnsRequest
|
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
XMRIG_DISABLE_COPY_MOVE_DEFAULT(DnsRequest)
|
XMRIG_DISABLE_COPY_MOVE_DEFAULT(DnsRequest)
|
||||||
|
|
||||||
DnsRequest(IDnsListener *listener) : listener(listener) {}
|
inline DnsRequest(IDnsListener *listener) : m_listener(listener) {}
|
||||||
~DnsRequest() = default;
|
~DnsRequest() override = default;
|
||||||
|
|
||||||
IDnsListener *listener;
|
protected:
|
||||||
|
inline void onResolved(const DnsRecords &records, int status, const char *error) override {
|
||||||
|
m_listener->onResolved(records, status, error);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
IDnsListener *m_listener;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
} /* namespace xmrig */
|
} // namespace xmrig
|
||||||
|
|
||||||
|
|
||||||
#endif /* XMRIG_DNSREQUEST_H */
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/* XMRig
|
/* XMRig
|
||||||
* Copyright (c) 2018-2023 SChernykh <https://github.com/SChernykh>
|
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||||
* Copyright (c) 2016-2023 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -16,13 +16,11 @@
|
|||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
#include <uv.h>
|
#include <uv.h>
|
||||||
|
|
||||||
|
|
||||||
#include "base/net/dns/DnsUvBackend.h"
|
#include "base/net/dns/DnsUvBackend.h"
|
||||||
#include "base/kernel/interfaces/IDnsListener.h"
|
#include "base/kernel/interfaces/IDnsListener.h"
|
||||||
#include "base/net/dns/DnsRequest.h"
|
#include "base/net/dns/DnsConfig.h"
|
||||||
#include "base/tools/Chrono.h"
|
#include "base/tools/Chrono.h"
|
||||||
|
|
||||||
|
|
||||||
@@ -73,21 +71,23 @@ xmrig::DnsUvBackend::~DnsUvBackend()
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::shared_ptr<xmrig::DnsRequest> xmrig::DnsUvBackend::resolve(const String &host, IDnsListener *listener, uint64_t ttl)
|
void xmrig::DnsUvBackend::resolve(const String &host, const std::weak_ptr<IDnsListener> &listener, const DnsConfig &config)
|
||||||
{
|
{
|
||||||
auto req = std::make_shared<DnsRequest>(listener);
|
m_queue.emplace_back(listener);
|
||||||
|
|
||||||
if (Chrono::currentMSecsSinceEpoch() - m_ts <= ttl && !m_records.isEmpty()) {
|
if (Chrono::currentMSecsSinceEpoch() - m_ts <= config.ttl()) {
|
||||||
req->listener->onResolved(m_records, 0, nullptr);
|
return notify();
|
||||||
} else {
|
|
||||||
m_queue.emplace(req);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_queue.size() == 1 && !resolve(host)) {
|
if (m_req) {
|
||||||
done();
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
return req;
|
m_ai_family = config.ai_family();
|
||||||
|
|
||||||
|
if (!resolve(host)) {
|
||||||
|
notify();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -102,44 +102,46 @@ bool xmrig::DnsUvBackend::resolve(const String &host)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void xmrig::DnsUvBackend::done()
|
void xmrig::DnsUvBackend::notify()
|
||||||
{
|
{
|
||||||
const char *error = m_status < 0 ? uv_strerror(m_status) : nullptr;
|
const char *error = m_status < 0 ? uv_strerror(m_status) : nullptr;
|
||||||
|
|
||||||
while (!m_queue.empty()) {
|
for (const auto &l : m_queue) {
|
||||||
auto req = std::move(m_queue.front()).lock();
|
auto listener = l.lock();
|
||||||
if (req) {
|
if (listener) {
|
||||||
req->listener->onResolved(m_records, m_status, error);
|
listener->onResolved(m_records, m_status, error);
|
||||||
}
|
}
|
||||||
|
|
||||||
m_queue.pop();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
m_queue.clear();
|
||||||
m_req.reset();
|
m_req.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void xmrig::DnsUvBackend::onResolved(int status, addrinfo *res)
|
void xmrig::DnsUvBackend::onResolved(int status, addrinfo *res)
|
||||||
{
|
{
|
||||||
m_ts = Chrono::currentMSecsSinceEpoch();
|
m_status = status;
|
||||||
|
m_ts = Chrono::currentMSecsSinceEpoch();
|
||||||
|
|
||||||
if ((m_status = status) < 0) {
|
if (m_status < 0) {
|
||||||
return done();
|
m_records = {};
|
||||||
|
|
||||||
|
return notify();
|
||||||
}
|
}
|
||||||
|
|
||||||
m_records.parse(res);
|
m_records = { res, m_ai_family };
|
||||||
|
|
||||||
if (m_records.isEmpty()) {
|
if (m_records.isEmpty()) {
|
||||||
m_status = UV_EAI_NONAME;
|
m_status = UV_EAI_NONAME;
|
||||||
}
|
}
|
||||||
|
|
||||||
done();
|
notify();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void xmrig::DnsUvBackend::onResolved(uv_getaddrinfo_t *req, int status, addrinfo *res)
|
void xmrig::DnsUvBackend::onResolved(uv_getaddrinfo_t *req, int status, addrinfo *res)
|
||||||
{
|
{
|
||||||
auto backend = getStorage().get(req->data);
|
auto *backend = getStorage().get(req->data);
|
||||||
if (backend) {
|
if (backend) {
|
||||||
backend->onResolved(status, res);
|
backend->onResolved(status, res);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/* XMRig
|
/* XMRig
|
||||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -16,16 +16,13 @@
|
|||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef XMRIG_DNSUVBACKEND_H
|
#pragma once
|
||||||
#define XMRIG_DNSUVBACKEND_H
|
|
||||||
|
|
||||||
|
|
||||||
#include "base/kernel/interfaces/IDnsBackend.h"
|
#include "base/kernel/interfaces/IDnsBackend.h"
|
||||||
#include "base/net/dns/DnsRecords.h"
|
#include "base/net/dns/DnsRecords.h"
|
||||||
#include "base/net/tools/Storage.h"
|
#include "base/net/tools/Storage.h"
|
||||||
|
|
||||||
|
#include <deque>
|
||||||
#include <queue>
|
|
||||||
|
|
||||||
|
|
||||||
using uv_getaddrinfo_t = struct uv_getaddrinfo_s;
|
using uv_getaddrinfo_t = struct uv_getaddrinfo_s;
|
||||||
@@ -43,20 +40,19 @@ public:
|
|||||||
~DnsUvBackend() override;
|
~DnsUvBackend() override;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
inline const DnsRecords &records() const override { return m_records; }
|
void resolve(const String &host, const std::weak_ptr<IDnsListener> &listener, const DnsConfig &config) override;
|
||||||
|
|
||||||
std::shared_ptr<DnsRequest> resolve(const String &host, IDnsListener *listener, uint64_t ttl) override;
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool resolve(const String &host);
|
bool resolve(const String &host);
|
||||||
void done();
|
void notify();
|
||||||
void onResolved(int status, addrinfo *res);
|
void onResolved(int status, addrinfo *res);
|
||||||
|
|
||||||
static void onResolved(uv_getaddrinfo_t *req, int status, addrinfo *res);
|
static void onResolved(uv_getaddrinfo_t *req, int status, addrinfo *res);
|
||||||
|
|
||||||
DnsRecords m_records;
|
DnsRecords m_records;
|
||||||
|
int m_ai_family = 0;
|
||||||
int m_status = 0;
|
int m_status = 0;
|
||||||
std::queue<std::weak_ptr<DnsRequest> > m_queue;
|
std::deque<std::weak_ptr<IDnsListener>> m_queue;
|
||||||
std::shared_ptr<uv_getaddrinfo_t> m_req;
|
std::shared_ptr<uv_getaddrinfo_t> m_req;
|
||||||
uint64_t m_ts = 0;
|
uint64_t m_ts = 0;
|
||||||
uintptr_t m_key;
|
uintptr_t m_key;
|
||||||
@@ -66,7 +62,4 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
} /* namespace xmrig */
|
} // namespace xmrig
|
||||||
|
|
||||||
|
|
||||||
#endif /* XMRIG_DNSUVBACKEND_H */
|
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
/* XMRig
|
/* XMRig
|
||||||
* Copyright (c) 2018 Lee Clagett <https://github.com/vtnerd>
|
* Copyright (c) 2018 Lee Clagett <https://github.com/vtnerd>
|
||||||
* Copyright (c) 2018-2023 SChernykh <https://github.com/SChernykh>
|
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||||
* Copyright (c) 2016-2023 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -45,7 +45,7 @@ namespace xmrig {
|
|||||||
|
|
||||||
|
|
||||||
// https://wiki.openssl.org/index.php/Diffie-Hellman_parameters
|
// https://wiki.openssl.org/index.php/Diffie-Hellman_parameters
|
||||||
#if OPENSSL_VERSION_NUMBER < 0x30000000L || defined(LIBRESSL_VERSION_NUMBER)
|
#if OPENSSL_VERSION_NUMBER < 0x30000000L || (defined(LIBRESSL_VERSION_NUMBER) && !defined(LIBRESSL_HAS_TLS1_3))
|
||||||
static DH *get_dh2048()
|
static DH *get_dh2048()
|
||||||
{
|
{
|
||||||
static unsigned char dhp_2048[] = {
|
static unsigned char dhp_2048[] = {
|
||||||
@@ -152,7 +152,7 @@ bool xmrig::TlsContext::load(const TlsConfig &config)
|
|||||||
SSL_CTX_set_options(m_ctx, SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3);
|
SSL_CTX_set_options(m_ctx, SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3);
|
||||||
SSL_CTX_set_options(m_ctx, SSL_OP_CIPHER_SERVER_PREFERENCE);
|
SSL_CTX_set_options(m_ctx, SSL_OP_CIPHER_SERVER_PREFERENCE);
|
||||||
|
|
||||||
# if OPENSSL_VERSION_NUMBER >= 0x1010100fL && !defined(LIBRESSL_VERSION_NUMBER)
|
# if OPENSSL_VERSION_NUMBER >= 0x1010100fL || defined(LIBRESSL_HAS_TLS1_3)
|
||||||
SSL_CTX_set_max_early_data(m_ctx, 0);
|
SSL_CTX_set_max_early_data(m_ctx, 0);
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
@@ -180,7 +180,7 @@ bool xmrig::TlsContext::setCipherSuites(const char *ciphersuites)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
# if OPENSSL_VERSION_NUMBER >= 0x1010100fL && !defined(LIBRESSL_VERSION_NUMBER)
|
# if OPENSSL_VERSION_NUMBER >= 0x1010100fL || defined(LIBRESSL_HAS_TLS1_3)
|
||||||
if (SSL_CTX_set_ciphersuites(m_ctx, ciphersuites) == 1) {
|
if (SSL_CTX_set_ciphersuites(m_ctx, ciphersuites) == 1) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -194,7 +194,7 @@ bool xmrig::TlsContext::setCipherSuites(const char *ciphersuites)
|
|||||||
|
|
||||||
bool xmrig::TlsContext::setDH(const char *dhparam)
|
bool xmrig::TlsContext::setDH(const char *dhparam)
|
||||||
{
|
{
|
||||||
# if OPENSSL_VERSION_NUMBER < 0x30000000L || defined(LIBRESSL_VERSION_NUMBER)
|
# if OPENSSL_VERSION_NUMBER < 0x30000000L || (defined(LIBRESSL_VERSION_NUMBER) && !defined(LIBRESSL_HAS_TLS1_3))
|
||||||
DH *dh = nullptr;
|
DH *dh = nullptr;
|
||||||
|
|
||||||
if (dhparam != nullptr) {
|
if (dhparam != nullptr) {
|
||||||
|
|||||||
@@ -241,8 +241,13 @@ bool xmrig::BlockTemplate::parse(bool hashes)
|
|||||||
ar(m_amount);
|
ar(m_amount);
|
||||||
ar(m_outputType);
|
ar(m_outputType);
|
||||||
|
|
||||||
// output type must be txout_to_key (2) or txout_to_tagged_key (3)
|
const bool is_fcmp_pp = (m_coin == Coin::MONERO) && (m_version.first >= 17);
|
||||||
if ((m_outputType != 2) && (m_outputType != 3)) {
|
|
||||||
|
// output type must be txout_to_key (2) or txout_to_tagged_key (3) for versions < 17, and txout_to_carrot_v1 (0) for version FCMP++
|
||||||
|
if (is_fcmp_pp && (m_outputType == 0)) {
|
||||||
|
// all good
|
||||||
|
}
|
||||||
|
else if ((m_outputType != 2) && (m_outputType != 3)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -250,6 +255,11 @@ bool xmrig::BlockTemplate::parse(bool hashes)
|
|||||||
|
|
||||||
ar(m_ephPublicKey, kKeySize);
|
ar(m_ephPublicKey, kKeySize);
|
||||||
|
|
||||||
|
if (is_fcmp_pp) {
|
||||||
|
ar(m_carrotViewTag);
|
||||||
|
ar(m_janusAnchor);
|
||||||
|
}
|
||||||
|
|
||||||
if (m_coin == Coin::ZEPHYR) {
|
if (m_coin == Coin::ZEPHYR) {
|
||||||
if (m_outputType != 2) {
|
if (m_outputType != 2) {
|
||||||
return false;
|
return false;
|
||||||
|
|||||||
@@ -148,6 +148,8 @@ private:
|
|||||||
Buffer m_hashes;
|
Buffer m_hashes;
|
||||||
Buffer m_minerTxMerkleTreeBranch;
|
Buffer m_minerTxMerkleTreeBranch;
|
||||||
uint8_t m_rootHash[kHashSize]{};
|
uint8_t m_rootHash[kHashSize]{};
|
||||||
|
uint8_t m_carrotViewTag[3]{};
|
||||||
|
uint8_t m_janusAnchor[16]{};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -23,15 +23,22 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
|
||||||
#ifdef XMRIG_64_BIT
|
#if defined(XMRIG_64_BIT)
|
||||||
# ifdef _MSC_VER
|
# if defined(_MSC_VER)
|
||||||
# include <intrin.h>
|
# include <intrin.h>
|
||||||
# pragma intrinsic(_umul128)
|
# if defined(XMRIG_ARM)
|
||||||
# define __umul128 _umul128
|
#pragma intrinsic(__umulh)
|
||||||
# elif defined __GNUC__
|
static inline uint64_t __umul128(uint64_t a, uint64_t b, uint64_t *high) {
|
||||||
|
*high = __umulh(a, b);
|
||||||
|
return a * b;
|
||||||
|
}
|
||||||
|
# else
|
||||||
|
# pragma intrinsic(_umul128)
|
||||||
|
# define __umul128 _umul128
|
||||||
|
# endif
|
||||||
|
# elif defined(__GNUC__)
|
||||||
static inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi)
|
static inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi)
|
||||||
{
|
{
|
||||||
unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b;
|
unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b;
|
||||||
|
|||||||
@@ -93,7 +93,7 @@
|
|||||||
"dhparam": null
|
"dhparam": null
|
||||||
},
|
},
|
||||||
"dns": {
|
"dns": {
|
||||||
"ipv6": false,
|
"ip_version": 0,
|
||||||
"ttl": 30
|
"ttl": 30
|
||||||
},
|
},
|
||||||
"user-agent": null,
|
"user-agent": null,
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/* XMRig
|
/* XMRig
|
||||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -16,9 +16,7 @@
|
|||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef XMRIG_CONFIG_PLATFORM_H
|
#pragma once
|
||||||
#define XMRIG_CONFIG_PLATFORM_H
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
# include "getopt/getopt.h"
|
# include "getopt/getopt.h"
|
||||||
@@ -28,13 +26,12 @@
|
|||||||
|
|
||||||
|
|
||||||
#include "base/kernel/interfaces/IConfig.h"
|
#include "base/kernel/interfaces/IConfig.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
|
|
||||||
namespace xmrig {
|
namespace xmrig {
|
||||||
|
|
||||||
|
|
||||||
static const char short_options[] = "a:c:kBp:Px:r:R:s:t:T:o:u:O:v:l:Sx:";
|
static const char short_options[] = "a:c:kBp:Px:r:R:s:t:T:o:u:O:v:l:Sx:46";
|
||||||
|
|
||||||
|
|
||||||
static const option options[] = {
|
static const option options[] = {
|
||||||
@@ -99,7 +96,8 @@ static const option options[] = {
|
|||||||
{ "no-title", 0, nullptr, IConfig::NoTitleKey },
|
{ "no-title", 0, nullptr, IConfig::NoTitleKey },
|
||||||
{ "pause-on-battery", 0, nullptr, IConfig::PauseOnBatteryKey },
|
{ "pause-on-battery", 0, nullptr, IConfig::PauseOnBatteryKey },
|
||||||
{ "pause-on-active", 1, nullptr, IConfig::PauseOnActiveKey },
|
{ "pause-on-active", 1, nullptr, IConfig::PauseOnActiveKey },
|
||||||
{ "dns-ipv6", 0, nullptr, IConfig::DnsIPv6Key },
|
{ "ipv4", 0, nullptr, IConfig::DnsIPv4Key },
|
||||||
|
{ "ipv6", 0, nullptr, IConfig::DnsIPv6Key },
|
||||||
{ "dns-ttl", 1, nullptr, IConfig::DnsTtlKey },
|
{ "dns-ttl", 1, nullptr, IConfig::DnsTtlKey },
|
||||||
{ "spend-secret-key", 1, nullptr, IConfig::SpendSecretKey },
|
{ "spend-secret-key", 1, nullptr, IConfig::SpendSecretKey },
|
||||||
# ifdef XMRIG_FEATURE_BENCHMARK
|
# ifdef XMRIG_FEATURE_BENCHMARK
|
||||||
@@ -169,6 +167,3 @@ static const option options[] = {
|
|||||||
|
|
||||||
|
|
||||||
} // namespace xmrig
|
} // namespace xmrig
|
||||||
|
|
||||||
|
|
||||||
#endif /* XMRIG_CONFIG_PLATFORM_H */
|
|
||||||
|
|||||||
@@ -4,8 +4,8 @@
|
|||||||
* Copyright (c) 2014 Lucas Jones <https://github.com/lucasjones>
|
* Copyright (c) 2014 Lucas Jones <https://github.com/lucasjones>
|
||||||
* Copyright (c) 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
* Copyright (c) 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||||
* Copyright (c) 2016 Jay D Dee <jayddee246@gmail.com>
|
* Copyright (c) 2016 Jay D Dee <jayddee246@gmail.com>
|
||||||
* Copyright (c) 2018-2024 SChernykh <https://github.com/SChernykh>
|
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||||
* Copyright (c) 2016-2024 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -21,13 +21,10 @@
|
|||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef XMRIG_USAGE_H
|
#pragma once
|
||||||
#define XMRIG_USAGE_H
|
|
||||||
|
|
||||||
|
|
||||||
#include "version.h"
|
#include "version.h"
|
||||||
|
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
|
||||||
@@ -59,7 +56,8 @@ static inline const std::string &usage()
|
|||||||
u += " --tls-fingerprint=HEX pool TLS certificate fingerprint for strict certificate pinning\n";
|
u += " --tls-fingerprint=HEX pool TLS certificate fingerprint for strict certificate pinning\n";
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
u += " --dns-ipv6 prefer IPv6 records from DNS responses\n";
|
u += " -4, --ipv4 resolve names to IPv4 addresses\n";
|
||||||
|
u += " -6, --ipv6 resolve names to IPv6 addresses\n";
|
||||||
u += " --dns-ttl=N N seconds (default: 30) TTL for internal DNS cache\n";
|
u += " --dns-ttl=N N seconds (default: 30) TTL for internal DNS cache\n";
|
||||||
|
|
||||||
# ifdef XMRIG_FEATURE_HTTP
|
# ifdef XMRIG_FEATURE_HTTP
|
||||||
@@ -205,6 +203,4 @@ static inline const std::string &usage()
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
} /* namespace xmrig */
|
} // namespace xmrig
|
||||||
|
|
||||||
#endif /* XMRIG_USAGE_H */
|
|
||||||
|
|||||||
@@ -23,7 +23,7 @@
|
|||||||
#include "crypto/common/VirtualMemory.h"
|
#include "crypto/common/VirtualMemory.h"
|
||||||
|
|
||||||
|
|
||||||
#if defined(XMRIG_ARM)
|
#if defined(XMRIG_ARM) || defined(XMRIG_RISCV)
|
||||||
# include "crypto/cn/CryptoNight_arm.h"
|
# include "crypto/cn/CryptoNight_arm.h"
|
||||||
#else
|
#else
|
||||||
# include "crypto/cn/CryptoNight_x86.h"
|
# include "crypto/cn/CryptoNight_x86.h"
|
||||||
|
|||||||
@@ -30,7 +30,7 @@
|
|||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
#if defined _MSC_VER || defined XMRIG_ARM
|
#if defined _MSC_VER || defined XMRIG_ARM || defined XMRIG_RISCV
|
||||||
# define ABI_ATTRIBUTE
|
# define ABI_ATTRIBUTE
|
||||||
#else
|
#else
|
||||||
# define ABI_ATTRIBUTE __attribute__((ms_abi))
|
# define ABI_ATTRIBUTE __attribute__((ms_abi))
|
||||||
|
|||||||
@@ -27,6 +27,9 @@
|
|||||||
#ifndef XMRIG_CRYPTONIGHT_ARM_H
|
#ifndef XMRIG_CRYPTONIGHT_ARM_H
|
||||||
#define XMRIG_CRYPTONIGHT_ARM_H
|
#define XMRIG_CRYPTONIGHT_ARM_H
|
||||||
|
|
||||||
|
#ifdef XMRIG_RISCV
|
||||||
|
# include "crypto/cn/sse2rvv.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "base/crypto/keccak.h"
|
#include "base/crypto/keccak.h"
|
||||||
#include "crypto/cn/CnAlgo.h"
|
#include "crypto/cn/CnAlgo.h"
|
||||||
|
|||||||
@@ -30,7 +30,7 @@
|
|||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
|
||||||
// VARIANT ALTERATIONS
|
// VARIANT ALTERATIONS
|
||||||
#ifndef XMRIG_ARM
|
#if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
|
||||||
# define VARIANT1_INIT(part) \
|
# define VARIANT1_INIT(part) \
|
||||||
uint64_t tweak1_2_##part = 0; \
|
uint64_t tweak1_2_##part = 0; \
|
||||||
if (BASE == Algorithm::CN_1) { \
|
if (BASE == Algorithm::CN_1) { \
|
||||||
@@ -60,7 +60,7 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#ifndef XMRIG_ARM
|
#if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
|
||||||
# define VARIANT2_INIT(part) \
|
# define VARIANT2_INIT(part) \
|
||||||
__m128i division_result_xmm_##part = _mm_cvtsi64_si128(static_cast<int64_t>(h##part[12])); \
|
__m128i division_result_xmm_##part = _mm_cvtsi64_si128(static_cast<int64_t>(h##part[12])); \
|
||||||
__m128i sqrt_result_xmm_##part = _mm_cvtsi64_si128(static_cast<int64_t>(h##part[13]));
|
__m128i sqrt_result_xmm_##part = _mm_cvtsi64_si128(static_cast<int64_t>(h##part[13]));
|
||||||
|
|||||||
@@ -29,6 +29,8 @@
|
|||||||
|
|
||||||
#if defined(XMRIG_ARM)
|
#if defined(XMRIG_ARM)
|
||||||
# include "crypto/cn/sse2neon.h"
|
# include "crypto/cn/sse2neon.h"
|
||||||
|
#elif defined(XMRIG_RISCV)
|
||||||
|
# include "crypto/cn/sse2rvv.h"
|
||||||
#elif defined(__GNUC__)
|
#elif defined(__GNUC__)
|
||||||
# include <x86intrin.h>
|
# include <x86intrin.h>
|
||||||
#else
|
#else
|
||||||
|
|||||||
748
src/crypto/cn/sse2rvv.h
Normal file
748
src/crypto/cn/sse2rvv.h
Normal file
@@ -0,0 +1,748 @@
|
|||||||
|
/* XMRig
|
||||||
|
* Copyright (c) 2025 Slayingripper <https://github.com/Slayingripper>
|
||||||
|
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||||
|
* Copyright (c) 2016-2025 XMRig <support@xmrig.com>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SSE to RISC-V Vector (RVV) optimized compatibility header
|
||||||
|
* Provides both scalar fallback and vectorized implementations using RVV intrinsics
|
||||||
|
*
|
||||||
|
* Based on sse2neon.h concepts, adapted for RISC-V architecture with RVV extensions
|
||||||
|
* Original sse2neon.h: https://github.com/DLTcollab/sse2neon
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef XMRIG_SSE2RVV_OPTIMIZED_H
|
||||||
|
#define XMRIG_SSE2RVV_OPTIMIZED_H
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
/* Check if RVV is available */
|
||||||
|
#if defined(__riscv_vector)
|
||||||
|
#include <riscv_vector.h>
|
||||||
|
#define USE_RVV_INTRINSICS 1
|
||||||
|
#else
|
||||||
|
#define USE_RVV_INTRINSICS 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* 128-bit vector type */
|
||||||
|
typedef union {
|
||||||
|
uint8_t u8[16];
|
||||||
|
uint16_t u16[8];
|
||||||
|
uint32_t u32[4];
|
||||||
|
uint64_t u64[2];
|
||||||
|
int8_t i8[16];
|
||||||
|
int16_t i16[8];
|
||||||
|
int32_t i32[4];
|
||||||
|
int64_t i64[2];
|
||||||
|
} __m128i_union;
|
||||||
|
|
||||||
|
typedef __m128i_union __m128i;
|
||||||
|
|
||||||
|
/* Set operations */
|
||||||
|
static inline __m128i _mm_set_epi32(int e3, int e2, int e1, int e0)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
result.i32[0] = e0;
|
||||||
|
result.i32[1] = e1;
|
||||||
|
result.i32[2] = e2;
|
||||||
|
result.i32[3] = e3;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_set_epi64x(int64_t e1, int64_t e0)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
result.i64[0] = e0;
|
||||||
|
result.i64[1] = e1;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_setzero_si128(void)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
memset(&result, 0, sizeof(result));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Extract/insert operations */
|
||||||
|
static inline int _mm_cvtsi128_si32(__m128i a)
|
||||||
|
{
|
||||||
|
return a.i32[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int64_t _mm_cvtsi128_si64(__m128i a)
|
||||||
|
{
|
||||||
|
return a.i64[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_cvtsi32_si128(int a)
|
||||||
|
{
|
||||||
|
__m128i result = _mm_setzero_si128();
|
||||||
|
result.i32[0] = a;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_cvtsi64_si128(int64_t a)
|
||||||
|
{
|
||||||
|
__m128i result = _mm_setzero_si128();
|
||||||
|
result.i64[0] = a;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Shuffle operations */
|
||||||
|
static inline __m128i _mm_shuffle_epi32(__m128i a, int imm8)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
result.u32[0] = a.u32[(imm8 >> 0) & 0x3];
|
||||||
|
result.u32[1] = a.u32[(imm8 >> 2) & 0x3];
|
||||||
|
result.u32[2] = a.u32[(imm8 >> 4) & 0x3];
|
||||||
|
result.u32[3] = a.u32[(imm8 >> 6) & 0x3];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Logical operations - optimized with RVV when available */
|
||||||
|
static inline __m128i _mm_xor_si128(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
#if USE_RVV_INTRINSICS
|
||||||
|
__m128i result;
|
||||||
|
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||||
|
vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||||
|
vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
|
||||||
|
vuint64m1_t vr = __riscv_vxor_vv_u64m1(va, vb, vl);
|
||||||
|
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
__m128i result;
|
||||||
|
result.u64[0] = a.u64[0] ^ b.u64[0];
|
||||||
|
result.u64[1] = a.u64[1] ^ b.u64[1];
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_or_si128(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
#if USE_RVV_INTRINSICS
|
||||||
|
__m128i result;
|
||||||
|
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||||
|
vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||||
|
vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
|
||||||
|
vuint64m1_t vr = __riscv_vor_vv_u64m1(va, vb, vl);
|
||||||
|
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
__m128i result;
|
||||||
|
result.u64[0] = a.u64[0] | b.u64[0];
|
||||||
|
result.u64[1] = a.u64[1] | b.u64[1];
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_and_si128(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
#if USE_RVV_INTRINSICS
|
||||||
|
__m128i result;
|
||||||
|
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||||
|
vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||||
|
vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
|
||||||
|
vuint64m1_t vr = __riscv_vand_vv_u64m1(va, vb, vl);
|
||||||
|
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
__m128i result;
|
||||||
|
result.u64[0] = a.u64[0] & b.u64[0];
|
||||||
|
result.u64[1] = a.u64[1] & b.u64[1];
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_andnot_si128(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
#if USE_RVV_INTRINSICS
|
||||||
|
__m128i result;
|
||||||
|
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||||
|
vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||||
|
vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
|
||||||
|
vuint64m1_t vnot_a = __riscv_vnot_v_u64m1(va, vl);
|
||||||
|
vuint64m1_t vr = __riscv_vand_vv_u64m1(vnot_a, vb, vl);
|
||||||
|
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
__m128i result;
|
||||||
|
result.u64[0] = (~a.u64[0]) & b.u64[0];
|
||||||
|
result.u64[1] = (~a.u64[1]) & b.u64[1];
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Shift operations */
|
||||||
|
static inline __m128i _mm_slli_si128(__m128i a, int imm8)
|
||||||
|
{
|
||||||
|
#if USE_RVV_INTRINSICS
|
||||||
|
__m128i result = _mm_setzero_si128();
|
||||||
|
int count = imm8 & 0xFF;
|
||||||
|
if (count > 15) return result;
|
||||||
|
|
||||||
|
size_t vl = __riscv_vsetvl_e8m1(16);
|
||||||
|
vuint8m1_t va = __riscv_vle8_v_u8m1(a.u8, vl);
|
||||||
|
vuint8m1_t vr = __riscv_vslideup_vx_u8m1(__riscv_vmv_v_x_u8m1(0, vl), va, count, vl);
|
||||||
|
__riscv_vse8_v_u8m1(result.u8, vr, vl);
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
__m128i result = _mm_setzero_si128();
|
||||||
|
int count = imm8 & 0xFF;
|
||||||
|
if (count > 15) return result;
|
||||||
|
|
||||||
|
for (int i = 0; i < 16 - count; i++) {
|
||||||
|
result.u8[i + count] = a.u8[i];
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_srli_si128(__m128i a, int imm8)
|
||||||
|
{
|
||||||
|
#if USE_RVV_INTRINSICS
|
||||||
|
__m128i result = _mm_setzero_si128();
|
||||||
|
int count = imm8 & 0xFF;
|
||||||
|
if (count > 15) return result;
|
||||||
|
|
||||||
|
size_t vl = __riscv_vsetvl_e8m1(16);
|
||||||
|
vuint8m1_t va = __riscv_vle8_v_u8m1(a.u8, vl);
|
||||||
|
vuint8m1_t vr = __riscv_vslidedown_vx_u8m1(va, count, vl);
|
||||||
|
__riscv_vse8_v_u8m1(result.u8, vr, vl);
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
__m128i result = _mm_setzero_si128();
|
||||||
|
int count = imm8 & 0xFF;
|
||||||
|
if (count > 15) return result;
|
||||||
|
|
||||||
|
for (int i = count; i < 16; i++) {
|
||||||
|
result.u8[i - count] = a.u8[i];
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_slli_epi64(__m128i a, int imm8)
|
||||||
|
{
|
||||||
|
#if USE_RVV_INTRINSICS
|
||||||
|
__m128i result;
|
||||||
|
if (imm8 > 63) {
|
||||||
|
result.u64[0] = 0;
|
||||||
|
result.u64[1] = 0;
|
||||||
|
} else {
|
||||||
|
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||||
|
vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||||
|
vuint64m1_t vr = __riscv_vsll_vx_u64m1(va, imm8, vl);
|
||||||
|
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
__m128i result;
|
||||||
|
if (imm8 > 63) {
|
||||||
|
result.u64[0] = 0;
|
||||||
|
result.u64[1] = 0;
|
||||||
|
} else {
|
||||||
|
result.u64[0] = a.u64[0] << imm8;
|
||||||
|
result.u64[1] = a.u64[1] << imm8;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_srli_epi64(__m128i a, int imm8)
|
||||||
|
{
|
||||||
|
#if USE_RVV_INTRINSICS
|
||||||
|
__m128i result;
|
||||||
|
if (imm8 > 63) {
|
||||||
|
result.u64[0] = 0;
|
||||||
|
result.u64[1] = 0;
|
||||||
|
} else {
|
||||||
|
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||||
|
vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||||
|
vuint64m1_t vr = __riscv_vsrl_vx_u64m1(va, imm8, vl);
|
||||||
|
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
__m128i result;
|
||||||
|
if (imm8 > 63) {
|
||||||
|
result.u64[0] = 0;
|
||||||
|
result.u64[1] = 0;
|
||||||
|
} else {
|
||||||
|
result.u64[0] = a.u64[0] >> imm8;
|
||||||
|
result.u64[1] = a.u64[1] >> imm8;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Load/store operations - optimized with RVV */
|
||||||
|
static inline __m128i _mm_load_si128(const __m128i* p)
|
||||||
|
{
|
||||||
|
#if USE_RVV_INTRINSICS
|
||||||
|
__m128i result;
|
||||||
|
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||||
|
vuint64m1_t v = __riscv_vle64_v_u64m1((const uint64_t*)p, vl);
|
||||||
|
__riscv_vse64_v_u64m1(result.u64, v, vl);
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
__m128i result;
|
||||||
|
memcpy(&result, p, sizeof(__m128i));
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_loadu_si128(const __m128i* p)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
memcpy(&result, p, sizeof(__m128i));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void _mm_store_si128(__m128i* p, __m128i a)
|
||||||
|
{
|
||||||
|
#if USE_RVV_INTRINSICS
|
||||||
|
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||||
|
vuint64m1_t v = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||||
|
__riscv_vse64_v_u64m1((uint64_t*)p, v, vl);
|
||||||
|
#else
|
||||||
|
memcpy(p, &a, sizeof(__m128i));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void _mm_storeu_si128(__m128i* p, __m128i a)
|
||||||
|
{
|
||||||
|
memcpy(p, &a, sizeof(__m128i));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Arithmetic operations - optimized with RVV */
|
||||||
|
static inline __m128i _mm_add_epi64(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
#if USE_RVV_INTRINSICS
|
||||||
|
__m128i result;
|
||||||
|
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||||
|
vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||||
|
vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
|
||||||
|
vuint64m1_t vr = __riscv_vadd_vv_u64m1(va, vb, vl);
|
||||||
|
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
__m128i result;
|
||||||
|
result.u64[0] = a.u64[0] + b.u64[0];
|
||||||
|
result.u64[1] = a.u64[1] + b.u64[1];
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_add_epi32(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
#if USE_RVV_INTRINSICS
|
||||||
|
__m128i result;
|
||||||
|
size_t vl = __riscv_vsetvl_e32m1(4);
|
||||||
|
vuint32m1_t va = __riscv_vle32_v_u32m1(a.u32, vl);
|
||||||
|
vuint32m1_t vb = __riscv_vle32_v_u32m1(b.u32, vl);
|
||||||
|
vuint32m1_t vr = __riscv_vadd_vv_u32m1(va, vb, vl);
|
||||||
|
__riscv_vse32_v_u32m1(result.u32, vr, vl);
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
__m128i result;
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
result.i32[i] = a.i32[i] + b.i32[i];
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_sub_epi64(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
#if USE_RVV_INTRINSICS
|
||||||
|
__m128i result;
|
||||||
|
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||||
|
vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||||
|
vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
|
||||||
|
vuint64m1_t vr = __riscv_vsub_vv_u64m1(va, vb, vl);
|
||||||
|
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
__m128i result;
|
||||||
|
result.u64[0] = a.u64[0] - b.u64[0];
|
||||||
|
result.u64[1] = a.u64[1] - b.u64[1];
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_mul_epu32(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
#if USE_RVV_INTRINSICS
|
||||||
|
__m128i result;
|
||||||
|
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||||
|
vuint64m1_t va_lo = __riscv_vzext_vf2_u64m1(__riscv_vle32_v_u32mf2(&a.u32[0], 2), vl);
|
||||||
|
vuint64m1_t vb_lo = __riscv_vzext_vf2_u64m1(__riscv_vle32_v_u32mf2(&b.u32[0], 2), vl);
|
||||||
|
vuint64m1_t vr = __riscv_vmul_vv_u64m1(va_lo, vb_lo, vl);
|
||||||
|
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
__m128i result;
|
||||||
|
result.u64[0] = (uint64_t)a.u32[0] * (uint64_t)b.u32[0];
|
||||||
|
result.u64[1] = (uint64_t)a.u32[2] * (uint64_t)b.u32[2];
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Unpack operations */
|
||||||
|
static inline __m128i _mm_unpacklo_epi64(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
result.u64[0] = a.u64[0];
|
||||||
|
result.u64[1] = b.u64[0];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_unpackhi_epi64(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
result.u64[0] = a.u64[1];
|
||||||
|
result.u64[1] = b.u64[1];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Pause instruction for spin-wait loops */
|
||||||
|
static inline void _mm_pause(void)
|
||||||
|
{
|
||||||
|
/* RISC-V pause hint if available (requires Zihintpause extension) */
|
||||||
|
#if defined(__riscv_zihintpause)
|
||||||
|
__asm__ __volatile__("pause");
|
||||||
|
#else
|
||||||
|
__asm__ __volatile__("nop");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Memory fence - optimized for RISC-V */
|
||||||
|
static inline void _mm_mfence(void)
|
||||||
|
{
|
||||||
|
__asm__ __volatile__("fence rw,rw" ::: "memory");
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void _mm_lfence(void)
|
||||||
|
{
|
||||||
|
__asm__ __volatile__("fence r,r" ::: "memory");
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void _mm_sfence(void)
|
||||||
|
{
|
||||||
|
__asm__ __volatile__("fence w,w" ::: "memory");
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Comparison operations */
|
||||||
|
static inline __m128i _mm_cmpeq_epi32(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
result.u32[i] = (a.u32[i] == b.u32[i]) ? 0xFFFFFFFF : 0;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_cmpeq_epi64(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
for (int i = 0; i < 2; i++) {
|
||||||
|
result.u64[i] = (a.u64[i] == b.u64[i]) ? 0xFFFFFFFFFFFFFFFFULL : 0;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Additional shift operations */
|
||||||
|
static inline __m128i _mm_slli_epi32(__m128i a, int imm8)
|
||||||
|
{
|
||||||
|
#if USE_RVV_INTRINSICS
|
||||||
|
__m128i result;
|
||||||
|
if (imm8 > 31) {
|
||||||
|
memset(&result, 0, sizeof(result));
|
||||||
|
} else {
|
||||||
|
size_t vl = __riscv_vsetvl_e32m1(4);
|
||||||
|
vuint32m1_t va = __riscv_vle32_v_u32m1(a.u32, vl);
|
||||||
|
vuint32m1_t vr = __riscv_vsll_vx_u32m1(va, imm8, vl);
|
||||||
|
__riscv_vse32_v_u32m1(result.u32, vr, vl);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
__m128i result;
|
||||||
|
if (imm8 > 31) {
|
||||||
|
for (int i = 0; i < 4; i++) result.u32[i] = 0;
|
||||||
|
} else {
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
result.u32[i] = a.u32[i] << imm8;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_srli_epi32(__m128i a, int imm8)
|
||||||
|
{
|
||||||
|
#if USE_RVV_INTRINSICS
|
||||||
|
__m128i result;
|
||||||
|
if (imm8 > 31) {
|
||||||
|
memset(&result, 0, sizeof(result));
|
||||||
|
} else {
|
||||||
|
size_t vl = __riscv_vsetvl_e32m1(4);
|
||||||
|
vuint32m1_t va = __riscv_vle32_v_u32m1(a.u32, vl);
|
||||||
|
vuint32m1_t vr = __riscv_vsrl_vx_u32m1(va, imm8, vl);
|
||||||
|
__riscv_vse32_v_u32m1(result.u32, vr, vl);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
__m128i result;
|
||||||
|
if (imm8 > 31) {
|
||||||
|
for (int i = 0; i < 4; i++) result.u32[i] = 0;
|
||||||
|
} else {
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
result.u32[i] = a.u32[i] >> imm8;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 64-bit integer operations */
|
||||||
|
static inline __m128i _mm_set1_epi64x(int64_t a)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
result.i64[0] = a;
|
||||||
|
result.i64[1] = a;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Float type for compatibility */
|
||||||
|
typedef __m128i __m128;
|
||||||
|
|
||||||
|
/* Float operations - simplified scalar implementations */
|
||||||
|
static inline __m128 _mm_set1_ps(float a)
|
||||||
|
{
|
||||||
|
__m128 result;
|
||||||
|
uint32_t val;
|
||||||
|
memcpy(&val, &a, sizeof(float));
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
result.u32[i] = val;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128 _mm_setzero_ps(void)
|
||||||
|
{
|
||||||
|
__m128 result;
|
||||||
|
memset(&result, 0, sizeof(result));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128 _mm_add_ps(__m128 a, __m128 b)
|
||||||
|
{
|
||||||
|
__m128 result;
|
||||||
|
float fa[4], fb[4], fr[4];
|
||||||
|
memcpy(fa, &a, sizeof(__m128));
|
||||||
|
memcpy(fb, &b, sizeof(__m128));
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
fr[i] = fa[i] + fb[i];
|
||||||
|
}
|
||||||
|
memcpy(&result, fr, sizeof(__m128));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128 _mm_mul_ps(__m128 a, __m128 b)
|
||||||
|
{
|
||||||
|
__m128 result;
|
||||||
|
float fa[4], fb[4], fr[4];
|
||||||
|
memcpy(fa, &a, sizeof(__m128));
|
||||||
|
memcpy(fb, &b, sizeof(__m128));
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
fr[i] = fa[i] * fb[i];
|
||||||
|
}
|
||||||
|
memcpy(&result, fr, sizeof(__m128));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128 _mm_and_ps(__m128 a, __m128 b)
|
||||||
|
{
|
||||||
|
__m128 result;
|
||||||
|
result.u64[0] = a.u64[0] & b.u64[0];
|
||||||
|
result.u64[1] = a.u64[1] & b.u64[1];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128 _mm_or_ps(__m128 a, __m128 b)
|
||||||
|
{
|
||||||
|
__m128 result;
|
||||||
|
result.u64[0] = a.u64[0] | b.u64[0];
|
||||||
|
result.u64[1] = a.u64[1] | b.u64[1];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128 _mm_cvtepi32_ps(__m128i a)
|
||||||
|
{
|
||||||
|
__m128 result;
|
||||||
|
float fr[4];
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
fr[i] = (float)a.i32[i];
|
||||||
|
}
|
||||||
|
memcpy(&result, fr, sizeof(__m128));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_cvttps_epi32(__m128 a)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
float fa[4];
|
||||||
|
memcpy(fa, &a, sizeof(__m128));
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
result.i32[i] = (int32_t)fa[i];
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Casting operations */
|
||||||
|
static inline __m128 _mm_castsi128_ps(__m128i a)
|
||||||
|
{
|
||||||
|
__m128 result;
|
||||||
|
memcpy(&result, &a, sizeof(__m128));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_castps_si128(__m128 a)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
memcpy(&result, &a, sizeof(__m128));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Additional set operations */
|
||||||
|
static inline __m128i _mm_set1_epi32(int a)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
result.i32[i] = a;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* AES instructions - placeholders for soft_aes compatibility */
|
||||||
|
static inline __m128i _mm_aesenc_si128(__m128i a, __m128i roundkey)
|
||||||
|
{
|
||||||
|
return _mm_xor_si128(a, roundkey);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon)
|
||||||
|
{
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Rotate right operation for soft_aes.h */
|
||||||
|
static inline uint32_t _rotr(uint32_t value, unsigned int count)
|
||||||
|
{
|
||||||
|
const unsigned int mask = 31;
|
||||||
|
count &= mask;
|
||||||
|
return (value >> count) | (value << ((-count) & mask));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ARM NEON compatibility types and intrinsics for RISC-V */
|
||||||
|
typedef __m128i_union uint64x2_t;
|
||||||
|
typedef __m128i_union uint8x16_t;
|
||||||
|
typedef __m128i_union int64x2_t;
|
||||||
|
typedef __m128i_union int32x4_t;
|
||||||
|
|
||||||
|
static inline uint64x2_t vld1q_u64(const uint64_t *ptr)
|
||||||
|
{
|
||||||
|
uint64x2_t result;
|
||||||
|
result.u64[0] = ptr[0];
|
||||||
|
result.u64[1] = ptr[1];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int64x2_t vld1q_s64(const int64_t *ptr)
|
||||||
|
{
|
||||||
|
int64x2_t result;
|
||||||
|
result.i64[0] = ptr[0];
|
||||||
|
result.i64[1] = ptr[1];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void vst1q_u64(uint64_t *ptr, uint64x2_t val)
|
||||||
|
{
|
||||||
|
ptr[0] = val.u64[0];
|
||||||
|
ptr[1] = val.u64[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint64x2_t veorq_u64(uint64x2_t a, uint64x2_t b)
|
||||||
|
{
|
||||||
|
return _mm_xor_si128(a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint64x2_t vaddq_u64(uint64x2_t a, uint64x2_t b)
|
||||||
|
{
|
||||||
|
return _mm_add_epi64(a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint64x2_t vreinterpretq_u64_u8(uint8x16_t a)
|
||||||
|
{
|
||||||
|
uint64x2_t result;
|
||||||
|
memcpy(&result, &a, sizeof(uint64x2_t));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint64_t vgetq_lane_u64(uint64x2_t v, int lane)
|
||||||
|
{
|
||||||
|
return v.u64[lane];
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int64_t vgetq_lane_s64(int64x2_t v, int lane)
|
||||||
|
{
|
||||||
|
return v.i64[lane];
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int32_t vgetq_lane_s32(int32x4_t v, int lane)
|
||||||
|
{
|
||||||
|
return v.i32[lane];
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef struct { uint64_t val[1]; } uint64x1_t;
|
||||||
|
|
||||||
|
static inline uint64x1_t vcreate_u64(uint64_t a)
|
||||||
|
{
|
||||||
|
uint64x1_t result;
|
||||||
|
result.val[0] = a;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint64x2_t vcombine_u64(uint64x1_t low, uint64x1_t high)
|
||||||
|
{
|
||||||
|
uint64x2_t result;
|
||||||
|
result.u64[0] = low.val[0];
|
||||||
|
result.u64[1] = high.val[0];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* XMRIG_SSE2RVV_OPTIMIZED_H */
|
||||||
748
src/crypto/cn/sse2rvv_optimized.h
Normal file
748
src/crypto/cn/sse2rvv_optimized.h
Normal file
@@ -0,0 +1,748 @@
|
|||||||
|
/* XMRig
|
||||||
|
* Copyright (c) 2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SSE to RISC-V Vector (RVV) optimized compatibility header
|
||||||
|
* Provides both scalar fallback and vectorized implementations using RVV intrinsics
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef XMRIG_SSE2RVV_OPTIMIZED_H
|
||||||
|
#define XMRIG_SSE2RVV_OPTIMIZED_H
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
/* Check if RVV is available */
|
||||||
|
#if defined(__riscv_vector)
|
||||||
|
#include <riscv_vector.h>
|
||||||
|
#define USE_RVV_INTRINSICS 1
|
||||||
|
#else
|
||||||
|
#define USE_RVV_INTRINSICS 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* 128-bit vector type */
|
||||||
|
typedef union {
|
||||||
|
uint8_t u8[16];
|
||||||
|
uint16_t u16[8];
|
||||||
|
uint32_t u32[4];
|
||||||
|
uint64_t u64[2];
|
||||||
|
int8_t i8[16];
|
||||||
|
int16_t i16[8];
|
||||||
|
int32_t i32[4];
|
||||||
|
int64_t i64[2];
|
||||||
|
#if USE_RVV_INTRINSICS
|
||||||
|
vuint64m1_t rvv_u64;
|
||||||
|
vuint32m1_t rvv_u32;
|
||||||
|
vuint8m1_t rvv_u8;
|
||||||
|
#endif
|
||||||
|
} __m128i_union;
|
||||||
|
|
||||||
|
typedef __m128i_union __m128i;
|
||||||
|
|
||||||
|
/* Set operations */
|
||||||
|
static inline __m128i _mm_set_epi32(int e3, int e2, int e1, int e0)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
result.i32[0] = e0;
|
||||||
|
result.i32[1] = e1;
|
||||||
|
result.i32[2] = e2;
|
||||||
|
result.i32[3] = e3;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_set_epi64x(int64_t e1, int64_t e0)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
result.i64[0] = e0;
|
||||||
|
result.i64[1] = e1;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_setzero_si128(void)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
memset(&result, 0, sizeof(result));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Extract/insert operations */
|
||||||
|
static inline int _mm_cvtsi128_si32(__m128i a)
|
||||||
|
{
|
||||||
|
return a.i32[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int64_t _mm_cvtsi128_si64(__m128i a)
|
||||||
|
{
|
||||||
|
return a.i64[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_cvtsi32_si128(int a)
|
||||||
|
{
|
||||||
|
__m128i result = _mm_setzero_si128();
|
||||||
|
result.i32[0] = a;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_cvtsi64_si128(int64_t a)
|
||||||
|
{
|
||||||
|
__m128i result = _mm_setzero_si128();
|
||||||
|
result.i64[0] = a;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Shuffle operations */
|
||||||
|
static inline __m128i _mm_shuffle_epi32(__m128i a, int imm8)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
result.u32[0] = a.u32[(imm8 >> 0) & 0x3];
|
||||||
|
result.u32[1] = a.u32[(imm8 >> 2) & 0x3];
|
||||||
|
result.u32[2] = a.u32[(imm8 >> 4) & 0x3];
|
||||||
|
result.u32[3] = a.u32[(imm8 >> 6) & 0x3];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Logical operations - optimized with RVV when available */
|
||||||
|
static inline __m128i _mm_xor_si128(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
#if USE_RVV_INTRINSICS
|
||||||
|
__m128i result;
|
||||||
|
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||||
|
vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||||
|
vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
|
||||||
|
vuint64m1_t vr = __riscv_vxor_vv_u64m1(va, vb, vl);
|
||||||
|
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
__m128i result;
|
||||||
|
result.u64[0] = a.u64[0] ^ b.u64[0];
|
||||||
|
result.u64[1] = a.u64[1] ^ b.u64[1];
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_or_si128(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
#if USE_RVV_INTRINSICS
|
||||||
|
__m128i result;
|
||||||
|
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||||
|
vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||||
|
vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
|
||||||
|
vuint64m1_t vr = __riscv_vor_vv_u64m1(va, vb, vl);
|
||||||
|
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
__m128i result;
|
||||||
|
result.u64[0] = a.u64[0] | b.u64[0];
|
||||||
|
result.u64[1] = a.u64[1] | b.u64[1];
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_and_si128(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
#if USE_RVV_INTRINSICS
|
||||||
|
__m128i result;
|
||||||
|
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||||
|
vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||||
|
vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
|
||||||
|
vuint64m1_t vr = __riscv_vand_vv_u64m1(va, vb, vl);
|
||||||
|
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
__m128i result;
|
||||||
|
result.u64[0] = a.u64[0] & b.u64[0];
|
||||||
|
result.u64[1] = a.u64[1] & b.u64[1];
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_andnot_si128(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
#if USE_RVV_INTRINSICS
|
||||||
|
__m128i result;
|
||||||
|
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||||
|
vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||||
|
vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
|
||||||
|
vuint64m1_t vnot_a = __riscv_vnot_v_u64m1(va, vl);
|
||||||
|
vuint64m1_t vr = __riscv_vand_vv_u64m1(vnot_a, vb, vl);
|
||||||
|
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
__m128i result;
|
||||||
|
result.u64[0] = (~a.u64[0]) & b.u64[0];
|
||||||
|
result.u64[1] = (~a.u64[1]) & b.u64[1];
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Shift operations */
|
||||||
|
static inline __m128i _mm_slli_si128(__m128i a, int imm8)
|
||||||
|
{
|
||||||
|
#if USE_RVV_INTRINSICS
|
||||||
|
__m128i result = _mm_setzero_si128();
|
||||||
|
int count = imm8 & 0xFF;
|
||||||
|
if (count > 15) return result;
|
||||||
|
|
||||||
|
size_t vl = __riscv_vsetvl_e8m1(16);
|
||||||
|
vuint8m1_t va = __riscv_vle8_v_u8m1(a.u8, vl);
|
||||||
|
vuint8m1_t vr = __riscv_vslideup_vx_u8m1(__riscv_vmv_v_x_u8m1(0, vl), va, count, vl);
|
||||||
|
__riscv_vse8_v_u8m1(result.u8, vr, vl);
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
__m128i result = _mm_setzero_si128();
|
||||||
|
int count = imm8 & 0xFF;
|
||||||
|
if (count > 15) return result;
|
||||||
|
|
||||||
|
for (int i = 0; i < 16 - count; i++) {
|
||||||
|
result.u8[i + count] = a.u8[i];
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_srli_si128(__m128i a, int imm8)
|
||||||
|
{
|
||||||
|
#if USE_RVV_INTRINSICS
|
||||||
|
__m128i result = _mm_setzero_si128();
|
||||||
|
int count = imm8 & 0xFF;
|
||||||
|
if (count > 15) return result;
|
||||||
|
|
||||||
|
size_t vl = __riscv_vsetvl_e8m1(16);
|
||||||
|
vuint8m1_t va = __riscv_vle8_v_u8m1(a.u8, vl);
|
||||||
|
vuint8m1_t vr = __riscv_vslidedown_vx_u8m1(va, count, vl);
|
||||||
|
__riscv_vse8_v_u8m1(result.u8, vr, vl);
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
__m128i result = _mm_setzero_si128();
|
||||||
|
int count = imm8 & 0xFF;
|
||||||
|
if (count > 15) return result;
|
||||||
|
|
||||||
|
for (int i = count; i < 16; i++) {
|
||||||
|
result.u8[i - count] = a.u8[i];
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_slli_epi64(__m128i a, int imm8)
|
||||||
|
{
|
||||||
|
#if USE_RVV_INTRINSICS
|
||||||
|
__m128i result;
|
||||||
|
if (imm8 > 63) {
|
||||||
|
result.u64[0] = 0;
|
||||||
|
result.u64[1] = 0;
|
||||||
|
} else {
|
||||||
|
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||||
|
vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||||
|
vuint64m1_t vr = __riscv_vsll_vx_u64m1(va, imm8, vl);
|
||||||
|
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
__m128i result;
|
||||||
|
if (imm8 > 63) {
|
||||||
|
result.u64[0] = 0;
|
||||||
|
result.u64[1] = 0;
|
||||||
|
} else {
|
||||||
|
result.u64[0] = a.u64[0] << imm8;
|
||||||
|
result.u64[1] = a.u64[1] << imm8;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_srli_epi64(__m128i a, int imm8)
|
||||||
|
{
|
||||||
|
#if USE_RVV_INTRINSICS
|
||||||
|
__m128i result;
|
||||||
|
if (imm8 > 63) {
|
||||||
|
result.u64[0] = 0;
|
||||||
|
result.u64[1] = 0;
|
||||||
|
} else {
|
||||||
|
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||||
|
vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||||
|
vuint64m1_t vr = __riscv_vsrl_vx_u64m1(va, imm8, vl);
|
||||||
|
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
__m128i result;
|
||||||
|
if (imm8 > 63) {
|
||||||
|
result.u64[0] = 0;
|
||||||
|
result.u64[1] = 0;
|
||||||
|
} else {
|
||||||
|
result.u64[0] = a.u64[0] >> imm8;
|
||||||
|
result.u64[1] = a.u64[1] >> imm8;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Load/store operations - optimized with RVV */
|
||||||
|
static inline __m128i _mm_load_si128(const __m128i* p)
|
||||||
|
{
|
||||||
|
#if USE_RVV_INTRINSICS
|
||||||
|
__m128i result;
|
||||||
|
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||||
|
vuint64m1_t v = __riscv_vle64_v_u64m1((const uint64_t*)p, vl);
|
||||||
|
__riscv_vse64_v_u64m1(result.u64, v, vl);
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
__m128i result;
|
||||||
|
memcpy(&result, p, sizeof(__m128i));
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_loadu_si128(const __m128i* p)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
memcpy(&result, p, sizeof(__m128i));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void _mm_store_si128(__m128i* p, __m128i a)
|
||||||
|
{
|
||||||
|
#if USE_RVV_INTRINSICS
|
||||||
|
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||||
|
vuint64m1_t v = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||||
|
__riscv_vse64_v_u64m1((uint64_t*)p, v, vl);
|
||||||
|
#else
|
||||||
|
memcpy(p, &a, sizeof(__m128i));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void _mm_storeu_si128(__m128i* p, __m128i a)
|
||||||
|
{
|
||||||
|
memcpy(p, &a, sizeof(__m128i));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Arithmetic operations - optimized with RVV */
|
||||||
|
static inline __m128i _mm_add_epi64(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
#if USE_RVV_INTRINSICS
|
||||||
|
__m128i result;
|
||||||
|
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||||
|
vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||||
|
vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
|
||||||
|
vuint64m1_t vr = __riscv_vadd_vv_u64m1(va, vb, vl);
|
||||||
|
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
__m128i result;
|
||||||
|
result.u64[0] = a.u64[0] + b.u64[0];
|
||||||
|
result.u64[1] = a.u64[1] + b.u64[1];
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_add_epi32(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
#if USE_RVV_INTRINSICS
|
||||||
|
__m128i result;
|
||||||
|
size_t vl = __riscv_vsetvl_e32m1(4);
|
||||||
|
vuint32m1_t va = __riscv_vle32_v_u32m1(a.u32, vl);
|
||||||
|
vuint32m1_t vb = __riscv_vle32_v_u32m1(b.u32, vl);
|
||||||
|
vuint32m1_t vr = __riscv_vadd_vv_u32m1(va, vb, vl);
|
||||||
|
__riscv_vse32_v_u32m1(result.u32, vr, vl);
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
__m128i result;
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
result.i32[i] = a.i32[i] + b.i32[i];
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_sub_epi64(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
#if USE_RVV_INTRINSICS
|
||||||
|
__m128i result;
|
||||||
|
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||||
|
vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
|
||||||
|
vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
|
||||||
|
vuint64m1_t vr = __riscv_vsub_vv_u64m1(va, vb, vl);
|
||||||
|
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
__m128i result;
|
||||||
|
result.u64[0] = a.u64[0] - b.u64[0];
|
||||||
|
result.u64[1] = a.u64[1] - b.u64[1];
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_mul_epu32(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
#if USE_RVV_INTRINSICS
|
||||||
|
__m128i result;
|
||||||
|
size_t vl = __riscv_vsetvl_e64m1(2);
|
||||||
|
vuint64m1_t va_lo = __riscv_vzext_vf2_u64m1(__riscv_vle32_v_u32mf2(&a.u32[0], 2), vl);
|
||||||
|
vuint64m1_t vb_lo = __riscv_vzext_vf2_u64m1(__riscv_vle32_v_u32mf2(&b.u32[0], 2), vl);
|
||||||
|
vuint64m1_t vr = __riscv_vmul_vv_u64m1(va_lo, vb_lo, vl);
|
||||||
|
__riscv_vse64_v_u64m1(result.u64, vr, vl);
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
__m128i result;
|
||||||
|
result.u64[0] = (uint64_t)a.u32[0] * (uint64_t)b.u32[0];
|
||||||
|
result.u64[1] = (uint64_t)a.u32[2] * (uint64_t)b.u32[2];
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Unpack operations */
|
||||||
|
static inline __m128i _mm_unpacklo_epi64(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
result.u64[0] = a.u64[0];
|
||||||
|
result.u64[1] = b.u64[0];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_unpackhi_epi64(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
result.u64[0] = a.u64[1];
|
||||||
|
result.u64[1] = b.u64[1];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Pause instruction for spin-wait loops */
|
||||||
|
static inline void _mm_pause(void)
|
||||||
|
{
|
||||||
|
/* RISC-V pause hint if available (requires Zihintpause extension) */
|
||||||
|
#if defined(__riscv_zihintpause)
|
||||||
|
__asm__ __volatile__("pause");
|
||||||
|
#else
|
||||||
|
__asm__ __volatile__("nop");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Memory fence - optimized for RISC-V */
|
||||||
|
static inline void _mm_mfence(void)
|
||||||
|
{
|
||||||
|
__asm__ __volatile__("fence rw,rw" ::: "memory");
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void _mm_lfence(void)
|
||||||
|
{
|
||||||
|
__asm__ __volatile__("fence r,r" ::: "memory");
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void _mm_sfence(void)
|
||||||
|
{
|
||||||
|
__asm__ __volatile__("fence w,w" ::: "memory");
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Comparison operations */
|
||||||
|
static inline __m128i _mm_cmpeq_epi32(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
result.u32[i] = (a.u32[i] == b.u32[i]) ? 0xFFFFFFFF : 0;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_cmpeq_epi64(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
for (int i = 0; i < 2; i++) {
|
||||||
|
result.u64[i] = (a.u64[i] == b.u64[i]) ? 0xFFFFFFFFFFFFFFFFULL : 0;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Additional shift operations */
|
||||||
|
static inline __m128i _mm_slli_epi32(__m128i a, int imm8)
|
||||||
|
{
|
||||||
|
#if USE_RVV_INTRINSICS
|
||||||
|
__m128i result;
|
||||||
|
if (imm8 > 31) {
|
||||||
|
memset(&result, 0, sizeof(result));
|
||||||
|
} else {
|
||||||
|
size_t vl = __riscv_vsetvl_e32m1(4);
|
||||||
|
vuint32m1_t va = __riscv_vle32_v_u32m1(a.u32, vl);
|
||||||
|
vuint32m1_t vr = __riscv_vsll_vx_u32m1(va, imm8, vl);
|
||||||
|
__riscv_vse32_v_u32m1(result.u32, vr, vl);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
__m128i result;
|
||||||
|
if (imm8 > 31) {
|
||||||
|
for (int i = 0; i < 4; i++) result.u32[i] = 0;
|
||||||
|
} else {
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
result.u32[i] = a.u32[i] << imm8;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_srli_epi32(__m128i a, int imm8)
|
||||||
|
{
|
||||||
|
#if USE_RVV_INTRINSICS
|
||||||
|
__m128i result;
|
||||||
|
if (imm8 > 31) {
|
||||||
|
memset(&result, 0, sizeof(result));
|
||||||
|
} else {
|
||||||
|
size_t vl = __riscv_vsetvl_e32m1(4);
|
||||||
|
vuint32m1_t va = __riscv_vle32_v_u32m1(a.u32, vl);
|
||||||
|
vuint32m1_t vr = __riscv_vsrl_vx_u32m1(va, imm8, vl);
|
||||||
|
__riscv_vse32_v_u32m1(result.u32, vr, vl);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
__m128i result;
|
||||||
|
if (imm8 > 31) {
|
||||||
|
for (int i = 0; i < 4; i++) result.u32[i] = 0;
|
||||||
|
} else {
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
result.u32[i] = a.u32[i] >> imm8;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 64-bit integer operations */
|
||||||
|
static inline __m128i _mm_set1_epi64x(int64_t a)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
result.i64[0] = a;
|
||||||
|
result.i64[1] = a;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Float type for compatibility */
|
||||||
|
typedef __m128i __m128;
|
||||||
|
|
||||||
|
/* Float operations - simplified scalar implementations */
|
||||||
|
static inline __m128 _mm_set1_ps(float a)
|
||||||
|
{
|
||||||
|
__m128 result;
|
||||||
|
uint32_t val;
|
||||||
|
memcpy(&val, &a, sizeof(float));
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
result.u32[i] = val;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128 _mm_setzero_ps(void)
|
||||||
|
{
|
||||||
|
__m128 result;
|
||||||
|
memset(&result, 0, sizeof(result));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128 _mm_add_ps(__m128 a, __m128 b)
|
||||||
|
{
|
||||||
|
__m128 result;
|
||||||
|
float fa[4], fb[4], fr[4];
|
||||||
|
memcpy(fa, &a, sizeof(__m128));
|
||||||
|
memcpy(fb, &b, sizeof(__m128));
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
fr[i] = fa[i] + fb[i];
|
||||||
|
}
|
||||||
|
memcpy(&result, fr, sizeof(__m128));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128 _mm_mul_ps(__m128 a, __m128 b)
|
||||||
|
{
|
||||||
|
__m128 result;
|
||||||
|
float fa[4], fb[4], fr[4];
|
||||||
|
memcpy(fa, &a, sizeof(__m128));
|
||||||
|
memcpy(fb, &b, sizeof(__m128));
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
fr[i] = fa[i] * fb[i];
|
||||||
|
}
|
||||||
|
memcpy(&result, fr, sizeof(__m128));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128 _mm_and_ps(__m128 a, __m128 b)
|
||||||
|
{
|
||||||
|
__m128 result;
|
||||||
|
result.u64[0] = a.u64[0] & b.u64[0];
|
||||||
|
result.u64[1] = a.u64[1] & b.u64[1];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128 _mm_or_ps(__m128 a, __m128 b)
|
||||||
|
{
|
||||||
|
__m128 result;
|
||||||
|
result.u64[0] = a.u64[0] | b.u64[0];
|
||||||
|
result.u64[1] = a.u64[1] | b.u64[1];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128 _mm_cvtepi32_ps(__m128i a)
|
||||||
|
{
|
||||||
|
__m128 result;
|
||||||
|
float fr[4];
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
fr[i] = (float)a.i32[i];
|
||||||
|
}
|
||||||
|
memcpy(&result, fr, sizeof(__m128));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_cvttps_epi32(__m128 a)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
float fa[4];
|
||||||
|
memcpy(fa, &a, sizeof(__m128));
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
result.i32[i] = (int32_t)fa[i];
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Casting operations */
|
||||||
|
static inline __m128 _mm_castsi128_ps(__m128i a)
|
||||||
|
{
|
||||||
|
__m128 result;
|
||||||
|
memcpy(&result, &a, sizeof(__m128));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_castps_si128(__m128 a)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
memcpy(&result, &a, sizeof(__m128));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Additional set operations */
|
||||||
|
static inline __m128i _mm_set1_epi32(int a)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
result.i32[i] = a;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* AES instructions - placeholders for soft_aes compatibility */
|
||||||
|
static inline __m128i _mm_aesenc_si128(__m128i a, __m128i roundkey)
|
||||||
|
{
|
||||||
|
return _mm_xor_si128(a, roundkey);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon)
|
||||||
|
{
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Rotate right operation for soft_aes.h */
|
||||||
|
static inline uint32_t _rotr(uint32_t value, unsigned int count)
|
||||||
|
{
|
||||||
|
const unsigned int mask = 31;
|
||||||
|
count &= mask;
|
||||||
|
return (value >> count) | (value << ((-count) & mask));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ARM NEON compatibility types and intrinsics for RISC-V */
|
||||||
|
typedef __m128i_union uint64x2_t;
|
||||||
|
typedef __m128i_union uint8x16_t;
|
||||||
|
typedef __m128i_union int64x2_t;
|
||||||
|
typedef __m128i_union int32x4_t;
|
||||||
|
|
||||||
|
static inline uint64x2_t vld1q_u64(const uint64_t *ptr)
|
||||||
|
{
|
||||||
|
uint64x2_t result;
|
||||||
|
result.u64[0] = ptr[0];
|
||||||
|
result.u64[1] = ptr[1];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int64x2_t vld1q_s64(const int64_t *ptr)
|
||||||
|
{
|
||||||
|
int64x2_t result;
|
||||||
|
result.i64[0] = ptr[0];
|
||||||
|
result.i64[1] = ptr[1];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void vst1q_u64(uint64_t *ptr, uint64x2_t val)
|
||||||
|
{
|
||||||
|
ptr[0] = val.u64[0];
|
||||||
|
ptr[1] = val.u64[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint64x2_t veorq_u64(uint64x2_t a, uint64x2_t b)
|
||||||
|
{
|
||||||
|
return _mm_xor_si128(a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint64x2_t vaddq_u64(uint64x2_t a, uint64x2_t b)
|
||||||
|
{
|
||||||
|
return _mm_add_epi64(a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint64x2_t vreinterpretq_u64_u8(uint8x16_t a)
|
||||||
|
{
|
||||||
|
uint64x2_t result;
|
||||||
|
memcpy(&result, &a, sizeof(uint64x2_t));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint64_t vgetq_lane_u64(uint64x2_t v, int lane)
|
||||||
|
{
|
||||||
|
return v.u64[lane];
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int64_t vgetq_lane_s64(int64x2_t v, int lane)
|
||||||
|
{
|
||||||
|
return v.i64[lane];
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int32_t vgetq_lane_s32(int32x4_t v, int lane)
|
||||||
|
{
|
||||||
|
return v.i32[lane];
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef struct { uint64_t val[1]; } uint64x1_t;
|
||||||
|
|
||||||
|
static inline uint64x1_t vcreate_u64(uint64_t a)
|
||||||
|
{
|
||||||
|
uint64x1_t result;
|
||||||
|
result.val[0] = a;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint64x2_t vcombine_u64(uint64x1_t low, uint64x1_t high)
|
||||||
|
{
|
||||||
|
uint64x2_t result;
|
||||||
|
result.u64[0] = low.val[0];
|
||||||
|
result.u64[1] = high.val[0];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* XMRIG_SSE2RVV_OPTIMIZED_H */
|
||||||
571
src/crypto/cn/sse2rvv_scalar_backup.h
Normal file
571
src/crypto/cn/sse2rvv_scalar_backup.h
Normal file
@@ -0,0 +1,571 @@
|
|||||||
|
/* XMRig
|
||||||
|
* Copyright (c) 2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SSE to RISC-V compatibility header
|
||||||
|
* Provides scalar implementations of SSE intrinsics for RISC-V architecture
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef XMRIG_SSE2RVV_H
|
||||||
|
#define XMRIG_SSE2RVV_H
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
/* 128-bit vector type */
|
||||||
|
typedef union {
|
||||||
|
uint8_t u8[16];
|
||||||
|
uint16_t u16[8];
|
||||||
|
uint32_t u32[4];
|
||||||
|
uint64_t u64[2];
|
||||||
|
int8_t i8[16];
|
||||||
|
int16_t i16[8];
|
||||||
|
int32_t i32[4];
|
||||||
|
int64_t i64[2];
|
||||||
|
} __m128i_union;
|
||||||
|
|
||||||
|
typedef __m128i_union __m128i;
|
||||||
|
|
||||||
|
/* Set operations */
|
||||||
|
static inline __m128i _mm_set_epi32(int e3, int e2, int e1, int e0)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
result.i32[0] = e0;
|
||||||
|
result.i32[1] = e1;
|
||||||
|
result.i32[2] = e2;
|
||||||
|
result.i32[3] = e3;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_set_epi64x(int64_t e1, int64_t e0)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
result.i64[0] = e0;
|
||||||
|
result.i64[1] = e1;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_setzero_si128(void)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
memset(&result, 0, sizeof(result));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Extract/insert operations */
|
||||||
|
static inline int _mm_cvtsi128_si32(__m128i a)
|
||||||
|
{
|
||||||
|
return a.i32[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int64_t _mm_cvtsi128_si64(__m128i a)
|
||||||
|
{
|
||||||
|
return a.i64[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_cvtsi32_si128(int a)
|
||||||
|
{
|
||||||
|
__m128i result = _mm_setzero_si128();
|
||||||
|
result.i32[0] = a;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_cvtsi64_si128(int64_t a)
|
||||||
|
{
|
||||||
|
__m128i result = _mm_setzero_si128();
|
||||||
|
result.i64[0] = a;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Shuffle operations */
|
||||||
|
static inline __m128i _mm_shuffle_epi32(__m128i a, int imm8)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
result.u32[0] = a.u32[(imm8 >> 0) & 0x3];
|
||||||
|
result.u32[1] = a.u32[(imm8 >> 2) & 0x3];
|
||||||
|
result.u32[2] = a.u32[(imm8 >> 4) & 0x3];
|
||||||
|
result.u32[3] = a.u32[(imm8 >> 6) & 0x3];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Logical operations */
|
||||||
|
static inline __m128i _mm_xor_si128(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
result.u64[0] = a.u64[0] ^ b.u64[0];
|
||||||
|
result.u64[1] = a.u64[1] ^ b.u64[1];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_or_si128(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
result.u64[0] = a.u64[0] | b.u64[0];
|
||||||
|
result.u64[1] = a.u64[1] | b.u64[1];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_and_si128(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
result.u64[0] = a.u64[0] & b.u64[0];
|
||||||
|
result.u64[1] = a.u64[1] & b.u64[1];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_andnot_si128(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
result.u64[0] = (~a.u64[0]) & b.u64[0];
|
||||||
|
result.u64[1] = (~a.u64[1]) & b.u64[1];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Shift operations */
|
||||||
|
static inline __m128i _mm_slli_si128(__m128i a, int imm8)
|
||||||
|
{
|
||||||
|
__m128i result = _mm_setzero_si128();
|
||||||
|
int count = imm8 & 0xFF;
|
||||||
|
if (count > 15) return result;
|
||||||
|
|
||||||
|
for (int i = 0; i < 16 - count; i++) {
|
||||||
|
result.u8[i + count] = a.u8[i];
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_srli_si128(__m128i a, int imm8)
|
||||||
|
{
|
||||||
|
__m128i result = _mm_setzero_si128();
|
||||||
|
int count = imm8 & 0xFF;
|
||||||
|
if (count > 15) return result;
|
||||||
|
|
||||||
|
for (int i = count; i < 16; i++) {
|
||||||
|
result.u8[i - count] = a.u8[i];
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_slli_epi64(__m128i a, int imm8)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
if (imm8 > 63) {
|
||||||
|
result.u64[0] = 0;
|
||||||
|
result.u64[1] = 0;
|
||||||
|
} else {
|
||||||
|
result.u64[0] = a.u64[0] << imm8;
|
||||||
|
result.u64[1] = a.u64[1] << imm8;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_srli_epi64(__m128i a, int imm8)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
if (imm8 > 63) {
|
||||||
|
result.u64[0] = 0;
|
||||||
|
result.u64[1] = 0;
|
||||||
|
} else {
|
||||||
|
result.u64[0] = a.u64[0] >> imm8;
|
||||||
|
result.u64[1] = a.u64[1] >> imm8;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Load/store operations */
|
||||||
|
static inline __m128i _mm_load_si128(const __m128i* p)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
memcpy(&result, p, sizeof(__m128i));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_loadu_si128(const __m128i* p)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
memcpy(&result, p, sizeof(__m128i));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void _mm_store_si128(__m128i* p, __m128i a)
|
||||||
|
{
|
||||||
|
memcpy(p, &a, sizeof(__m128i));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void _mm_storeu_si128(__m128i* p, __m128i a)
|
||||||
|
{
|
||||||
|
memcpy(p, &a, sizeof(__m128i));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Arithmetic operations */
|
||||||
|
static inline __m128i _mm_add_epi64(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
result.u64[0] = a.u64[0] + b.u64[0];
|
||||||
|
result.u64[1] = a.u64[1] + b.u64[1];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_add_epi32(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
result.i32[i] = a.i32[i] + b.i32[i];
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_sub_epi64(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
result.u64[0] = a.u64[0] - b.u64[0];
|
||||||
|
result.u64[1] = a.u64[1] - b.u64[1];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_mul_epu32(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
result.u64[0] = (uint64_t)a.u32[0] * (uint64_t)b.u32[0];
|
||||||
|
result.u64[1] = (uint64_t)a.u32[2] * (uint64_t)b.u32[2];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Unpack operations */
|
||||||
|
static inline __m128i _mm_unpacklo_epi64(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
result.u64[0] = a.u64[0];
|
||||||
|
result.u64[1] = b.u64[0];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_unpackhi_epi64(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
result.u64[0] = a.u64[1];
|
||||||
|
result.u64[1] = b.u64[1];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Pause instruction for spin-wait loops */
|
||||||
|
static inline void _mm_pause(void)
|
||||||
|
{
|
||||||
|
/* RISC-V doesn't have a direct equivalent to x86 PAUSE
|
||||||
|
* Use a simple NOP or yield hint */
|
||||||
|
__asm__ __volatile__("nop");
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Memory fence */
|
||||||
|
static inline void _mm_mfence(void)
|
||||||
|
{
|
||||||
|
__asm__ __volatile__("fence" ::: "memory");
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void _mm_lfence(void)
|
||||||
|
{
|
||||||
|
__asm__ __volatile__("fence r,r" ::: "memory");
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void _mm_sfence(void)
|
||||||
|
{
|
||||||
|
__asm__ __volatile__("fence w,w" ::: "memory");
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Comparison operations */
|
||||||
|
static inline __m128i _mm_cmpeq_epi32(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
result.u32[i] = (a.u32[i] == b.u32[i]) ? 0xFFFFFFFF : 0;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_cmpeq_epi64(__m128i a, __m128i b)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
for (int i = 0; i < 2; i++) {
|
||||||
|
result.u64[i] = (a.u64[i] == b.u64[i]) ? 0xFFFFFFFFFFFFFFFFULL : 0;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Additional shift operations */
|
||||||
|
static inline __m128i _mm_slli_epi32(__m128i a, int imm8)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
if (imm8 > 31) {
|
||||||
|
for (int i = 0; i < 4; i++) result.u32[i] = 0;
|
||||||
|
} else {
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
result.u32[i] = a.u32[i] << imm8;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_srli_epi32(__m128i a, int imm8)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
if (imm8 > 31) {
|
||||||
|
for (int i = 0; i < 4; i++) result.u32[i] = 0;
|
||||||
|
} else {
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
result.u32[i] = a.u32[i] >> imm8;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 64-bit integer operations */
|
||||||
|
static inline __m128i _mm_set1_epi64x(int64_t a)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
result.i64[0] = a;
|
||||||
|
result.i64[1] = a;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Float type for compatibility - we'll treat it as int for simplicity */
|
||||||
|
typedef __m128i __m128;
|
||||||
|
|
||||||
|
/* Float operations - simplified scalar implementations */
|
||||||
|
static inline __m128 _mm_set1_ps(float a)
|
||||||
|
{
|
||||||
|
__m128 result;
|
||||||
|
uint32_t val;
|
||||||
|
memcpy(&val, &a, sizeof(float));
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
result.u32[i] = val;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128 _mm_setzero_ps(void)
|
||||||
|
{
|
||||||
|
__m128 result;
|
||||||
|
memset(&result, 0, sizeof(result));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128 _mm_add_ps(__m128 a, __m128 b)
|
||||||
|
{
|
||||||
|
__m128 result;
|
||||||
|
float fa[4], fb[4], fr[4];
|
||||||
|
memcpy(fa, &a, sizeof(__m128));
|
||||||
|
memcpy(fb, &b, sizeof(__m128));
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
fr[i] = fa[i] + fb[i];
|
||||||
|
}
|
||||||
|
memcpy(&result, fr, sizeof(__m128));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128 _mm_mul_ps(__m128 a, __m128 b)
|
||||||
|
{
|
||||||
|
__m128 result;
|
||||||
|
float fa[4], fb[4], fr[4];
|
||||||
|
memcpy(fa, &a, sizeof(__m128));
|
||||||
|
memcpy(fb, &b, sizeof(__m128));
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
fr[i] = fa[i] * fb[i];
|
||||||
|
}
|
||||||
|
memcpy(&result, fr, sizeof(__m128));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128 _mm_and_ps(__m128 a, __m128 b)
|
||||||
|
{
|
||||||
|
__m128 result;
|
||||||
|
result.u64[0] = a.u64[0] & b.u64[0];
|
||||||
|
result.u64[1] = a.u64[1] & b.u64[1];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128 _mm_or_ps(__m128 a, __m128 b)
|
||||||
|
{
|
||||||
|
__m128 result;
|
||||||
|
result.u64[0] = a.u64[0] | b.u64[0];
|
||||||
|
result.u64[1] = a.u64[1] | b.u64[1];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128 _mm_cvtepi32_ps(__m128i a)
|
||||||
|
{
|
||||||
|
__m128 result;
|
||||||
|
float fr[4];
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
fr[i] = (float)a.i32[i];
|
||||||
|
}
|
||||||
|
memcpy(&result, fr, sizeof(__m128));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_cvttps_epi32(__m128 a)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
float fa[4];
|
||||||
|
memcpy(fa, &a, sizeof(__m128));
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
result.i32[i] = (int32_t)fa[i];
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Casting operations */
|
||||||
|
static inline __m128 _mm_castsi128_ps(__m128i a)
|
||||||
|
{
|
||||||
|
__m128 result;
|
||||||
|
memcpy(&result, &a, sizeof(__m128));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_castps_si128(__m128 a)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
memcpy(&result, &a, sizeof(__m128));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Additional set operations */
|
||||||
|
static inline __m128i _mm_set1_epi32(int a)
|
||||||
|
{
|
||||||
|
__m128i result;
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
result.i32[i] = a;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* AES instructions - these are placeholders, actual AES is done via soft_aes.h */
|
||||||
|
/* On RISC-V without crypto extensions, these should never be called directly */
|
||||||
|
/* They are only here for compilation compatibility */
|
||||||
|
static inline __m128i _mm_aesenc_si128(__m128i a, __m128i roundkey)
|
||||||
|
{
|
||||||
|
/* This is a placeholder - actual implementation should use soft_aes */
|
||||||
|
/* If this function is called, it means SOFT_AES template parameter wasn't used */
|
||||||
|
/* We return a XOR as a minimal fallback, but proper code should use soft_aesenc */
|
||||||
|
return _mm_xor_si128(a, roundkey);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon)
|
||||||
|
{
|
||||||
|
/* Placeholder for AES key generation - should use soft_aeskeygenassist */
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Rotate right operation for soft_aes.h */
|
||||||
|
static inline uint32_t _rotr(uint32_t value, unsigned int count)
|
||||||
|
{
|
||||||
|
const unsigned int mask = 31;
|
||||||
|
count &= mask;
|
||||||
|
return (value >> count) | (value << ((-count) & mask));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ARM NEON compatibility types and intrinsics for RISC-V */
|
||||||
|
typedef __m128i_union uint64x2_t;
|
||||||
|
typedef __m128i_union uint8x16_t;
|
||||||
|
typedef __m128i_union int64x2_t;
|
||||||
|
typedef __m128i_union int32x4_t;
|
||||||
|
|
||||||
|
static inline uint64x2_t vld1q_u64(const uint64_t *ptr)
|
||||||
|
{
|
||||||
|
uint64x2_t result;
|
||||||
|
result.u64[0] = ptr[0];
|
||||||
|
result.u64[1] = ptr[1];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int64x2_t vld1q_s64(const int64_t *ptr)
|
||||||
|
{
|
||||||
|
int64x2_t result;
|
||||||
|
result.i64[0] = ptr[0];
|
||||||
|
result.i64[1] = ptr[1];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void vst1q_u64(uint64_t *ptr, uint64x2_t val)
|
||||||
|
{
|
||||||
|
ptr[0] = val.u64[0];
|
||||||
|
ptr[1] = val.u64[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint64x2_t veorq_u64(uint64x2_t a, uint64x2_t b)
|
||||||
|
{
|
||||||
|
uint64x2_t result;
|
||||||
|
result.u64[0] = a.u64[0] ^ b.u64[0];
|
||||||
|
result.u64[1] = a.u64[1] ^ b.u64[1];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint64x2_t vaddq_u64(uint64x2_t a, uint64x2_t b)
|
||||||
|
{
|
||||||
|
uint64x2_t result;
|
||||||
|
result.u64[0] = a.u64[0] + b.u64[0];
|
||||||
|
result.u64[1] = a.u64[1] + b.u64[1];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint64x2_t vreinterpretq_u64_u8(uint8x16_t a)
|
||||||
|
{
|
||||||
|
uint64x2_t result;
|
||||||
|
memcpy(&result, &a, sizeof(uint64x2_t));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint64_t vgetq_lane_u64(uint64x2_t v, int lane)
|
||||||
|
{
|
||||||
|
return v.u64[lane];
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int64_t vgetq_lane_s64(int64x2_t v, int lane)
|
||||||
|
{
|
||||||
|
return v.i64[lane];
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int32_t vgetq_lane_s32(int32x4_t v, int lane)
|
||||||
|
{
|
||||||
|
return v.i32[lane];
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef struct { uint64_t val[1]; } uint64x1_t;
|
||||||
|
|
||||||
|
static inline uint64x1_t vcreate_u64(uint64_t a)
|
||||||
|
{
|
||||||
|
uint64x1_t result;
|
||||||
|
result.val[0] = a;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint64x2_t vcombine_u64(uint64x1_t low, uint64x1_t high)
|
||||||
|
{
|
||||||
|
uint64x2_t result;
|
||||||
|
result.u64[0] = low.val[0];
|
||||||
|
result.u64[1] = high.val[0];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* XMRIG_SSE2RVV_H */
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
/* XMRig
|
/* XMRig
|
||||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -35,15 +35,69 @@ constexpr size_t twoMiB = 2U * 1024U * 1024U;
|
|||||||
constexpr size_t oneGiB = 1024U * 1024U * 1024U;
|
constexpr size_t oneGiB = 1024U * 1024U * 1024U;
|
||||||
|
|
||||||
|
|
||||||
static inline std::string sysfs_path(uint32_t node, size_t hugePageSize, bool nr)
|
static bool sysfs_write(const std::string &path, uint64_t value)
|
||||||
|
{
|
||||||
|
std::ofstream file(path, std::ios::out | std::ios::binary | std::ios::trunc);
|
||||||
|
if (!file.is_open()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
file << value;
|
||||||
|
file.flush();
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int64_t sysfs_read(const std::string &path)
|
||||||
|
{
|
||||||
|
std::ifstream file(path);
|
||||||
|
if (!file.is_open()) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t value = 0;
|
||||||
|
file >> value;
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static std::string sysfs_path(uint32_t node, size_t hugePageSize, bool nr)
|
||||||
{
|
{
|
||||||
return fmt::format("/sys/devices/system/node/node{}/hugepages/hugepages-{}kB/{}_hugepages", node, hugePageSize / 1024, nr ? "nr" : "free");
|
return fmt::format("/sys/devices/system/node/node{}/hugepages/hugepages-{}kB/{}_hugepages", node, hugePageSize / 1024, nr ? "nr" : "free");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static inline bool write_nr_hugepages(uint32_t node, size_t hugePageSize, uint64_t count) { return LinuxMemory::write(sysfs_path(node, hugePageSize, true).c_str(), count); }
|
static std::string sysfs_path(size_t hugePageSize, bool nr)
|
||||||
static inline int64_t free_hugepages(uint32_t node, size_t hugePageSize) { return LinuxMemory::read(sysfs_path(node, hugePageSize, false).c_str()); }
|
{
|
||||||
static inline int64_t nr_hugepages(uint32_t node, size_t hugePageSize) { return LinuxMemory::read(sysfs_path(node, hugePageSize, true).c_str()); }
|
return fmt::format("/sys/kernel/mm/hugepages/hugepages-{}kB/{}_hugepages", hugePageSize / 1024, nr ? "nr" : "free");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static bool write_nr_hugepages(uint32_t node, size_t hugePageSize, uint64_t count)
|
||||||
|
{
|
||||||
|
if (sysfs_write(sysfs_path(node, hugePageSize, true), count)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return sysfs_write(sysfs_path(hugePageSize, true), count);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int64_t sysfs_read_hugepages(uint32_t node, size_t hugePageSize, bool nr)
|
||||||
|
{
|
||||||
|
const int64_t value = sysfs_read(sysfs_path(node, hugePageSize, nr));
|
||||||
|
if (value >= 0) {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
return sysfs_read(sysfs_path(hugePageSize, nr));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline int64_t free_hugepages(uint32_t node, size_t hugePageSize) { return sysfs_read_hugepages(node, hugePageSize, false); }
|
||||||
|
static inline int64_t nr_hugepages(uint32_t node, size_t hugePageSize) { return sysfs_read_hugepages(node, hugePageSize, true); }
|
||||||
|
|
||||||
|
|
||||||
} // namespace xmrig
|
} // namespace xmrig
|
||||||
@@ -62,31 +116,3 @@ bool xmrig::LinuxMemory::reserve(size_t size, uint32_t node, size_t hugePageSize
|
|||||||
|
|
||||||
return write_nr_hugepages(node, hugePageSize, std::max<size_t>(nr_hugepages(node, hugePageSize), 0) + (required - available));
|
return write_nr_hugepages(node, hugePageSize, std::max<size_t>(nr_hugepages(node, hugePageSize), 0) + (required - available));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool xmrig::LinuxMemory::write(const char *path, uint64_t value)
|
|
||||||
{
|
|
||||||
std::ofstream file(path, std::ios::out | std::ios::binary | std::ios::trunc);
|
|
||||||
if (!file.is_open()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
file << value;
|
|
||||||
file.flush();
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int64_t xmrig::LinuxMemory::read(const char *path)
|
|
||||||
{
|
|
||||||
std::ifstream file(path);
|
|
||||||
if (!file.is_open()) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t value = 0;
|
|
||||||
file >> value;
|
|
||||||
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/* XMRig
|
/* XMRig
|
||||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -31,13 +31,10 @@ class LinuxMemory
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
static bool reserve(size_t size, uint32_t node, size_t hugePageSize);
|
static bool reserve(size_t size, uint32_t node, size_t hugePageSize);
|
||||||
|
|
||||||
static bool write(const char *path, uint64_t value);
|
|
||||||
static int64_t read(const char *path);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
} /* namespace xmrig */
|
} // namespace xmrig
|
||||||
|
|
||||||
|
|
||||||
#endif /* XMRIG_LINUXMEMORY_H */
|
#endif // XMRIG_LINUXMEMORY_H
|
||||||
|
|||||||
@@ -86,7 +86,7 @@ bool xmrig::VirtualMemory::isHugepagesAvailable()
|
|||||||
{
|
{
|
||||||
# ifdef XMRIG_OS_LINUX
|
# ifdef XMRIG_OS_LINUX
|
||||||
return std::ifstream("/proc/sys/vm/nr_hugepages").good() || std::ifstream("/sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages").good();
|
return std::ifstream("/proc/sys/vm/nr_hugepages").good() || std::ifstream("/sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages").good();
|
||||||
# elif defined(XMRIG_OS_MACOS) && defined(XMRIG_ARM)
|
# elif defined(XMRIG_OS_MACOS) && defined(XMRIG_ARM) || defined(XMRIG_OS_HAIKU)
|
||||||
return false;
|
return false;
|
||||||
# else
|
# else
|
||||||
return true;
|
return true;
|
||||||
@@ -156,7 +156,8 @@ void *xmrig::VirtualMemory::allocateExecutableMemory(size_t size, bool hugePages
|
|||||||
if (!mem) {
|
if (!mem) {
|
||||||
mem = mmap(0, size, PROT_READ | PROT_WRITE | SECURE_PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
mem = mmap(0, size, PROT_READ | PROT_WRITE | SECURE_PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||||
}
|
}
|
||||||
|
# elif defined(XMRIG_OS_HAIKU)
|
||||||
|
void *mem = mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||||
# else
|
# else
|
||||||
|
|
||||||
void *mem = nullptr;
|
void *mem = nullptr;
|
||||||
@@ -181,6 +182,8 @@ void *xmrig::VirtualMemory::allocateLargePagesMemory(size_t size)
|
|||||||
void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
|
void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
|
||||||
# elif defined(XMRIG_OS_FREEBSD)
|
# elif defined(XMRIG_OS_FREEBSD)
|
||||||
void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER | MAP_PREFAULT_READ, -1, 0);
|
void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER | MAP_PREFAULT_READ, -1, 0);
|
||||||
|
# elif defined(XMRIG_OS_HAIKU)
|
||||||
|
void *mem = nullptr;
|
||||||
# else
|
# else
|
||||||
void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE | hugePagesFlag(hugePageSize()), 0, 0);
|
void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE | hugePagesFlag(hugePageSize()), 0, 0);
|
||||||
# endif
|
# endif
|
||||||
|
|||||||
@@ -26,7 +26,7 @@
|
|||||||
#define XMRIG_MM_MALLOC_PORTABLE_H
|
#define XMRIG_MM_MALLOC_PORTABLE_H
|
||||||
|
|
||||||
|
|
||||||
#if defined(XMRIG_ARM) && !defined(__clang__)
|
#if (defined(XMRIG_ARM) || defined(XMRIG_RISCV)) && !defined(__clang__)
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -57,6 +57,9 @@
|
|||||||
|
|
||||||
#if defined(XMRIG_ARM)
|
#if defined(XMRIG_ARM)
|
||||||
# include "crypto/cn/sse2neon.h"
|
# include "crypto/cn/sse2neon.h"
|
||||||
|
#elif defined(XMRIG_RISCV)
|
||||||
|
// RISC-V doesn't have SSE/NEON, provide minimal compatibility
|
||||||
|
# define _mm_pause() __asm__ __volatile__("nop")
|
||||||
#elif defined(__GNUC__)
|
#elif defined(__GNUC__)
|
||||||
# include <x86intrin.h>
|
# include <x86intrin.h>
|
||||||
#else
|
#else
|
||||||
@@ -286,7 +289,7 @@ struct HelperThread
|
|||||||
|
|
||||||
void benchmark()
|
void benchmark()
|
||||||
{
|
{
|
||||||
#ifndef XMRIG_ARM
|
#if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
|
||||||
static std::atomic<int> done{ 0 };
|
static std::atomic<int> done{ 0 };
|
||||||
if (done.exchange(1)) {
|
if (done.exchange(1)) {
|
||||||
return;
|
return;
|
||||||
@@ -478,7 +481,7 @@ static inline bool findByType(hwloc_obj_t obj, hwloc_obj_type_t type, func lambd
|
|||||||
|
|
||||||
HelperThread* create_helper_thread(int64_t cpu_index, int priority, const std::vector<int64_t>& affinities)
|
HelperThread* create_helper_thread(int64_t cpu_index, int priority, const std::vector<int64_t>& affinities)
|
||||||
{
|
{
|
||||||
#ifndef XMRIG_ARM
|
#if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
|
||||||
hwloc_bitmap_t helper_cpu_set = hwloc_bitmap_alloc();
|
hwloc_bitmap_t helper_cpu_set = hwloc_bitmap_alloc();
|
||||||
hwloc_bitmap_t main_threads_set = hwloc_bitmap_alloc();
|
hwloc_bitmap_t main_threads_set = hwloc_bitmap_alloc();
|
||||||
|
|
||||||
@@ -807,7 +810,7 @@ void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ct
|
|||||||
uint32_t cn_indices[6];
|
uint32_t cn_indices[6];
|
||||||
select_indices(cn_indices, seed);
|
select_indices(cn_indices, seed);
|
||||||
|
|
||||||
#ifdef XMRIG_ARM
|
#if defined(XMRIG_ARM) || defined(XMRIG_RISCV)
|
||||||
uint32_t step[6] = { 1, 1, 1, 1, 1, 1 };
|
uint32_t step[6] = { 1, 1, 1, 1, 1, 1 };
|
||||||
#else
|
#else
|
||||||
uint32_t step[6] = { 4, 4, 1, 2, 4, 4 };
|
uint32_t step[6] = { 4, 4, 1, 2, 4, 4 };
|
||||||
|
|||||||
@@ -235,6 +235,131 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
|
|||||||
template void fillAes4Rx4<true>(void *state, size_t outputSize, void *buffer);
|
template void fillAes4Rx4<true>(void *state, size_t outputSize, void *buffer);
|
||||||
template void fillAes4Rx4<false>(void *state, size_t outputSize, void *buffer);
|
template void fillAes4Rx4<false>(void *state, size_t outputSize, void *buffer);
|
||||||
|
|
||||||
|
#if defined(XMRIG_RISCV) && defined(XMRIG_RVV_ENABLED)
|
||||||
|
static constexpr uint32_t AES_HASH_1R_STATE02[8] = { 0x92b52c0d, 0x9fa856de, 0xcc82db47, 0xd7983aad, 0x6a770017, 0xae62c7d0, 0x5079506b, 0xe8a07ce4 };
|
||||||
|
static constexpr uint32_t AES_HASH_1R_STATE13[8] = { 0x338d996e, 0x15c7b798, 0xf59e125a, 0xace78057, 0x630a240c, 0x07ad828d, 0x79a10005, 0x7e994948 };
|
||||||
|
|
||||||
|
static constexpr uint32_t AES_GEN_1R_KEY02[8] = { 0x6daca553, 0x62716609, 0xdbb5552b, 0xb4f44917, 0x3f1262f1, 0x9f947ec6, 0xf4c0794f, 0x3e20e345 };
|
||||||
|
static constexpr uint32_t AES_GEN_1R_KEY13[8] = { 0x6d7caf07, 0x846a710d, 0x1725d378, 0x0da1dc4e, 0x6aef8135, 0xb1ba317c, 0x16314c88, 0x49169154 };
|
||||||
|
|
||||||
|
static constexpr uint32_t AES_HASH_1R_XKEY00[8] = { 0xf6fa8389, 0x8b24949f, 0x90dc56bf, 0x06890201, 0xf6fa8389, 0x8b24949f, 0x90dc56bf, 0x06890201 };
|
||||||
|
static constexpr uint32_t AES_HASH_1R_XKEY11[8] = { 0x61b263d1, 0x51f4e03c, 0xee1043c6, 0xed18f99b, 0x61b263d1, 0x51f4e03c, 0xee1043c6, 0xed18f99b };
|
||||||
|
|
||||||
|
static constexpr uint32_t AES_HASH_STRIDE[8] = { 0, 4, 8, 12, 32, 36, 40, 44 };
|
||||||
|
|
||||||
|
template<int softAes, int unroll>
|
||||||
|
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
|
||||||
|
PROFILE_SCOPE(RandomX_AES);
|
||||||
|
|
||||||
|
uint8_t* scratchpadPtr = (uint8_t*)scratchpad;
|
||||||
|
const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize;
|
||||||
|
|
||||||
|
vuint32m1_t hash_state02 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE02, 8);
|
||||||
|
vuint32m1_t hash_state13 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE13, 8);
|
||||||
|
|
||||||
|
const vuint32m1_t key02 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY02, 8);
|
||||||
|
const vuint32m1_t key13 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY13, 8);
|
||||||
|
|
||||||
|
const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE, 8);
|
||||||
|
|
||||||
|
vuint32m1_t fill_state02 = __riscv_vluxei32_v_u32m1((uint32_t*)fill_state + 0, stride, 8);
|
||||||
|
vuint32m1_t fill_state13 = __riscv_vluxei32_v_u32m1((uint32_t*)fill_state + 4, stride, 8);
|
||||||
|
|
||||||
|
const vuint8m1_t lutenc_index0 = __riscv_vle8_v_u8m1(lutEncIndex[0], 32);
|
||||||
|
const vuint8m1_t lutenc_index1 = __riscv_vle8_v_u8m1(lutEncIndex[1], 32);
|
||||||
|
const vuint8m1_t lutenc_index2 = __riscv_vle8_v_u8m1(lutEncIndex[2], 32);
|
||||||
|
const vuint8m1_t lutenc_index3 = __riscv_vle8_v_u8m1(lutEncIndex[3], 32);
|
||||||
|
|
||||||
|
const vuint8m1_t& lutdec_index0 = lutenc_index0;
|
||||||
|
const vuint8m1_t lutdec_index1 = __riscv_vle8_v_u8m1(lutDecIndex[1], 32);
|
||||||
|
const vuint8m1_t& lutdec_index2 = lutenc_index2;
|
||||||
|
const vuint8m1_t lutdec_index3 = __riscv_vle8_v_u8m1(lutDecIndex[3], 32);
|
||||||
|
|
||||||
|
//process 64 bytes at a time in 4 lanes
|
||||||
|
while (scratchpadPtr < scratchpadEnd) {
|
||||||
|
#define HASH_STATE(k) \
|
||||||
|
hash_state02 = softaes_vector_double(hash_state02, __riscv_vluxei32_v_u32m1((uint32_t*)scratchpadPtr + k * 16 + 0, stride, 8), lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3); \
|
||||||
|
hash_state13 = softaes_vector_double(hash_state13, __riscv_vluxei32_v_u32m1((uint32_t*)scratchpadPtr + k * 16 + 4, stride, 8), lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
|
||||||
|
|
||||||
|
#define FILL_STATE(k) \
|
||||||
|
fill_state02 = softaes_vector_double(fill_state02, key02, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3); \
|
||||||
|
fill_state13 = softaes_vector_double(fill_state13, key13, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3); \
|
||||||
|
__riscv_vsuxei32_v_u32m1((uint32_t*)scratchpadPtr + k * 16 + 0, stride, fill_state02, 8); \
|
||||||
|
__riscv_vsuxei32_v_u32m1((uint32_t*)scratchpadPtr + k * 16 + 4, stride, fill_state13, 8);
|
||||||
|
|
||||||
|
switch (softAes) {
|
||||||
|
case 0:
|
||||||
|
HASH_STATE(0);
|
||||||
|
HASH_STATE(1);
|
||||||
|
|
||||||
|
FILL_STATE(0);
|
||||||
|
FILL_STATE(1);
|
||||||
|
|
||||||
|
scratchpadPtr += 128;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
switch (unroll) {
|
||||||
|
case 4:
|
||||||
|
HASH_STATE(0);
|
||||||
|
FILL_STATE(0);
|
||||||
|
|
||||||
|
HASH_STATE(1);
|
||||||
|
FILL_STATE(1);
|
||||||
|
|
||||||
|
HASH_STATE(2);
|
||||||
|
FILL_STATE(2);
|
||||||
|
|
||||||
|
HASH_STATE(3);
|
||||||
|
FILL_STATE(3);
|
||||||
|
|
||||||
|
scratchpadPtr += 64 * 4;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 2:
|
||||||
|
HASH_STATE(0);
|
||||||
|
FILL_STATE(0);
|
||||||
|
|
||||||
|
HASH_STATE(1);
|
||||||
|
FILL_STATE(1);
|
||||||
|
|
||||||
|
scratchpadPtr += 64 * 2;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
HASH_STATE(0);
|
||||||
|
FILL_STATE(0);
|
||||||
|
|
||||||
|
scratchpadPtr += 64;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef HASH_STATE
|
||||||
|
#undef FILL_STATE
|
||||||
|
|
||||||
|
__riscv_vsuxei32_v_u32m1((uint32_t*)fill_state + 0, stride, fill_state02, 8);
|
||||||
|
__riscv_vsuxei32_v_u32m1((uint32_t*)fill_state + 4, stride, fill_state13, 8);
|
||||||
|
|
||||||
|
//two extra rounds to achieve full diffusion
|
||||||
|
const vuint32m1_t xkey00 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY00, 8);
|
||||||
|
const vuint32m1_t xkey11 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY11, 8);
|
||||||
|
|
||||||
|
hash_state02 = softaes_vector_double(hash_state02, xkey00, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
|
||||||
|
hash_state13 = softaes_vector_double(hash_state13, xkey00, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
|
||||||
|
|
||||||
|
hash_state02 = softaes_vector_double(hash_state02, xkey11, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
|
||||||
|
hash_state13 = softaes_vector_double(hash_state13, xkey11, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
|
||||||
|
|
||||||
|
//output hash
|
||||||
|
__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 0, stride, hash_state02, 8);
|
||||||
|
__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 4, stride, hash_state13, 8);
|
||||||
|
}
|
||||||
|
|
||||||
|
#else // defined(XMRIG_RISCV) && defined(XMRIG_RVV_ENABLED)
|
||||||
|
|
||||||
template<int softAes, int unroll>
|
template<int softAes, int unroll>
|
||||||
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
|
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
|
||||||
PROFILE_SCOPE(RandomX_AES);
|
PROFILE_SCOPE(RandomX_AES);
|
||||||
@@ -375,6 +500,7 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi
|
|||||||
rx_store_vec_i128((rx_vec_i128*)hash + 2, hash_state2);
|
rx_store_vec_i128((rx_vec_i128*)hash + 2, hash_state2);
|
||||||
rx_store_vec_i128((rx_vec_i128*)hash + 3, hash_state3);
|
rx_store_vec_i128((rx_vec_i128*)hash + 3, hash_state3);
|
||||||
}
|
}
|
||||||
|
#endif // defined(XMRIG_RISCV) && defined(XMRIG_RVV_ENABLED)
|
||||||
|
|
||||||
template void hashAndFillAes1Rx4<0,2>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);
|
template void hashAndFillAes1Rx4<0,2>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);
|
||||||
template void hashAndFillAes1Rx4<1,1>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);
|
template void hashAndFillAes1Rx4<1,1>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);
|
||||||
|
|||||||
@@ -111,6 +111,10 @@ namespace randomx {
|
|||||||
#define RANDOMX_HAVE_COMPILER 1
|
#define RANDOMX_HAVE_COMPILER 1
|
||||||
class JitCompilerA64;
|
class JitCompilerA64;
|
||||||
using JitCompiler = JitCompilerA64;
|
using JitCompiler = JitCompilerA64;
|
||||||
|
#elif defined(__riscv) && defined(__riscv_xlen) && (__riscv_xlen == 64)
|
||||||
|
#define RANDOMX_HAVE_COMPILER 1
|
||||||
|
class JitCompilerRV64;
|
||||||
|
using JitCompiler = JitCompilerRV64;
|
||||||
#else
|
#else
|
||||||
#define RANDOMX_HAVE_COMPILER 0
|
#define RANDOMX_HAVE_COMPILER 0
|
||||||
class JitCompilerFallback;
|
class JitCompilerFallback;
|
||||||
|
|||||||
@@ -200,7 +200,18 @@ typedef union{
|
|||||||
int i32[4];
|
int i32[4];
|
||||||
} vec_u;
|
} vec_u;
|
||||||
|
|
||||||
#define rx_aligned_alloc(a, b) malloc(a)
|
#ifdef HAVE_POSIX_MEMALIGN
|
||||||
|
inline void* rx_aligned_alloc(size_t size, size_t align) {
|
||||||
|
void* p;
|
||||||
|
if (posix_memalign(&p, align, size) == 0)
|
||||||
|
return p;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
};
|
||||||
|
#else
|
||||||
|
# define rx_aligned_alloc(a, b) malloc(a)
|
||||||
|
#endif
|
||||||
|
|
||||||
#define rx_aligned_free(a) free(a)
|
#define rx_aligned_free(a) free(a)
|
||||||
#define rx_prefetch_nta(x)
|
#define rx_prefetch_nta(x)
|
||||||
#define rx_prefetch_t0(x)
|
#define rx_prefetch_t0(x)
|
||||||
@@ -392,7 +403,7 @@ FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
|
|||||||
typedef uint8x16_t rx_vec_i128;
|
typedef uint8x16_t rx_vec_i128;
|
||||||
typedef float64x2_t rx_vec_f128;
|
typedef float64x2_t rx_vec_f128;
|
||||||
|
|
||||||
#if !defined(XMRIG_OS_WIN) // FIXME
|
#ifdef HAVE_POSIX_MEMALIGN
|
||||||
inline void* rx_aligned_alloc(size_t size, size_t align) {
|
inline void* rx_aligned_alloc(size_t size, size_t align) {
|
||||||
void* p;
|
void* p;
|
||||||
if (posix_memalign(&p, align, size) == 0)
|
if (posix_memalign(&p, align, size) == 0)
|
||||||
@@ -400,12 +411,15 @@ inline void* rx_aligned_alloc(size_t size, size_t align) {
|
|||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
};
|
};
|
||||||
|
# define rx_aligned_free(a) free(a)
|
||||||
|
#elif defined(HAVE_ALIGNED_MALLOC)
|
||||||
|
# define rx_aligned_alloc(a, b) _aligned_malloc(a, b)
|
||||||
|
# define rx_aligned_free(a) _aligned_free(a)
|
||||||
#else
|
#else
|
||||||
# define rx_aligned_alloc(a, b) malloc(a)
|
# define rx_aligned_alloc(a, b) malloc(a)
|
||||||
|
# define rx_aligned_free(a) free(a)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define rx_aligned_free(a) free(a)
|
|
||||||
|
|
||||||
inline void rx_prefetch_nta(void* ptr) {
|
inline void rx_prefetch_nta(void* ptr) {
|
||||||
asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr));
|
asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr));
|
||||||
}
|
}
|
||||||
@@ -546,8 +560,23 @@ typedef union {
|
|||||||
rx_vec_i128 i;
|
rx_vec_i128 i;
|
||||||
} rx_vec_f128;
|
} rx_vec_f128;
|
||||||
|
|
||||||
#define rx_aligned_alloc(a, b) malloc(a)
|
#ifdef HAVE_POSIX_MEMALIGN
|
||||||
#define rx_aligned_free(a) free(a)
|
inline void* rx_aligned_alloc(size_t size, size_t align) {
|
||||||
|
void* p;
|
||||||
|
if (posix_memalign(&p, align, size) == 0)
|
||||||
|
return p;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
};
|
||||||
|
# define rx_aligned_free(a) free(a)
|
||||||
|
#elif defined(HAVE_ALIGNED_MALLOC)
|
||||||
|
# define rx_aligned_alloc(a, b) _aligned_malloc(a, b)
|
||||||
|
# define rx_aligned_free(a) _aligned_free(a)
|
||||||
|
#else
|
||||||
|
# define rx_aligned_alloc(a, b) malloc(a)
|
||||||
|
# define rx_aligned_free(a) free(a)
|
||||||
|
#endif
|
||||||
|
|
||||||
#define rx_prefetch_nta(x)
|
#define rx_prefetch_nta(x)
|
||||||
#define rx_prefetch_t0(x)
|
#define rx_prefetch_t0(x)
|
||||||
|
|
||||||
|
|||||||
@@ -32,6 +32,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
#include "crypto/randomx/jit_compiler_x86.hpp"
|
#include "crypto/randomx/jit_compiler_x86.hpp"
|
||||||
#elif defined(__aarch64__)
|
#elif defined(__aarch64__)
|
||||||
#include "crypto/randomx/jit_compiler_a64.hpp"
|
#include "crypto/randomx/jit_compiler_a64.hpp"
|
||||||
|
#elif defined(__riscv) && defined(__riscv_xlen) && (__riscv_xlen == 64)
|
||||||
|
#include "crypto/randomx/jit_compiler_rv64.hpp"
|
||||||
#else
|
#else
|
||||||
#include "crypto/randomx/jit_compiler_fallback.hpp"
|
#include "crypto/randomx/jit_compiler_fallback.hpp"
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -67,7 +67,6 @@ constexpr uint32_t LDR_LITERAL = 0x58000000;
|
|||||||
constexpr uint32_t ROR = 0x9AC02C00;
|
constexpr uint32_t ROR = 0x9AC02C00;
|
||||||
constexpr uint32_t ROR_IMM = 0x93C00000;
|
constexpr uint32_t ROR_IMM = 0x93C00000;
|
||||||
constexpr uint32_t MOV_REG = 0xAA0003E0;
|
constexpr uint32_t MOV_REG = 0xAA0003E0;
|
||||||
constexpr uint32_t MOV_VREG_EL = 0x6E080400;
|
|
||||||
constexpr uint32_t FADD = 0x4E60D400;
|
constexpr uint32_t FADD = 0x4E60D400;
|
||||||
constexpr uint32_t FSUB = 0x4EE0D400;
|
constexpr uint32_t FSUB = 0x4EE0D400;
|
||||||
constexpr uint32_t FEOR = 0x6E201C00;
|
constexpr uint32_t FEOR = 0x6E201C00;
|
||||||
@@ -102,7 +101,7 @@ static size_t CalcDatasetItemSize()
|
|||||||
((uint8_t*)randomx_calc_dataset_item_aarch64_end - (uint8_t*)randomx_calc_dataset_item_aarch64_store_result);
|
((uint8_t*)randomx_calc_dataset_item_aarch64_end - (uint8_t*)randomx_calc_dataset_item_aarch64_store_result);
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr uint32_t IntRegMap[8] = { 4, 5, 6, 7, 12, 13, 14, 15 };
|
constexpr uint8_t IntRegMap[8] = { 4, 5, 6, 7, 12, 13, 14, 15 };
|
||||||
|
|
||||||
JitCompilerA64::JitCompilerA64(bool hugePagesEnable, bool) :
|
JitCompilerA64::JitCompilerA64(bool hugePagesEnable, bool) :
|
||||||
hugePages(hugePagesJIT && hugePagesEnable),
|
hugePages(hugePagesJIT && hugePagesEnable),
|
||||||
@@ -128,11 +127,12 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
|
|||||||
|
|
||||||
uint32_t codePos = MainLoopBegin + 4;
|
uint32_t codePos = MainLoopBegin + 4;
|
||||||
|
|
||||||
|
uint32_t mask = ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10);
|
||||||
// and w16, w10, ScratchpadL3Mask64
|
// and w16, w10, ScratchpadL3Mask64
|
||||||
emit32(0x121A0000 | 16 | (10 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
|
emit32(0x121A0000 | 16 | (10 << 5) | mask, code, codePos);
|
||||||
|
|
||||||
// and w17, w20, ScratchpadL3Mask64
|
// and w17, w20, ScratchpadL3Mask64
|
||||||
emit32(0x121A0000 | 17 | (20 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
|
emit32(0x121A0000 | 17 | (20 << 5) | mask, code, codePos);
|
||||||
|
|
||||||
codePos = PrologueSize;
|
codePos = PrologueSize;
|
||||||
literalPos = ImulRcpLiteralsEnd;
|
literalPos = ImulRcpLiteralsEnd;
|
||||||
@@ -155,13 +155,14 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
|
|||||||
const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end) - ((uint8_t*)randomx_program_aarch64)) - codePos;
|
const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end) - ((uint8_t*)randomx_program_aarch64)) - codePos;
|
||||||
emit32(ARMV8A::B | (offset / 4), code, codePos);
|
emit32(ARMV8A::B | (offset / 4), code, codePos);
|
||||||
|
|
||||||
// and w20, w20, CacheLineAlignMask
|
mask = ((RandomX_CurrentConfig.Log2_DatasetBaseSize - 7) << 10);
|
||||||
|
// and w20, w9, CacheLineAlignMask
|
||||||
codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask1) - ((uint8_t*)randomx_program_aarch64));
|
codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask1) - ((uint8_t*)randomx_program_aarch64));
|
||||||
emit32(0x121A0000 | 20 | (20 << 5) | ((RandomX_CurrentConfig.Log2_DatasetBaseSize - 7) << 10), code, codePos);
|
emit32(0x121A0000 | 20 | (9 << 5) | mask, code, codePos);
|
||||||
|
|
||||||
// and w10, w10, CacheLineAlignMask
|
// and w10, w10, CacheLineAlignMask
|
||||||
codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask2) - ((uint8_t*)randomx_program_aarch64));
|
codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask2) - ((uint8_t*)randomx_program_aarch64));
|
||||||
emit32(0x121A0000 | 10 | (10 << 5) | ((RandomX_CurrentConfig.Log2_DatasetBaseSize - 7) << 10), code, codePos);
|
emit32(0x121A0000 | 10 | (10 << 5) | mask, code, codePos);
|
||||||
|
|
||||||
// Update spMix1
|
// Update spMix1
|
||||||
// eor x10, config.readReg0, config.readReg1
|
// eor x10, config.readReg0, config.readReg1
|
||||||
@@ -497,9 +498,12 @@ void JitCompilerA64::emitMemLoad(uint32_t dst, uint32_t src, Instruction& instr,
|
|||||||
if (src != dst)
|
if (src != dst)
|
||||||
{
|
{
|
||||||
imm &= instr.getModMem() ? (RandomX_CurrentConfig.ScratchpadL1_Size - 1) : (RandomX_CurrentConfig.ScratchpadL2_Size - 1);
|
imm &= instr.getModMem() ? (RandomX_CurrentConfig.ScratchpadL1_Size - 1) : (RandomX_CurrentConfig.ScratchpadL2_Size - 1);
|
||||||
emitAddImmediate(tmp_reg, src, imm, code, k);
|
uint32_t t = 0x927d0000 | tmp_reg | (tmp_reg << 5);
|
||||||
|
if (imm)
|
||||||
|
emitAddImmediate(tmp_reg, src, imm, code, k);
|
||||||
|
else
|
||||||
|
t = 0x927d0000 | tmp_reg | (src << 5);
|
||||||
|
|
||||||
constexpr uint32_t t = 0x927d0000 | tmp_reg | (tmp_reg << 5);
|
|
||||||
const uint32_t andInstrL1 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL1 - 4) << 10);
|
const uint32_t andInstrL1 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL1 - 4) << 10);
|
||||||
const uint32_t andInstrL2 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL2 - 4) << 10);
|
const uint32_t andInstrL2 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL2 - 4) << 10);
|
||||||
|
|
||||||
@@ -511,10 +515,18 @@ void JitCompilerA64::emitMemLoad(uint32_t dst, uint32_t src, Instruction& instr,
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
imm = (imm & ScratchpadL3Mask) >> 3;
|
imm = (imm & ScratchpadL3Mask) >> 3;
|
||||||
emitMovImmediate(tmp_reg, imm, code, k);
|
if (imm)
|
||||||
|
{
|
||||||
|
emitMovImmediate(tmp_reg, imm, code, k);
|
||||||
|
|
||||||
// ldr tmp_reg, [x2, tmp_reg, lsl 3]
|
// ldr tmp_reg, [x2, tmp_reg, lsl 3]
|
||||||
emit32(0xf8607840 | tmp_reg | (tmp_reg << 16), code, k);
|
emit32(0xf8607840 | tmp_reg | (tmp_reg << 16), code, k);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// ldr tmp_reg, [x2]
|
||||||
|
emit32(0xf9400040 | tmp_reg, code, k);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
codePos = k;
|
codePos = k;
|
||||||
@@ -529,25 +541,22 @@ void JitCompilerA64::emitMemLoadFP(uint32_t src, Instruction& instr, uint8_t* co
|
|||||||
constexpr uint32_t tmp_reg = 19;
|
constexpr uint32_t tmp_reg = 19;
|
||||||
|
|
||||||
imm &= instr.getModMem() ? (RandomX_CurrentConfig.ScratchpadL1_Size - 1) : (RandomX_CurrentConfig.ScratchpadL2_Size - 1);
|
imm &= instr.getModMem() ? (RandomX_CurrentConfig.ScratchpadL1_Size - 1) : (RandomX_CurrentConfig.ScratchpadL2_Size - 1);
|
||||||
emitAddImmediate(tmp_reg, src, imm, code, k);
|
uint32_t t = 0x927d0000 | tmp_reg | (tmp_reg << 5);
|
||||||
|
if (imm)
|
||||||
|
emitAddImmediate(tmp_reg, src, imm, code, k);
|
||||||
|
else
|
||||||
|
t = 0x927d0000 | tmp_reg | (src << 5);
|
||||||
|
|
||||||
constexpr uint32_t t = 0x927d0000 | tmp_reg | (tmp_reg << 5);
|
|
||||||
const uint32_t andInstrL1 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL1 - 4) << 10);
|
const uint32_t andInstrL1 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL1 - 4) << 10);
|
||||||
const uint32_t andInstrL2 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL2 - 4) << 10);
|
const uint32_t andInstrL2 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL2 - 4) << 10);
|
||||||
|
|
||||||
emit32(instr.getModMem() ? andInstrL1 : andInstrL2, code, k);
|
emit32(instr.getModMem() ? andInstrL1 : andInstrL2, code, k);
|
||||||
|
|
||||||
// add tmp_reg, x2, tmp_reg
|
// ldr tmp_reg_fp, [x2, tmp_reg]
|
||||||
emit32(ARMV8A::ADD | tmp_reg | (2 << 5) | (tmp_reg << 16), code, k);
|
emit32(0x3ce06800 | tmp_reg_fp | (2 << 5) | (tmp_reg << 16), code, k);
|
||||||
|
|
||||||
// ldpsw tmp_reg, tmp_reg + 1, [tmp_reg]
|
// sxtl.2d tmp_reg_fp, tmp_reg_fp
|
||||||
emit32(0x69400000 | tmp_reg | (tmp_reg << 5) | ((tmp_reg + 1) << 10), code, k);
|
emit32(0x0f20a400 | tmp_reg_fp | (tmp_reg_fp << 5), code, k);
|
||||||
|
|
||||||
// ins tmp_reg_fp.d[0], tmp_reg
|
|
||||||
emit32(0x4E081C00 | tmp_reg_fp | (tmp_reg << 5), code, k);
|
|
||||||
|
|
||||||
// ins tmp_reg_fp.d[1], tmp_reg + 1
|
|
||||||
emit32(0x4E181C00 | tmp_reg_fp | ((tmp_reg + 1) << 5), code, k);
|
|
||||||
|
|
||||||
// scvtf tmp_reg_fp.2d, tmp_reg_fp.2d
|
// scvtf tmp_reg_fp.2d, tmp_reg_fp.2d
|
||||||
emit32(0x4E61D800 | tmp_reg_fp | (tmp_reg_fp << 5), code, k);
|
emit32(0x4E61D800 | tmp_reg_fp | (tmp_reg_fp << 5), code, k);
|
||||||
@@ -835,7 +844,8 @@ void JitCompilerA64::h_IROR_R(Instruction& instr, uint32_t& codePos)
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
// ror dst, dst, imm
|
// ror dst, dst, imm
|
||||||
emit32(ARMV8A::ROR_IMM | dst | (dst << 5) | ((instr.getImm32() & 63) << 10) | (dst << 16), code, codePos);
|
if ((instr.getImm32() & 63))
|
||||||
|
emit32(ARMV8A::ROR_IMM | dst | (dst << 5) | ((instr.getImm32() & 63) << 10) | (dst << 16), code, codePos);
|
||||||
}
|
}
|
||||||
|
|
||||||
reg_changed_offset[instr.dst] = codePos;
|
reg_changed_offset[instr.dst] = codePos;
|
||||||
@@ -861,7 +871,8 @@ void JitCompilerA64::h_IROL_R(Instruction& instr, uint32_t& codePos)
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
// ror dst, dst, imm
|
// ror dst, dst, imm
|
||||||
emit32(ARMV8A::ROR_IMM | dst | (dst << 5) | ((-instr.getImm32() & 63) << 10) | (dst << 16), code, k);
|
if ((instr.getImm32() & 63))
|
||||||
|
emit32(ARMV8A::ROR_IMM | dst | (dst << 5) | ((-instr.getImm32() & 63) << 10) | (dst << 16), code, k);
|
||||||
}
|
}
|
||||||
|
|
||||||
reg_changed_offset[instr.dst] = k;
|
reg_changed_offset[instr.dst] = k;
|
||||||
@@ -894,13 +905,8 @@ void JitCompilerA64::h_FSWAP_R(Instruction& instr, uint32_t& codePos)
|
|||||||
|
|
||||||
const uint32_t dst = instr.dst + 16;
|
const uint32_t dst = instr.dst + 16;
|
||||||
|
|
||||||
constexpr uint32_t tmp_reg_fp = 28;
|
// ext dst.16b, dst.16b, dst.16b, #0x8
|
||||||
constexpr uint32_t src_index1 = 1 << 14;
|
emit32(0x6e004000 | dst | (dst << 5) | (dst << 16), code, k);
|
||||||
constexpr uint32_t dst_index1 = 1 << 20;
|
|
||||||
|
|
||||||
emit32(ARMV8A::MOV_VREG_EL | tmp_reg_fp | (dst << 5) | src_index1, code, k);
|
|
||||||
emit32(ARMV8A::MOV_VREG_EL | dst | (dst << 5) | dst_index1, code, k);
|
|
||||||
emit32(ARMV8A::MOV_VREG_EL | dst | (tmp_reg_fp << 5), code, k);
|
|
||||||
|
|
||||||
codePos = k;
|
codePos = k;
|
||||||
}
|
}
|
||||||
@@ -1029,11 +1035,19 @@ void JitCompilerA64::h_CFROUND(Instruction& instr, uint32_t& codePos)
|
|||||||
constexpr uint32_t tmp_reg = 20;
|
constexpr uint32_t tmp_reg = 20;
|
||||||
constexpr uint32_t fpcr_tmp_reg = 8;
|
constexpr uint32_t fpcr_tmp_reg = 8;
|
||||||
|
|
||||||
// ror tmp_reg, src, imm
|
if (instr.getImm32() & 63)
|
||||||
emit32(ARMV8A::ROR_IMM | tmp_reg | (src << 5) | ((instr.getImm32() & 63) << 10) | (src << 16), code, k);
|
{
|
||||||
|
// ror tmp_reg, src, imm
|
||||||
|
emit32(ARMV8A::ROR_IMM | tmp_reg | (src << 5) | ((instr.getImm32() & 63) << 10) | (src << 16), code, k);
|
||||||
|
|
||||||
// bfi fpcr_tmp_reg, tmp_reg, 40, 2
|
// bfi fpcr_tmp_reg, tmp_reg, 40, 2
|
||||||
emit32(0xB3580400 | fpcr_tmp_reg | (tmp_reg << 5), code, k);
|
emit32(0xB3580400 | fpcr_tmp_reg | (tmp_reg << 5), code, k);
|
||||||
|
}
|
||||||
|
else // no rotation
|
||||||
|
{
|
||||||
|
// bfi fpcr_tmp_reg, src, 40, 2
|
||||||
|
emit32(0xB3580400 | fpcr_tmp_reg | (src << 5), code, k);
|
||||||
|
}
|
||||||
|
|
||||||
// rbit tmp_reg, fpcr_tmp_reg
|
// rbit tmp_reg, fpcr_tmp_reg
|
||||||
emit32(0xDAC00000 | tmp_reg | (fpcr_tmp_reg << 5), code, k);
|
emit32(0xDAC00000 | tmp_reg | (fpcr_tmp_reg << 5), code, k);
|
||||||
@@ -1059,9 +1073,12 @@ void JitCompilerA64::h_ISTORE(Instruction& instr, uint32_t& codePos)
|
|||||||
else
|
else
|
||||||
imm &= RandomX_CurrentConfig.ScratchpadL3_Size - 1;
|
imm &= RandomX_CurrentConfig.ScratchpadL3_Size - 1;
|
||||||
|
|
||||||
emitAddImmediate(tmp_reg, dst, imm, code, k);
|
uint32_t t = 0x927d0000 | tmp_reg | (tmp_reg << 5);
|
||||||
|
if (imm)
|
||||||
|
emitAddImmediate(tmp_reg, dst, imm, code, k);
|
||||||
|
else
|
||||||
|
t = 0x927d0000 | tmp_reg | (dst << 5);
|
||||||
|
|
||||||
constexpr uint32_t t = 0x927d0000 | tmp_reg | (tmp_reg << 5);
|
|
||||||
const uint32_t andInstrL1 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL1 - 4) << 10);
|
const uint32_t andInstrL1 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL1 - 4) << 10);
|
||||||
const uint32_t andInstrL2 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL2 - 4) << 10);
|
const uint32_t andInstrL2 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL2 - 4) << 10);
|
||||||
const uint32_t andInstrL3 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 4) << 10);
|
const uint32_t andInstrL3 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 4) << 10);
|
||||||
|
|||||||
@@ -100,9 +100,9 @@
|
|||||||
# v26 -> "a2"
|
# v26 -> "a2"
|
||||||
# v27 -> "a3"
|
# v27 -> "a3"
|
||||||
# v28 -> temporary
|
# v28 -> temporary
|
||||||
# v29 -> E 'and' mask = 0x00ffffffffffffff00ffffffffffffff
|
# v29 -> E 'and' mask = 0x00ffffffffffffff'00ffffffffffffff
|
||||||
# v30 -> E 'or' mask = 0x3*00000000******3*00000000******
|
# v30 -> E 'or' mask = 0x3*00000000******'3*00000000******
|
||||||
# v31 -> scale mask = 0x81f000000000000081f0000000000000
|
# v31 -> scale mask = 0x80f0000000000000'80f0000000000000
|
||||||
|
|
||||||
.balign 4
|
.balign 4
|
||||||
DECL(randomx_program_aarch64):
|
DECL(randomx_program_aarch64):
|
||||||
@@ -142,17 +142,14 @@ DECL(randomx_program_aarch64):
|
|||||||
ldp q26, q27, [x0, 224]
|
ldp q26, q27, [x0, 224]
|
||||||
|
|
||||||
# Load E 'and' mask
|
# Load E 'and' mask
|
||||||
mov x16, 0x00FFFFFFFFFFFFFF
|
movi v29.2d, #0x00FFFFFFFFFFFFFF
|
||||||
ins v29.d[0], x16
|
|
||||||
ins v29.d[1], x16
|
|
||||||
|
|
||||||
# Load E 'or' mask (stored in reg.f[0])
|
# Load E 'or' mask (stored in reg.f[0])
|
||||||
ldr q30, [x0, 64]
|
ldr q30, [x0, 64]
|
||||||
|
|
||||||
# Load scale mask
|
# Load scale mask
|
||||||
mov x16, 0x80f0000000000000
|
mov x16, 0x80f0000000000000
|
||||||
ins v31.d[0], x16
|
dup v31.2d, x16
|
||||||
ins v31.d[1], x16
|
|
||||||
|
|
||||||
# Read fpcr
|
# Read fpcr
|
||||||
mrs x8, fpcr
|
mrs x8, fpcr
|
||||||
@@ -162,35 +159,22 @@ DECL(randomx_program_aarch64):
|
|||||||
str x0, [sp, -16]!
|
str x0, [sp, -16]!
|
||||||
|
|
||||||
# Read literals
|
# Read literals
|
||||||
ldr x0, literal_x0
|
adr x30, literal_v0
|
||||||
ldr x11, literal_x11
|
ldp q0, q1, [x30]
|
||||||
ldr x21, literal_x21
|
ldp q2, q3, [x30, 32]
|
||||||
ldr x22, literal_x22
|
ldp q4, q5, [x30, 64]
|
||||||
ldr x23, literal_x23
|
ldp q6, q7, [x30, 96]
|
||||||
ldr x24, literal_x24
|
ldp q8, q9, [x30, 128]
|
||||||
ldr x25, literal_x25
|
ldp q10, q11, [x30, 160]
|
||||||
ldr x26, literal_x26
|
ldp q12, q13, [x30, 192]
|
||||||
ldr x27, literal_x27
|
ldp q14, q15, [x30, 224]
|
||||||
ldr x28, literal_x28
|
|
||||||
ldr x29, literal_x29
|
|
||||||
ldr x30, literal_x30
|
|
||||||
|
|
||||||
ldr q0, literal_v0
|
ldp x0, x11, [x30, -96] // literal_x0
|
||||||
ldr q1, literal_v1
|
ldp x21, x22, [x30, -80] // literal_x21
|
||||||
ldr q2, literal_v2
|
ldp x23, x24, [x30, -64] // literal_x23
|
||||||
ldr q3, literal_v3
|
ldp x25, x26, [x30, -48] // literal_x25
|
||||||
ldr q4, literal_v4
|
ldp x27, x28, [x30, -32] // literal_x27
|
||||||
ldr q5, literal_v5
|
ldp x29, x30, [x30, -16] // literal_x29
|
||||||
ldr q6, literal_v6
|
|
||||||
ldr q7, literal_v7
|
|
||||||
ldr q8, literal_v8
|
|
||||||
ldr q9, literal_v9
|
|
||||||
ldr q10, literal_v10
|
|
||||||
ldr q11, literal_v11
|
|
||||||
ldr q12, literal_v12
|
|
||||||
ldr q13, literal_v13
|
|
||||||
ldr q14, literal_v14
|
|
||||||
ldr q15, literal_v15
|
|
||||||
|
|
||||||
DECL(randomx_program_aarch64_main_loop):
|
DECL(randomx_program_aarch64_main_loop):
|
||||||
# spAddr0 = spMix1 & ScratchpadL3Mask64;
|
# spAddr0 = spMix1 & ScratchpadL3Mask64;
|
||||||
@@ -221,40 +205,31 @@ DECL(randomx_program_aarch64_main_loop):
|
|||||||
eor x15, x15, x19
|
eor x15, x15, x19
|
||||||
|
|
||||||
# Load group F registers (spAddr1)
|
# Load group F registers (spAddr1)
|
||||||
ldpsw x20, x19, [x17]
|
ldr q17, [x17]
|
||||||
ins v16.d[0], x20
|
sxtl v16.2d, v17.2s
|
||||||
ins v16.d[1], x19
|
|
||||||
ldpsw x20, x19, [x17, 8]
|
|
||||||
ins v17.d[0], x20
|
|
||||||
ins v17.d[1], x19
|
|
||||||
ldpsw x20, x19, [x17, 16]
|
|
||||||
ins v18.d[0], x20
|
|
||||||
ins v18.d[1], x19
|
|
||||||
ldpsw x20, x19, [x17, 24]
|
|
||||||
ins v19.d[0], x20
|
|
||||||
ins v19.d[1], x19
|
|
||||||
scvtf v16.2d, v16.2d
|
scvtf v16.2d, v16.2d
|
||||||
|
sxtl2 v17.2d, v17.4s
|
||||||
scvtf v17.2d, v17.2d
|
scvtf v17.2d, v17.2d
|
||||||
|
|
||||||
|
ldr q19, [x17, 16]
|
||||||
|
sxtl v18.2d, v19.2s
|
||||||
scvtf v18.2d, v18.2d
|
scvtf v18.2d, v18.2d
|
||||||
|
sxtl2 v19.2d, v19.4s
|
||||||
scvtf v19.2d, v19.2d
|
scvtf v19.2d, v19.2d
|
||||||
|
|
||||||
# Load group E registers (spAddr1)
|
# Load group E registers (spAddr1)
|
||||||
ldpsw x20, x19, [x17, 32]
|
ldr q21, [x17, 32]
|
||||||
ins v20.d[0], x20
|
sxtl v20.2d, v21.2s
|
||||||
ins v20.d[1], x19
|
|
||||||
ldpsw x20, x19, [x17, 40]
|
|
||||||
ins v21.d[0], x20
|
|
||||||
ins v21.d[1], x19
|
|
||||||
ldpsw x20, x19, [x17, 48]
|
|
||||||
ins v22.d[0], x20
|
|
||||||
ins v22.d[1], x19
|
|
||||||
ldpsw x20, x19, [x17, 56]
|
|
||||||
ins v23.d[0], x20
|
|
||||||
ins v23.d[1], x19
|
|
||||||
scvtf v20.2d, v20.2d
|
scvtf v20.2d, v20.2d
|
||||||
|
sxtl2 v21.2d, v21.4s
|
||||||
scvtf v21.2d, v21.2d
|
scvtf v21.2d, v21.2d
|
||||||
|
|
||||||
|
ldr q23, [x17, 48]
|
||||||
|
sxtl v22.2d, v23.2s
|
||||||
scvtf v22.2d, v22.2d
|
scvtf v22.2d, v22.2d
|
||||||
|
sxtl2 v23.2d, v23.4s
|
||||||
scvtf v23.2d, v23.2d
|
scvtf v23.2d, v23.2d
|
||||||
|
|
||||||
and v20.16b, v20.16b, v29.16b
|
and v20.16b, v20.16b, v29.16b
|
||||||
and v21.16b, v21.16b, v29.16b
|
and v21.16b, v21.16b, v29.16b
|
||||||
and v22.16b, v22.16b, v29.16b
|
and v22.16b, v22.16b, v29.16b
|
||||||
@@ -310,10 +285,9 @@ DECL(randomx_program_aarch64_vm_instructions_end):
|
|||||||
eor x9, x9, x20
|
eor x9, x9, x20
|
||||||
|
|
||||||
# Calculate dataset pointer for dataset prefetch
|
# Calculate dataset pointer for dataset prefetch
|
||||||
mov w20, w9
|
|
||||||
DECL(randomx_program_aarch64_cacheline_align_mask1):
|
DECL(randomx_program_aarch64_cacheline_align_mask1):
|
||||||
# Actual mask will be inserted by JIT compiler
|
# Actual mask will be inserted by JIT compiler
|
||||||
and x20, x20, 1
|
and x20, x9, 1
|
||||||
add x20, x20, x1
|
add x20, x20, x1
|
||||||
|
|
||||||
# Prefetch dataset data
|
# Prefetch dataset data
|
||||||
@@ -491,42 +465,39 @@ DECL(randomx_calc_dataset_item_aarch64):
|
|||||||
stp x10, x11, [sp, 80]
|
stp x10, x11, [sp, 80]
|
||||||
stp x12, x13, [sp, 96]
|
stp x12, x13, [sp, 96]
|
||||||
|
|
||||||
ldr x12, superscalarMul0
|
adr x7, superscalarMul0
|
||||||
|
# superscalarMul0, superscalarAdd1
|
||||||
|
ldp x12, x13, [x7]
|
||||||
|
|
||||||
mov x8, x0
|
ldp x8, x9, [sp]
|
||||||
mov x9, x1
|
|
||||||
mov x10, x2
|
mov x10, x2
|
||||||
|
|
||||||
# rl[0] = (itemNumber + 1) * superscalarMul0;
|
# rl[0] = (itemNumber + 1) * superscalarMul0;
|
||||||
madd x0, x2, x12, x12
|
madd x0, x2, x12, x12
|
||||||
|
|
||||||
# rl[1] = rl[0] ^ superscalarAdd1;
|
# rl[1] = rl[0] ^ superscalarAdd1;
|
||||||
ldr x12, superscalarAdd1
|
eor x1, x0, x13
|
||||||
eor x1, x0, x12
|
|
||||||
|
|
||||||
# rl[2] = rl[0] ^ superscalarAdd2;
|
# rl[2] = rl[0] ^ superscalarAdd2;
|
||||||
ldr x12, superscalarAdd2
|
ldp x12, x13, [x7, 16]
|
||||||
eor x2, x0, x12
|
eor x2, x0, x12
|
||||||
|
|
||||||
# rl[3] = rl[0] ^ superscalarAdd3;
|
# rl[3] = rl[0] ^ superscalarAdd3;
|
||||||
ldr x12, superscalarAdd3
|
eor x3, x0, x13
|
||||||
eor x3, x0, x12
|
|
||||||
|
|
||||||
# rl[4] = rl[0] ^ superscalarAdd4;
|
# rl[4] = rl[0] ^ superscalarAdd4;
|
||||||
ldr x12, superscalarAdd4
|
ldp x12, x13, [x7, 32]
|
||||||
eor x4, x0, x12
|
eor x4, x0, x12
|
||||||
|
|
||||||
# rl[5] = rl[0] ^ superscalarAdd5;
|
# rl[5] = rl[0] ^ superscalarAdd5;
|
||||||
ldr x12, superscalarAdd5
|
eor x5, x0, x13
|
||||||
eor x5, x0, x12
|
|
||||||
|
|
||||||
# rl[6] = rl[0] ^ superscalarAdd6;
|
# rl[6] = rl[0] ^ superscalarAdd6;
|
||||||
ldr x12, superscalarAdd6
|
ldp x12, x13, [x7, 48]
|
||||||
eor x6, x0, x12
|
eor x6, x0, x12
|
||||||
|
|
||||||
# rl[7] = rl[0] ^ superscalarAdd7;
|
# rl[7] = rl[0] ^ superscalarAdd7;
|
||||||
ldr x12, superscalarAdd7
|
eor x7, x0, x13
|
||||||
eor x7, x0, x12
|
|
||||||
|
|
||||||
b DECL(randomx_calc_dataset_item_aarch64_prefetch)
|
b DECL(randomx_calc_dataset_item_aarch64_prefetch)
|
||||||
|
|
||||||
|
|||||||
1187
src/crypto/randomx/jit_compiler_rv64.cpp
Normal file
1187
src/crypto/randomx/jit_compiler_rv64.cpp
Normal file
File diff suppressed because it is too large
Load Diff
147
src/crypto/randomx/jit_compiler_rv64.hpp
Normal file
147
src/crypto/randomx/jit_compiler_rv64.hpp
Normal file
@@ -0,0 +1,147 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2023 tevador <tevador@gmail.com>
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the copyright holder nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
#include <cstring>
|
||||||
|
#include <vector>
|
||||||
|
#include "crypto/randomx/common.hpp"
|
||||||
|
#include "crypto/randomx/jit_compiler_rv64_static.hpp"
|
||||||
|
|
||||||
|
namespace randomx {
|
||||||
|
|
||||||
|
struct CodeBuffer {
|
||||||
|
uint8_t* code;
|
||||||
|
int32_t codePos;
|
||||||
|
int32_t rcpCount;
|
||||||
|
|
||||||
|
void emit(const uint8_t* src, int32_t len) {
|
||||||
|
memcpy(&code[codePos], src, len);
|
||||||
|
codePos += len;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
void emit(T src) {
|
||||||
|
memcpy(&code[codePos], &src, sizeof(src));
|
||||||
|
codePos += sizeof(src);
|
||||||
|
}
|
||||||
|
|
||||||
|
void emitAt(int32_t codePos, const uint8_t* src, int32_t len) {
|
||||||
|
memcpy(&code[codePos], src, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
void emitAt(int32_t codePos, T src) {
|
||||||
|
memcpy(&code[codePos], &src, sizeof(src));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CompilerState : public CodeBuffer {
|
||||||
|
int32_t instructionOffsets[RANDOMX_PROGRAM_MAX_SIZE];
|
||||||
|
int registerUsage[RegistersCount];
|
||||||
|
};
|
||||||
|
|
||||||
|
class Program;
|
||||||
|
struct ProgramConfiguration;
|
||||||
|
class SuperscalarProgram;
|
||||||
|
class Instruction;
|
||||||
|
|
||||||
|
#define HANDLER_ARGS randomx::CompilerState& state, randomx::Instruction isn, int i
|
||||||
|
typedef void(*InstructionGeneratorRV64)(HANDLER_ARGS);
|
||||||
|
|
||||||
|
class JitCompilerRV64 {
|
||||||
|
public:
|
||||||
|
JitCompilerRV64(bool hugePagesEnable, bool optimizedInitDatasetEnable);
|
||||||
|
~JitCompilerRV64();
|
||||||
|
|
||||||
|
void prepare() {}
|
||||||
|
void generateProgram(Program&, ProgramConfiguration&, uint32_t);
|
||||||
|
void generateProgramLight(Program&, ProgramConfiguration&, uint32_t);
|
||||||
|
|
||||||
|
template<size_t N>
|
||||||
|
void generateSuperscalarHash(SuperscalarProgram(&programs)[N]);
|
||||||
|
|
||||||
|
void generateDatasetInitCode() {}
|
||||||
|
|
||||||
|
ProgramFunc* getProgramFunc() {
|
||||||
|
return (ProgramFunc*)entryProgram;
|
||||||
|
}
|
||||||
|
DatasetInitFunc* getDatasetInitFunc();
|
||||||
|
uint8_t* getCode() {
|
||||||
|
return state.code;
|
||||||
|
}
|
||||||
|
size_t getCodeSize();
|
||||||
|
|
||||||
|
void enableWriting() const;
|
||||||
|
void enableExecution() const;
|
||||||
|
|
||||||
|
static InstructionGeneratorRV64 engine[256];
|
||||||
|
private:
|
||||||
|
CompilerState state;
|
||||||
|
|
||||||
|
uint8_t* vectorCode;
|
||||||
|
size_t vectorCodeSize;
|
||||||
|
|
||||||
|
void* entryDataInit;
|
||||||
|
void* entryDataInitOptimized;
|
||||||
|
void* entryProgram;
|
||||||
|
|
||||||
|
public:
|
||||||
|
static void v1_IADD_RS(HANDLER_ARGS);
|
||||||
|
static void v1_IADD_M(HANDLER_ARGS);
|
||||||
|
static void v1_ISUB_R(HANDLER_ARGS);
|
||||||
|
static void v1_ISUB_M(HANDLER_ARGS);
|
||||||
|
static void v1_IMUL_R(HANDLER_ARGS);
|
||||||
|
static void v1_IMUL_M(HANDLER_ARGS);
|
||||||
|
static void v1_IMULH_R(HANDLER_ARGS);
|
||||||
|
static void v1_IMULH_M(HANDLER_ARGS);
|
||||||
|
static void v1_ISMULH_R(HANDLER_ARGS);
|
||||||
|
static void v1_ISMULH_M(HANDLER_ARGS);
|
||||||
|
static void v1_IMUL_RCP(HANDLER_ARGS);
|
||||||
|
static void v1_INEG_R(HANDLER_ARGS);
|
||||||
|
static void v1_IXOR_R(HANDLER_ARGS);
|
||||||
|
static void v1_IXOR_M(HANDLER_ARGS);
|
||||||
|
static void v1_IROR_R(HANDLER_ARGS);
|
||||||
|
static void v1_IROL_R(HANDLER_ARGS);
|
||||||
|
static void v1_ISWAP_R(HANDLER_ARGS);
|
||||||
|
static void v1_FSWAP_R(HANDLER_ARGS);
|
||||||
|
static void v1_FADD_R(HANDLER_ARGS);
|
||||||
|
static void v1_FADD_M(HANDLER_ARGS);
|
||||||
|
static void v1_FSUB_R(HANDLER_ARGS);
|
||||||
|
static void v1_FSUB_M(HANDLER_ARGS);
|
||||||
|
static void v1_FSCAL_R(HANDLER_ARGS);
|
||||||
|
static void v1_FMUL_R(HANDLER_ARGS);
|
||||||
|
static void v1_FDIV_M(HANDLER_ARGS);
|
||||||
|
static void v1_FSQRT_R(HANDLER_ARGS);
|
||||||
|
static void v1_CBRANCH(HANDLER_ARGS);
|
||||||
|
static void v1_CFROUND(HANDLER_ARGS);
|
||||||
|
static void v1_ISTORE(HANDLER_ARGS);
|
||||||
|
static void v1_NOP(HANDLER_ARGS);
|
||||||
|
};
|
||||||
|
}
|
||||||
1236
src/crypto/randomx/jit_compiler_rv64_static.S
Normal file
1236
src/crypto/randomx/jit_compiler_rv64_static.S
Normal file
File diff suppressed because it is too large
Load Diff
53
src/crypto/randomx/jit_compiler_rv64_static.hpp
Normal file
53
src/crypto/randomx/jit_compiler_rv64_static.hpp
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2023 tevador <tevador@gmail.com>
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the copyright holder nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
void randomx_riscv64_literals();
|
||||||
|
void randomx_riscv64_literals_end();
|
||||||
|
void randomx_riscv64_data_init();
|
||||||
|
void randomx_riscv64_fix_data_call();
|
||||||
|
void randomx_riscv64_prologue();
|
||||||
|
void randomx_riscv64_loop_begin();
|
||||||
|
void randomx_riscv64_data_read();
|
||||||
|
void randomx_riscv64_data_read_light();
|
||||||
|
void randomx_riscv64_fix_loop_call();
|
||||||
|
void randomx_riscv64_spad_store();
|
||||||
|
void randomx_riscv64_spad_store_hardaes();
|
||||||
|
void randomx_riscv64_spad_store_softaes();
|
||||||
|
void randomx_riscv64_loop_end();
|
||||||
|
void randomx_riscv64_fix_continue_loop();
|
||||||
|
void randomx_riscv64_epilogue();
|
||||||
|
void randomx_riscv64_softaes();
|
||||||
|
void randomx_riscv64_program_end();
|
||||||
|
void randomx_riscv64_ssh_init();
|
||||||
|
void randomx_riscv64_ssh_load();
|
||||||
|
void randomx_riscv64_ssh_prefetch();
|
||||||
|
void randomx_riscv64_ssh_end();
|
||||||
|
}
|
||||||
207
src/crypto/randomx/jit_compiler_rv64_vector.cpp
Normal file
207
src/crypto/randomx/jit_compiler_rv64_vector.cpp
Normal file
@@ -0,0 +1,207 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2018-2020, tevador <tevador@gmail.com>
|
||||||
|
Copyright (c) 2019-2021, XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
|
Copyright (c) 2025, SChernykh <https://github.com/SChernykh>
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the copyright holder nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "crypto/randomx/configuration.h"
|
||||||
|
#include "crypto/randomx/jit_compiler_rv64_vector.h"
|
||||||
|
#include "crypto/randomx/jit_compiler_rv64_vector_static.h"
|
||||||
|
#include "crypto/randomx/reciprocal.h"
|
||||||
|
#include "crypto/randomx/superscalar.hpp"
|
||||||
|
|
||||||
|
namespace randomx {
|
||||||
|
|
||||||
|
#define ADDR(x) ((uint8_t*) &(x))
|
||||||
|
#define DIST(x, y) (ADDR(y) - ADDR(x))
|
||||||
|
|
||||||
|
void* generateDatasetInitVectorRV64(uint8_t* buf, size_t buf_size, SuperscalarProgram* programs, size_t num_programs)
|
||||||
|
{
|
||||||
|
memcpy(buf, reinterpret_cast<void*>(randomx_riscv64_vector_sshash_begin), buf_size);
|
||||||
|
|
||||||
|
uint8_t* p = buf + DIST(randomx_riscv64_vector_sshash_begin, randomx_riscv64_vector_sshash_generated_instructions);
|
||||||
|
|
||||||
|
uint8_t* literals = buf + DIST(randomx_riscv64_vector_sshash_begin, randomx_riscv64_vector_sshash_imul_rcp_literals);
|
||||||
|
uint8_t* cur_literal = literals;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < num_programs; ++i) {
|
||||||
|
// Step 4
|
||||||
|
size_t k = DIST(randomx_riscv64_vector_sshash_cache_prefetch, randomx_riscv64_vector_sshash_xor);
|
||||||
|
memcpy(p, reinterpret_cast<void*>(randomx_riscv64_vector_sshash_cache_prefetch), k);
|
||||||
|
p += k;
|
||||||
|
|
||||||
|
// Step 5
|
||||||
|
for (uint32_t j = 0; j < programs[i].size; ++j) {
|
||||||
|
const uint32_t dst = programs[i].programBuffer[j].dst & 7;
|
||||||
|
const uint32_t src = programs[i].programBuffer[j].src & 7;
|
||||||
|
const uint32_t modShift = (programs[i].programBuffer[j].mod >> 2) & 3;
|
||||||
|
const uint32_t imm32 = programs[i].programBuffer[j].imm32;
|
||||||
|
|
||||||
|
uint32_t inst;
|
||||||
|
#define EMIT(data) inst = (data); memcpy(p, &inst, 4); p += 4
|
||||||
|
|
||||||
|
switch (static_cast<SuperscalarInstructionType>(programs[i].programBuffer[j].opcode)) {
|
||||||
|
case SuperscalarInstructionType::ISUB_R:
|
||||||
|
// 57 00 00 0A vsub.vv v0, v0, v0
|
||||||
|
EMIT(0x0A000057 | (dst << 7) | (src << 15) | (dst << 20));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SuperscalarInstructionType::IXOR_R:
|
||||||
|
// 57 00 00 2E vxor.vv v0, v0, v0
|
||||||
|
EMIT(0x2E000057 | (dst << 7) | (src << 15) | (dst << 20));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SuperscalarInstructionType::IADD_RS:
|
||||||
|
// 57 39 00 96 vsll.vi v18, v0, 0
|
||||||
|
// 57 00 09 02 vadd.vv v0, v0, v18
|
||||||
|
EMIT(0x96003957 | (modShift << 15) | (src << 20));
|
||||||
|
EMIT(0x02090057 | (dst << 7) | (dst << 20));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SuperscalarInstructionType::IMUL_R:
|
||||||
|
// 57 20 00 96 vmul.vv v0, v0, v0
|
||||||
|
EMIT(0x96002057 | (dst << 7) | (src << 15) | (dst << 20));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SuperscalarInstructionType::IROR_C:
|
||||||
|
{
|
||||||
|
const uint32_t shift_right = imm32 & 63;
|
||||||
|
const uint32_t shift_left = 64 - shift_right;
|
||||||
|
|
||||||
|
if (shift_right < 32) {
|
||||||
|
// 57 39 00 A2 vsrl.vi v18, v0, 0
|
||||||
|
EMIT(0xA2003957 | (shift_right << 15) | (dst << 20));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// 93 02 00 00 li x5, 0
|
||||||
|
// 57 C9 02 A2 vsrl.vx v18, v0, x5
|
||||||
|
EMIT(0x00000293 | (shift_right << 20));
|
||||||
|
EMIT(0xA202C957 | (dst << 20));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (shift_left < 32) {
|
||||||
|
// 57 30 00 96 vsll.vi v0, v0, 0
|
||||||
|
EMIT(0x96003057 | (dst << 7) | (shift_left << 15) | (dst << 20));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// 93 02 00 00 li x5, 0
|
||||||
|
// 57 C0 02 96 vsll.vx v0, v0, x5
|
||||||
|
EMIT(0x00000293 | (shift_left << 20));
|
||||||
|
EMIT(0x9602C057 | (dst << 7) | (dst << 20));
|
||||||
|
}
|
||||||
|
|
||||||
|
// 57 00 20 2B vor.vv v0, v18, v0
|
||||||
|
EMIT(0x2B200057 | (dst << 7) | (dst << 15));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SuperscalarInstructionType::IADD_C7:
|
||||||
|
case SuperscalarInstructionType::IADD_C8:
|
||||||
|
case SuperscalarInstructionType::IADD_C9:
|
||||||
|
// B7 02 00 00 lui x5, 0
|
||||||
|
// 9B 82 02 00 addiw x5, x5, 0
|
||||||
|
// 57 C0 02 02 vadd.vx v0, v0, x5
|
||||||
|
EMIT(0x000002B7 | ((imm32 + ((imm32 & 0x800) << 1)) & 0xFFFFF000));
|
||||||
|
EMIT(0x0002829B | ((imm32 & 0x00000FFF)) << 20);
|
||||||
|
EMIT(0x0202C057 | (dst << 7) | (dst << 20));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SuperscalarInstructionType::IXOR_C7:
|
||||||
|
case SuperscalarInstructionType::IXOR_C8:
|
||||||
|
case SuperscalarInstructionType::IXOR_C9:
|
||||||
|
// B7 02 00 00 lui x5, 0
|
||||||
|
// 9B 82 02 00 addiw x5, x5, 0
|
||||||
|
// 57 C0 02 2E vxor.vx v0, v0, x5
|
||||||
|
EMIT(0x000002B7 | ((imm32 + ((imm32 & 0x800) << 1)) & 0xFFFFF000));
|
||||||
|
EMIT(0x0002829B | ((imm32 & 0x00000FFF)) << 20);
|
||||||
|
EMIT(0x2E02C057 | (dst << 7) | (dst << 20));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SuperscalarInstructionType::IMULH_R:
|
||||||
|
// 57 20 00 92 vmulhu.vv v0, v0, v0
|
||||||
|
EMIT(0x92002057 | (dst << 7) | (src << 15) | (dst << 20));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SuperscalarInstructionType::ISMULH_R:
|
||||||
|
// 57 20 00 9E vmulh.vv v0, v0, v0
|
||||||
|
EMIT(0x9E002057 | (dst << 7) | (src << 15) | (dst << 20));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SuperscalarInstructionType::IMUL_RCP:
|
||||||
|
{
|
||||||
|
uint32_t offset = cur_literal - literals;
|
||||||
|
|
||||||
|
if (offset == 2040) {
|
||||||
|
literals += 2040;
|
||||||
|
offset = 0;
|
||||||
|
|
||||||
|
// 93 87 87 7F add x15, x15, 2040
|
||||||
|
EMIT(0x7F878793);
|
||||||
|
}
|
||||||
|
|
||||||
|
const uint64_t r = randomx_reciprocal_fast(imm32);
|
||||||
|
memcpy(cur_literal, &r, 8);
|
||||||
|
cur_literal += 8;
|
||||||
|
|
||||||
|
// 83 B2 07 00 ld x5, (x15)
|
||||||
|
// 57 E0 02 96 vmul.vx v0, v0, x5
|
||||||
|
EMIT(0x0007B283 | (offset << 20));
|
||||||
|
EMIT(0x9602E057 | (dst << 7) | (dst << 20));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 6
|
||||||
|
k = DIST(randomx_riscv64_vector_sshash_xor, randomx_riscv64_vector_sshash_set_cache_index);
|
||||||
|
memcpy(p, reinterpret_cast<void*>(randomx_riscv64_vector_sshash_xor), k);
|
||||||
|
p += k;
|
||||||
|
|
||||||
|
// Step 7
|
||||||
|
if (i + 1 < num_programs) {
|
||||||
|
memcpy(p, reinterpret_cast<uint8_t*>(randomx_riscv64_vector_sshash_set_cache_index) + programs[i].getAddressRegister() * 4, 4);
|
||||||
|
p += 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Emit "J randomx_riscv64_vector_sshash_generated_instructions_end" instruction
|
||||||
|
const uint8_t* e = buf + DIST(randomx_riscv64_vector_sshash_begin, randomx_riscv64_vector_sshash_generated_instructions_end);
|
||||||
|
const uint32_t k = e - p;
|
||||||
|
const uint32_t j = 0x6F | ((k & 0x7FE) << 20) | ((k & 0x800) << 9) | (k & 0xFF000);
|
||||||
|
memcpy(p, &j, 4);
|
||||||
|
|
||||||
|
#ifdef __GNUC__
|
||||||
|
__builtin___clear_cache((char*) buf, (char*)(buf + buf_size));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return buf + DIST(randomx_riscv64_vector_sshash_begin, randomx_riscv64_vector_sshash_dataset_init);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace randomx
|
||||||
42
src/crypto/randomx/jit_compiler_rv64_vector.h
Normal file
42
src/crypto/randomx/jit_compiler_rv64_vector.h
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2018-2020, tevador <tevador@gmail.com>
|
||||||
|
Copyright (c) 2019-2021, XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
|
Copyright (c) 2025, SChernykh <https://github.com/SChernykh>
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the copyright holder nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
#include <cstdlib>
|
||||||
|
|
||||||
|
namespace randomx {
|
||||||
|
|
||||||
|
class SuperscalarProgram;
|
||||||
|
|
||||||
|
void* generateDatasetInitVectorRV64(uint8_t* buf, size_t buf_size, SuperscalarProgram* programs, size_t num_programs);
|
||||||
|
|
||||||
|
} // namespace randomx
|
||||||
296
src/crypto/randomx/jit_compiler_rv64_vector_static.S
Normal file
296
src/crypto/randomx/jit_compiler_rv64_vector_static.S
Normal file
@@ -0,0 +1,296 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2018-2020, tevador <tevador@gmail.com>
|
||||||
|
Copyright (c) 2019-2021, XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
|
Copyright (c) 2025, SChernykh <https://github.com/SChernykh>
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the copyright holder nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "configuration.h"
|
||||||
|
|
||||||
|
// Compatibility macros
|
||||||
|
|
||||||
|
#if !defined(RANDOMX_CACHE_ACCESSES) && defined(RANDOMX_CACHE_MAX_ACCESSES)
|
||||||
|
#define RANDOMX_CACHE_ACCESSES RANDOMX_CACHE_MAX_ACCESSES
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(RANDOMX_ARGON_MEMORY)
|
||||||
|
#define RANDOMX_CACHE_MASK RANDOMX_ARGON_MEMORY * 1024 / 64 - 1
|
||||||
|
#elif defined(RANDOMX_CACHE_MAX_SIZE)
|
||||||
|
#define RANDOMX_CACHE_MASK RANDOMX_CACHE_MAX_SIZE / 64 - 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define DECL(x) x
|
||||||
|
|
||||||
|
.text
|
||||||
|
|
||||||
|
.option arch, rv64gcv_zicbop
|
||||||
|
.option pic
|
||||||
|
|
||||||
|
.global DECL(randomx_riscv64_vector_sshash_begin)
|
||||||
|
.global DECL(randomx_riscv64_vector_sshash_imul_rcp_literals)
|
||||||
|
.global DECL(randomx_riscv64_vector_sshash_dataset_init)
|
||||||
|
.global DECL(randomx_riscv64_vector_sshash_generated_instructions)
|
||||||
|
.global DECL(randomx_riscv64_vector_sshash_generated_instructions_end)
|
||||||
|
.global DECL(randomx_riscv64_vector_sshash_cache_prefetch)
|
||||||
|
.global DECL(randomx_riscv64_vector_sshash_xor)
|
||||||
|
.global DECL(randomx_riscv64_vector_sshash_set_cache_index)
|
||||||
|
.global DECL(randomx_riscv64_vector_sshash_end)
|
||||||
|
|
||||||
|
.balign 8
|
||||||
|
|
||||||
|
DECL(randomx_riscv64_vector_sshash_begin):
|
||||||
|
|
||||||
|
sshash_constant_0: .dword 6364136223846793005
|
||||||
|
sshash_constant_1: .dword 9298411001130361340
|
||||||
|
sshash_constant_2: .dword 12065312585734608966
|
||||||
|
sshash_constant_3: .dword 9306329213124626780
|
||||||
|
sshash_constant_4: .dword 5281919268842080866
|
||||||
|
sshash_constant_5: .dword 10536153434571861004
|
||||||
|
sshash_constant_6: .dword 3398623926847679864
|
||||||
|
sshash_constant_7: .dword 9549104520008361294
|
||||||
|
sshash_offsets: .dword 0,1,2,3
|
||||||
|
store_offsets: .dword 0,64,128,192
|
||||||
|
|
||||||
|
DECL(randomx_riscv64_vector_sshash_imul_rcp_literals): .fill 512,8,0
|
||||||
|
|
||||||
|
/*
|
||||||
|
Reference: https://github.com/tevador/RandomX/blob/master/doc/specs.md#73-dataset-block-generation
|
||||||
|
|
||||||
|
Register layout
|
||||||
|
---------------
|
||||||
|
x5 = temporary
|
||||||
|
|
||||||
|
x10 = randomx cache
|
||||||
|
x11 = output buffer
|
||||||
|
x12 = startBlock
|
||||||
|
x13 = endBlock
|
||||||
|
|
||||||
|
x14 = cache mask
|
||||||
|
x15 = imul_rcp literal pointer
|
||||||
|
|
||||||
|
v0-v7 = r0-r7
|
||||||
|
v8 = itemNumber
|
||||||
|
v9 = cacheIndex, then a pointer into cache->memory (for prefetch), then a byte offset into cache->memory
|
||||||
|
|
||||||
|
v10-v17 = sshash constants
|
||||||
|
|
||||||
|
v18 = temporary
|
||||||
|
|
||||||
|
v19 = dataset item store offsets
|
||||||
|
*/
|
||||||
|
|
||||||
|
DECL(randomx_riscv64_vector_sshash_dataset_init):
|
||||||
|
// Process 4 64-bit values at a time
|
||||||
|
li x5, 4
|
||||||
|
vsetvli x5, x5, e64, m1, ta, ma
|
||||||
|
|
||||||
|
// Load cache->memory pointer
|
||||||
|
ld x10, (x10)
|
||||||
|
|
||||||
|
// Init cache mask
|
||||||
|
li x14, RANDOMX_CACHE_MASK
|
||||||
|
|
||||||
|
// Init dataset item store offsets
|
||||||
|
lla x5, store_offsets
|
||||||
|
vle64.v v19, (x5)
|
||||||
|
|
||||||
|
// Init itemNumber vector to (startBlock, startBlock + 1, startBlock + 2, startBlock + 3)
|
||||||
|
lla x5, sshash_offsets
|
||||||
|
vle64.v v8, (x5)
|
||||||
|
vadd.vx v8, v8, x12
|
||||||
|
|
||||||
|
// Load constants (stride = x0 = 0, so a 64-bit value will be broadcast into each element of a vector)
|
||||||
|
lla x5, sshash_constant_0
|
||||||
|
vlse64.v v10, (x5), x0
|
||||||
|
|
||||||
|
lla x5, sshash_constant_1
|
||||||
|
vlse64.v v11, (x5), x0
|
||||||
|
|
||||||
|
lla x5, sshash_constant_2
|
||||||
|
vlse64.v v12, (x5), x0
|
||||||
|
|
||||||
|
lla x5, sshash_constant_3
|
||||||
|
vlse64.v v13, (x5), x0
|
||||||
|
|
||||||
|
lla x5, sshash_constant_4
|
||||||
|
vlse64.v v14, (x5), x0
|
||||||
|
|
||||||
|
lla x5, sshash_constant_5
|
||||||
|
vlse64.v v15, (x5), x0
|
||||||
|
|
||||||
|
lla x5, sshash_constant_6
|
||||||
|
vlse64.v v16, (x5), x0
|
||||||
|
|
||||||
|
lla x5, sshash_constant_7
|
||||||
|
vlse64.v v17, (x5), x0
|
||||||
|
|
||||||
|
// Calculate the end pointer for dataset init
|
||||||
|
sub x13, x13, x12
|
||||||
|
slli x13, x13, 6
|
||||||
|
add x13, x13, x11
|
||||||
|
|
||||||
|
init_item:
|
||||||
|
// Step 1. Init r0-r7
|
||||||
|
|
||||||
|
// r0 = (itemNumber + 1) * 6364136223846793005
|
||||||
|
vmv.v.v v0, v8
|
||||||
|
vmadd.vv v0, v10, v10
|
||||||
|
|
||||||
|
// r_i = r0 ^ c_i for i = 1..7
|
||||||
|
vxor.vv v1, v0, v11
|
||||||
|
vxor.vv v2, v0, v12
|
||||||
|
vxor.vv v3, v0, v13
|
||||||
|
vxor.vv v4, v0, v14
|
||||||
|
vxor.vv v5, v0, v15
|
||||||
|
vxor.vv v6, v0, v16
|
||||||
|
vxor.vv v7, v0, v17
|
||||||
|
|
||||||
|
// Step 2. Let cacheIndex = itemNumber
|
||||||
|
vmv.v.v v9, v8
|
||||||
|
|
||||||
|
// Step 3 is implicit (all iterations are inlined, there is no "i")
|
||||||
|
|
||||||
|
// Init imul_rcp literal pointer
|
||||||
|
lla x15, randomx_riscv64_vector_sshash_imul_rcp_literals
|
||||||
|
|
||||||
|
DECL(randomx_riscv64_vector_sshash_generated_instructions):
|
||||||
|
// Generated by JIT compiler
|
||||||
|
//
|
||||||
|
// Step 4. randomx_riscv64_vector_sshash_cache_prefetch
|
||||||
|
// Step 5. SuperscalarHash[i]
|
||||||
|
// Step 6. randomx_riscv64_vector_sshash_xor
|
||||||
|
// Step 7. randomx_riscv64_vector_sshash_set_cache_index
|
||||||
|
//
|
||||||
|
// Above steps will be repeated RANDOMX_CACHE_ACCESSES times
|
||||||
|
.fill RANDOMX_CACHE_ACCESSES * 2048, 4, 0
|
||||||
|
|
||||||
|
DECL(randomx_riscv64_vector_sshash_generated_instructions_end):
|
||||||
|
// Step 9. Concatenate registers r0-r7 in little endian format to get the final Dataset item data.
|
||||||
|
vsuxei64.v v0, (x11), v19
|
||||||
|
|
||||||
|
add x5, x11, 8
|
||||||
|
vsuxei64.v v1, (x5), v19
|
||||||
|
|
||||||
|
add x5, x11, 16
|
||||||
|
vsuxei64.v v2, (x5), v19
|
||||||
|
|
||||||
|
add x5, x11, 24
|
||||||
|
vsuxei64.v v3, (x5), v19
|
||||||
|
|
||||||
|
add x5, x11, 32
|
||||||
|
vsuxei64.v v4, (x5), v19
|
||||||
|
|
||||||
|
add x5, x11, 40
|
||||||
|
vsuxei64.v v5, (x5), v19
|
||||||
|
|
||||||
|
add x5, x11, 48
|
||||||
|
vsuxei64.v v6, (x5), v19
|
||||||
|
|
||||||
|
add x5, x11, 56
|
||||||
|
vsuxei64.v v7, (x5), v19
|
||||||
|
|
||||||
|
// Iterate to the next 4 items
|
||||||
|
vadd.vi v8, v8, 4
|
||||||
|
add x11, x11, 256
|
||||||
|
bltu x11, x13, init_item
|
||||||
|
|
||||||
|
ret
|
||||||
|
|
||||||
|
// Step 4. Load a 64-byte item from the Cache. The item index is given by cacheIndex modulo the total number of 64-byte items in Cache.
|
||||||
|
DECL(randomx_riscv64_vector_sshash_cache_prefetch):
|
||||||
|
// v9 = convert from cacheIndex to a direct pointer into cache->memory
|
||||||
|
vand.vx v9, v9, x14
|
||||||
|
vsll.vi v9, v9, 6
|
||||||
|
vadd.vx v9, v9, x10
|
||||||
|
|
||||||
|
// Prefetch element 0
|
||||||
|
vmv.x.s x5, v9
|
||||||
|
prefetch.r (x5)
|
||||||
|
|
||||||
|
// Prefetch element 1
|
||||||
|
vslidedown.vi v18, v9, 1
|
||||||
|
vmv.x.s x5, v18
|
||||||
|
prefetch.r (x5)
|
||||||
|
|
||||||
|
// Prefetch element 2
|
||||||
|
vslidedown.vi v18, v9, 2
|
||||||
|
vmv.x.s x5, v18
|
||||||
|
prefetch.r (x5)
|
||||||
|
|
||||||
|
// Prefetch element 3
|
||||||
|
vslidedown.vi v18, v9, 3
|
||||||
|
vmv.x.s x5, v18
|
||||||
|
prefetch.r (x5)
|
||||||
|
|
||||||
|
// v9 = byte offset into cache->memory
|
||||||
|
vsub.vx v9, v9, x10
|
||||||
|
|
||||||
|
// Step 6. XOR all registers with data loaded from randomx cache
|
||||||
|
DECL(randomx_riscv64_vector_sshash_xor):
|
||||||
|
vluxei64.v v18, (x10), v9
|
||||||
|
vxor.vv v0, v0, v18
|
||||||
|
|
||||||
|
add x5, x10, 8
|
||||||
|
vluxei64.v v18, (x5), v9
|
||||||
|
vxor.vv v1, v1, v18
|
||||||
|
|
||||||
|
add x5, x10, 16
|
||||||
|
vluxei64.v v18, (x5), v9
|
||||||
|
vxor.vv v2, v2, v18
|
||||||
|
|
||||||
|
add x5, x10, 24
|
||||||
|
vluxei64.v v18, (x5), v9
|
||||||
|
vxor.vv v3, v3, v18
|
||||||
|
|
||||||
|
add x5, x10, 32
|
||||||
|
vluxei64.v v18, (x5), v9
|
||||||
|
vxor.vv v4, v4, v18
|
||||||
|
|
||||||
|
add x5, x10, 40
|
||||||
|
vluxei64.v v18, (x5), v9
|
||||||
|
vxor.vv v5, v5, v18
|
||||||
|
|
||||||
|
add x5, x10, 48
|
||||||
|
vluxei64.v v18, (x5), v9
|
||||||
|
vxor.vv v6, v6, v18
|
||||||
|
|
||||||
|
add x5, x10, 56
|
||||||
|
vluxei64.v v18, (x5), v9
|
||||||
|
vxor.vv v7, v7, v18
|
||||||
|
|
||||||
|
// Step 7. Set cacheIndex to the value of the register that has the longest dependency chain in the SuperscalarHash function executed in step 5.
|
||||||
|
DECL(randomx_riscv64_vector_sshash_set_cache_index):
|
||||||
|
// JIT compiler will pick a single instruction reading from the required register
|
||||||
|
vmv.v.v v9, v0
|
||||||
|
vmv.v.v v9, v1
|
||||||
|
vmv.v.v v9, v2
|
||||||
|
vmv.v.v v9, v3
|
||||||
|
vmv.v.v v9, v4
|
||||||
|
vmv.v.v v9, v5
|
||||||
|
vmv.v.v v9, v6
|
||||||
|
vmv.v.v v9, v7
|
||||||
|
|
||||||
|
DECL(randomx_riscv64_vector_sshash_end):
|
||||||
58
src/crypto/randomx/jit_compiler_rv64_vector_static.h
Normal file
58
src/crypto/randomx/jit_compiler_rv64_vector_static.h
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2018-2020, tevador <tevador@gmail.com>
|
||||||
|
Copyright (c) 2019-2021, XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
|
Copyright (c) 2025, SChernykh <https://github.com/SChernykh>
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the copyright holder nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#if defined(__cplusplus)
|
||||||
|
#include <cstdint>
|
||||||
|
#else
|
||||||
|
#include <stdint.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__cplusplus)
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
struct randomx_cache;
|
||||||
|
|
||||||
|
void randomx_riscv64_vector_sshash_begin();
|
||||||
|
void randomx_riscv64_vector_sshash_imul_rcp_literals();
|
||||||
|
void randomx_riscv64_vector_sshash_dataset_init(struct randomx_cache* cache, uint8_t* output_buf, uint32_t startBlock, uint32_t endBlock);
|
||||||
|
void randomx_riscv64_vector_sshash_cache_prefetch();
|
||||||
|
void randomx_riscv64_vector_sshash_generated_instructions();
|
||||||
|
void randomx_riscv64_vector_sshash_generated_instructions_end();
|
||||||
|
void randomx_riscv64_vector_sshash_cache_prefetch();
|
||||||
|
void randomx_riscv64_vector_sshash_xor();
|
||||||
|
void randomx_riscv64_vector_sshash_set_cache_index();
|
||||||
|
void randomx_riscv64_vector_sshash_end();
|
||||||
|
|
||||||
|
#if defined(__cplusplus)
|
||||||
|
}
|
||||||
|
#endif
|
||||||
@@ -39,6 +39,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
#include "crypto/randomx/jit_compiler_x86_static.hpp"
|
#include "crypto/randomx/jit_compiler_x86_static.hpp"
|
||||||
#elif (XMRIG_ARM == 8)
|
#elif (XMRIG_ARM == 8)
|
||||||
#include "crypto/randomx/jit_compiler_a64_static.hpp"
|
#include "crypto/randomx/jit_compiler_a64_static.hpp"
|
||||||
|
#elif defined(__riscv) && defined(__riscv_xlen) && (__riscv_xlen == 64)
|
||||||
|
#include "crypto/randomx/jit_compiler_rv64_static.hpp"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "backend/cpu/Cpu.h"
|
#include "backend/cpu/Cpu.h"
|
||||||
@@ -190,7 +192,7 @@ RandomX_ConfigurationBase::RandomX_ConfigurationBase()
|
|||||||
# endif
|
# endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#if (XMRIG_ARM == 8)
|
#if (XMRIG_ARM == 8) || defined(XMRIG_RISCV)
|
||||||
static uint32_t Log2(size_t value) { return (value > 1) ? (Log2(value / 2) + 1) : 0; }
|
static uint32_t Log2(size_t value) { return (value > 1) ? (Log2(value / 2) + 1) : 0; }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -274,6 +276,14 @@ typedef void(randomx::JitCompilerX86::* InstructionGeneratorX86_2)(const randomx
|
|||||||
|
|
||||||
#define JIT_HANDLE(x, prev) randomx::JitCompilerA64::engine[k] = &randomx::JitCompilerA64::h_##x
|
#define JIT_HANDLE(x, prev) randomx::JitCompilerA64::engine[k] = &randomx::JitCompilerA64::h_##x
|
||||||
|
|
||||||
|
#elif defined(XMRIG_RISCV)
|
||||||
|
|
||||||
|
Log2_ScratchpadL1 = Log2(ScratchpadL1_Size);
|
||||||
|
Log2_ScratchpadL2 = Log2(ScratchpadL2_Size);
|
||||||
|
Log2_ScratchpadL3 = Log2(ScratchpadL3_Size);
|
||||||
|
|
||||||
|
#define JIT_HANDLE(x, prev) randomx::JitCompilerRV64::engine[k] = &randomx::JitCompilerRV64::v1_##x
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#define JIT_HANDLE(x, prev)
|
#define JIT_HANDLE(x, prev)
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -133,7 +133,7 @@ struct RandomX_ConfigurationBase
|
|||||||
uint32_t ScratchpadL3Mask_Calculated;
|
uint32_t ScratchpadL3Mask_Calculated;
|
||||||
uint32_t ScratchpadL3Mask64_Calculated;
|
uint32_t ScratchpadL3Mask64_Calculated;
|
||||||
|
|
||||||
# if (XMRIG_ARM == 8)
|
# if (XMRIG_ARM == 8) || defined(XMRIG_RISCV)
|
||||||
uint32_t Log2_ScratchpadL1;
|
uint32_t Log2_ScratchpadL1;
|
||||||
uint32_t Log2_ScratchpadL2;
|
uint32_t Log2_ScratchpadL2;
|
||||||
uint32_t Log2_ScratchpadL3;
|
uint32_t Log2_ScratchpadL3;
|
||||||
|
|||||||
@@ -73,8 +73,20 @@ uint64_t randomx_reciprocal(uint64_t divisor) {
|
|||||||
|
|
||||||
#if !RANDOMX_HAVE_FAST_RECIPROCAL
|
#if !RANDOMX_HAVE_FAST_RECIPROCAL
|
||||||
|
|
||||||
|
#ifdef __GNUC__
|
||||||
|
uint64_t randomx_reciprocal_fast(uint64_t divisor)
|
||||||
|
{
|
||||||
|
const uint64_t q = (1ULL << 63) / divisor;
|
||||||
|
const uint64_t r = (1ULL << 63) % divisor;
|
||||||
|
|
||||||
|
const uint64_t shift = 64 - __builtin_clzll(divisor);
|
||||||
|
|
||||||
|
return (q << shift) + ((r << shift) / divisor);
|
||||||
|
}
|
||||||
|
#else
|
||||||
uint64_t randomx_reciprocal_fast(uint64_t divisor) {
|
uint64_t randomx_reciprocal_fast(uint64_t divisor) {
|
||||||
return randomx_reciprocal(divisor);
|
return randomx_reciprocal(divisor);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -39,6 +39,9 @@ alignas(64) uint32_t lutDec1[256];
|
|||||||
alignas(64) uint32_t lutDec2[256];
|
alignas(64) uint32_t lutDec2[256];
|
||||||
alignas(64) uint32_t lutDec3[256];
|
alignas(64) uint32_t lutDec3[256];
|
||||||
|
|
||||||
|
alignas(64) uint8_t lutEncIndex[4][32];
|
||||||
|
alignas(64) uint8_t lutDecIndex[4][32];
|
||||||
|
|
||||||
static uint32_t mul_gf2(uint32_t b, uint32_t c)
|
static uint32_t mul_gf2(uint32_t b, uint32_t c)
|
||||||
{
|
{
|
||||||
uint32_t s = 0;
|
uint32_t s = 0;
|
||||||
@@ -115,5 +118,49 @@ static struct SAESInitializer
|
|||||||
lutDec2[i] = w; w = (w << 8) | (w >> 24);
|
lutDec2[i] = w; w = (w << 8) | (w >> 24);
|
||||||
lutDec3[i] = w;
|
lutDec3[i] = w;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
memset(lutEncIndex, -1, sizeof(lutEncIndex));
|
||||||
|
memset(lutDecIndex, -1, sizeof(lutDecIndex));
|
||||||
|
|
||||||
|
lutEncIndex[0][ 0] = 0;
|
||||||
|
lutEncIndex[0][ 4] = 4;
|
||||||
|
lutEncIndex[0][ 8] = 8;
|
||||||
|
lutEncIndex[0][12] = 12;
|
||||||
|
lutEncIndex[1][ 0] = 5;
|
||||||
|
lutEncIndex[1][ 4] = 9;
|
||||||
|
lutEncIndex[1][ 8] = 13;
|
||||||
|
lutEncIndex[1][12] = 1;
|
||||||
|
lutEncIndex[2][ 0] = 10;
|
||||||
|
lutEncIndex[2][ 4] = 14;
|
||||||
|
lutEncIndex[2][ 8] = 2;
|
||||||
|
lutEncIndex[2][12] = 6;
|
||||||
|
lutEncIndex[3][ 0] = 15;
|
||||||
|
lutEncIndex[3][ 4] = 3;
|
||||||
|
lutEncIndex[3][ 8] = 7;
|
||||||
|
lutEncIndex[3][12] = 11;
|
||||||
|
|
||||||
|
lutDecIndex[0][ 0] = 0;
|
||||||
|
lutDecIndex[0][ 4] = 4;
|
||||||
|
lutDecIndex[0][ 8] = 8;
|
||||||
|
lutDecIndex[0][12] = 12;
|
||||||
|
lutDecIndex[1][ 0] = 13;
|
||||||
|
lutDecIndex[1][ 4] = 1;
|
||||||
|
lutDecIndex[1][ 8] = 5;
|
||||||
|
lutDecIndex[1][12] = 9;
|
||||||
|
lutDecIndex[2][ 0] = 10;
|
||||||
|
lutDecIndex[2][ 4] = 14;
|
||||||
|
lutDecIndex[2][ 8] = 2;
|
||||||
|
lutDecIndex[2][12] = 6;
|
||||||
|
lutDecIndex[3][ 0] = 7;
|
||||||
|
lutDecIndex[3][ 4] = 11;
|
||||||
|
lutDecIndex[3][ 8] = 15;
|
||||||
|
lutDecIndex[3][12] = 3;
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < 4; ++i) {
|
||||||
|
for (uint32_t j = 0; j < 16; j += 4) {
|
||||||
|
lutEncIndex[i][j + 16] = lutEncIndex[i][j] + 16;
|
||||||
|
lutDecIndex[i][j + 16] = lutDecIndex[i][j] + 16;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} aes_initializer;
|
} aes_initializer;
|
||||||
|
|||||||
@@ -41,6 +41,9 @@ extern uint32_t lutDec1[256];
|
|||||||
extern uint32_t lutDec2[256];
|
extern uint32_t lutDec2[256];
|
||||||
extern uint32_t lutDec3[256];
|
extern uint32_t lutDec3[256];
|
||||||
|
|
||||||
|
extern uint8_t lutEncIndex[4][32];
|
||||||
|
extern uint8_t lutDecIndex[4][32];
|
||||||
|
|
||||||
template<int soft> rx_vec_i128 aesenc(rx_vec_i128 in, rx_vec_i128 key);
|
template<int soft> rx_vec_i128 aesenc(rx_vec_i128 in, rx_vec_i128 key);
|
||||||
template<int soft> rx_vec_i128 aesdec(rx_vec_i128 in, rx_vec_i128 key);
|
template<int soft> rx_vec_i128 aesdec(rx_vec_i128 in, rx_vec_i128 key);
|
||||||
|
|
||||||
@@ -147,3 +150,32 @@ template<>
|
|||||||
FORCE_INLINE rx_vec_i128 aesdec<0>(rx_vec_i128 in, rx_vec_i128 key) {
|
FORCE_INLINE rx_vec_i128 aesdec<0>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||||
return rx_aesdec_vec_i128(in, key);
|
return rx_aesdec_vec_i128(in, key);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(XMRIG_RISCV) && defined(XMRIG_RVV_ENABLED)
|
||||||
|
#include <riscv_vector.h>
|
||||||
|
|
||||||
|
FORCE_INLINE vuint32m1_t softaes_vector_double(
|
||||||
|
vuint32m1_t in,
|
||||||
|
vuint32m1_t key,
|
||||||
|
vuint8m1_t i0, vuint8m1_t i1, vuint8m1_t i2, vuint8m1_t i3,
|
||||||
|
const uint32_t* lut0, const uint32_t* lut1, const uint32_t *lut2, const uint32_t* lut3)
|
||||||
|
{
|
||||||
|
const vuint8m1_t in8 = __riscv_vreinterpret_v_u32m1_u8m1(in);
|
||||||
|
|
||||||
|
const vuint32m1_t index0 = __riscv_vreinterpret_v_u8m1_u32m1(__riscv_vrgather_vv_u8m1(in8, i0, 32));
|
||||||
|
const vuint32m1_t index1 = __riscv_vreinterpret_v_u8m1_u32m1(__riscv_vrgather_vv_u8m1(in8, i1, 32));
|
||||||
|
const vuint32m1_t index2 = __riscv_vreinterpret_v_u8m1_u32m1(__riscv_vrgather_vv_u8m1(in8, i2, 32));
|
||||||
|
const vuint32m1_t index3 = __riscv_vreinterpret_v_u8m1_u32m1(__riscv_vrgather_vv_u8m1(in8, i3, 32));
|
||||||
|
|
||||||
|
vuint32m1_t s0 = __riscv_vluxei32_v_u32m1(lut0, __riscv_vsll_vx_u32m1(index0, 2, 8), 8);
|
||||||
|
vuint32m1_t s1 = __riscv_vluxei32_v_u32m1(lut1, __riscv_vsll_vx_u32m1(index1, 2, 8), 8);
|
||||||
|
vuint32m1_t s2 = __riscv_vluxei32_v_u32m1(lut2, __riscv_vsll_vx_u32m1(index2, 2, 8), 8);
|
||||||
|
vuint32m1_t s3 = __riscv_vluxei32_v_u32m1(lut3, __riscv_vsll_vx_u32m1(index3, 2, 8), 8);
|
||||||
|
|
||||||
|
s0 = __riscv_vxor_vv_u32m1(s0, s1, 8);
|
||||||
|
s2 = __riscv_vxor_vv_u32m1(s2, s3, 8);
|
||||||
|
s0 = __riscv_vxor_vv_u32m1(s0, s2, 8);
|
||||||
|
|
||||||
|
return __riscv_vxor_vv_u32m1(s0, key, 8);
|
||||||
|
}
|
||||||
|
#endif // defined(XMRIG_RISCV) && defined(XMRIG_RVV_ENABLED)
|
||||||
|
|||||||
14
src/crypto/randomx/tests/riscv64_vector.s
Normal file
14
src/crypto/randomx/tests/riscv64_vector.s
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
/* RISC-V - test if the vector extension and prefetch instruction are present */
|
||||||
|
|
||||||
|
.text
|
||||||
|
.option arch, rv64gcv_zicbop
|
||||||
|
.global main
|
||||||
|
|
||||||
|
main:
|
||||||
|
lla x5, main
|
||||||
|
prefetch.r (x5)
|
||||||
|
li x5, 4
|
||||||
|
vsetvli x6, x5, e64, m1, ta, ma
|
||||||
|
vxor.vv v0, v0, v0
|
||||||
|
sub x10, x5, x6
|
||||||
|
ret
|
||||||
9
src/crypto/randomx/tests/riscv64_zba.s
Normal file
9
src/crypto/randomx/tests/riscv64_zba.s
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
/* RISC-V - test if the Zba extension is present */
|
||||||
|
|
||||||
|
.text
|
||||||
|
.global main
|
||||||
|
|
||||||
|
main:
|
||||||
|
sh1add x6, x6, x7
|
||||||
|
li x10, 0
|
||||||
|
ret
|
||||||
9
src/crypto/randomx/tests/riscv64_zbb.s
Normal file
9
src/crypto/randomx/tests/riscv64_zbb.s
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
/* RISC-V - test if the Zbb extension is present */
|
||||||
|
|
||||||
|
.text
|
||||||
|
.global main
|
||||||
|
|
||||||
|
main:
|
||||||
|
ror x6, x6, x7
|
||||||
|
li x10, 0
|
||||||
|
ret
|
||||||
@@ -43,6 +43,12 @@ static void init_dataset_wrapper(randomx_dataset *dataset, randomx_cache *cache,
|
|||||||
randomx_init_dataset(dataset, cache, startItem, itemCount - (itemCount % 5));
|
randomx_init_dataset(dataset, cache, startItem, itemCount - (itemCount % 5));
|
||||||
randomx_init_dataset(dataset, cache, startItem + itemCount - 5, 5);
|
randomx_init_dataset(dataset, cache, startItem + itemCount - 5, 5);
|
||||||
}
|
}
|
||||||
|
#ifdef XMRIG_RISCV
|
||||||
|
else if (itemCount % 4) {
|
||||||
|
randomx_init_dataset(dataset, cache, startItem, itemCount - (itemCount % 4));
|
||||||
|
randomx_init_dataset(dataset, cache, startItem + itemCount - 4, 4);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
else {
|
else {
|
||||||
randomx_init_dataset(dataset, cache, startItem, itemCount);
|
randomx_init_dataset(dataset, cache, startItem, itemCount);
|
||||||
}
|
}
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user