Merge pull request #3740 from SChernykh/dev

RISC-V: added vectorized soft AES
2025-12-07 07:55:04 -05:00 · 2025-12-06 19:39:47 +07:00 · 2025-12-05 21:09:22 +01:00 · 2025-12-01 10:46:03 +07:00 · 2025-11-30 19:15:15 +01:00 · 2025-11-13 15:52:43 +07:00
102 changed files with 7629 additions and 841 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,4 @@ scripts/deps
 /CMakeLists.txt.user
 /.idea
 /src/backend/opencl/cl/cn/cryptonight_gen.cl
 .vscode
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,13 @@
 # v6.24.0
 - [#3671](https://github.com/xmrig/xmrig/pull/3671) Fixed detection of L2 cache size for some complex NUMA topologies.
 - [#3674](https://github.com/xmrig/xmrig/pull/3674) Fixed ARMv7 build.
 - [#3677](https://github.com/xmrig/xmrig/pull/3677) Fixed auto-config for AMD CPUs with less than 2 MB L3 cache per thread.
 - [#3678](https://github.com/xmrig/xmrig/pull/3678) Improved IPv6 support: the new default settings use IPv6 equally with IPv4.
 # v6.23.0
 - [#3668](https://github.com/xmrig/xmrig/issues/3668) Added support for Windows ARM64.
 - [#3665](https://github.com/xmrig/xmrig/pull/3665) Tweaked auto-config for AMD CPUs with < 2 MB L3 cache per thread.
 # v6.22.3
 - [#3605](https://github.com/xmrig/xmrig/pull/3605) CUDA backend: added missing RandomX dataset update.
 - [#3646](https://github.com/xmrig/xmrig/pull/3646) Optimized auto-config for AMD CPUs with less than 2 MB L3 cache per thread.
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -95,7 +95,7 @@ set(HEADERS_CRYPTO
    src/crypto/common/VirtualMemory.h
   )
-if (XMRIG_ARM)
+if (XMRIG_ARM OR XMRIG_RISCV)
    set(HEADERS_CRYPTO "${HEADERS_CRYPTO}" src/crypto/cn/CryptoNight_arm.h)
 else()
    set(HEADERS_CRYPTO "${HEADERS_CRYPTO}" src/crypto/cn/CryptoNight_x86.h)
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@
 XMRig is a high performance, open source, cross platform RandomX, KawPow, CryptoNight and [GhostRider](https://github.com/xmrig/xmrig/tree/master/src/crypto/ghostrider#readme) unified CPU/GPU miner and [RandomX benchmark](https://xmrig.com/benchmark). Official binaries are available for Windows, Linux, macOS and FreeBSD.
 ## Mining backends
- **CPU** (x86/x64/ARMv7/ARMv8)
+- **CPU** (x86/x64/ARMv7/ARMv8/RISC-V)
 - **OpenCL** for AMD GPUs.
 - **CUDA** for NVIDIA GPUs via external [CUDA plugin](https://github.com/xmrig/xmrig-cuda).
--- a/cmake/asm.cmake
+++ b/cmake/asm.cmake
@@ -1,4 +1,4 @@
-if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
+if (WITH_ASM AND NOT XMRIG_ARM AND NOT XMRIG_RISCV AND CMAKE_SIZEOF_VOID_P EQUAL 8)
    set(XMRIG_ASM_LIBRARY "xmrig-asm")
    if (CMAKE_C_COMPILER_ID MATCHES MSVC)
--- a/cmake/cpu.cmake
+++ b/cmake/cpu.cmake
@@ -21,6 +21,19 @@ if (NOT VAES_SUPPORTED)
    set(WITH_VAES OFF)
 endif()
 # Detect RISC-V architecture early (before it's used below)
 if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv64|riscv|rv64)$")
    set(RISCV_TARGET 64)
    set(XMRIG_RISCV ON)
    add_definitions(-DXMRIG_RISCV)
    message(STATUS "Detected RISC-V 64-bit architecture (${CMAKE_SYSTEM_PROCESSOR})")
 elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv32|rv32)$")
    set(RISCV_TARGET 32)
    set(XMRIG_RISCV ON)
    add_definitions(-DXMRIG_RISCV)
    message(STATUS "Detected RISC-V 32-bit architecture (${CMAKE_SYSTEM_PROCESSOR})")
 endif()
 if (XMRIG_64_BIT AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|AMD64)$")
    add_definitions(-DRAPIDJSON_SSE2)
 else()
@@ -29,6 +42,57 @@ else()
    set(WITH_VAES OFF)
 endif()
 # Disable x86-specific features for RISC-V
 if (XMRIG_RISCV)
    set(WITH_SSE4_1 OFF)
    set(WITH_AVX2 OFF)
    set(WITH_VAES OFF)
    # default build uses the RV64GC baseline
    set(RVARCH "rv64gc")
    # for native builds, enable Zba and Zbb if supported by the CPU
    if(ARCH STREQUAL "native")
        enable_language(ASM)
        try_run(RANDOMX_VECTOR_RUN_FAIL
            RANDOMX_VECTOR_COMPILE_OK
            ${CMAKE_CURRENT_BINARY_DIR}/
            ${CMAKE_CURRENT_SOURCE_DIR}/src/crypto/randomx/tests/riscv64_vector.s
            COMPILE_DEFINITIONS "-march=rv64gcv_zicbop")
        if (RANDOMX_VECTOR_COMPILE_OK AND NOT RANDOMX_VECTOR_RUN_FAIL)
            set(RVARCH "${RVARCH}v_zicbop")
            add_definitions(-DXMRIG_RVV_ENABLED)
            message(STATUS "RISC-V vector extension detected")
        endif()
        try_run(RANDOMX_ZBA_RUN_FAIL
            RANDOMX_ZBA_COMPILE_OK
            ${CMAKE_CURRENT_BINARY_DIR}/
            ${CMAKE_CURRENT_SOURCE_DIR}/src/crypto/randomx/tests/riscv64_zba.s
            COMPILE_DEFINITIONS "-march=rv64gc_zba")
        if (RANDOMX_ZBA_COMPILE_OK AND NOT RANDOMX_ZBA_RUN_FAIL)
            set(RVARCH "${RVARCH}_zba")
            message(STATUS "RISC-V zba extension detected")
        endif()
        try_run(RANDOMX_ZBB_RUN_FAIL
            RANDOMX_ZBB_COMPILE_OK
            ${CMAKE_CURRENT_BINARY_DIR}/
            ${CMAKE_CURRENT_SOURCE_DIR}/src/crypto/randomx/tests/riscv64_zbb.s
            COMPILE_DEFINITIONS "-march=rv64gc_zbb")
        if (RANDOMX_ZBB_COMPILE_OK AND NOT RANDOMX_ZBB_RUN_FAIL)
            set(RVARCH "${RVARCH}_zbb")
            message(STATUS "RISC-V zbb extension detected")
        endif()
    endif()
    message(STATUS "Using -march=${RVARCH}")
 endif()
 add_definitions(-DRAPIDJSON_WRITE_DEFAULT_FLAGS=6) # rapidjson::kWriteNanAndInfFlag | rapidjson::kWriteNanAndInfNullFlag
 if (ARM_V8)
@@ -40,7 +104,7 @@ endif()
 if (NOT ARM_TARGET)
    if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64|ARM64|armv8-a)$")
        set(ARM_TARGET 8)
-    elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv7|armv7f|armv7s|armv7k|armv7-a|armv7l|armv7ve)$")
+    elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv7|armv7f|armv7s|armv7k|armv7-a|armv7l|armv7ve|armv8l)$")
        set(ARM_TARGET 7)
    endif()
 endif()
--- a/cmake/flags.cmake
+++ b/cmake/flags.cmake
@@ -26,8 +26,13 @@ if (CMAKE_CXX_COMPILER_ID MATCHES GNU)
        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARM8_CXX_FLAGS}")
        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARM8_CXX_FLAGS} -flax-vector-conversions")
    elseif (ARM_TARGET EQUAL 7)
-        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv7-a -mfpu=neon")
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv7-a -mfpu=neon -flax-vector-conversions")
        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv7-a -mfpu=neon -flax-vector-conversions")
    elseif (XMRIG_RISCV)
        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=${RVARCH}")
        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=${RVARCH}")
        add_definitions(-DHAVE_ROTR)
    else()
        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes")
        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes")
@@ -41,6 +46,8 @@ if (CMAKE_CXX_COMPILER_ID MATCHES GNU)
        else()
            set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static -Wl,--large-address-aware")
        endif()
    elseif(CMAKE_SYSTEM_NAME STREQUAL "Haiku")
        set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc")
    else()
        set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc -static-libstdc++")
    endif()
@@ -74,6 +81,11 @@ elseif (CMAKE_CXX_COMPILER_ID MATCHES Clang)
    elseif (ARM_TARGET EQUAL 7)
        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon -march=${CMAKE_SYSTEM_PROCESSOR}")
        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -march=${CMAKE_SYSTEM_PROCESSOR}")
    elseif (XMRIG_RISCV)
        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=${RVARCH}")
        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=${RVARCH}")
        add_definitions(-DHAVE_ROTR)
    else()
        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes")
        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes")
--- a/cmake/os.cmake
+++ b/cmake/os.cmake
@@ -17,6 +17,10 @@ else()
        set(XMRIG_OS_LINUX ON)
    elseif(CMAKE_SYSTEM_NAME STREQUAL FreeBSD OR CMAKE_SYSTEM_NAME STREQUAL DragonFly)
        set(XMRIG_OS_FREEBSD ON)
    elseif(CMAKE_SYSTEM_NAME STREQUAL OpenBSD)
        set(XMRIG_OS_OPENBSD ON)
    elseif(CMAKE_SYSTEM_NAME STREQUAL "Haiku")
        set(XMRIG_OS_HAIKU ON)
    endif()
 endif()
@@ -43,6 +47,10 @@ elseif(XMRIG_OS_UNIX)
        add_definitions(-DXMRIG_OS_LINUX)
    elseif (XMRIG_OS_FREEBSD)
        add_definitions(-DXMRIG_OS_FREEBSD)
    elseif (XMRIG_OS_OPENBSD)
        add_definitions(-DXMRIG_OS_OPENBSD)
    elseif (XMRIG_OS_HAIKU)
        add_definitions(-DXMRIG_OS_HAIKU)
    endif()
 endif()
--- a/cmake/randomx.cmake
+++ b/cmake/randomx.cmake
@@ -1,4 +1,18 @@
 if (WITH_RANDOMX)
    include(CheckSymbolExists)
    if (WIN32)
        check_symbol_exists(_aligned_malloc "stdlib.h" HAVE_ALIGNED_MALLOC)
        if (HAVE_ALIGNED_MALLOC)
            add_compile_definitions(HAVE_ALIGNED_MALLOC)
        endif()
    else()
        check_symbol_exists(posix_memalign "stdlib.h" HAVE_POSIX_MEMALIGN)
        if (HAVE_POSIX_MEMALIGN)
            add_compile_definitions(HAVE_POSIX_MEMALIGN)
        endif()
    endif()
    add_definitions(/DXMRIG_ALGO_RANDOMX)
    set(WITH_ARGON2 ON)
@@ -48,7 +62,7 @@ if (WITH_RANDOMX)
             src/crypto/randomx/jit_compiler_x86_static.asm
             src/crypto/randomx/jit_compiler_x86.cpp
            )
-    elseif (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
+    elseif (WITH_ASM AND NOT XMRIG_ARM AND NOT XMRIG_RISCV AND CMAKE_SIZEOF_VOID_P EQUAL 8)
        list(APPEND SOURCES_CRYPTO
             src/crypto/randomx/jit_compiler_x86_static.S
             src/crypto/randomx/jit_compiler_x86.cpp
@@ -66,6 +80,16 @@ if (WITH_RANDOMX)
        else()
            set_property(SOURCE src/crypto/randomx/jit_compiler_a64_static.S PROPERTY LANGUAGE C)
        endif()
    elseif (XMRIG_RISCV AND CMAKE_SIZEOF_VOID_P EQUAL 8)
        list(APPEND SOURCES_CRYPTO
             src/crypto/randomx/jit_compiler_rv64_static.S
             src/crypto/randomx/jit_compiler_rv64_vector_static.S
             src/crypto/randomx/jit_compiler_rv64.cpp
             src/crypto/randomx/jit_compiler_rv64_vector.cpp
            )
        # cheat because cmake and ccache hate each other
        set_property(SOURCE src/crypto/randomx/jit_compiler_rv64_static.S PROPERTY LANGUAGE C)
        set_property(SOURCE src/crypto/randomx/jit_compiler_rv64_vector_static.S PROPERTY LANGUAGE C)
    else()
        list(APPEND SOURCES_CRYPTO
             src/crypto/randomx/jit_compiler_fallback.cpp
@@ -102,7 +126,7 @@ if (WITH_RANDOMX)
            )
    endif()
-    if (WITH_MSR AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8 AND (XMRIG_OS_WIN OR XMRIG_OS_LINUX))
+    if (WITH_MSR AND NOT XMRIG_ARM AND NOT XMRIG_RISCV AND CMAKE_SIZEOF_VOID_P EQUAL 8 AND (XMRIG_OS_WIN OR XMRIG_OS_LINUX))
        add_definitions(/DXMRIG_FEATURE_MSR)
        add_definitions(/DXMRIG_FIX_RYZEN)
        message("-- WITH_MSR=ON")
--- a/doc/RISCV_PERF_TUNING.md
+++ b/doc/RISCV_PERF_TUNING.md
@@ -0,0 +1,365 @@
 # RISC-V Performance Optimization Guide
 This guide provides comprehensive instructions for optimizing XMRig on RISC-V architectures.
 ## Build Optimizations
 ### Compiler Flags Applied Automatically
 The CMake build now applies aggressive RISC-V-specific optimizations:
 ```cmake
 # RISC-V ISA with extensions
 -march=rv64gcv_zba_zbb_zbc_zbs
 # Aggressive compiler optimizations
 -funroll-loops              # Unroll loops for ILP (instruction-level parallelism)
 -fomit-frame-pointer        # Free up frame pointer register (RISC-V has limited registers)
 -fno-common                 # Better code generation for global variables
 -finline-functions          # Inline more functions for better cache locality
 -ffast-math                 # Relaxed FP semantics (safe for mining)
 -flto                       # Link-time optimization for cross-module inlining
 # Release build additions
 -minline-atomics            # Inline atomic operations for faster synchronization
 ```
 ### Optimal Build Command
 ```bash
 mkdir build && cd build
 cmake -DCMAKE_BUILD_TYPE=Release ..
 make -j$(nproc)
 ```
 **Expected build time**: 5-15 minutes depending on CPU
 ## Runtime Optimizations
 ### 1. Memory Configuration (Most Important)
 Enable huge pages to reduce TLB misses and fragmentation:
 #### Enable 2MB Huge Pages
 ```bash
 # Calculate required huge pages (1 page = 2MB)
 # For 2 GB dataset: 1024 pages
 # For cache + dataset: 1536 pages minimum
 sudo sysctl -w vm.nr_hugepages=2048
 ```
 Verify:
 ```bash
 grep HugePages /proc/meminfo
 # Expected: HugePages_Free should be close to nr_hugepages
 ```
 #### Enable 1GB Huge Pages (Optional but Recommended)
 ```bash
 # Run provided helper script
 sudo ./scripts/enable_1gb_pages.sh
 # Verify 1GB pages are available
 cat /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages
 # Should be: >= 1 (one 1GB page)
 ```
 Update config.json:
 ```json
 {
    "cpu": {
        "huge-pages": true
    },
    "randomx": {
        "1gb-pages": true
    }
 }
 ```
 ### 2. RandomX Mode Selection
 | Mode | Memory | Init Time | Throughput | Recommendation |
 |------|--------|-----------|-----------|-----------------|
 | **light** | 256 MB | 10 sec | Low | Testing, resource-constrained |
 | **fast** | 2 GB | 2-5 min* | High | Production (with huge pages) |
 | **auto** | 2 GB | Varies | High | Default (uses fast if possible) |
 *With optimizations; can be 30+ minutes without huge pages
 **For RISC-V, use fast mode with huge pages enabled.**
 ### 3. Dataset Initialization Threads
 Optimal thread count = 60-75% of CPU cores (leaves headroom for OS/other tasks)
 ```json
 {
    "randomx": {
        "init": 4
    }
 }
 ```
 Or auto-detect (rewritten for RISC-V):
 ```json
 {
    "randomx": {
        "init": -1
    }
 }
 ```
 ### 4. CPU Affinity (Optional)
 Pin threads to specific cores for better cache locality:
 ```json
 {
    "cpu": {
        "rx/0": [
            { "threads": 1, "affinity": 0 },
            { "threads": 1, "affinity": 1 },
            { "threads": 1, "affinity": 2 },
            { "threads": 1, "affinity": 3 }
        ]
    }
 }
 ```
 ### 5. CPU Governor (Linux)
 Set to performance mode for maximum throughput:
 ```bash
 # Check current governor
 cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor
 # Set to performance (requires root)
 echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
 # Verify
 cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor
 # Should output: performance
 ```
 ## Configuration Examples
 ### Minimum (Testing)
 ```json
 {
    "randomx": {
        "mode": "light"
    },
    "cpu": {
        "huge-pages": false
    }
 }
 ```
 ### Recommended (Balanced)
 ```json
 {
    "randomx": {
        "mode": "auto",
        "init": 4,
        "1gb-pages": true
    },
    "cpu": {
        "huge-pages": true,
        "priority": 2
    }
 }
 ```
 ### Maximum Performance (Production)
 ```json
 {
    "randomx": {
        "mode": "fast",
        "init": -1,
        "1gb-pages": true,
        "scratchpad_prefetch_mode": 1
    },
    "cpu": {
        "huge-pages": true,
        "priority": 3,
        "yield": false
    }
 }
 ```
 ## CLI Equivalents
 ```bash
 # Light mode
 ./xmrig --randomx-mode=light
 # Fast mode with 4 init threads
 ./xmrig --randomx-mode=fast --randomx-init=4
 # Benchmark
 ./xmrig --bench=1M --algo=rx/0
 # Benchmark Wownero variant (1 MB scratchpad)
 ./xmrig --bench=1M --algo=rx/wow
 # Mine to pool
 ./xmrig -o pool.example.com:3333 -u YOUR_WALLET -p x
 ```
 ## Performance Diagnostics
 ### Check if Vector Extensions are Detected
 Look for `FEATURES:` line in output:
 ```
 * CPU:       ky,x60 (uarch ky,x1)
 * FEATURES:  rv64imafdcv zba zbb zbc zbs
 ```
 - `v`: Vector extension (RVV) ✓
 - `zba`, `zbb`, `zbc`, `zbs`: Bit manipulation ✓
 - If missing, make sure build used `-march=rv64gcv_zba_zbb_zbc_zbs`
 ### Verify Huge Pages at Runtime
 ```bash
 # Run xmrig with --bench=1M and check output
 ./xmrig --bench=1M
 # Look for line like:
 # HUGE PAGES   100%  1 / 1 (1024 MB)
 ```
 - Should show 100% for dataset AND threads
 - If less, increase `vm.nr_hugepages` and reboot
 ### Monitor Performance
 ```bash
 # Run benchmark multiple times to find stable hashrate
 ./xmrig --bench=1M --algo=rx/0
 ./xmrig --bench=10M --algo=rx/0
 ./xmrig --bench=100M --algo=rx/0
 # Check system load and memory during mining
 while true; do free -h; grep HugePages /proc/meminfo; sleep 2; done
 ```
 ## Expected Performance
 ### Hardware: Orange Pi RV2 (Ky X1, 8 cores @ ~1.5 GHz)
 | Config | Mode | Hashrate | Init Time |
 |--------|------|----------|-----------|
 | Scalar (baseline) | fast | 30 H/s | 10 min |
 | Scalar + huge pages | fast | 33 H/s | 2 min |
 | RVV (if enabled) | fast | 70-100 H/s | 3 min |
 *Actual results depend on CPU frequency, memory speed, and load*
 ## Troubleshooting
 ### Long Initialization Times (30+ minutes)
 **Cause**: Huge pages not enabled, system using swap
 **Solution**:
 1. Enable huge pages: `sudo sysctl -w vm.nr_hugepages=2048`
 2. Reboot: `sudo reboot`
 3. Reduce mining threads to free memory
 4. Check available memory: `free -h`
 ### Low Hashrate (50% of expected)
 **Cause**: CPU governor set to power-save, no huge pages, high contention
 **Solution**:
 1. Set governor to performance: `echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor`
 2. Enable huge pages
 3. Reduce number of mining threads
 4. Check system load: `top` or `htop`
 ### Dataset Init Crashes or Hangs
 **Cause**: Insufficient memory, corrupted huge pages
 **Solution**:
 1. Disable huge pages temporarily: set `huge-pages: false` in config
 2. Reduce mining threads
 3. Reboot and re-enable huge pages
 4. Try light mode: `--randomx-mode=light`
 ### Out of Memory During Benchmark
 **Cause**: Not enough RAM for dataset + cache + threads
 **Solution**:
 1. Use light mode: `--randomx-mode=light`
 2. Reduce mining threads: `--threads=1`
 3. Increase available memory (kill other processes)
 4. Check: `free -h` before mining
 ## Advanced Tuning
 ### Vector Length (VLEN) Detection
 RISC-V vector extension variable length (VLEN) affects performance:
 ```bash
 # Check VLEN on your CPU
 cat /proc/cpuinfo | grep vlen
 # Expected values:
 # - 128 bits (16 bytes) = minimum
 # - 256 bits (32 bytes) = common
 # - 512 bits (64 bytes) = high performance
 ```
 Larger VLEN generally means better performance for vectorized operations.
 ### Prefetch Optimization
 The code automatically optimizes memory prefetching for RISC-V:
 ```
 scratchpad_prefetch_mode: 0 = disabled (slowest)
 scratchpad_prefetch_mode: 1 = prefetch.r (default, recommended)
 scratchpad_prefetch_mode: 2 = prefetch.w (experimental)
 ```
 ### Memory Bandwidth Saturation
 If experiencing memory bandwidth saturation (high latency):
 1. Reduce mining threads
 2. Increase L2/L3 cache by mining fewer threads per core
 3. Enable cache QoS (AMD Ryzen): `cache_qos: true`
 ## Building with Custom Flags
 To build with custom RISC-V flags:
 ```bash
 mkdir build && cd build
 cmake -DCMAKE_BUILD_TYPE=Release \
      -DCMAKE_C_FLAGS="-march=rv64gcv_zba_zbb_zbc_zbs -O3 -funroll-loops -fomit-frame-pointer" \
      ..
 make -j$(nproc)
 ```
 ## Future Optimizations
 - [ ] Zbk* (crypto) support detection and usage
 - [ ] Optimal VLEN-aware algorithm selection
 - [ ] Per-core memory affinity (NUMA support)
 - [ ] Dynamic thread count adjustment based on thermals
 - [ ] Cross-compile optimizations for various RISC-V cores
 ## References
 - [RISC-V Vector Extension Spec](https://github.com/riscv/riscv-v-spec)
 - [RISC-V Bit Manipulation Spec](https://github.com/riscv/riscv-bitmanip)
 - [RISC-V Crypto Spec](https://github.com/riscv/riscv-crypto)
 - [XMRig Documentation](https://xmrig.com/docs)
 ---
 For further optimization, enable RVV intrinsics by replacing `sse2rvv.h` with `sse2rvv_optimized.h` in the build.
--- a/scripts/randomx_boost.sh
+++ b/scripts/randomx_boost.sh
@@ -12,7 +12,7 @@ if grep -E 'AMD Ryzen|AMD EPYC|AuthenticAMD' /proc/cpuinfo > /dev/null;
 	then
 	if grep "cpu family[[:space:]]\{1,\}:[[:space:]]25" /proc/cpuinfo > /dev/null;
 		then
-			if grep "model[[:space:]]\{1,\}:[[:space:]]97" /proc/cpuinfo > /dev/null;
+			if grep "model[[:space:]]\{1,\}:[[:space:]]\(97\|117\)" /proc/cpuinfo > /dev/null;
 				then
 					echo "Detected Zen4 CPU"
 					wrmsr -a 0xc0011020 0x4400000000000
--- a/src/3rdparty/argon2/CMakeLists.txt
+++ b/src/3rdparty/argon2/CMakeLists.txt
@@ -35,7 +35,7 @@ if (CMAKE_C_COMPILER_ID MATCHES MSVC)
    add_feature_impl(xop     ""              HAVE_XOP)
    add_feature_impl(avx2    "/arch:AVX2"    HAVE_AVX2)
    add_feature_impl(avx512f "/arch:AVX512F" HAVE_AVX512F)
-elseif (NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
+elseif (NOT XMRIG_ARM AND NOT XMRIG_RISCV AND CMAKE_SIZEOF_VOID_P EQUAL 8)
    function(add_feature_impl FEATURE GCC_FLAG DEF)
        add_library(argon2-${FEATURE} STATIC arch/x86_64/lib/argon2-${FEATURE}.c)
        target_include_directories(argon2-${FEATURE} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../)
--- a/src/3rdparty/hwloc/NEWS
+++ b/src/3rdparty/hwloc/NEWS
@@ -1,5 +1,5 @@
 Copyright © 2009 CNRS
-Copyright © 2009-2024 Inria.  All rights reserved.
+Copyright © 2009-2025 Inria.  All rights reserved.
 Copyright © 2009-2013 Université Bordeaux
 Copyright © 2009-2011 Cisco Systems, Inc.  All rights reserved.
 Copyright © 2020 Hewlett Packard Enterprise.  All rights reserved.
@@ -17,6 +17,52 @@ bug fixes (and other actions) for each version of hwloc since version
 0.9.
 Version 2.12.1
 --------------
 * Add hwloc-calc's --default-nodes option to hwloc-bind and hwloc-info.
 * Improve the --best-memattr "default" fallback, try to use "default"
  memory nodes, and add verbose messages and warnings if some
  performance info are incomplete or missing.
  Thanks to Antoine Morvan for the report.
 * Fix CPU and memory binding on different locations,
  thanks to Antoine Morvan for the report.
 * Add HWLOC_LOCAL_NUMANODE_FLAG_INTERSECT_LOCALITY and enable it by
  default in hwloc-calc --local-memory for finding local NUMA nodes
  that do not exactly match input locations.
  Thanks to Antoine Morvan for the report.
 * Fix a possible crash in the x86 backend when Qemu is configured to
  expose multicore/thread CPUs that are actually single-core/thread.
  Thanks to Georg Pfuetzenreuter.
 Version 2.12.0
 --------------
 * Add hwloc_topology_get_default_nodeset() for the set of default
  NUMA nodes.
  - hwloc-calc now has --default-nodes option.
 * Rework oneAPI LevelZero support to use zesInit() and avoid the need
  to set ZES_ENABLE_SYSMAN=1 in the environment.
  - zesDriverGetDeviceByUuidExp() is now required in the L0 runtime.
  - ZES/Sysman variants were added in hwloc/levelzero.h to specifically
    handle ZES/Sysman device handles.
 * Fix the locality of AMD GPU partitions, thanks to Edgar Leon for
  reporting and debugging the issue.
 * Better detect Cray Slingshot NICs, thanks to Edgar Leon.
 * Add support for Die objects and Module groups on Windows.
 * Only filter-out Dies that are identical to their Packages
  when it applies to all Dies.
 * Improve hwloc-calc to handle CPU-less NUMA nodes or platforms with
  heterogeneous memory without requiring --nodeset-output.
 * hwloc-calc now accepts counting/listing cpukinds and memory tiers
  with -N and -I cpukind/memorytier.
 * The systemd-dbus-api output of hwloc-calc has changed, and
  --nodeset-output-format was added, to support NUMA node outputs.
  Thanks to Pierre Neyron.
 * Update NVLink bandwidth and CUDA capabilities up to NVIDIA Blackwell.
 * Fix some NUMA syscalls on Linux for platforms with old libc headers.
 * Some minor fixes in distances.
 Version 2.11.2
 --------------
 * Add missing CPU info attrs on aarch64 on Linux.
--- a/src/3rdparty/hwloc/VERSION
+++ b/src/3rdparty/hwloc/VERSION
@@ -8,8 +8,8 @@
 # Please update HWLOC_VERSION* in contrib/windows/hwloc_config.h too.
 major=2
-minor=11
+minor=12
-release=2
+release=1
 # greek is used for alpha or beta release tags.  If it is non-empty,
 # it will be appended to the version number.  It does not have to be
@@ -22,7 +22,7 @@ greek=
 # The date when this release was created
-date="Sep 26, 2024"
+date="May 12, 2025"
 # If snapshot=1, then use the value from snapshot_version as the
 # entire hwloc version (i.e., ignore major, minor, release, and
@@ -41,6 +41,6 @@ snapshot_version=${major}.${minor}.${release}${greek}-git
 # 2. Version numbers are described in the Libtool current:revision:age
 # format.
-libhwloc_so_version=23:1:8
+libhwloc_so_version=25:0:10
 # Please also update the <TargetName> lines in contrib/windows/libhwloc.vcxproj
--- a/src/3rdparty/hwloc/include/hwloc.h
+++ b/src/3rdparty/hwloc/include/hwloc.h
@@ -1,6 +1,6 @@
 /*
 * Copyright © 2009 CNRS
- * Copyright © 2009-2024 Inria.  All rights reserved.
+ * Copyright © 2009-2025 Inria.  All rights reserved.
 * Copyright © 2009-2012 Université Bordeaux
 * Copyright © 2009-2020 Cisco Systems, Inc.  All rights reserved.
 * See COPYING in top-level directory.
@@ -112,7 +112,7 @@ extern "C" {
 * Two stable releases of the same series usually have the same ::HWLOC_API_VERSION
 * even if their HWLOC_VERSION are different.
 */
-#define HWLOC_API_VERSION 0x00020b00
+#define HWLOC_API_VERSION 0x00020c00
 /** \brief Indicate at runtime which hwloc API version was used at build time.
 *
@@ -346,9 +346,10 @@ typedef enum {
 			 *
 			 * Some operating systems (e.g. Linux) may expose a single die per package
 			 * even if the hardware does not support dies at all. To avoid showing
-			 * such non-existing dies, the corresponding hwloc backend may filter them out.
+			 * such non-existing dies, hwloc will filter them out if all of them are
                         * identical to packages.
 			 * This is functionally equivalent to ::HWLOC_TYPE_FILTER_KEEP_STRUCTURE
-			 * being enforced.
+			 * being enforced for Dies versus Packages.
 			 */
  HWLOC_OBJ_TYPE_MAX    /**< \private Sentinel value */
@@ -1047,7 +1048,7 @@ HWLOC_DECLSPEC const char * hwloc_obj_type_string (hwloc_obj_type_t type) __hwlo
 * If \p size is 0, \p string may safely be \c NULL.
 *
 * \return the number of characters that were actually written if not truncating,
- * or that would have been written (not including the ending \\0).
+ * or that would have been written (not including the ending \c \0).
 */
 HWLOC_DECLSPEC int hwloc_obj_type_snprintf(char * __hwloc_restrict string, size_t size,
 					   hwloc_obj_t obj,
@@ -1062,7 +1063,7 @@ HWLOC_DECLSPEC int hwloc_obj_type_snprintf(char * __hwloc_restrict string, size_
 * If \p size is 0, \p string may safely be \c NULL.
 *
 * \return the number of characters that were actually written if not truncating,
- * or that would have been written (not including the ending \\0).
+ * or that would have been written (not including the ending \c \0).
 */
 HWLOC_DECLSPEC int hwloc_obj_attr_snprintf(char * __hwloc_restrict string, size_t size,
 					   hwloc_obj_t obj, const char * __hwloc_restrict separator,
@@ -2002,7 +2003,7 @@ HWLOC_DECLSPEC int hwloc_topology_set_xml(hwloc_topology_t __hwloc_restrict topo
 * a file, as with hwloc_topology_set_xml()).
 *
 * Gather topology information from the XML memory buffer given at
- * \p buffer and of length \p size (including an ending \0).
+ * \p buffer and of length \p size (including an ending \c \0).
 * This buffer may have been filled earlier with
 * hwloc_topology_export_xmlbuffer() in hwloc/export.h.
 *
--- a/src/3rdparty/hwloc/include/hwloc/autogen/config.h
+++ b/src/3rdparty/hwloc/include/hwloc/autogen/config.h
@@ -1,6 +1,6 @@
 /*
 * Copyright © 2009 CNRS
- * Copyright © 2009-2024 Inria.  All rights reserved.
+ * Copyright © 2009-2025 Inria.  All rights reserved.
 * Copyright © 2009-2012 Université Bordeaux
 * Copyright © 2009-2011 Cisco Systems, Inc.  All rights reserved.
 * See COPYING in top-level directory.
@@ -11,10 +11,10 @@
 #ifndef HWLOC_CONFIG_H
 #define HWLOC_CONFIG_H
-#define HWLOC_VERSION "2.11.2"
+#define HWLOC_VERSION "2.12.1"
 #define HWLOC_VERSION_MAJOR 2
-#define HWLOC_VERSION_MINOR 11
+#define HWLOC_VERSION_MINOR 12
-#define HWLOC_VERSION_RELEASE 2
+#define HWLOC_VERSION_RELEASE 1
 #define HWLOC_VERSION_GREEK ""
 #define __hwloc_restrict
--- a/src/3rdparty/hwloc/include/hwloc/bitmap.h
+++ b/src/3rdparty/hwloc/include/hwloc/bitmap.h
@@ -1,6 +1,6 @@
 /*
 * Copyright © 2009 CNRS
- * Copyright © 2009-2023 Inria.  All rights reserved.
+ * Copyright © 2009-2024 Inria.  All rights reserved.
 * Copyright © 2009-2012 Université Bordeaux
 * Copyright © 2009-2011 Cisco Systems, Inc.  All rights reserved.
 * See COPYING in top-level directory.
@@ -113,51 +113,88 @@ HWLOC_DECLSPEC int hwloc_bitmap_copy(hwloc_bitmap_t dst, hwloc_const_bitmap_t sr
 * Bitmap/String Conversion
 */
-/** \brief Stringify a bitmap.
+/** \brief Stringify a bitmap in the default hwloc format.
 *
 * <b>Note that if the bitmap is a CPU or nodeset, it contains physical indexes.</b>
 *
 * Print the bits set inside a bitmap as a comma-separated list of hexadecimal 32-bit blocks.
 * A bitmap containing bits 1, 33, 34, and all from 64 to 95 is printed as <tt>"0xffffffff,0x00000006,0x00000002"</tt>.
 *
 * Up to \p buflen characters may be written in buffer \p buf.
 *
 * If \p buflen is 0, \p buf may safely be \c NULL.
 *
 * \return the number of characters that were actually written if not truncating,
- * or that would have been written (not including the ending \\0).
+ * or that would have been written (not including the ending \c \0).
 * \return -1 on error.
 */
 HWLOC_DECLSPEC int hwloc_bitmap_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap);
-/** \brief Stringify a bitmap into a newly allocated string.
+/** \brief Stringify a bitmap into a newly allocated string in the default hwloc format.
 *
- * \return 0 on success, -1 on error.
+ * <b>Note that if the bitmap is a CPU or nodeset, it contains physical indexes.</b>
 *
 * Print the bits set inside a bitmap as a comma-separated list of hexadecimal 32-bit blocks.
 * A bitmap containing bits 1, 33, 34, and all from 64 to 95 is printed as <tt>"0xffffffff,0x00000006,0x00000002"</tt>.
 *
 * \return the number of characters that were written (not including the ending \c \0).
 * \return -1 on error, for instance with \p errno set to \c ENOMEM on failure to allocate the output string.
 */
 HWLOC_DECLSPEC int hwloc_bitmap_asprintf(char ** strp, hwloc_const_bitmap_t bitmap);
-/** \brief Parse a bitmap string and stores it in bitmap \p bitmap.
+/** \brief Parse a bitmap string as the default hwloc format and stores it in bitmap \p bitmap.
 *
 * <b>Note that if the bitmap is a CPU or nodeset, the input string must contain physical indexes.</b>
 *
 * The input string should be a comma-separared list of hexadecimal 32-bit blocks.
 * String <tt>"0xffffffff,0x6,0x2"</tt> is parsed as a bitmap containing all bits between 64 and 95,
 * and bits 33, 34 and 1.
 *
 * \return 0 on success, -1 on error.
 */
 HWLOC_DECLSPEC int hwloc_bitmap_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string);
 /** \brief Stringify a bitmap in the list format.
 *
 * <b>Note that if the bitmap is a CPU or nodeset, it contains physical indexes.</b>
 *
 * Lists are comma-separated indexes or ranges.
 * Ranges are dash separated indexes.
- * The last range may not have an ending indexes if the bitmap is infinitely set.
+ * A bitmap containing bits 1, 33, 34, and all from 64 to 95 is printed as <tt>"1,33-34,64-95"</tt>.
 * The last range may not have an ending index if the bitmap is infinitely set.
 *
 * Up to \p buflen characters may be written in buffer \p buf.
 *
 * If \p buflen is 0, \p buf may safely be \c NULL.
 *
 * \return the number of characters that were actually written if not truncating,
- * or that would have been written (not including the ending \\0).
+ * or that would have been written (not including the ending \c \0).
 * \return -1 on error.
 */
 HWLOC_DECLSPEC int hwloc_bitmap_list_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap);
 /** \brief Stringify a bitmap into a newly allocated list string.
 *
- * \return 0 on success, -1 on error.
+ * <b>Note that if the bitmap is a CPU or nodeset, it contains physical indexes.</b>
 *
 * Lists are comma-separated indexes or ranges.
 * Ranges are dash separated indexes.
 * A bitmap containing bits 1, 33, 34, and all from 64 to 95 is printed as <tt>"1,33-34,64-95"</tt>.
 * The last range may not have an ending index if the bitmap is infinitely set.
 *
 * \return the number of characters that were written (not including the ending \c \0).
 * \return -1 on error, for instance with \p errno set to \c ENOMEM on failure to allocate the output string.
 */
 HWLOC_DECLSPEC int hwloc_bitmap_list_asprintf(char ** strp, hwloc_const_bitmap_t bitmap);
 /** \brief Parse a list string and stores it in bitmap \p bitmap.
 *
 * <b>Note that if the bitmap is a CPU or nodeset, the input string must contain physical indexes.</b>
 *
 * Lists are comma-separated indexes or ranges.
 * Ranges are dash separated indexes.
 * String <tt>"1,33-34,64-95"</tt> is parsed as a bitmap containing bits 1, 33, 34, and all from 64 to 95.
 * The last range may not have an ending index if the bitmap is infinitely set.
 *
 * \return 0 on success, -1 on error.
 */
@@ -165,25 +202,43 @@ HWLOC_DECLSPEC int hwloc_bitmap_list_sscanf(hwloc_bitmap_t bitmap, const char *
 /** \brief Stringify a bitmap in the taskset-specific format.
 *
- * The taskset command manipulates bitmap strings that contain a single
+ * <b>Note that if the bitmap is a CPU or nodeset, it contains physical indexes.</b>
 *
 * The taskset program manipulates bitmap strings that contain a single
 * (possible very long) hexadecimal number starting with 0x.
 * A bitmap containing bits 1, 33, 34, and all from 64 to 95 is printed as </tt>"0xffffffff0000000600000002"</tt>.
 *
 * Up to \p buflen characters may be written in buffer \p buf.
 *
 * If \p buflen is 0, \p buf may safely be \c NULL.
 *
 * \return the number of characters that were actually written if not truncating,
- * or that would have been written (not including the ending \\0).
+ * or that would have been written (not including the ending \c \0).
 * \return -1 on error.
 */
 HWLOC_DECLSPEC int hwloc_bitmap_taskset_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap);
 /** \brief Stringify a bitmap into a newly allocated taskset-specific string.
 *
- * \return 0 on success, -1 on error.
+ * <b>Note that if the bitmap is a CPU or nodeset, it contains physical indexes.</b>
 *
 * The taskset program manipulates bitmap strings that contain a single
 * (possible very long) hexadecimal number starting with 0x.
 * A bitmap containing bits 1, 33, 34, and all from 64 to 95 is printed as <tt>"0xffffffff0000000600000002"</tt>.
 *
 * \return the number of characters that were written (not including the ending \c \0).
 * \return -1 on error, for instance with \p errno set to \c ENOMEM on failure to allocate the output string.
 */
 HWLOC_DECLSPEC int hwloc_bitmap_taskset_asprintf(char ** strp, hwloc_const_bitmap_t bitmap);
 /** \brief Parse a taskset-specific bitmap string and stores it in bitmap \p bitmap.
 *
 * <b>Note that if the bitmap is a CPU or nodeset, the input string must contain physical indexes.</b>
 *
 * The taskset program manipulates bitmap strings that contain a single
 * (possible very long) hexadecimal number starting with 0x.
 * String <tt>"0xffffffff0000000600000002"</tt> is parsed as a bitmap containing all bits between 64 and 95,
 * and bits 33, 34 and 1.
 *
 * \return 0 on success, -1 on error.
 */
--- a/src/3rdparty/hwloc/include/hwloc/diff.h
+++ b/src/3rdparty/hwloc/include/hwloc/diff.h
@@ -1,5 +1,5 @@
 /*
- * Copyright © 2013-2023 Inria.  All rights reserved.
+ * Copyright © 2013-2024 Inria.  All rights reserved.
 * See COPYING in top-level directory.
 */
@@ -258,7 +258,7 @@ HWLOC_DECLSPEC int hwloc_topology_diff_export_xml(hwloc_topology_diff_t diff, co
 /** \brief Load a list of topology differences from a XML buffer.
 *
 * Build a list of differences from the XML memory buffer given
- * at \p xmlbuffer and of length \p buflen (including an ending \0).
+ * at \p xmlbuffer and of length \p buflen (including an ending \c \0).
 * This buffer may have been filled earlier with
 * hwloc_topology_diff_export_xmlbuffer().
 *
@@ -284,7 +284,7 @@ HWLOC_DECLSPEC int hwloc_topology_diff_load_xmlbuffer(const char *xmlbuffer, int
 * that contains the reference topology.
 * This attribute is given back when reading the diff from XML.
 *
- * The returned buffer ends with a \0 that is included in the returned
+ * The returned buffer ends with a \c \0 that is included in the returned
 * length.
 *
 * \return 0 on success, -1 on error.
--- a/src/3rdparty/hwloc/include/hwloc/distances.h
+++ b/src/3rdparty/hwloc/include/hwloc/distances.h
@@ -1,5 +1,5 @@
 /*
- * Copyright © 2010-2024 Inria.  All rights reserved.
+ * Copyright © 2010-2025 Inria.  All rights reserved.
 * See COPYING in top-level directory.
 */
@@ -227,17 +227,24 @@ enum hwloc_distances_transform_e {
  HWLOC_DISTANCES_TRANSFORM_LINKS = 1,
  /** \brief Merge switches with multiple ports into a single object.
-   * This currently only applies to NVSwitches where GPUs seem connected to different
+   *
-   * separate switch ports in the NVLinkBandwidth matrix. This transformation will
+   * This currently only applies to NVSwitches where GPUs seem connected
-   * replace all of them with the same port connected to all GPUs.
+   * to different switch ports. Switch ports must be objects with subtype
-   * Other ports are removed by applying ::HWLOC_DISTANCES_TRANSFORM_REMOVE_NULL internally.
+   * "NVSwitch" as in the NVLinkBandwidth matrix.
   *
   * This transformation will replace all ports with only the first one,
   * now connected to all GPUs. Other ports are removed by applying
   * ::HWLOC_DISTANCES_TRANSFORM_REMOVE_NULL internally.
   * \hideinitializer
   */
  HWLOC_DISTANCES_TRANSFORM_MERGE_SWITCH_PORTS = 2,
  /** \brief Apply a transitive closure to the matrix to connect objects across switches.
-   * This currently only applies to GPUs and NVSwitches in the NVLinkBandwidth matrix.
+   *
-   * All pairs of GPUs will be reported as directly connected.
+   * All pairs of GPUs will be reported as directly connected instead GPUs being
   * only connected to switches.
   *
   * Switch ports must be objects with subtype "NVSwitch" as in the NVLinkBandwidth matrix.
   * \hideinitializer
   */
  HWLOC_DISTANCES_TRANSFORM_TRANSITIVE_CLOSURE = 3
--- a/src/3rdparty/hwloc/include/hwloc/levelzero.h
+++ b/src/3rdparty/hwloc/include/hwloc/levelzero.h
@@ -1,5 +1,5 @@
 /*
- * Copyright © 2021-2023 Inria.  All rights reserved.
+ * Copyright © 2021-2024 Inria.  All rights reserved.
 * See COPYING in top-level directory.
 */
@@ -32,7 +32,8 @@ extern "C" {
 /** \defgroup hwlocality_levelzero Interoperability with the oneAPI Level Zero interface.
 *
 * This interface offers ways to retrieve topology information about
- * devices managed by the Level Zero API.
+ * devices managed by the Level Zero API, both for main Core devices (ZE API)
 * and the Sysman devices (ZES API).
 *
 * @{
 */
@@ -44,9 +45,68 @@ extern "C" {
 * the Level Zero device \p device.
 *
 * Topology \p topology and device \p device must match the local machine.
 * The Level Zero library must have been initialized with zeInit().
 * I/O devices detection and the Level Zero component are not needed in the
 * topology.
 *
 * The function only returns the locality of the device.
 * If more information about the device is needed, OS objects should
 * be used instead, see hwloc_levelzero_get_device_osdev().
 *
 * This function is currently only implemented in a meaningful way for
 * Linux; other systems will simply get a full cpuset.
 *
 * \return 0 on success.
 * \return -1 on error, for instance if device information could not be found.
 *
 * \note zeDevicePciGetPropertiesExt() must be supported, or the entire machine
 * locality will be returned.
 */
 static __hwloc_inline int
 hwloc_levelzero_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
                                  ze_device_handle_t device, hwloc_cpuset_t set)
 {
 #ifdef HWLOC_LINUX_SYS
  /* If we're on Linux, use the sysfs mechanism to get the local cpus */
 #define HWLOC_LEVELZERO_DEVICE_SYSFS_PATH_MAX 128
  char path[HWLOC_LEVELZERO_DEVICE_SYSFS_PATH_MAX];
  ze_pci_ext_properties_t pci;
  ze_result_t res;
  if (!hwloc_topology_is_thissystem(topology)) {
    errno = EINVAL;
    return -1;
  }
  pci.stype =  ZE_STRUCTURE_TYPE_PCI_EXT_PROPERTIES;
  pci.pNext = NULL;
  res = zeDevicePciGetPropertiesExt(device, &pci);
  if (res != ZE_RESULT_SUCCESS) {
    errno = EINVAL;
    return -1;
  }
  sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/local_cpus",
          pci.address.domain, pci.address.bus, pci.address.device, pci.address.function);
  if (hwloc_linux_read_path_as_cpumask(path, set) < 0
      || hwloc_bitmap_iszero(set))
    hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
 #else
  /* Non-Linux systems simply get a full cpuset */
  hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
 #endif
  return 0;
 }
 /** \brief Get the CPU set of logical processors that are physically
 * close to the Level Zero Sysman device \p device
 *
 * Store in \p set the CPU-set describing the locality of
 * the Level Zero device \p device.
 *
 * Topology \p topology and device \p device must match the local machine.
 * The Level Zero library must have been initialized with Sysman enabled
- * (by calling zesInit(0) if supported,
+ * with zesInit().
 *  or by setting ZES_ENABLE_SYSMAN=1 in the environment).
 * I/O devices detection and the Level Zero component are not needed in the
 * topology.
 *
@@ -61,15 +121,14 @@ extern "C" {
 * \return -1 on error, for instance if device information could not be found.
 */
 static __hwloc_inline int
-hwloc_levelzero_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
+hwloc_levelzero_get_sysman_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
-                                  ze_device_handle_t device, hwloc_cpuset_t set)
+                                         zes_device_handle_t device, hwloc_cpuset_t set)
 {
 #ifdef HWLOC_LINUX_SYS
  /* If we're on Linux, use the sysfs mechanism to get the local cpus */
 #define HWLOC_LEVELZERO_DEVICE_SYSFS_PATH_MAX 128
  char path[HWLOC_LEVELZERO_DEVICE_SYSFS_PATH_MAX];
  zes_pci_properties_t pci;
  zes_device_handle_t sdevice = device;
  ze_result_t res;
  if (!hwloc_topology_is_thissystem(topology)) {
@@ -77,7 +136,7 @@ hwloc_levelzero_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_un
    return -1;
  }
-  res = zesDevicePciGetProperties(sdevice, &pci);
+  res = zesDevicePciGetProperties(device, &pci);
  if (res != ZE_RESULT_SUCCESS) {
    errno = EINVAL;
    return -1;
@@ -102,17 +161,90 @@ hwloc_levelzero_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_un
 * \return \c NULL if none could be found.
 *
 * Topology \p topology and device \p dv_ind must match the local machine.
 * The Level Zero library must have been initialized with zeInit().
 * I/O devices detection and the Level Zero component must be enabled in the
 * topology. If not, the locality of the object may still be found using
 * hwloc_levelzero_get_device_cpuset().
 *
 * \note If the input ZE device is actually a subdevice, then its parent
 * (root device) is actually translated, i.e. the main hwloc OS device
 * is returned instead of one of its children.
 *
 * \note The corresponding hwloc PCI device may be found by looking
 * at the result parent pointer (unless PCI devices are filtered out).
 *
 * \note zeDevicePciGetPropertiesExt() must be supported.
 */
 static __hwloc_inline hwloc_obj_t
 hwloc_levelzero_get_device_osdev(hwloc_topology_t topology, ze_device_handle_t device)
 {
  ze_pci_ext_properties_t pci;
  ze_result_t res;
  hwloc_obj_t osdev;
  if (!hwloc_topology_is_thissystem(topology)) {
    errno = EINVAL;
    return NULL;
  }
  pci.stype = ZE_STRUCTURE_TYPE_PCI_EXT_PROPERTIES;
  pci.pNext = NULL;
  res = zeDevicePciGetPropertiesExt(device, &pci);
  if (res != ZE_RESULT_SUCCESS) {
    errno = EINVAL;
    return NULL;
  }
  osdev = NULL;
  while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
    hwloc_obj_t pcidev;
    if (strncmp(osdev->name, "ze", 2))
      continue;
    pcidev = osdev;
    while (pcidev && pcidev->type != HWLOC_OBJ_PCI_DEVICE)
      pcidev = pcidev->parent;
    if (!pcidev)
      continue;
    if (pcidev
      && pcidev->type == HWLOC_OBJ_PCI_DEVICE
      && pcidev->attr->pcidev.domain == pci.address.domain
      && pcidev->attr->pcidev.bus == pci.address.bus
      && pcidev->attr->pcidev.dev == pci.address.device
      && pcidev->attr->pcidev.func == pci.address.function)
      return osdev;
    /* FIXME: when we'll have serialnumber, try it in case PCI is filtered-out */
  }
  return NULL;
 }
 /** \brief Get the hwloc OS device object corresponding to Level Zero Sysman device
 * \p device.
 *
 * \return The hwloc OS device object that describes the given Level Zero device \p device.
 * \return \c NULL if none could be found.
 *
 * Topology \p topology and device \p dv_ind must match the local machine.
 * The Level Zero library must have been initialized with Sysman enabled
 * with zesInit().
 * I/O devices detection and the Level Zero component must be enabled in the
 * topology. If not, the locality of the object may still be found using
 * hwloc_levelzero_get_device_cpuset().
 *
 * \note If the input ZES device is actually a subdevice, then its parent
 * (root device) is actually translated, i.e. the main hwloc OS device
 * is returned instead of one of its children.
 *
 * \note The corresponding hwloc PCI device may be found by looking
 * at the result parent pointer (unless PCI devices are filtered out).
 */
 static __hwloc_inline hwloc_obj_t
-hwloc_levelzero_get_device_osdev(hwloc_topology_t topology, ze_device_handle_t device)
+hwloc_levelzero_get_sysman_device_osdev(hwloc_topology_t topology, zes_device_handle_t device)
 {
  zes_device_handle_t sdevice = device;
  zes_pci_properties_t pci;
  ze_result_t res;
  hwloc_obj_t osdev;
@@ -122,20 +254,25 @@ hwloc_levelzero_get_device_osdev(hwloc_topology_t topology, ze_device_handle_t d
    return NULL;
  }
-  res = zesDevicePciGetProperties(sdevice, &pci);
+  res = zesDevicePciGetProperties(device, &pci);
  if (res != ZE_RESULT_SUCCESS) {
    /* L0 was likely initialized without sysman, don't bother */
    errno = EINVAL;
    return NULL;
  }
  osdev = NULL;
  while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
-    hwloc_obj_t pcidev = osdev->parent;
+    hwloc_obj_t pcidev;
    if (strncmp(osdev->name, "ze", 2))
      continue;
    pcidev = osdev;
    while (pcidev && pcidev->type != HWLOC_OBJ_PCI_DEVICE)
      pcidev = pcidev->parent;
    if (!pcidev)
      continue;
    if (pcidev
      && pcidev->type == HWLOC_OBJ_PCI_DEVICE
      && pcidev->attr->pcidev.domain == pci.address.domain
--- a/src/3rdparty/hwloc/include/hwloc/memattrs.h
+++ b/src/3rdparty/hwloc/include/hwloc/memattrs.h
@@ -1,5 +1,5 @@
 /*
- * Copyright © 2019-2024 Inria.  All rights reserved.
+ * Copyright © 2019-2025 Inria.  All rights reserved.
 * See COPYING in top-level directory.
 */
@@ -58,6 +58,11 @@ extern "C" {
 * an easy way to distinguish NUMA nodes of different kinds, as explained
 * in \ref heteromem.
 *
 * Beside tiers, hwloc defines a set of "default" nodes where normal memory
 * allocations should be made from (see hwloc_topology_get_default_nodeset()).
 * This is also useful for dividing the machine into a set of non-overlapping
 * NUMA domains, for instance for binding tasks per domain.
 *
 * \sa An example is available in doc/examples/memory-attributes.c in the source tree.
 *
 * \note The API also supports specific objects as initiator,
@@ -245,6 +250,16 @@ enum hwloc_local_numanode_flag_e {
   */
  HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY = (1UL<<1),
  /** \breif Select NUMA nodes whose locality intersects the given cpuset.
   * This includes larger and smaller localities as well as localities
   * that are partially included.
   * For instance, if the locality is one core of both packages, a NUMA node
   * local to one package is neither larger nor smaller than this locality,
   * but it intersects it.
   * \hideinitializer
   */
  HWLOC_LOCAL_NUMANODE_FLAG_INTERSECT_LOCALITY = (1UL<<3),
  /** \brief Select all NUMA nodes in the topology.
   * The initiator \p initiator is ignored.
   * \hideinitializer
@@ -290,7 +305,57 @@ hwloc_get_local_numanode_objs(hwloc_topology_t topology,
                              hwloc_obj_t *nodes,
                              unsigned long flags);
-
+/** \brief Return the set of default NUMA nodes
 *
 * In machines with heterogeneous memory, some NUMA nodes are considered
 * the default ones, i.e. where basic allocations should be made from.
 * These are usually DRAM nodes.
 *
 * Other nodes may be reserved for specific use (I/O device memory, e.g. GPU memory),
 * small but high performance (HBM), large but slow memory (NVM), etc.
 * Buffers should usually not be allocated from there unless explicitly required.
 *
 * This function fills \p nodeset with the bits of NUMA nodes considered default.
 *
 * It is guaranteed that these nodes have non-intersecting CPU sets,
 * i.e. cores may not have multiple local NUMA nodes anymore.
 * Hence this may be used to iterate over the platform divided into separate
 * NUMA localities, for instance for binding one task per NUMA domain.
 *
 * Any core that had some local NUMA node(s) in the initial topology should
 * still have one in the default nodeset. Corner cases where this would be
 * wrong consist in asymmetric platforms with missing DRAM nodes, or topologies
 * that were already restricted to less NUMA nodes.
 *
 * The returned nodeset may be passed to hwloc_topology_restrict() with
 * ::HWLOC_RESTRICT_FLAG_BYNODESET to remove all non-default nodes from
 * the topology. The resulting topology will be easier to use when iterating
 * over (now homogeneous) NUMA nodes.
 *
 * The heuristics for finding default nodes relies on memory tiers and subtypes
 * (see \ref heteromem) as well as the assumption that hardware vendors list
 * default nodes first in hardware tables.
 *
 * \p flags must be \c 0 for now.
 *
 * \return 0 on success.
 * \return -1 on error.
 *
 * \note The returned nodeset usually contains all nodes from a single memory
 * tier, likely the DRAM one.
 *
 * \note The returned nodeset is included in the list of available nodes
 * returned by hwloc_topology_get_topology_nodeset(). It is strictly smaller
 * if the machine has heterogeneous memory.
 *
 * \note The heuristics may return a suboptimal set of nodes if hwloc could
 * not guess memory types and/or if some default nodes were removed earlier
 * from the topology (e.g. with hwloc_topology_restrict()).
 */
 HWLOC_DECLSPEC int
 hwloc_topology_get_default_nodeset(hwloc_topology_t topology,
                                   hwloc_nodeset_t nodeset,
                                   unsigned long flags);
 /** \brief Return an attribute value for a specific target NUMA node.
 *
--- a/src/3rdparty/hwloc/include/hwloc/plugins.h
+++ b/src/3rdparty/hwloc/include/hwloc/plugins.h
@@ -26,7 +26,7 @@ struct hwloc_backend;
-/** \defgroup hwlocality_disc_components Components and Plugins: Discovery components
+/** \defgroup hwlocality_disc_components Components and Plugins: Discovery components and backends
 *
 * \note These structures and functions may change when ::HWLOC_COMPONENT_ABI is modified.
 *
@@ -90,18 +90,6 @@ struct hwloc_disc_component {
  struct hwloc_disc_component * next;
 };
 /** @} */
 /** \defgroup hwlocality_disc_backends Components and Plugins: Discovery backends
 *
 * \note These structures and functions may change when ::HWLOC_COMPONENT_ABI is modified.
 *
 * @{
 */
 /** \brief Discovery phase */
 typedef enum hwloc_disc_phase_e {
  /** \brief xml or synthetic, platform-specific components such as bgq.
@@ -313,6 +301,64 @@ struct hwloc_component {
  void * data;
 };
 /** \brief Make sure that plugins can lookup core symbols.
 *
 * This is a sanity check to avoid lazy-lookup failures when libhwloc
 * is loaded within a plugin, and later tries to load its own plugins.
 * This may fail (and abort the program) if libhwloc symbols are in a
 * private namespace.
 *
 * \return 0 on success.
 * \return -1 if the plugin cannot be successfully loaded. The caller
 * plugin init() callback should return a negative error code as well.
 *
 * Plugins should call this function in their init() callback to avoid
 * later crashes if lazy symbol resolution is used by the upper layer that
 * loaded hwloc (e.g. OpenCL implementations using dlopen with RTLD_LAZY).
 *
 * \note The build system must define HWLOC_INSIDE_PLUGIN if and only if
 * building the caller as a plugin.
 *
 * \note This function should remain inline so plugins can call it even
 * when they cannot find libhwloc symbols.
 */
 static __hwloc_inline int
 hwloc_plugin_check_namespace(const char *pluginname __hwloc_attribute_unused, const char *symbol __hwloc_attribute_unused)
 {
 #ifdef HWLOC_INSIDE_PLUGIN
  void *sym;
 #ifdef HWLOC_HAVE_LTDL
  lt_dlhandle handle = lt_dlopen(NULL);
 #else
  void *handle = dlopen(NULL, RTLD_NOW|RTLD_LOCAL);
 #endif
  if (!handle)
    /* cannot check, assume things will work */
    return 0;
 #ifdef HWLOC_HAVE_LTDL
  sym = lt_dlsym(handle, symbol);
  lt_dlclose(handle);
 #else
  sym = dlsym(handle, symbol);
  dlclose(handle);
 #endif
  if (!sym) {
    static int verboseenv_checked = 0;
    static int verboseenv_value = 0;
    if (!verboseenv_checked) {
      const char *verboseenv = getenv("HWLOC_PLUGINS_VERBOSE");
      verboseenv_value = verboseenv ? atoi(verboseenv) : 0;
      verboseenv_checked = 1;
    }
    if (verboseenv_value)
      fprintf(stderr, "Plugin `%s' disabling itself because it cannot find the `%s' core symbol.\n",
 	      pluginname, symbol);
    return -1;
  }
 #endif /* HWLOC_INSIDE_PLUGIN */
  return 0;
 }
 /** @} */
@@ -422,64 +468,6 @@ HWLOC_DECLSPEC int hwloc_obj_add_children_sets(hwloc_obj_t obj);
 */
 HWLOC_DECLSPEC int hwloc_topology_reconnect(hwloc_topology_t topology, unsigned long flags __hwloc_attribute_unused);
 /** \brief Make sure that plugins can lookup core symbols.
 *
 * This is a sanity check to avoid lazy-lookup failures when libhwloc
 * is loaded within a plugin, and later tries to load its own plugins.
 * This may fail (and abort the program) if libhwloc symbols are in a
 * private namespace.
 *
 * \return 0 on success.
 * \return -1 if the plugin cannot be successfully loaded. The caller
 * plugin init() callback should return a negative error code as well.
 *
 * Plugins should call this function in their init() callback to avoid
 * later crashes if lazy symbol resolution is used by the upper layer that
 * loaded hwloc (e.g. OpenCL implementations using dlopen with RTLD_LAZY).
 *
 * \note The build system must define HWLOC_INSIDE_PLUGIN if and only if
 * building the caller as a plugin.
 *
 * \note This function should remain inline so plugins can call it even
 * when they cannot find libhwloc symbols.
 */
 static __hwloc_inline int
 hwloc_plugin_check_namespace(const char *pluginname __hwloc_attribute_unused, const char *symbol __hwloc_attribute_unused)
 {
 #ifdef HWLOC_INSIDE_PLUGIN
  void *sym;
 #ifdef HWLOC_HAVE_LTDL
  lt_dlhandle handle = lt_dlopen(NULL);
 #else
  void *handle = dlopen(NULL, RTLD_NOW|RTLD_LOCAL);
 #endif
  if (!handle)
    /* cannot check, assume things will work */
    return 0;
 #ifdef HWLOC_HAVE_LTDL
  sym = lt_dlsym(handle, symbol);
  lt_dlclose(handle);
 #else
  sym = dlsym(handle, symbol);
  dlclose(handle);
 #endif
  if (!sym) {
    static int verboseenv_checked = 0;
    static int verboseenv_value = 0;
    if (!verboseenv_checked) {
      const char *verboseenv = getenv("HWLOC_PLUGINS_VERBOSE");
      verboseenv_value = verboseenv ? atoi(verboseenv) : 0;
      verboseenv_checked = 1;
    }
    if (verboseenv_value)
      fprintf(stderr, "Plugin `%s' disabling itself because it cannot find the `%s' core symbol.\n",
 	      pluginname, symbol);
    return -1;
  }
 #endif /* HWLOC_INSIDE_PLUGIN */
  return 0;
 }
 /** @} */
--- a/src/3rdparty/hwloc/include/hwloc/rename.h
+++ b/src/3rdparty/hwloc/include/hwloc/rename.h
@@ -1,6 +1,6 @@
 /*
 * Copyright © 2009-2011 Cisco Systems, Inc.  All rights reserved.
- * Copyright © 2010-2024 Inria.  All rights reserved.
+ * Copyright © 2010-2025 Inria.  All rights reserved.
 * See COPYING in top-level directory.
 */
@@ -409,8 +409,10 @@ extern "C" {
 #define hwloc_local_numanode_flag_e HWLOC_NAME(local_numanode_flag_e)
 #define HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY HWLOC_NAME_CAPS(LOCAL_NUMANODE_FLAG_LARGER_LOCALITY)
 #define HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY HWLOC_NAME_CAPS(LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY)
 #define HWLOC_LOCAL_NUMANODE_FLAG_INTERSECT_LOCALITY HWLOC_NAME_CAPS(LOCAL_NUMANODE_FLAG_INTERSECT_LOCALITY)
 #define HWLOC_LOCAL_NUMANODE_FLAG_ALL HWLOC_NAME_CAPS(LOCAL_NUMANODE_FLAG_ALL)
 #define hwloc_get_local_numanode_objs HWLOC_NAME(get_local_numanode_objs)
 #define hwloc_topology_get_default_nodeset HWLOC_NAME(topology_get_default_nodeset)
 #define hwloc_memattr_get_name HWLOC_NAME(memattr_get_name)
 #define hwloc_memattr_get_flags HWLOC_NAME(memattr_get_flags)
@@ -599,7 +601,9 @@ extern "C" {
 /* levelzero.h */
 #define hwloc_levelzero_get_device_cpuset HWLOC_NAME(levelzero_get_device_cpuset)
 #define hwloc_levelzero_get_sysman_device_cpuset HWLOC_NAME(levelzero_get_sysman_device_cpuset)
 #define hwloc_levelzero_get_device_osdev HWLOC_NAME(levelzero_get_device_osdev)
 #define hwloc_levelzero_get_sysman_device_osdev HWLOC_NAME(levelzero_get_sysman_device_osdev)
 /* gl.h */
@@ -813,6 +817,8 @@ extern "C" {
 #define hwloc_topology_setup_defaults HWLOC_NAME(topology_setup_defaults)
 #define hwloc_topology_clear HWLOC_NAME(topology_clear)
 #define hwloc__reconnect HWLOC_NAME(_reconnect)
 #define hwloc__attach_memory_object HWLOC_NAME(insert_memory_object)
 #define hwloc_get_obj_by_type_and_gp_index HWLOC_NAME(get_obj_by_type_and_gp_index)
--- a/src/3rdparty/hwloc/include/private/autogen/config.h
+++ b/src/3rdparty/hwloc/include/private/autogen/config.h
@@ -1,6 +1,6 @@
 /*
 * Copyright © 2009, 2011, 2012 CNRS.  All rights reserved.
- * Copyright © 2009-2020 Inria.  All rights reserved.
+ * Copyright © 2009-2021 Inria.  All rights reserved.
 * Copyright © 2009, 2011, 2012, 2015 Université Bordeaux.  All rights reserved.
 * Copyright © 2009-2020 Cisco Systems, Inc.  All rights reserved.
 * $COPYRIGHT$
@@ -17,10 +17,6 @@
 #define HWLOC_HAVE_MSVC_CPUIDEX 1
 /* #undef HAVE_MKSTEMP */
 #define HWLOC_HAVE_X86_CPUID 1
 /* Define to 1 if the system has the type `CACHE_DESCRIPTOR'. */
 #define HAVE_CACHE_DESCRIPTOR 0
@@ -132,7 +128,8 @@
 #define HAVE_DECL__SC_PAGE_SIZE 0
 /* Define to 1 if you have the <dirent.h> header file. */
-/* #undef HAVE_DIRENT_H */
+/* #define HAVE_DIRENT_H 1 */
 #undef HAVE_DIRENT_H
 /* Define to 1 if you have the <dlfcn.h> header file. */
 /* #undef HAVE_DLFCN_H */
@@ -285,7 +282,7 @@
 #define HAVE_STRING_H 1
 /* Define to 1 if you have the `strncasecmp' function. */
-/* #undef HAVE_STRNCASECMP */
+#define HAVE_STRNCASECMP 1
 /* Define to '1' if sysctl is present and usable */
 /* #undef HAVE_SYSCTL */
@@ -326,7 +323,8 @@
 /* #undef HAVE_UNAME */
 /* Define to 1 if you have the <unistd.h> header file. */
-/* #undef HAVE_UNISTD_H */
+/* #define HAVE_UNISTD_H 1 */
 #undef HAVE_UNISTD_H
 /* Define to 1 if you have the `uselocale' function. */
 /* #undef HAVE_USELOCALE */
@@ -661,7 +659,7 @@
 #define hwloc_pid_t HANDLE
 /* Define this to either strncasecmp or strncmp */
-/* #undef hwloc_strncasecmp */
+#define hwloc_strncasecmp strncasecmp
 /* Define this to the thread ID type */
 #define hwloc_thread_t HANDLE
--- a/src/3rdparty/hwloc/include/private/misc.h
+++ b/src/3rdparty/hwloc/include/private/misc.h
@@ -186,7 +186,7 @@ hwloc_ffsl_from_ffs32(unsigned long x)
 /**
 * flsl helpers.
 */
-#ifdef __GNUC_____
+#ifdef __GNUC__
 #  if (__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))
 #    define hwloc_flsl(x) ((x) ? (8*sizeof(long) - __builtin_clzl(x)) : 0)
--- a/src/3rdparty/hwloc/include/private/private.h
+++ b/src/3rdparty/hwloc/include/private/private.h
@@ -1,6 +1,6 @@
 /*
 * Copyright © 2009      CNRS
- * Copyright © 2009-2023 Inria.  All rights reserved.
+ * Copyright © 2009-2025 Inria.  All rights reserved.
 * Copyright © 2009-2012, 2020 Université Bordeaux
 * Copyright © 2009-2011 Cisco Systems, Inc.  All rights reserved.
 *
@@ -302,6 +302,9 @@ extern void hwloc__reorder_children(hwloc_obj_t parent);
 extern void hwloc_topology_setup_defaults(struct hwloc_topology *topology);
 extern void hwloc_topology_clear(struct hwloc_topology *topology);
 #define _HWLOC_RECONNECT_FLAG_KEEPSTRUCTURE (1UL<<0)
 extern int hwloc__reconnect(struct hwloc_topology *topology, unsigned long flags);
 /* insert memory object as memory child of normal parent */
 extern struct hwloc_obj * hwloc__attach_memory_object(struct hwloc_topology *topology, hwloc_obj_t parent,
                                                      hwloc_obj_t obj, const char *reason);
--- a/src/3rdparty/hwloc/src/distances.c
+++ b/src/3rdparty/hwloc/src/distances.c
@@ -1,5 +1,5 @@
 /*
- * Copyright © 2010-2024 Inria.  All rights reserved.
+ * Copyright © 2010-2025 Inria.  All rights reserved.
 * Copyright © 2011-2012 Université Bordeaux
 * Copyright © 2011 Cisco Systems, Inc.  All rights reserved.
 * See COPYING in top-level directory.
@@ -699,7 +699,7 @@ hwloc_distances_add_commit(hwloc_topology_t topology,
  }
  /* in case we added some groups, see if we need to reconnect */
-  hwloc_topology_reconnect(topology, 0);
+  hwloc__reconnect(topology, 0);
  return 0;
@@ -1387,19 +1387,12 @@ static __hwloc_inline int is_nvswitch(hwloc_obj_t obj)
 }
 static int
-hwloc__distances_transform_merge_switch_ports(hwloc_topology_t topology,
+hwloc__distances_transform_merge_switch_ports(struct hwloc_distances_s *distances)
                                              struct hwloc_distances_s *distances)
 {
  struct hwloc_internal_distances_s *dist = hwloc__internal_distances_from_public(topology, distances);
  hwloc_obj_t *objs = distances->objs;
  hwloc_uint64_t *values = distances->values;
  unsigned first, i, j, nbobjs = distances->nbobjs;
  if (strcmp(dist->name, "NVLinkBandwidth")) {
    errno = EINVAL;
    return -1;
  }
  /* find the first port */
  first = (unsigned) -1;
  for(i=0; i<nbobjs; i++)
@@ -1435,20 +1428,13 @@ hwloc__distances_transform_merge_switch_ports(hwloc_topology_t topology,
 }
 static int
-hwloc__distances_transform_transitive_closure(hwloc_topology_t topology,
+hwloc__distances_transform_transitive_closure(struct hwloc_distances_s *distances)
                                              struct hwloc_distances_s *distances)
 {
  struct hwloc_internal_distances_s *dist = hwloc__internal_distances_from_public(topology, distances);
  hwloc_obj_t *objs = distances->objs;
  hwloc_uint64_t *values = distances->values;
  unsigned nbobjs = distances->nbobjs;
  unsigned i, j, k;
  if (strcmp(dist->name, "NVLinkBandwidth")) {
    errno = EINVAL;
    return -1;
  }
  for(i=0; i<nbobjs; i++) {
    hwloc_uint64_t bw_i2sw = 0;
    if (is_nvswitch(objs[i]))
@@ -1467,8 +1453,8 @@ hwloc__distances_transform_transitive_closure(hwloc_topology_t topology,
        if (is_nvswitch(objs[k]))
          bw_sw2j += values[k*nbobjs+j];
-      /* bandwidth from i to j is now min(i2sw,sw2j) */
+      /* bandwidth from i to j now gets indirect bandwidth too, min(i2sw,sw2j) */
-      values[i*nbobjs+j] = bw_i2sw > bw_sw2j ? bw_sw2j : bw_i2sw;
+      values[i*nbobjs+j] += bw_i2sw > bw_sw2j ? bw_sw2j : bw_i2sw;
    }
  }
@@ -1476,7 +1462,7 @@ hwloc__distances_transform_transitive_closure(hwloc_topology_t topology,
 }
 int
-hwloc_distances_transform(hwloc_topology_t topology,
+hwloc_distances_transform(hwloc_topology_t topology __hwloc_attribute_unused,
                          struct hwloc_distances_s *distances,
                          enum hwloc_distances_transform_e transform,
                          void *transform_attr,
@@ -1495,13 +1481,13 @@ hwloc_distances_transform(hwloc_topology_t topology,
  case HWLOC_DISTANCES_TRANSFORM_MERGE_SWITCH_PORTS:
  {
    int err;
-    err = hwloc__distances_transform_merge_switch_ports(topology, distances);
+    err = hwloc__distances_transform_merge_switch_ports(distances);
    if (!err)
      err = hwloc__distances_transform_remove_null(distances);
    return err;
  }
  case HWLOC_DISTANCES_TRANSFORM_TRANSITIVE_CLOSURE:
-    return hwloc__distances_transform_transitive_closure(topology, distances);
+    return hwloc__distances_transform_transitive_closure(distances);
  default:
    errno = EINVAL;
    return -1;
--- a/src/3rdparty/hwloc/src/memattrs.c
+++ b/src/3rdparty/hwloc/src/memattrs.c
@@ -1,5 +1,5 @@
 /*
- * Copyright © 2020-2024 Inria.  All rights reserved.
+ * Copyright © 2020-2025 Inria.  All rights reserved.
 * See COPYING in top-level directory.
 */
@@ -1158,6 +1158,8 @@ match_local_obj_cpuset(hwloc_obj_t node, hwloc_cpuset_t cpuset, unsigned long fl
 {
  if (flags & HWLOC_LOCAL_NUMANODE_FLAG_ALL)
    return 1;
  if (flags & HWLOC_LOCAL_NUMANODE_FLAG_INTERSECT_LOCALITY)
    return hwloc_bitmap_intersects(node->cpuset, cpuset);
  if ((flags & HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY)
      && hwloc_bitmap_isincluded(cpuset, node->cpuset))
    return 1;
@@ -1180,6 +1182,7 @@ hwloc_get_local_numanode_objs(hwloc_topology_t topology,
  if (flags & ~(HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY
                |HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY
                |HWLOC_LOCAL_NUMANODE_FLAG_INTERSECT_LOCALITY
                | HWLOC_LOCAL_NUMANODE_FLAG_ALL)) {
    errno = EINVAL;
    return -1;
@@ -1226,6 +1229,93 @@ hwloc_get_local_numanode_objs(hwloc_topology_t topology,
  return 0;
 }
 static int compare_nodes_by_os_index(const void *_a, const void *_b)
 {
  const hwloc_obj_t * a = _a, * b = _b;
  return (*a)->os_index - (*b)->os_index;
 }
 int
 hwloc_topology_get_default_nodeset(hwloc_topology_t topology,
                                   hwloc_nodeset_t nodeset,
                                   unsigned long flags)
 {
  hwloc_obj_t *nodes;
  hwloc_bitmap_t remainingcpuset;
  unsigned nrnodes, i;
  const char *first_subtype;
  if (flags) {
    errno = EINVAL;
    goto out;
  }
  remainingcpuset = hwloc_bitmap_dup(topology->levels[0][0]->cpuset);
  if (!remainingcpuset)
    goto out;
  nrnodes = topology->slevels[HWLOC_SLEVEL_NUMANODE].nbobjs;
  nodes = malloc(nrnodes * sizeof(*nodes));
  if (!nodes)
    goto out_with_remainingcpuset;
  memcpy(nodes, topology->slevels[HWLOC_SLEVEL_NUMANODE].objs, nrnodes * sizeof(*nodes));
  qsort(nodes, nrnodes, sizeof(*nodes), compare_nodes_by_os_index);
  hwloc_bitmap_zero(nodeset);
  /* always take the first node (FIXME: except if unexpected subtype?) */
  first_subtype = nodes[0]->subtype;
  hwloc_bitmap_set(nodeset, nodes[0]->os_index);
  hwloc_bitmap_andnot(remainingcpuset, remainingcpuset, nodes[0]->cpuset);
  /* use all non-intersecting nodes with same subtype */
  for(i=1; i<nrnodes; i++) {
    /* check same or no subtype */
    if (first_subtype) {
      if (!nodes[i]->subtype || strcmp(first_subtype, nodes[i]->subtype))
        continue;
    } else if (nodes[i]->subtype) {
      continue;
    }
    /* take non-overlapping nodes */
    if (hwloc_bitmap_isincluded(nodes[i]->cpuset, remainingcpuset) /* can be empty */) {
      hwloc_bitmap_set(nodeset, nodes[i]->os_index);
      hwloc_bitmap_andnot(remainingcpuset, remainingcpuset, nodes[i]->cpuset);
    }
    /* more needed? */
    if (hwloc_bitmap_iszero(remainingcpuset))
      goto done;
  }
  /* find more nodes to cover the entire topology cpuset.
   * only take what's necessary: first nodes, non-empty */
  for(i=1; i<nrnodes; i++) {
    /* already taken? */
    if (hwloc_bitmap_isset(nodeset, i))
      continue;
    /* take non-overlapping nodes, except empty  */
    if (hwloc_bitmap_isincluded(nodes[i]->cpuset, remainingcpuset)
        && !hwloc_bitmap_iszero(nodes[i]->cpuset)) {
      hwloc_bitmap_set(nodeset, nodes[i]->os_index);
      hwloc_bitmap_andnot(remainingcpuset, remainingcpuset, nodes[i]->cpuset);
    }
    /* more needed? */
    if (hwloc_bitmap_iszero(remainingcpuset))
      goto done;
  }
 done:
  free(nodes);
  hwloc_bitmap_free(remainingcpuset);
  return 0;
 out_with_remainingcpuset:
  hwloc_bitmap_free(remainingcpuset);
 out:
  return -1;
 }
 /**************************************
 * Using memattrs to identify HBM/DRAM
@@ -1433,10 +1523,15 @@ hwloc__group_memory_tiers(hwloc_topology_t topology,
    }
  }
-  /* Sort nodes.
+  /* Sort nodes by tier type and bandwidth.
-   * We could also sort by the existing subtype.
+   *
-   * KNL is the only case where subtypes are set in backends, but we set memattrs as well there.
+   * We could also use the existing subtype but it's not clear it'd be better.
-   * Also HWLOC_MEMTIERS_REFRESH would be a special value to ignore existing subtypes.
+   * For NVIDIA GPU, "GPUMemory" is set in the Linux backend, and used above to set tier type anyway.
   * For KNL, the Linux backend sets subtypes and memattrs, sorting by memattrs already works fine.
   * Existing subtypes could have been imported from XML, usually mostly OK except maybe SPM (fallback for I don't know)?
   * An envvar (or HWLOC_MEMTIERS_REFRESH special value?) could be passed to ignore existing subtypes,
   * but "GPUMemory" wouldn't be available anymore, we'd have to use something else like "PCIBusId",
   * but that one might not always be specific to GPU-backed NUMA nodes?
   */
  hwloc_debug("Sorting memory node infos...\n");
  qsort(nodeinfos, n, sizeof(*nodeinfos), compare_node_infos_by_type_and_bw);
--- a/src/3rdparty/hwloc/src/topology-windows.c
+++ b/src/3rdparty/hwloc/src/topology-windows.c
@@ -1,6 +1,6 @@
 /*
 * Copyright © 2009 CNRS
- * Copyright © 2009-2024 Inria.  All rights reserved.
+ * Copyright © 2009-2025 Inria.  All rights reserved.
 * Copyright © 2009-2012, 2020 Université Bordeaux
 * Copyright © 2011 Cisco Systems, Inc.  All rights reserved.
 * See COPYING in top-level directory.
@@ -56,6 +56,9 @@ typedef enum _LOGICAL_PROCESSOR_RELATIONSHIP {
  RelationCache,
  RelationProcessorPackage,
  RelationGroup,
  RelationProcessorDie,
  RelationNumaNodeEx, /* only used to *request* extended numa info only, but included in RelationAll, never returned on output */
  RelationProcessorModule,
  RelationAll = 0xffff
 } LOGICAL_PROCESSOR_RELATIONSHIP;
 #else /* HAVE_LOGICAL_PROCESSOR_RELATIONSHIP */
@@ -64,6 +67,11 @@ typedef enum _LOGICAL_PROCESSOR_RELATIONSHIP {
 #    define RelationGroup 4
 #    define RelationAll 0xffff
 #  endif /* HAVE_RELATIONPROCESSORPACKAGE */
 #  ifndef HAVE_RELATIONPROCESSORDIE
 #    define RelationProcessorDie 5
 #    define RelationNumaNodeEx 6
 #    define RelationProcessorModule 7
 #  endif
 #endif /* HAVE_LOGICAL_PROCESSOR_RELATIONSHIP */
 #ifndef HAVE_GROUP_AFFINITY
@@ -366,7 +374,7 @@ hwloc_win_get_processor_groups(void)
  hwloc_debug("found %lu windows processor groups\n", nr_processor_groups);
  if (nr_processor_groups > 1 && SIZEOF_VOID_P == 4) {
-    if (HWLOC_SHOW_ALL_ERRORS())
+    if (HWLOC_SHOW_CRITICAL_ERRORS())
      fprintf(stderr, "hwloc/windows: multiple processor groups found on 32bits Windows, topology may be invalid/incomplete.\n");
  }
@@ -1068,6 +1076,7 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
 	id = HWLOC_UNKNOWN_INDEX;
 	switch (procInfo->Relationship) {
          case RelationNumaNodeEx: /* only used on input anyway */
 	  case RelationNumaNode:
 	    type = HWLOC_OBJ_NUMANODE;
            /* Starting with Windows 11 and Server 2022, the GroupCount field is valid and >=1
@@ -1087,9 +1096,19 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
 	    break;
 	  case RelationProcessorPackage:
 	    type = HWLOC_OBJ_PACKAGE;
 	    num = procInfo->Processor.GroupCount;
 	    GroupMask = procInfo->Processor.GroupMask;
 	    break;
 	  case RelationProcessorDie:
            type = HWLOC_OBJ_DIE;
            num = procInfo->Processor.GroupCount;
            GroupMask = procInfo->Processor.GroupMask;
-	    break;
+            break;
 	  case RelationProcessorModule:
            type = HWLOC_OBJ_GROUP;
            num = procInfo->Processor.GroupCount;
            GroupMask = procInfo->Processor.GroupMask;
            break;
 	  case RelationCache:
 	    type = (procInfo->Cache.Type == CacheInstruction ? HWLOC_OBJ_L1ICACHE : HWLOC_OBJ_L1CACHE) + procInfo->Cache.Level - 1;
            /* GroupCount added approximately with NumaNode.GroupCount above */
@@ -1211,6 +1230,19 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
 		continue;
 	    }
 	    break;
          case HWLOC_OBJ_GROUP:
            switch (procInfo->Relationship) {
            case RelationGroup:
              obj->attr->group.kind = HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP;
              break;
            case RelationProcessorModule:
              obj->attr->group.kind = HWLOC_GROUP_KIND_INTEL_MODULE;
              obj->subtype = strdup("Module");
              break;
            default:
              obj->attr->group.kind = HWLOC_GROUP_KIND_WINDOWS_RELATIONSHIP_UNKNOWN;
            }
            break;
 	  default:
 	    break;
 	}
--- a/src/3rdparty/hwloc/src/topology-x86.c
+++ b/src/3rdparty/hwloc/src/topology-x86.c
@@ -1,5 +1,5 @@
 /*
- * Copyright © 2010-2024 Inria.  All rights reserved.
+ * Copyright © 2010-2025 Inria.  All rights reserved.
 * Copyright © 2010-2013 Université Bordeaux
 * Copyright © 2010-2011 Cisco Systems, Inc.  All rights reserved.
 * See COPYING in top-level directory.
@@ -653,7 +653,13 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns
  cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
  infos->apicid = ebx >> 24;
  if (edx & (1 << 28)) {
-    legacy_max_log_proc = 1 << hwloc_flsl(((ebx >> 16) & 0xff) - 1);
+    unsigned ebx_16_23 = (ebx >> 16) & 0xff;
    if (ebx_16_23) {
      legacy_max_log_proc = 1 << hwloc_flsl(ebx_16_23 - 1);
    } else {
      hwloc_debug("HTT bit set in CPUID 0x01.edx, but legacy_max_proc = 0 in ebx, assuming legacy_max_log_proc = 1\n");
      legacy_max_log_proc = 1;
    }
  } else {
    hwloc_debug("HTT bit not set in CPUID 0x01.edx, assuming legacy_max_log_proc = 1\n");
    legacy_max_log_proc = 1;
@@ -1742,7 +1748,7 @@ hwloc_x86_discover(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
  if (topology->levels[0][0]->cpuset) {
    /* somebody else discovered things, reconnect levels so that we can look at them */
-    hwloc_topology_reconnect(topology, 0);
+    hwloc__reconnect(topology, 0);
    if (topology->nb_levels == 2 && topology->level_nbobjects[1] == data->nbprocs) {
      /* only PUs were discovered, as much as we would, complete the topology with everything else */
      alreadypus = 1;
--- a/src/3rdparty/hwloc/src/topology-xml.c
+++ b/src/3rdparty/hwloc/src/topology-xml.c
@@ -1,6 +1,6 @@
 /*
 * Copyright © 2009 CNRS
- * Copyright © 2009-2024 Inria.  All rights reserved.
+ * Copyright © 2009-2025 Inria.  All rights reserved.
 * Copyright © 2009-2011, 2020 Université Bordeaux
 * Copyright © 2009-2018 Cisco Systems, Inc.  All rights reserved.
 * See COPYING in top-level directory.
@@ -415,6 +415,20 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology,
    }
  }
  else if (!strcmp(name, "numanode_type")) {
    switch (obj->type) {
    case HWLOC_OBJ_NUMANODE: {
      /* ignored for now, here for possible forward compat */
      break;
    }
    default:
      if (hwloc__xml_verbose())
 	fprintf(stderr, "%s: ignoring numanode_type attribute for non-NUMA object\n",
 		state->global->msgprefix);
      break;
    }
  }
  else if (data->version_major < 2) {
    /************************
     * deprecated from 1.x
@@ -876,14 +890,19 @@ hwloc__xml_import_object(hwloc_topology_t topology,
 	  /* deal with possible future type */
 	  obj->type = HWLOC_OBJ_GROUP;
 	  obj->attr->group.kind = HWLOC_GROUP_KIND_LINUX_CLUSTER;
-	} else if (!strcasecmp(attrvalue, "MemCache")) {
+	}
 #if 0
        /* reenable if there's ever a future type that should be ignored without being an error */
        else if (!strcasecmp(attrvalue, "MemCache")) {
 	  /* ignore possible future type */
 	  obj->type = _HWLOC_OBJ_FUTURE;
 	  ignored = 1;
 	  if (hwloc__xml_verbose())
 	    fprintf(stderr, "%s: %s object not-supported, will be ignored\n",
 		    state->global->msgprefix, attrvalue);
-	} else {
+	}
 #endif
        else {
 	  if (hwloc__xml_verbose())
 	    fprintf(stderr, "%s: unrecognized object type string %s\n",
 		    state->global->msgprefix, attrvalue);
@@ -958,22 +977,22 @@ hwloc__xml_import_object(hwloc_topology_t topology,
    if (hwloc__obj_type_is_normal(obj->type)) {
      if (!hwloc__obj_type_is_normal(parent->type)) {
 	if (hwloc__xml_verbose())
-	  fprintf(stderr, "normal object %s cannot be child of non-normal parent %s\n",
+	  fprintf(stderr, "%s: normal object %s cannot be child of non-normal parent %s\n",
-		  hwloc_obj_type_string(obj->type), hwloc_obj_type_string(parent->type));
+		  state->global->msgprefix, hwloc_obj_type_string(obj->type), hwloc_obj_type_string(parent->type));
 	goto error_with_object;
      }
    } else if (hwloc__obj_type_is_memory(obj->type)) {
      if (hwloc__obj_type_is_io(parent->type) || HWLOC_OBJ_MISC == parent->type) {
 	if (hwloc__xml_verbose())
-	  fprintf(stderr, "Memory object %s cannot be child of non-normal-or-memory parent %s\n",
+	  fprintf(stderr, "%s: Memory object %s cannot be child of non-normal-or-memory parent %s\n",
-		  hwloc_obj_type_string(obj->type), hwloc_obj_type_string(parent->type));
+		  state->global->msgprefix, hwloc_obj_type_string(obj->type), hwloc_obj_type_string(parent->type));
 	goto error_with_object;
      }
    } else if (hwloc__obj_type_is_io(obj->type)) {
      if (hwloc__obj_type_is_memory(parent->type) || HWLOC_OBJ_MISC == parent->type) {
 	if (hwloc__xml_verbose())
-	  fprintf(stderr, "I/O object %s cannot be child of non-normal-or-I/O parent %s\n",
+	  fprintf(stderr, "%s: I/O object %s cannot be child of non-normal-or-I/O parent %s\n",
-		  hwloc_obj_type_string(obj->type), hwloc_obj_type_string(parent->type));
+		  state->global->msgprefix, hwloc_obj_type_string(obj->type), hwloc_obj_type_string(parent->type));
 	goto error_with_object;
      }
    }
--- a/src/3rdparty/hwloc/src/topology.c
+++ b/src/3rdparty/hwloc/src/topology.c
@@ -1,6 +1,6 @@
 /*
 * Copyright © 2009 CNRS
- * Copyright © 2009-2023 Inria.  All rights reserved.
+ * Copyright © 2009-2025 Inria.  All rights reserved.
 * Copyright © 2009-2012, 2020 Université Bordeaux
 * Copyright © 2009-2011 Cisco Systems, Inc.  All rights reserved.
 * Copyright © 2022 IBM Corporation.  All rights reserved.
@@ -54,56 +54,6 @@
 #endif
 #ifdef HWLOC_HAVE_LEVELZERO
 /*
 * Define ZES_ENABLE_SYSMAN=1 early so that the LevelZero backend gets Sysman enabled.
 *
 * Only if the levelzero was enabled in this build so that we don't enable sysman
 * for external levelzero users when hwloc doesn't need it. If somebody ever loads
 * an external levelzero plugin in a hwloc library built without levelzero (unlikely),
 * he may have to manually set ZES_ENABLE_SYSMAN=1.
 *
 * Use the constructor if supported and/or the Windows DllMain callback.
 * Do it in the main hwloc library instead of the levelzero component because
 * the latter could be loaded later as a plugin.
 *
 * L0 seems to be using getenv() to check this variable on Windows
 * (at least in the Intel Compute-Runtime of March 2021),
 * but setenv() doesn't seem to exist on Windows, hence use putenv() to set the variable.
 *
 * For the record, Get/SetEnvironmentVariable() is not exactly the same as getenv/putenv():
 * - getenv() doesn't see what was set with SetEnvironmentVariable()
 * - GetEnvironmentVariable() doesn't see putenv() in cygwin (while it does in MSVC and MinGW).
 * Hence, if L0 ever switches from getenv() to GetEnvironmentVariable(),
 * it will break in cygwin, we'll have to use both putenv() and SetEnvironmentVariable().
 * Hopefully L0 will provide a way to enable Sysman without env vars before it happens.
 */
 #if HWLOC_HAVE_ATTRIBUTE_CONSTRUCTOR
 static void hwloc_constructor(void) __attribute__((constructor));
 static void hwloc_constructor(void)
 {
  if (!getenv("ZES_ENABLE_SYSMAN"))
 #ifdef HWLOC_WIN_SYS
    putenv("ZES_ENABLE_SYSMAN=1");
 #else
    setenv("ZES_ENABLE_SYSMAN", "1", 1);
 #endif
 }
 #endif
 #ifdef HWLOC_WIN_SYS
 BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved)
 {
  if (fdwReason == DLL_PROCESS_ATTACH) {
    if (!getenv("ZES_ENABLE_SYSMAN"))
      /* Windows does not have a setenv, so use putenv. */
      putenv((char *) "ZES_ENABLE_SYSMAN=1");
  }
  return TRUE;
 }
 #endif
 #endif /* HWLOC_HAVE_LEVELZERO */
 unsigned hwloc_get_api_version(void)
 {
  return HWLOC_API_VERSION;
@@ -179,7 +129,7 @@ static void report_insert_error(hwloc_obj_t new, hwloc_obj_t old, const char *ms
    report_insert_error_format_obj(oldstr, sizeof(oldstr), old);
    fprintf(stderr, "****************************************************************************\n");
-    fprintf(stderr, "* hwloc %s received invalid information from the operating system.\n", HWLOC_VERSION);
+    fprintf(stderr, "* hwloc %s received invalid information.\n", HWLOC_VERSION);
    fprintf(stderr, "*\n");
    fprintf(stderr, "* Failed with error: %s\n", msg);
    fprintf(stderr, "* while inserting %s\n", newstr);
@@ -1966,6 +1916,51 @@ static void hwloc_set_group_depth(hwloc_topology_t topology);
 static void hwloc_connect_children(hwloc_obj_t parent);
 static int hwloc_connect_levels(hwloc_topology_t topology);
 static int hwloc_connect_special_levels(hwloc_topology_t topology);
 static int hwloc_filter_levels_keep_structure(hwloc_topology_t topology);
 /* reconnect children and levels,
 * and optionnally merged identical levels while keeping structure.
 */
 int
 hwloc__reconnect(struct hwloc_topology *topology, unsigned long flags)
 {
  int merged_levels = 0;
  if (topology->modified) {
    hwloc_connect_children(topology->levels[0][0]);
    if (hwloc_connect_levels(topology) < 0)
      return -1;
  }
  if (flags & _HWLOC_RECONNECT_FLAG_KEEPSTRUCTURE) {
    merged_levels = hwloc_filter_levels_keep_structure(topology);
    /* If > 0, we merged some levels,
     * some child+parent special children list may have been merged,
     * hence specials level might need reordering,
     * So reconnect special levels only here at the end.
     */
  }
  if (topology->modified || merged_levels) {
    if (hwloc_connect_special_levels(topology) < 0)
      return -1;
  }
  topology->modified = 0;
  return 0;
 }
 int
 hwloc_topology_reconnect(struct hwloc_topology *topology, unsigned long flags)
 {
  if (flags) {
    errno = EINVAL;
    return -1;
  }
  return hwloc__reconnect(topology, 0);
 }
 hwloc_obj_t
 hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t obj)
@@ -2058,7 +2053,10 @@ hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t
  /* properly inserted */
  hwloc_obj_add_children_sets(res);
-  if (hwloc_topology_reconnect(topology, 0) < 0)
+  /* reconnect levels.
   * no need to filter levels keep_structure because groups are either auto-merged
   * or have the dont_merge attribute */
  if (hwloc__reconnect(topology, 0) < 0)
    return NULL;
  /* Compute group total_memory. */
@@ -2550,26 +2548,13 @@ hwloc_compare_levels_structure(hwloc_topology_t topology, unsigned i)
  return 0;
 }
-/* return > 0 if any level was removed.
+/* return > 0 if any level was removed. */
 * performs its own reconnect internally if needed
 */
 static int
 hwloc_filter_levels_keep_structure(hwloc_topology_t topology)
 {
  unsigned i, j;
  int res = 0;
  if (topology->modified) {
    /* WARNING: hwloc_topology_reconnect() is duplicated partially here
     * and at the end of this function:
     * - we need normal levels before merging.
     * - and we'll need to update special levels after merging.
     */
    hwloc_connect_children(topology->levels[0][0]);
    if (hwloc_connect_levels(topology) < 0)
      return -1;
  }
  /* start from the bottom since we'll remove intermediate levels */
  for(i=topology->nb_levels-1; i>0; i--) {
    int replacechild = 0, replaceparent = 0;
@@ -2591,9 +2576,15 @@ hwloc_filter_levels_keep_structure(hwloc_topology_t topology)
      if (type1 == HWLOC_OBJ_GROUP && hwloc_dont_merge_group_level(topology, i))
 	replacechild = 0;
    }
-    if (!replacechild && !replaceparent)
+    if (!replacechild && !replaceparent) {
      /* always merge Die into Package when levels are identical */
      if (type1 == HWLOC_OBJ_PACKAGE && type2 == HWLOC_OBJ_DIE)
        replacechild = 1;
    }
    if (!replacechild && !replaceparent) {
      /* no ignoring */
      continue;
    }
    /* Decide which one to actually replace */
    if (replaceparent && replacechild) {
      /* If both may be replaced, look at obj_type_priority */
@@ -2736,20 +2727,6 @@ hwloc_filter_levels_keep_structure(hwloc_topology_t topology)
    }
  }
  if (res > 0 || topology-> modified) {
    /* WARNING: hwloc_topology_reconnect() is duplicated partially here
     * and at the beginning of this function.
     * If we merged some levels, some child+parent special children lisst
     * may have been merged, hence specials level might need reordering,
     * So reconnect special levels only here at the end
     * (it's not needed at the beginning of this function).
     */
    if (hwloc_connect_special_levels(topology) < 0)
      return -1;
    topology->modified = 0;
  }
  return 0;
 }
@@ -3278,33 +3255,6 @@ hwloc_connect_levels(hwloc_topology_t topology)
  return 0;
 }
 int
 hwloc_topology_reconnect(struct hwloc_topology *topology, unsigned long flags)
 {
  /* WARNING: when updating this function, the replicated code must
   * also be updated inside hwloc_filter_levels_keep_structure()
   */
  if (flags) {
    errno = EINVAL;
    return -1;
  }
  if (!topology->modified)
    return 0;
  hwloc_connect_children(topology->levels[0][0]);
  if (hwloc_connect_levels(topology) < 0)
    return -1;
  if (hwloc_connect_special_levels(topology) < 0)
    return -1;
  topology->modified = 0;
  return 0;
 }
 /* for regression testing, make sure the order of io devices
 * doesn't change with the dentry order in the filesystem
 *
@@ -3561,32 +3511,13 @@ hwloc_discover(struct hwloc_topology *topology,
    hwloc_debug_print_objects(0, topology->levels[0][0]);
  }
  /* see if we should ignore the root now that we know how many children it has */
  if (!hwloc_filter_check_keep_object(topology, topology->levels[0][0])
      && topology->levels[0][0]->first_child && !topology->levels[0][0]->first_child->next_sibling) {
    hwloc_obj_t oldroot = topology->levels[0][0];
    hwloc_obj_t newroot = oldroot->first_child;
    /* switch to the new root */
    newroot->parent = NULL;
    topology->levels[0][0] = newroot;
    /* move oldroot memory/io/misc children before newroot children */
    if (oldroot->memory_first_child)
      prepend_siblings_list(&newroot->memory_first_child, oldroot->memory_first_child, newroot);
    if (oldroot->io_first_child)
      prepend_siblings_list(&newroot->io_first_child, oldroot->io_first_child, newroot);
    if (oldroot->misc_first_child)
      prepend_siblings_list(&newroot->misc_first_child, oldroot->misc_first_child, newroot);
    /* destroy oldroot and use the new one */
    hwloc_free_unlinked_object(oldroot);
  }
  /*
   * All object cpusets and nodesets are properly set now.
   */
  /* Now connect handy pointers to make remaining discovery easier. */
  hwloc_debug("%s", "\nOk, finished tweaking, now connect\n");
-  if (hwloc_topology_reconnect(topology, 0) < 0)
+  if (hwloc__reconnect(topology, 0) < 0)
    return -1;
  hwloc_debug_print_objects(0, topology->levels[0][0]);
@@ -3642,12 +3573,12 @@ hwloc_discover(struct hwloc_topology *topology,
  }
  hwloc_debug_print_objects(0, topology->levels[0][0]);
  /* reconnect all (new groups might have appears, IO added, etc),
   * and (now that everything was added) remove identical levels while keeping structure
   */
  hwloc_debug("%s", "\nRemoving levels with HWLOC_TYPE_FILTER_KEEP_STRUCTURE\n");
-  if (hwloc_filter_levels_keep_structure(topology) < 0)
+  if (hwloc__reconnect(topology, _HWLOC_RECONNECT_FLAG_KEEPSTRUCTURE) < 0)
    return -1;
  /* takes care of reconnecting children/levels internally,
   * because it needs normal levels.
   * and it's often needed below because of Groups inserted for I/Os anyway */
  hwloc_debug_print_objects(0, topology->levels[0][0]);
  /* accumulate children memory in total_memory fields (only once parent is set) */
@@ -4494,7 +4425,7 @@ hwloc_topology_restrict(struct hwloc_topology *topology, hwloc_const_bitmap_t se
  hwloc_bitmap_free(droppedcpuset);
  hwloc_bitmap_free(droppednodeset);
-  if (hwloc_filter_levels_keep_structure(topology) < 0) /* takes care of reconnecting internally */
+  if (hwloc__reconnect(topology, _HWLOC_RECONNECT_FLAG_KEEPSTRUCTURE) < 0)
    goto out;
  /* some objects may have disappeared and sets were modified,
@@ -5116,6 +5047,8 @@ hwloc_topology_check(struct hwloc_topology *topology)
  unsigned i;
  int j, depth;
  assert(!topology->modified);
  /* make sure we can use ranges to check types */
  /* hwloc__obj_type_is_{,d,i}cache() want cache types to be ordered like this */
--- a/src/3rdparty/libethash/endian.h
+++ b/src/3rdparty/libethash/endian.h
@@ -31,7 +31,7 @@
 #include <libkern/OSByteOrder.h>
 #define ethash_swap_u32(input_) OSSwapInt32(input_)
 #define ethash_swap_u64(input_) OSSwapInt64(input_)
-#elif defined(__FreeBSD__) || defined(__DragonFly__) || defined(__NetBSD__)
+#elif defined(__FreeBSD__) || defined(__DragonFly__) || defined(__NetBSD__) || defined(__HAIKU__)
 #define ethash_swap_u32(input_) bswap32(input_)
 #define ethash_swap_u64(input_) bswap64(input_)
 #elif defined(__OpenBSD__)
--- a/src/backend/cpu/cpu.cmake
+++ b/src/backend/cpu/cpu.cmake
@@ -46,7 +46,12 @@ else()
    set(CPUID_LIB "")
 endif()
-if (XMRIG_ARM)
+if (XMRIG_RISCV)
    list(APPEND SOURCES_BACKEND_CPU
        src/backend/cpu/platform/lscpu_riscv.cpp
        src/backend/cpu/platform/BasicCpuInfo_riscv.cpp
    )
 elseif (XMRIG_ARM)
    list(APPEND SOURCES_BACKEND_CPU src/backend/cpu/platform/BasicCpuInfo_arm.cpp)
    if (XMRIG_OS_WIN)
--- a/src/backend/cpu/interfaces/ICpuInfo.h
+++ b/src/backend/cpu/interfaces/ICpuInfo.h
@@ -91,7 +91,7 @@ public:
    ICpuInfo()          = default;
    virtual ~ICpuInfo() = default;
-#   if defined(__x86_64__) || defined(_M_AMD64) || defined (__arm64__) || defined (__aarch64__)
+#   if defined(__x86_64__) || defined(_M_AMD64) || defined (__arm64__) || defined (__aarch64__) || defined(__riscv) && (__riscv_xlen == 64)
    inline constexpr static bool is64bit() { return true; }
 #   else
    inline constexpr static bool is64bit() { return false; }
--- a/src/backend/cpu/platform/BasicCpuInfo.cpp
+++ b/src/backend/cpu/platform/BasicCpuInfo.cpp
@@ -250,7 +250,7 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
                    break;
                case 0x19:
-                    if (m_model == 0x61) {
+                    if ((m_model == 0x61) || (m_model == 0x75)) {
                        m_arch = ARCH_ZEN4;
                        m_msrMod = MSR_MOD_RYZEN_19H_ZEN4;
                    }
--- a/src/backend/cpu/platform/BasicCpuInfo.h
+++ b/src/backend/cpu/platform/BasicCpuInfo.h
@@ -65,7 +65,7 @@ protected:
    inline Vendor vendor() const override                       { return m_vendor; }
    inline uint32_t model() const override
    {
-#   ifndef XMRIG_ARM
+#   if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
        return m_model;
 #   else
        return 0;
@@ -80,7 +80,7 @@ protected:
    Vendor m_vendor         = VENDOR_UNKNOWN;
 private:
-#   ifndef XMRIG_ARM
+#   if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
    uint32_t m_procInfo     = 0;
    uint32_t m_family       = 0;
    uint32_t m_model        = 0;
--- a/src/backend/cpu/platform/BasicCpuInfo_riscv.cpp
+++ b/src/backend/cpu/platform/BasicCpuInfo_riscv.cpp
@@ -0,0 +1,116 @@
 /* XMRig
 * Copyright (c) 2025      Slayingripper <https://github.com/Slayingripper>
 * Copyright (c) 2018-2025 SChernykh     <https://github.com/SChernykh>
 * Copyright (c) 2017-2019 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
 * Copyright (c) 2016-2025 XMRig         <support@xmrig.com>
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
 #include <array>
 #include <cstring>
 #include <fstream>
 #include <thread>
 #include "backend/cpu/platform/BasicCpuInfo.h"
 #include "base/tools/String.h"
 #include "3rdparty/rapidjson/document.h"
 namespace xmrig {
 extern String cpu_name_riscv();
 extern bool has_riscv_vector();
 extern bool has_riscv_crypto();
 } // namespace xmrig
 xmrig::BasicCpuInfo::BasicCpuInfo() :
    m_threads(std::thread::hardware_concurrency())
 {
    m_units.resize(m_threads);
    for (int32_t i = 0; i < static_cast<int32_t>(m_threads); ++i) {
        m_units[i] = i;
    }
    memcpy(m_brand, "RISC-V", 6);
    auto name = cpu_name_riscv();
    if (!name.isNull()) {
        strncpy(m_brand, name.data(), sizeof(m_brand) - 1);
    }
    // Check for crypto extensions (Zknd/Zkne/Zknh - AES and SHA)
    m_flags.set(FLAG_AES, has_riscv_crypto());
    // RISC-V typically supports 1GB huge pages
    m_flags.set(FLAG_PDPE1GB, std::ifstream("/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages").good());
 }
 const char *xmrig::BasicCpuInfo::backend() const
 {
    return "basic/1";
 }
 xmrig::CpuThreads xmrig::BasicCpuInfo::threads(const Algorithm &algorithm, uint32_t) const
 {
 #   ifdef XMRIG_ALGO_GHOSTRIDER
    if (algorithm.family() == Algorithm::GHOSTRIDER) {
        return CpuThreads(threads(), 8);
    }
 #   endif
    return CpuThreads(threads());
 }
 rapidjson::Value xmrig::BasicCpuInfo::toJSON(rapidjson::Document &doc) const
 {
    using namespace rapidjson;
    auto &allocator = doc.GetAllocator();
    Value out(kObjectType);
    out.AddMember("brand",      StringRef(brand()), allocator);
    out.AddMember("aes",        hasAES(), allocator);
    out.AddMember("avx2",       false, allocator);
    out.AddMember("x64",        is64bit(), allocator); // DEPRECATED will be removed in the next major release.
    out.AddMember("64_bit",     is64bit(), allocator);
    out.AddMember("l2",         static_cast<uint64_t>(L2()), allocator);
    out.AddMember("l3",         static_cast<uint64_t>(L3()), allocator);
    out.AddMember("cores",      static_cast<uint64_t>(cores()), allocator);
    out.AddMember("threads",    static_cast<uint64_t>(threads()), allocator);
    out.AddMember("packages",   static_cast<uint64_t>(packages()), allocator);
    out.AddMember("nodes",      static_cast<uint64_t>(nodes()), allocator);
    out.AddMember("backend",    StringRef(backend()), allocator);
    out.AddMember("msr",        "none", allocator);
    out.AddMember("assembly",   "none", allocator);
    out.AddMember("arch",       "riscv64", allocator);
    Value flags(kArrayType);
    if (hasAES()) {
        flags.PushBack("aes", allocator);
    }
    out.AddMember("flags", flags, allocator);
    return out;
 }
--- a/src/backend/cpu/platform/HwlocCpuInfo.cpp
+++ b/src/backend/cpu/platform/HwlocCpuInfo.cpp
@@ -87,7 +87,7 @@ static inline size_t countByType(hwloc_topology_t topology, hwloc_obj_type_t typ
 }
-#ifndef XMRIG_ARM
+#if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
 static inline std::vector<hwloc_obj_t> findByType(hwloc_obj_t obj, hwloc_obj_type_t type)
 {
    std::vector<hwloc_obj_t> out;
@@ -207,7 +207,7 @@ bool xmrig::HwlocCpuInfo::membind(hwloc_const_bitmap_t nodeset)
 xmrig::CpuThreads xmrig::HwlocCpuInfo::threads(const Algorithm &algorithm, uint32_t limit) const
 {
-#   ifndef XMRIG_ARM
+#   if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
    if (L2() == 0 && L3() == 0) {
        return BasicCpuInfo::threads(algorithm, limit);
    }
@@ -277,7 +277,7 @@ xmrig::CpuThreads xmrig::HwlocCpuInfo::allThreads(const Algorithm &algorithm, ui
 void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorithm &algorithm, CpuThreads &threads, size_t limit) const
 {
-#   ifndef XMRIG_ARM
+#   if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
    constexpr size_t oneMiB = 1024U * 1024U;
    size_t PUs = countByType(cache, HWLOC_OBJ_PU);
@@ -311,17 +311,17 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith
    uint32_t intensity      = algorithm.maxIntensity() == 1 ? 0 : 1;
    if (cache->attr->cache.depth == 3) {
-        for (size_t i = 0; i < cache->arity; ++i) {
+        auto process_L2 = [&L2, &L2_associativity, L3_exclusive, this, &extra, scratchpad](hwloc_obj_t l2) {
            hwloc_obj_t l2 = cache->children[i];
            if (!hwloc_obj_type_is_cache(l2->type) || l2->attr == nullptr) {
-                continue;
+                return;
            }
            L2 += l2->attr->cache.size;
            L2_associativity = l2->attr->cache.associativity;
            if (L3_exclusive) {
-                if (vendor() == VENDOR_AMD) {
+                if ((vendor() == VENDOR_AMD) && ((arch() == ARCH_ZEN4) || (arch() == ARCH_ZEN5))) {
                    // Use extra L2 only on newer CPUs because older CPUs (Zen 3 and older) don't benefit from it.
                    // For some reason, AMD CPUs can use only half of the exclusive L2/L3 cache combo efficiently
                    extra += std::min<size_t>(l2->attr->cache.size / 2, scratchpad);
                }
@@ -329,6 +329,18 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith
                    extra += scratchpad;
                }
            }
        };
        for (size_t i = 0; i < cache->arity; ++i) {
            hwloc_obj_t ch = cache->children[i];
            if (ch->type == HWLOC_OBJ_GROUP) {
                for (size_t j = 0; j < ch->arity; ++j) {
                    process_L2(ch->children[j]);
                }
            }
            else {
                process_L2(ch);
            }
        }
    }
--- a/src/backend/cpu/platform/lscpu_arm.cpp
+++ b/src/backend/cpu/platform/lscpu_arm.cpp
@@ -1,7 +1,7 @@
 /* XMRig
 * Copyright (c) 2018      Riku Voipio <riku.voipio@iki.fi>
- * Copyright (c) 2018-2023 SChernykh   <https://github.com/SChernykh>
+ * Copyright (c) 2018-2025 SChernykh   <https://github.com/SChernykh>
- * Copyright (c) 2016-2023 XMRig       <support@xmrig.com>
+ * Copyright (c) 2016-2025 XMRig       <support@xmrig.com>
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@@ -98,8 +98,11 @@ static const id_part arm_part[] = {
    { 0xd0e, "Cortex-A76AE" },
    { 0xd13, "Cortex-R52" },
    { 0xd15, "Cortex-R82" },
    { 0xd16, "Cortex-R52+" },
    { 0xd20, "Cortex-M23" },
    { 0xd21, "Cortex-M33" },
    { 0xd22, "Cortex-M55" },
    { 0xd23, "Cortex-M85" },
    { 0xd40, "Neoverse-V1" },
    { 0xd41, "Cortex-A78" },
    { 0xd42, "Cortex-A78AE" },
@@ -115,6 +118,17 @@ static const id_part arm_part[] = {
    { 0xd4d, "Cortex-A715" },
    { 0xd4e, "Cortex-X3" },
    { 0xd4f, "Neoverse-V2" },
    { 0xd80, "Cortex-A520" },
    { 0xd81, "Cortex-A720" },
    { 0xd82, "Cortex-X4" },
    { 0xd83, "Neoverse-V3AE" },
    { 0xd84, "Neoverse-V3" },
    { 0xd85, "Cortex-X925" },
    { 0xd87, "Cortex-A725" },
    { 0xd88, "Cortex-A520AE" },
    { 0xd89, "Cortex-A720AE" },
    { 0xd8e, "Neoverse-N3" },
    { 0xd8f, "Cortex-A320" },
    { -1, nullptr }
 };
@@ -154,6 +168,7 @@ static const id_part apm_part[] = {
 };
 static const id_part qcom_part[] = {
    { 0x001, "Oryon" },
    { 0x00f, "Scorpion" },
    { 0x02d, "Scorpion" },
    { 0x04d, "Krait" },
@@ -194,6 +209,22 @@ static const id_part marvell_part[] = {
    { -1, nullptr }
 };
 static const id_part apple_part[] = {
    { 0x022, "M1" },
    { 0x023, "M1" },
    { 0x024, "M1-Pro" },
    { 0x025, "M1-Pro" },
    { 0x028, "M1-Max" },
    { 0x029, "M1-Max" },
    { 0x032, "M2" },
    { 0x033, "M2" },
    { 0x034, "M2-Pro" },
    { 0x035, "M2-Pro" },
    { 0x038, "M2-Max" },
    { 0x039, "M2-Max" },
    { -1, nullptr }
 };
 static const id_part faraday_part[] = {
    { 0x526, "FA526" },
    { 0x626, "FA626" },
@@ -227,47 +258,40 @@ static const id_part intel_part[] = {
 static const struct id_part fujitsu_part[] = {
    { 0x001, "A64FX" },
    { 0x003, "MONAKA" },
    { -1, nullptr }
 };
 static const id_part hisi_part[] = {
-    { 0xd01, "Kunpeng-920" },	/* aka tsv110 */
+    { 0xd01, "TaiShan-v110" }, /* used in Kunpeng-920 SoC */
-    { 0xd40, "Cortex-A76" },	/* HiSilicon uses this ID though advertises A76 */
+    { 0xd02, "TaiShan-v120" }, /* used in Kirin 990A and 9000S SoCs */
    { 0xd40, "Cortex-A76" },   /* HiSilicon uses this ID though advertises A76 */
    { 0xd41, "Cortex-A77" },   /* HiSilicon uses this ID though advertises A77 */
    { -1, nullptr }
 };
 static const id_part apple_part[] = {
    { 0x022, "M1" },
    { 0x023, "M1" },
    { 0x024, "M1-Pro" },
    { 0x025, "M1-Pro" },
    { 0x028, "M1-Max" },
    { 0x029, "M1-Max" },
    { 0x032, "M2" },
    { 0x033, "M2" },
    { 0x034, "M2-Pro" },
    { 0x035, "M2-Pro" },
    { 0x038, "M2-Max" },
    { 0x039, "M2-Max" },
    { -1, nullptr }
 };
 static const struct id_part ft_part[] = {
    { 0x660, "FTC660" },
    { 0x661, "FTC661" },
    { 0x662, "FTC662" },
    { 0x663, "FTC663" },
    { -1, nullptr }
 };
 static const struct id_part ampere_part[] = {
    { 0xac3, "Ampere-1" },
    { 0xac4, "Ampere-1a" },
    { -1, nullptr }
 };
 static const struct id_part ft_part[] = {
    { 0x303, "FTC310" },
    { 0x660, "FTC660" },
    { 0x661, "FTC661" },
    { 0x662, "FTC662" },
    { 0x663, "FTC663" },
    { 0x664, "FTC664" },
    { 0x862, "FTC862" },
    { -1, nullptr }
 };
 static const struct id_part ms_part[] = {
    { 0xd49, "Azure-Cobalt-100" },
    { -1, nullptr }
 };
 static const hw_impl hw_implementer[] = {
    { 0x41, arm_part,     "ARM" },
@@ -276,7 +300,7 @@ static const hw_impl hw_implementer[] = {
    { 0x44, dec_part,     "DEC" },
    { 0x46, fujitsu_part, "FUJITSU" },
    { 0x48, hisi_part,    "HiSilicon" },
-    { 0x4e, nvidia_part,  "Nvidia" },
+    { 0x4e, nvidia_part,  "NVIDIA" },
    { 0x50, apm_part,     "APM" },
    { 0x51, qcom_part,    "Qualcomm" },
    { 0x53, samsung_part, "Samsung" },
@@ -284,6 +308,7 @@ static const hw_impl hw_implementer[] = {
    { 0x61, apple_part,   "Apple" },
    { 0x66, faraday_part, "Faraday" },
    { 0x69, intel_part,   "Intel" },
    { 0x6d, ms_part,      "Microsoft" },
    { 0x70, ft_part,      "Phytium" },
    { 0xc0, ampere_part,  "Ampere" }
 };
--- a/src/backend/cpu/platform/lscpu_riscv.cpp
+++ b/src/backend/cpu/platform/lscpu_riscv.cpp
@@ -0,0 +1,140 @@
 /* XMRig
 * Copyright (c) 2025      Slayingripper <https://github.com/Slayingripper>
 * Copyright (c) 2018-2025 SChernykh     <https://github.com/SChernykh>
 * Copyright (c) 2016-2025 XMRig         <support@xmrig.com>
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
 #include "base/tools/String.h"
 #include "3rdparty/fmt/core.h"
 #include <cstdio>
 #include <cstring>
 #include <string>
 namespace xmrig {
 struct riscv_cpu_desc
 {
    String model;
    String isa;
    String uarch;
    bool has_vector = false;
    bool has_crypto = false;
    inline bool isReady() const { return !model.isNull(); }
 };
 static bool lookup_riscv(char *line, const char *pattern, String &value)
 {
    char *p = strstr(line, pattern);
    if (!p) {
        return false;
    }
    p += strlen(pattern);
    while (isspace(*p)) {
        ++p;
    }
    if (*p == ':') {
        ++p;
    }
    while (isspace(*p)) {
        ++p;
    }
    // Remove trailing newline
    size_t len = strlen(p);
    if (len > 0 && p[len - 1] == '\n') {
        p[len - 1] = '\0';
    }
    // Ensure we call the const char* assignment (which performs a copy)
    // instead of the char* overload (which would take ownership of the pointer)
    value = (const char*)p;
    return true;
 }
 static bool read_riscv_cpuinfo(riscv_cpu_desc *desc)
 {
    auto fp = fopen("/proc/cpuinfo", "r");
    if (!fp) {
        return false;
    }
    char buf[2048]; // Larger buffer for long ISA strings
    while (fgets(buf, sizeof(buf), fp) != nullptr) {
        lookup_riscv(buf, "model name", desc->model);
        if (lookup_riscv(buf, "isa", desc->isa)) {
            // Check for vector extensions
            if (strstr(buf, "zve") || strstr(buf, "v_")) {
                desc->has_vector = true;
            }
            // Check for crypto extensions (AES, SHA, etc.)
            // zkn* = NIST crypto suite, zks* = SM crypto suite
            // Note: zba/zbb/zbc/zbs are bit-manipulation, NOT crypto
            if (strstr(buf, "zknd") || strstr(buf, "zkne") || strstr(buf, "zknh") ||
                strstr(buf, "zksed") || strstr(buf, "zksh")) {
                desc->has_crypto = true;
            }
        }
        lookup_riscv(buf, "uarch", desc->uarch);
        if (desc->isReady() && !desc->isa.isNull()) {
            break;
        }
    }
    fclose(fp);
    return desc->isReady();
 }
 String cpu_name_riscv()
 {
    riscv_cpu_desc desc;
    if (read_riscv_cpuinfo(&desc)) {
        if (!desc.uarch.isNull()) {
            return fmt::format("{} ({})", desc.model, desc.uarch).c_str();
        }
        return desc.model;
    }
    return "RISC-V";
 }
 bool has_riscv_vector()
 {
    riscv_cpu_desc desc;
    if (read_riscv_cpuinfo(&desc)) {
        return desc.has_vector;
    }
    return false;
 }
 bool has_riscv_crypto()
 {
    riscv_cpu_desc desc;
    if (read_riscv_cpuinfo(&desc)) {
        return desc.has_crypto;
    }
    return false;
 }
 } // namespace xmrig
--- a/src/base/kernel/Platform_unix.cpp
+++ b/src/base/kernel/Platform_unix.cpp
@@ -1,6 +1,6 @@
 /* XMRig
- * Copyright (c) 2018-2021 SChernykh   <https://github.com/SChernykh>
+ * Copyright (c) 2018-2025 SChernykh   <https://github.com/SChernykh>
- * Copyright (c) 2016-2021 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright (c) 2016-2025 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@@ -71,11 +71,11 @@ char *xmrig::Platform::createUserAgent()
 #ifndef XMRIG_FEATURE_HWLOC
-#ifdef __DragonFly__
+#if defined(__DragonFly__) || defined(XMRIG_OS_OPENBSD) || defined(XMRIG_OS_HAIKU)
 bool xmrig::Platform::setThreadAffinity(uint64_t cpu_id)
 {
-    return true;
+    return false;
 }
 #else
--- a/src/base/kernel/Platform_win.cpp
+++ b/src/base/kernel/Platform_win.cpp
@@ -1,6 +1,6 @@
 /* XMRig
- * Copyright (c) 2018-2023 SChernykh   <https://github.com/SChernykh>
+ * Copyright (c) 2018-2025 SChernykh   <https://github.com/SChernykh>
- * Copyright (c) 2016-2023 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright (c) 2016-2025 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@@ -53,13 +53,21 @@ char *xmrig::Platform::createUserAgent()
    char *buf = new char[max]();
    int length = snprintf(buf, max, "%s/%s (Windows NT %lu.%lu", APP_NAME, APP_VERSION, osver.dwMajorVersion, osver.dwMinorVersion);
-#   if defined(__x86_64__) || defined(_M_AMD64)
+#   if defined(XMRIG_64_BIT)
-    length += snprintf(buf + length, max - length, "; Win64; x64) libuv/%s", uv_version_string());
+    length += snprintf(buf + length, max - length, "; Win64; "
 #   if defined(XMRIG_ARM)
    "arm64"
 #   else
    "x64"
 #   endif
    ") libuv/%s", uv_version_string());
 #   else
    length += snprintf(buf + length, max - length, ") libuv/%s", uv_version_string());
 #   endif
-#   ifdef __GNUC__
+#   ifdef __clang__
    snprintf(buf + length, max - length, " clang/%d.%d.%d", __clang_major__, __clang_minor__, __clang_patchlevel__);
 #   elif defined(__GNUC__)
    snprintf(buf + length, max - length, " gcc/%d.%d.%d", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__);
 #   elif _MSC_VER
    snprintf(buf + length, max - length, " msvc/%d", MSVC_VERSION);
--- a/src/base/kernel/config/BaseTransform.cpp
+++ b/src/base/kernel/config/BaseTransform.cpp
@@ -1,6 +1,6 @@
 /* XMRig
- * Copyright (c) 2018-2021 SChernykh   <https://github.com/SChernykh>
+ * Copyright (c) 2018-2025 SChernykh   <https://github.com/SChernykh>
- * Copyright (c) 2016-2021 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright (c) 2016-2025 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@@ -18,14 +18,12 @@
 #include <cstdio>
 #ifdef _MSC_VER
 #   include "getopt/getopt.h"
 #else
 #   include <getopt.h>
 #endif
 #include "base/kernel/config/BaseTransform.h"
 #include "base/io/json/JsonChain.h"
 #include "base/io/log/Log.h"
@@ -37,7 +35,6 @@
 #include "base/net/stratum/Pools.h"
 #include "core/config/Config_platform.h"
 #ifdef XMRIG_FEATURE_TLS
 #   include "base/net/tls/TlsConfig.h"
 #endif
@@ -47,9 +44,9 @@ void xmrig::BaseTransform::load(JsonChain &chain, Process *process, IConfigTrans
 {
    using namespace rapidjson;
-    int key     = 0;
+    int key        = 0;
-    int argc    = process->arguments().argc();
+    const int argc = process->arguments().argc();
-    char **argv = process->arguments().argv();
+    char **argv    = process->arguments().argv();
    Document doc(kObjectType);
@@ -262,7 +259,8 @@ void xmrig::BaseTransform::transform(rapidjson::Document &doc, int key, const ch
    case IConfig::DaemonKey:      /* --daemon */
    case IConfig::SubmitToOriginKey: /* --submit-to-origin */
    case IConfig::VerboseKey:     /* --verbose */
-    case IConfig::DnsIPv6Key:     /* --dns-ipv6 */
+    case IConfig::DnsIPv4Key:     /* --ipv4 */
    case IConfig::DnsIPv6Key:     /* --ipv6 */
        return transformBoolean(doc, key, true);
    case IConfig::ColorKey:          /* --no-color */
@@ -323,8 +321,11 @@ void xmrig::BaseTransform::transformBoolean(rapidjson::Document &doc, int key, b
    case IConfig::NoTitleKey: /* --no-title */
        return set(doc, BaseConfig::kTitle, enable);
-    case IConfig::DnsIPv6Key: /* --dns-ipv6 */
+    case IConfig::DnsIPv4Key: /* --ipv4 */
-        return set(doc, DnsConfig::kField, DnsConfig::kIPv6, enable);
+        return set(doc, DnsConfig::kField, DnsConfig::kIPv, 4);
    case IConfig::DnsIPv6Key: /* --ipv6 */
        return set(doc, DnsConfig::kField, DnsConfig::kIPv, 6);
    default:
        break;
--- a/src/base/kernel/interfaces/IConfig.h
+++ b/src/base/kernel/interfaces/IConfig.h
@@ -1,6 +1,6 @@
 /* XMRig
- * Copyright (c) 2018-2021 SChernykh   <https://github.com/SChernykh>
+ * Copyright (c) 2018-2025 SChernykh   <https://github.com/SChernykh>
- * Copyright (c) 2016-2021 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright (c) 2016-2025 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@@ -16,9 +16,7 @@
 *   along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
-#ifndef XMRIG_ICONFIG_H
+#pragma once
 #define XMRIG_ICONFIG_H
 #include "3rdparty/rapidjson/fwd.h"
@@ -82,7 +80,8 @@ public:
        HugePageSizeKey      = 1050,
        PauseOnActiveKey     = 1051,
        SubmitToOriginKey    = 1052,
-        DnsIPv6Key           = 1053,
+        DnsIPv4Key           = '4',
        DnsIPv6Key           = '6',
        DnsTtlKey            = 1054,
        SpendSecretKey       = 1055,
        DaemonZMQPortKey     = 1056,
@@ -177,7 +176,4 @@ public:
 };
-} /* namespace xmrig */
+} // namespace xmrig
 #endif // XMRIG_ICONFIG_H
--- a/src/base/kernel/interfaces/IDnsBackend.h
+++ b/src/base/kernel/interfaces/IDnsBackend.h
@@ -1,6 +1,6 @@
 /* XMRig
- * Copyright (c) 2018-2021 SChernykh   <https://github.com/SChernykh>
+ * Copyright (c) 2018-2025 SChernykh   <https://github.com/SChernykh>
- * Copyright (c) 2016-2021 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright (c) 2016-2025 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@@ -16,21 +16,16 @@
 *   along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
-#ifndef XMRIG_IDNSBACKEND_H
+#pragma once
 #define XMRIG_IDNSBACKEND_H
 #include "base/tools/Object.h"
 #include <memory>
 namespace xmrig {
 class DnsConfig;
 class DnsRecords;
 class DnsRequest;
 class IDnsListener;
 class String;
@@ -43,12 +38,8 @@ public:
    IDnsBackend()           = default;
    virtual ~IDnsBackend()  = default;
-    virtual const DnsRecords &records() const                                                               = 0;
+    virtual void resolve(const String &host, const std::weak_ptr<IDnsListener> &listener, const DnsConfig &config)  = 0;
    virtual std::shared_ptr<DnsRequest> resolve(const String &host, IDnsListener *listener, uint64_t ttl)   = 0;
 };
-} /* namespace xmrig */
+} // namespace xmrig
 #endif // XMRIG_IDNSBACKEND_H
--- a/src/base/net/dns/Dns.cpp
+++ b/src/base/net/dns/Dns.cpp
@@ -1,6 +1,6 @@
 /* XMRig
- * Copyright (c) 2018-2021 SChernykh   <https://github.com/SChernykh>
+ * Copyright (c) 2018-2025 SChernykh   <https://github.com/SChernykh>
- * Copyright (c) 2016-2021 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright (c) 2016-2025 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@@ -18,6 +18,7 @@
 #include "base/net/dns/Dns.h"
 #include "base/net/dns/DnsRequest.h"
 #include "base/net/dns/DnsUvBackend.h"
@@ -25,17 +26,21 @@ namespace xmrig {
 DnsConfig Dns::m_config;
-std::map<String, std::shared_ptr<IDnsBackend> > Dns::m_backends;
+std::map<String, std::shared_ptr<IDnsBackend>> Dns::m_backends;
 } // namespace xmrig
-std::shared_ptr<xmrig::DnsRequest> xmrig::Dns::resolve(const String &host, IDnsListener *listener, uint64_t ttl)
+std::shared_ptr<xmrig::DnsRequest> xmrig::Dns::resolve(const String &host, IDnsListener *listener)
 {
    auto req = std::make_shared<DnsRequest>(listener);
    if (m_backends.find(host) == m_backends.end()) {
        m_backends.insert({ host, std::make_shared<DnsUvBackend>() });
    }
-    return m_backends.at(host)->resolve(host, listener, ttl == 0 ? m_config.ttl() : ttl);
+    m_backends.at(host)->resolve(host, req, m_config);
    return req;
 }
--- a/src/base/net/dns/Dns.h
+++ b/src/base/net/dns/Dns.h
@@ -1,6 +1,6 @@
 /* XMRig
- * Copyright (c) 2018-2021 SChernykh   <https://github.com/SChernykh>
+ * Copyright (c) 2018-2025 SChernykh   <https://github.com/SChernykh>
- * Copyright (c) 2016-2021 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright (c) 2016-2025 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@@ -43,7 +43,7 @@ public:
    inline static const DnsConfig &config()             { return m_config; }
    inline static void set(const DnsConfig &config)     { m_config = config; }
-    static std::shared_ptr<DnsRequest> resolve(const String &host, IDnsListener *listener, uint64_t ttl = 0);
+    static std::shared_ptr<DnsRequest> resolve(const String &host, IDnsListener *listener);
 private:
    static DnsConfig m_config;
--- a/src/base/net/dns/DnsConfig.cpp
+++ b/src/base/net/dns/DnsConfig.cpp
@@ -1,6 +1,6 @@
 /* XMRig
- * Copyright (c) 2018-2021 SChernykh   <https://github.com/SChernykh>
+ * Copyright (c) 2018-2025 SChernykh   <https://github.com/SChernykh>
- * Copyright (c) 2016-2021 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright (c) 2016-2025 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@@ -20,15 +20,15 @@
 #include "3rdparty/rapidjson/document.h"
 #include "base/io/json/Json.h"
 #include <algorithm>
 #include <uv.h>
 namespace xmrig {
 const char *DnsConfig::kField   = "dns";
-const char *DnsConfig::kIPv6    = "ipv6";
+const char *DnsConfig::kIPv     = "ip_version";
 const char *DnsConfig::kTTL     = "ttl";
@@ -37,8 +37,26 @@ const char *DnsConfig::kTTL     = "ttl";
 xmrig::DnsConfig::DnsConfig(const rapidjson::Value &value)
 {
-    m_ipv6  = Json::getBool(value, kIPv6, m_ipv6);
+    const uint32_t ipv = Json::getUint(value, kIPv, m_ipv);
-    m_ttl   = std::max(Json::getUint(value, kTTL, m_ttl), 1U);
+    if (ipv == 0 || ipv == 4 || ipv == 6) {
        m_ipv = ipv;
    }
    m_ttl = std::max(Json::getUint(value, kTTL, m_ttl), 1U);
 }
 int xmrig::DnsConfig::ai_family() const
 {
    if (m_ipv == 4) {
        return AF_INET;
    }
    if (m_ipv == 6) {
        return AF_INET6;
    }
    return AF_UNSPEC;
 }
@@ -49,8 +67,8 @@ rapidjson::Value xmrig::DnsConfig::toJSON(rapidjson::Document &doc) const
    auto &allocator = doc.GetAllocator();
    Value obj(kObjectType);
-    obj.AddMember(StringRef(kIPv6), m_ipv6, allocator);
+    obj.AddMember(StringRef(kIPv), m_ipv, allocator);
-    obj.AddMember(StringRef(kTTL),  m_ttl, allocator);
+    obj.AddMember(StringRef(kTTL), m_ttl, allocator);
    return obj;
 }
--- a/src/base/net/dns/DnsConfig.h
+++ b/src/base/net/dns/DnsConfig.h
@@ -1,6 +1,6 @@
 /* XMRig
- * Copyright (c) 2018-2021 SChernykh   <https://github.com/SChernykh>
+ * Copyright (c) 2018-2025 SChernykh   <https://github.com/SChernykh>
- * Copyright (c) 2016-2021 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright (c) 2016-2025 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@@ -16,9 +16,7 @@
 *   along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
-#ifndef XMRIG_DNSCONFIG_H
+#pragma once
 #define XMRIG_DNSCONFIG_H
 #include "3rdparty/rapidjson/fwd.h"
@@ -30,25 +28,22 @@ class DnsConfig
 {
 public:
    static const char *kField;
-    static const char *kIPv6;
+    static const char *kIPv;
    static const char *kTTL;
    DnsConfig() = default;
    DnsConfig(const rapidjson::Value &value);
-    inline bool isIPv6() const  { return m_ipv6; }
+    inline uint32_t ipv() const { return m_ipv; }
    inline uint32_t ttl() const { return m_ttl * 1000U; }
    int ai_family() const;
    rapidjson::Value toJSON(rapidjson::Document &doc) const;
 private:
-    bool m_ipv6     = false;
+    uint32_t m_ttl = 30U;
-    uint32_t m_ttl  = 30U;
+    uint32_t m_ipv = 0U;
 };
-} /* namespace xmrig */
+} // namespace xmrig
 #endif /* XMRIG_DNSCONFIG_H */
--- a/src/base/net/dns/DnsRecord.cpp
+++ b/src/base/net/dns/DnsRecord.cpp
@@ -1,6 +1,6 @@
 /* XMRig
- * Copyright (c) 2018-2023 SChernykh   <https://github.com/SChernykh>
+ * Copyright (c) 2018-2025 SChernykh   <https://github.com/SChernykh>
- * Copyright (c) 2016-2023 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright (c) 2016-2025 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@@ -16,19 +16,16 @@
 *   along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
 #include <uv.h>
 #include "base/net/dns/DnsRecord.h"
-xmrig::DnsRecord::DnsRecord(const addrinfo *addr) :
+xmrig::DnsRecord::DnsRecord(const addrinfo *addr)
    m_type(addr->ai_family == AF_INET6 ? AAAA : (addr->ai_family == AF_INET ? A : Unknown))
 {
    static_assert(sizeof(m_data) >= sizeof(sockaddr_in6), "Not enough storage for IPv6 address.");
-    memcpy(m_data, addr->ai_addr, m_type == AAAA ? sizeof(sockaddr_in6) : sizeof(sockaddr_in));
+    memcpy(m_data, addr->ai_addr, addr->ai_family == AF_INET6 ? sizeof(sockaddr_in6) : sizeof(sockaddr_in));
 }
@@ -44,7 +41,7 @@ xmrig::String xmrig::DnsRecord::ip() const
 {
    char *buf = nullptr;
-    if (m_type == AAAA) {
+    if (reinterpret_cast<const sockaddr &>(m_data).sa_family == AF_INET6) {
        buf = new char[45]();
        uv_ip6_name(reinterpret_cast<const sockaddr_in6*>(m_data), buf, 45);
    }
--- a/src/base/net/dns/DnsRecord.h
+++ b/src/base/net/dns/DnsRecord.h
@@ -1,6 +1,6 @@
 /* XMRig
- * Copyright (c) 2018-2021 SChernykh   <https://github.com/SChernykh>
+ * Copyright (c) 2018-2025 SChernykh   <https://github.com/SChernykh>
- * Copyright (c) 2016-2021 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright (c) 2016-2025 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@@ -16,14 +16,11 @@
 *   along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
-#ifndef XMRIG_DNSRECORD_H
+#pragma once
 #define XMRIG_DNSRECORD_H
 struct addrinfo;
 struct sockaddr;
 #include "base/tools/String.h"
@@ -33,28 +30,15 @@ namespace xmrig {
 class DnsRecord
 {
 public:
    enum Type : uint32_t {
        Unknown,
        A,
        AAAA
    };
    DnsRecord() {}
    DnsRecord(const addrinfo *addr);
    const sockaddr *addr(uint16_t port = 0) const;
    String ip() const;
    inline bool isValid() const     { return m_type != Unknown; }
    inline Type type() const        { return m_type; }
 private:
    mutable uint8_t m_data[28]{};
    const Type m_type = Unknown;
 };
-} /* namespace xmrig */
+} // namespace xmrig
 #endif /* XMRIG_DNSRECORD_H */
--- a/src/base/net/dns/DnsRecords.cpp
+++ b/src/base/net/dns/DnsRecords.cpp
@@ -1,6 +1,6 @@
 /* XMRig
- * Copyright (c) 2018-2021 SChernykh   <https://github.com/SChernykh>
+ * Copyright (c) 2018-2025 SChernykh   <https://github.com/SChernykh>
- * Copyright (c) 2016-2021 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright (c) 2016-2025 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@@ -18,90 +18,96 @@
 #include <uv.h>
 #include "base/net/dns/DnsRecords.h"
 #include "base/net/dns/Dns.h"
-const xmrig::DnsRecord &xmrig::DnsRecords::get(DnsRecord::Type prefered) const
+namespace {
 static size_t dns_records_count(const addrinfo *res, int &ai_family)
 {
    size_t ipv4 = 0;
    size_t ipv6 = 0;
    while (res != nullptr) {
        if (res->ai_family == AF_INET) {
            ++ipv4;
        }
        if (res->ai_family == AF_INET6) {
            ++ipv6;
        }
        res = res->ai_next;
    }
    if (ai_family == AF_INET6 && !ipv6) {
        ai_family = AF_INET;
    }
    switch (ai_family) {
    case AF_UNSPEC:
        return ipv4 + ipv6;
    case AF_INET:
        return ipv4;
    case AF_INET6:
        return ipv6;
    default:
        break;
    }
    return 0;
 }
 } // namespace
 xmrig::DnsRecords::DnsRecords(const addrinfo *res, int ai_family)
 {
    size_t size = dns_records_count(res, ai_family);
    if (!size) {
        return;
    }
    m_records.reserve(size);
    if (ai_family == AF_UNSPEC) {
        while (res != nullptr) {
            if (res->ai_family == AF_INET || res->ai_family == AF_INET6) {
                m_records.emplace_back(res);
            }
            res = res->ai_next;
        };
    } else {
        while (res != nullptr) {
            if (res->ai_family == ai_family) {
                m_records.emplace_back(res);
            }
            res = res->ai_next;
        };
    }
    size = m_records.size();
    if (size > 1) {
        m_index = static_cast<size_t>(rand()) % size; // NOLINT(concurrency-mt-unsafe, cert-msc30-c, cert-msc50-cpp)
    }
 }
 const xmrig::DnsRecord &xmrig::DnsRecords::get() const
 {
    static const DnsRecord defaultRecord;
-    if (isEmpty()) {
+    const size_t size = m_records.size();
-        return defaultRecord;
+    if (size > 0) {
-    }
+        return m_records[m_index++ % size];
    const size_t ipv4 = m_ipv4.size();
    const size_t ipv6 = m_ipv6.size();
    if (ipv6 && (prefered == DnsRecord::AAAA || Dns::config().isIPv6() || !ipv4)) {
        return m_ipv6[ipv6 == 1 ? 0 : static_cast<size_t>(rand()) % ipv6]; // NOLINT(concurrency-mt-unsafe, cert-msc30-c, cert-msc50-cpp)
    }
    if (ipv4) {
        return m_ipv4[ipv4 == 1 ? 0 : static_cast<size_t>(rand()) % ipv4]; // NOLINT(concurrency-mt-unsafe, cert-msc30-c, cert-msc50-cpp)
    }
    return defaultRecord;
 }
 size_t xmrig::DnsRecords::count(DnsRecord::Type type) const
 {
    if (type == DnsRecord::A) {
        return m_ipv4.size();
    }
    if (type == DnsRecord::AAAA) {
        return m_ipv6.size();
    }
    return m_ipv4.size() + m_ipv6.size();
 }
 void xmrig::DnsRecords::clear()
 {
    m_ipv4.clear();
    m_ipv6.clear();
 }
 void xmrig::DnsRecords::parse(addrinfo *res)
 {
    clear();
    addrinfo *ptr = res;
    size_t ipv4   = 0;
    size_t ipv6   = 0;
    while (ptr != nullptr) {
        if (ptr->ai_family == AF_INET) {
            ++ipv4;
        }
        else if (ptr->ai_family == AF_INET6) {
            ++ipv6;
        }
        ptr = ptr->ai_next;
    }
    if (ipv4 == 0 && ipv6 == 0) {
        return;
    }
    m_ipv4.reserve(ipv4);
    m_ipv6.reserve(ipv6);
    ptr = res;
    while (ptr != nullptr) {
        if (ptr->ai_family == AF_INET) {
            m_ipv4.emplace_back(ptr);
        }
        else if (ptr->ai_family == AF_INET6) {
            m_ipv6.emplace_back(ptr);
        }
        ptr = ptr->ai_next;
    }
 }
--- a/src/base/net/dns/DnsRecords.h
+++ b/src/base/net/dns/DnsRecords.h
@@ -1,6 +1,6 @@
 /* XMRig
- * Copyright (c) 2018-2021 SChernykh   <https://github.com/SChernykh>
+ * Copyright (c) 2018-2025 SChernykh   <https://github.com/SChernykh>
- * Copyright (c) 2016-2021 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright (c) 2016-2025 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@@ -16,9 +16,7 @@
 *   along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
-#ifndef XMRIG_DNSRECORDS_H
+#pragma once
 #define XMRIG_DNSRECORDS_H
 #include "base/net/dns/DnsRecord.h"
@@ -29,20 +27,19 @@ namespace xmrig {
 class DnsRecords
 {
 public:
-    inline bool isEmpty() const       { return m_ipv4.empty() && m_ipv6.empty(); }
+    DnsRecords() = default;
    DnsRecords(const addrinfo *res, int ai_family);
-    const DnsRecord &get(DnsRecord::Type prefered = DnsRecord::Unknown) const;
+    inline bool isEmpty() const                             { return m_records.empty(); }
-    size_t count(DnsRecord::Type type = DnsRecord::Unknown) const;
+    inline const std::vector<DnsRecord> &records() const    { return m_records; }
-    void clear();
+    inline size_t size() const                              { return m_records.size(); }
-    void parse(addrinfo *res);
+
    const DnsRecord &get() const;
 private:
-    std::vector<DnsRecord> m_ipv4;
+    mutable size_t m_index = 0;
-    std::vector<DnsRecord> m_ipv6;
+    std::vector<DnsRecord> m_records;
 };
-} /* namespace xmrig */
+} // namespace xmrig
 #endif /* XMRIG_DNSRECORDS_H */
--- a/src/base/net/dns/DnsRequest.h
+++ b/src/base/net/dns/DnsRequest.h
@@ -1,6 +1,6 @@
 /* XMRig
- * Copyright (c) 2018-2021 SChernykh   <https://github.com/SChernykh>
+ * Copyright (c) 2018-2025 SChernykh   <https://github.com/SChernykh>
- * Copyright (c) 2016-2021 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright (c) 2016-2025 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@@ -16,35 +16,30 @@
 *   along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
-#ifndef XMRIG_DNSREQUEST_H
+#pragma once
 #define XMRIG_DNSREQUEST_H
-
+#include "base/kernel/interfaces/IDnsListener.h"
 #include "base/tools/Object.h"
 #include <cstdint>
 namespace xmrig {
-class IDnsListener;
+class DnsRequest : public IDnsListener
 class DnsRequest
 {
 public:
    XMRIG_DISABLE_COPY_MOVE_DEFAULT(DnsRequest)
-    DnsRequest(IDnsListener *listener) : listener(listener) {}
+    inline DnsRequest(IDnsListener *listener) : m_listener(listener) {}
-    ~DnsRequest() = default;
+    ~DnsRequest() override = default;
-    IDnsListener *listener;
+protected:
    inline void onResolved(const DnsRecords &records, int status, const char *error) override {
        m_listener->onResolved(records, status, error);
    }
 private:
    IDnsListener *m_listener;
 };
-} /* namespace xmrig */
+} // namespace xmrig
 #endif /* XMRIG_DNSREQUEST_H */
--- a/src/base/net/dns/DnsUvBackend.cpp
+++ b/src/base/net/dns/DnsUvBackend.cpp
@@ -1,6 +1,6 @@
 /* XMRig
- * Copyright (c) 2018-2023 SChernykh   <https://github.com/SChernykh>
+ * Copyright (c) 2018-2025 SChernykh   <https://github.com/SChernykh>
- * Copyright (c) 2016-2023 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright (c) 2016-2025 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@@ -16,13 +16,11 @@
 *   along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
 #include <uv.h>
 #include "base/net/dns/DnsUvBackend.h"
 #include "base/kernel/interfaces/IDnsListener.h"
-#include "base/net/dns/DnsRequest.h"
+#include "base/net/dns/DnsConfig.h"
 #include "base/tools/Chrono.h"
@@ -73,21 +71,23 @@ xmrig::DnsUvBackend::~DnsUvBackend()
 }
-std::shared_ptr<xmrig::DnsRequest> xmrig::DnsUvBackend::resolve(const String &host, IDnsListener *listener, uint64_t ttl)
+void xmrig::DnsUvBackend::resolve(const String &host, const std::weak_ptr<IDnsListener> &listener, const DnsConfig &config)
 {
-    auto req = std::make_shared<DnsRequest>(listener);
+    m_queue.emplace_back(listener);
-    if (Chrono::currentMSecsSinceEpoch() - m_ts <= ttl && !m_records.isEmpty()) {
+    if (Chrono::currentMSecsSinceEpoch() - m_ts <= config.ttl()) {
-        req->listener->onResolved(m_records, 0, nullptr);
+        return notify();
    } else {
        m_queue.emplace(req);
    }
-    if (m_queue.size() == 1 && !resolve(host)) {
+    if (m_req) {
-        done();
+        return;
    }
-    return req;
+    m_ai_family = config.ai_family();
    if (!resolve(host)) {
        notify();
    }
 }
@@ -102,44 +102,46 @@ bool xmrig::DnsUvBackend::resolve(const String &host)
 }
-void xmrig::DnsUvBackend::done()
+void xmrig::DnsUvBackend::notify()
 {
    const char *error = m_status < 0 ? uv_strerror(m_status) : nullptr;
-    while (!m_queue.empty()) {
+    for (const auto &l : m_queue) {
-        auto req = std::move(m_queue.front()).lock();
+        auto listener = l.lock();
-        if (req) {
+        if (listener) {
-            req->listener->onResolved(m_records, m_status, error);
+            listener->onResolved(m_records, m_status, error);
        }
        m_queue.pop();
    }
    m_queue.clear();
    m_req.reset();
 }
 void xmrig::DnsUvBackend::onResolved(int status, addrinfo *res)
 {
-    m_ts = Chrono::currentMSecsSinceEpoch();
+    m_status = status;
    m_ts     = Chrono::currentMSecsSinceEpoch();
-    if ((m_status = status) < 0) {
+    if (m_status < 0) {
-        return done();
+        m_records = {};
        return notify();
    }
-    m_records.parse(res);
+    m_records = { res, m_ai_family };
    if (m_records.isEmpty()) {
        m_status = UV_EAI_NONAME;
    }
-    done();
+    notify();
 }
 void xmrig::DnsUvBackend::onResolved(uv_getaddrinfo_t *req, int status, addrinfo *res)
 {
-    auto backend = getStorage().get(req->data);
+    auto *backend = getStorage().get(req->data);
    if (backend) {
        backend->onResolved(status, res);
    }
--- a/src/base/net/dns/DnsUvBackend.h
+++ b/src/base/net/dns/DnsUvBackend.h
@@ -1,6 +1,6 @@
 /* XMRig
- * Copyright (c) 2018-2021 SChernykh   <https://github.com/SChernykh>
+ * Copyright (c) 2018-2025 SChernykh   <https://github.com/SChernykh>
- * Copyright (c) 2016-2021 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright (c) 2016-2025 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@@ -16,16 +16,13 @@
 *   along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
-#ifndef XMRIG_DNSUVBACKEND_H
+#pragma once
 #define XMRIG_DNSUVBACKEND_H
 #include "base/kernel/interfaces/IDnsBackend.h"
 #include "base/net/dns/DnsRecords.h"
 #include "base/net/tools/Storage.h"
-
+#include <deque>
 #include <queue>
 using uv_getaddrinfo_t = struct uv_getaddrinfo_s;
@@ -43,20 +40,19 @@ public:
    ~DnsUvBackend() override;
 protected:
-    inline const DnsRecords &records() const override   { return m_records; }
+    void resolve(const String &host, const std::weak_ptr<IDnsListener> &listener, const DnsConfig &config) override;
    std::shared_ptr<DnsRequest> resolve(const String &host, IDnsListener *listener, uint64_t ttl) override;
 private:
    bool resolve(const String &host);
-    void done();
+    void notify();
    void onResolved(int status, addrinfo *res);
    static void onResolved(uv_getaddrinfo_t *req, int status, addrinfo *res);
    DnsRecords m_records;
    int m_ai_family         = 0;
    int m_status            = 0;
-    std::queue<std::weak_ptr<DnsRequest> > m_queue;
+    std::deque<std::weak_ptr<IDnsListener>> m_queue;
    std::shared_ptr<uv_getaddrinfo_t> m_req;
    uint64_t m_ts           = 0;
    uintptr_t m_key;
@@ -66,7 +62,4 @@ private:
 };
-} /* namespace xmrig */
+} // namespace xmrig
 #endif /* XMRIG_DNSUVBACKEND_H */
--- a/src/base/net/tls/TlsContext.cpp
+++ b/src/base/net/tls/TlsContext.cpp
@@ -1,7 +1,7 @@
 /* XMRig
 * Copyright (c) 2018      Lee Clagett <https://github.com/vtnerd>
- * Copyright (c) 2018-2023 SChernykh   <https://github.com/SChernykh>
+ * Copyright (c) 2018-2025 SChernykh   <https://github.com/SChernykh>
- * Copyright (c) 2016-2023 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright (c) 2016-2025 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@@ -45,7 +45,7 @@ namespace xmrig {
 // https://wiki.openssl.org/index.php/Diffie-Hellman_parameters
-#if OPENSSL_VERSION_NUMBER < 0x30000000L || defined(LIBRESSL_VERSION_NUMBER)
+#if OPENSSL_VERSION_NUMBER < 0x30000000L || (defined(LIBRESSL_VERSION_NUMBER) && !defined(LIBRESSL_HAS_TLS1_3))
 static DH *get_dh2048()
 {
    static unsigned char dhp_2048[] = {
@@ -152,7 +152,7 @@ bool xmrig::TlsContext::load(const TlsConfig &config)
    SSL_CTX_set_options(m_ctx, SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3);
    SSL_CTX_set_options(m_ctx, SSL_OP_CIPHER_SERVER_PREFERENCE);
-#   if OPENSSL_VERSION_NUMBER >= 0x1010100fL && !defined(LIBRESSL_VERSION_NUMBER)
+#   if OPENSSL_VERSION_NUMBER >= 0x1010100fL || defined(LIBRESSL_HAS_TLS1_3)
    SSL_CTX_set_max_early_data(m_ctx, 0);
 #   endif
@@ -180,7 +180,7 @@ bool xmrig::TlsContext::setCipherSuites(const char *ciphersuites)
        return true;
    }
-#   if OPENSSL_VERSION_NUMBER >= 0x1010100fL && !defined(LIBRESSL_VERSION_NUMBER)
+#   if OPENSSL_VERSION_NUMBER >= 0x1010100fL || defined(LIBRESSL_HAS_TLS1_3)
    if (SSL_CTX_set_ciphersuites(m_ctx, ciphersuites) == 1) {
        return true;
    }
@@ -194,7 +194,7 @@ bool xmrig::TlsContext::setCipherSuites(const char *ciphersuites)
 bool xmrig::TlsContext::setDH(const char *dhparam)
 {
-#   if OPENSSL_VERSION_NUMBER < 0x30000000L || defined(LIBRESSL_VERSION_NUMBER)
+#   if OPENSSL_VERSION_NUMBER < 0x30000000L || (defined(LIBRESSL_VERSION_NUMBER) && !defined(LIBRESSL_HAS_TLS1_3))
    DH *dh = nullptr;
    if (dhparam != nullptr) {
--- a/src/base/tools/cryptonote/BlockTemplate.cpp
+++ b/src/base/tools/cryptonote/BlockTemplate.cpp
@@ -241,8 +241,13 @@ bool xmrig::BlockTemplate::parse(bool hashes)
    ar(m_amount);
    ar(m_outputType);
-    // output type must be txout_to_key (2) or txout_to_tagged_key (3)
+    const bool is_fcmp_pp = (m_coin == Coin::MONERO) && (m_version.first >= 17);
-    if ((m_outputType != 2) && (m_outputType != 3)) {
+
    // output type must be txout_to_key (2) or txout_to_tagged_key (3) for versions < 17, and txout_to_carrot_v1 (0) for version FCMP++
    if (is_fcmp_pp && (m_outputType == 0)) {
        // all good
    }
    else if ((m_outputType != 2) && (m_outputType != 3)) {
        return false;
    }
@@ -250,6 +255,11 @@ bool xmrig::BlockTemplate::parse(bool hashes)
    ar(m_ephPublicKey, kKeySize);
    if (is_fcmp_pp) {
        ar(m_carrotViewTag);
        ar(m_janusAnchor);
    }
    if (m_coin == Coin::ZEPHYR) {
        if (m_outputType != 2) {
            return false;
--- a/src/base/tools/cryptonote/BlockTemplate.h
+++ b/src/base/tools/cryptonote/BlockTemplate.h
@@ -148,6 +148,8 @@ private:
    Buffer m_hashes;
    Buffer m_minerTxMerkleTreeBranch;
    uint8_t m_rootHash[kHashSize]{};
    uint8_t m_carrotViewTag[3]{};
    uint8_t m_janusAnchor[16]{};
 };
--- a/src/base/tools/cryptonote/umul128.h
+++ b/src/base/tools/cryptonote/umul128.h
@@ -23,15 +23,22 @@
 #pragma once
 #include <cstdint>
-#ifdef XMRIG_64_BIT
+#if defined(XMRIG_64_BIT)
-#   ifdef _MSC_VER
+#   if defined(_MSC_VER)
 #       include <intrin.h>
-#       pragma intrinsic(_umul128)
+#       if defined(XMRIG_ARM)
-#       define __umul128 _umul128
+            #pragma intrinsic(__umulh)
-#   elif defined __GNUC__
+            static inline uint64_t __umul128(uint64_t a, uint64_t b, uint64_t *high) {
                *high = __umulh(a, b);
                return a * b;
            }
 #       else
 #           pragma intrinsic(_umul128)
 #           define __umul128 _umul128
 #       endif
 #   elif defined(__GNUC__)
        static inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi)
        {
            unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b;
--- a/src/config.json
+++ b/src/config.json
@@ -93,7 +93,7 @@
        "dhparam": null
    },
    "dns": {
-        "ipv6": false,
+        "ip_version": 0,
        "ttl": 30
    },
    "user-agent": null,
--- a/src/core/config/Config_platform.h
+++ b/src/core/config/Config_platform.h
@@ -1,6 +1,6 @@
 /* XMRig
- * Copyright (c) 2018-2021 SChernykh   <https://github.com/SChernykh>
+ * Copyright (c) 2018-2025 SChernykh   <https://github.com/SChernykh>
- * Copyright (c) 2016-2021 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright (c) 2016-2025 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@@ -16,9 +16,7 @@
 *   along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
-#ifndef XMRIG_CONFIG_PLATFORM_H
+#pragma once
 #define XMRIG_CONFIG_PLATFORM_H
 #ifdef _MSC_VER
 #   include "getopt/getopt.h"
@@ -28,13 +26,12 @@
 #include "base/kernel/interfaces/IConfig.h"
 #include "version.h"
 namespace xmrig {
-static const char short_options[] = "a:c:kBp:Px:r:R:s:t:T:o:u:O:v:l:Sx:";
+static const char short_options[] = "a:c:kBp:Px:r:R:s:t:T:o:u:O:v:l:Sx:46";
 static const option options[] = {
@@ -99,7 +96,8 @@ static const option options[] = {
    { "no-title",              0, nullptr, IConfig::NoTitleKey            },
    { "pause-on-battery",      0, nullptr, IConfig::PauseOnBatteryKey     },
    { "pause-on-active",       1, nullptr, IConfig::PauseOnActiveKey      },
-    { "dns-ipv6",              0, nullptr, IConfig::DnsIPv6Key            },
+    { "ipv4",                  0, nullptr, IConfig::DnsIPv4Key            },
    { "ipv6",                  0, nullptr, IConfig::DnsIPv6Key            },
    { "dns-ttl",               1, nullptr, IConfig::DnsTtlKey             },
    { "spend-secret-key",      1, nullptr, IConfig::SpendSecretKey        },
 #   ifdef XMRIG_FEATURE_BENCHMARK
@@ -169,6 +167,3 @@ static const option options[] = {
 } // namespace xmrig
 #endif /* XMRIG_CONFIG_PLATFORM_H */
--- a/src/core/config/usage.h
+++ b/src/core/config/usage.h
@@ -4,8 +4,8 @@
 * Copyright (c) 2014      Lucas Jones <https://github.com/lucasjones>
 * Copyright (c) 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
 * Copyright (c) 2016      Jay D Dee   <jayddee246@gmail.com>
- * Copyright (c) 2018-2024 SChernykh   <https://github.com/SChernykh>
+ * Copyright (c) 2018-2025 SChernykh   <https://github.com/SChernykh>
- * Copyright (c) 2016-2024 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright (c) 2016-2025 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@@ -21,13 +21,10 @@
 *   along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
-#ifndef XMRIG_USAGE_H
+#pragma once
 #define XMRIG_USAGE_H
 #include "version.h"
 #include <string>
@@ -59,7 +56,8 @@ static inline const std::string &usage()
    u += "      --tls-fingerprint=HEX     pool TLS certificate fingerprint for strict certificate pinning\n";
 #   endif
-    u += "      --dns-ipv6                prefer IPv6 records from DNS responses\n";
+    u += "  -4, --ipv4                    resolve names to IPv4 addresses\n";
    u += "  -6, --ipv6                    resolve names to IPv6 addresses\n";
    u += "      --dns-ttl=N               N seconds (default: 30) TTL for internal DNS cache\n";
 #   ifdef XMRIG_FEATURE_HTTP
@@ -205,6 +203,4 @@ static inline const std::string &usage()
 }
-} /* namespace xmrig */
+} // namespace xmrig
 #endif /* XMRIG_USAGE_H */
--- a/src/crypto/cn/CnHash.cpp
+++ b/src/crypto/cn/CnHash.cpp
@@ -23,7 +23,7 @@
 #include "crypto/common/VirtualMemory.h"
-#if defined(XMRIG_ARM)
+#if defined(XMRIG_ARM) || defined(XMRIG_RISCV)
 #   include "crypto/cn/CryptoNight_arm.h"
 #else
 #   include "crypto/cn/CryptoNight_x86.h"
--- a/src/crypto/cn/CryptoNight.h
+++ b/src/crypto/cn/CryptoNight.h
@@ -30,7 +30,7 @@
 #include <stddef.h>
 #include <stdint.h>
-#if defined _MSC_VER || defined XMRIG_ARM
+#if defined _MSC_VER || defined XMRIG_ARM || defined XMRIG_RISCV
 #   define ABI_ATTRIBUTE
 #else
 #   define ABI_ATTRIBUTE __attribute__((ms_abi))
--- a/src/crypto/cn/CryptoNight_arm.h
+++ b/src/crypto/cn/CryptoNight_arm.h
@@ -27,6 +27,9 @@
 #ifndef XMRIG_CRYPTONIGHT_ARM_H
 #define XMRIG_CRYPTONIGHT_ARM_H
 #ifdef XMRIG_RISCV
 #   include "crypto/cn/sse2rvv.h"
 #endif
 #include "base/crypto/keccak.h"
 #include "crypto/cn/CnAlgo.h"
--- a/src/crypto/cn/CryptoNight_monero.h
+++ b/src/crypto/cn/CryptoNight_monero.h
@@ -30,7 +30,7 @@
 #include <math.h>
 // VARIANT ALTERATIONS
-#ifndef XMRIG_ARM
+#if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
 #   define VARIANT1_INIT(part) \
    uint64_t tweak1_2_##part = 0; \
    if (BASE == Algorithm::CN_1) { \
@@ -60,7 +60,7 @@
    }
-#ifndef XMRIG_ARM
+#if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
 #   define VARIANT2_INIT(part) \
    __m128i division_result_xmm_##part = _mm_cvtsi64_si128(static_cast<int64_t>(h##part[12])); \
    __m128i sqrt_result_xmm_##part     = _mm_cvtsi64_si128(static_cast<int64_t>(h##part[13]));
--- a/src/crypto/cn/soft_aes.h
+++ b/src/crypto/cn/soft_aes.h
@@ -29,6 +29,8 @@
 #if defined(XMRIG_ARM)
 #   include "crypto/cn/sse2neon.h"
 #elif defined(XMRIG_RISCV)
 #   include "crypto/cn/sse2rvv.h"
 #elif defined(__GNUC__)
 #   include <x86intrin.h>
 #else
--- a/src/crypto/cn/sse2rvv.h
+++ b/src/crypto/cn/sse2rvv.h
@@ -0,0 +1,748 @@
 /* XMRig
 * Copyright (c) 2025      Slayingripper <https://github.com/Slayingripper>
 * Copyright (c) 2018-2025 SChernykh     <https://github.com/SChernykh>
 * Copyright (c) 2016-2025 XMRig         <support@xmrig.com>
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
 /*
 * SSE to RISC-V Vector (RVV) optimized compatibility header
 * Provides both scalar fallback and vectorized implementations using RVV intrinsics
 * 
 * Based on sse2neon.h concepts, adapted for RISC-V architecture with RVV extensions
 * Original sse2neon.h: https://github.com/DLTcollab/sse2neon
 */
 #ifndef XMRIG_SSE2RVV_OPTIMIZED_H
 #define XMRIG_SSE2RVV_OPTIMIZED_H
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include <stdint.h>
 #include <string.h>
 /* Check if RVV is available */
 #if defined(__riscv_vector)
 #include <riscv_vector.h>
 #define USE_RVV_INTRINSICS 1
 #else
 #define USE_RVV_INTRINSICS 0
 #endif
 /* 128-bit vector type */
 typedef union {
    uint8_t  u8[16];
    uint16_t u16[8];
    uint32_t u32[4];
    uint64_t u64[2];
    int8_t   i8[16];
    int16_t  i16[8];
    int32_t  i32[4];
    int64_t  i64[2];
 } __m128i_union;
 typedef __m128i_union __m128i;
 /* Set operations */
 static inline __m128i _mm_set_epi32(int e3, int e2, int e1, int e0)
 {
    __m128i result;
    result.i32[0] = e0;
    result.i32[1] = e1;
    result.i32[2] = e2;
    result.i32[3] = e3;
    return result;
 }
 static inline __m128i _mm_set_epi64x(int64_t e1, int64_t e0)
 {
    __m128i result;
    result.i64[0] = e0;
    result.i64[1] = e1;
    return result;
 }
 static inline __m128i _mm_setzero_si128(void)
 {
    __m128i result;
    memset(&result, 0, sizeof(result));
    return result;
 }
 /* Extract/insert operations */
 static inline int _mm_cvtsi128_si32(__m128i a)
 {
    return a.i32[0];
 }
 static inline int64_t _mm_cvtsi128_si64(__m128i a)
 {
    return a.i64[0];
 }
 static inline __m128i _mm_cvtsi32_si128(int a)
 {
    __m128i result = _mm_setzero_si128();
    result.i32[0] = a;
    return result;
 }
 static inline __m128i _mm_cvtsi64_si128(int64_t a)
 {
    __m128i result = _mm_setzero_si128();
    result.i64[0] = a;
    return result;
 }
 /* Shuffle operations */
 static inline __m128i _mm_shuffle_epi32(__m128i a, int imm8)
 {
    __m128i result;
    result.u32[0] = a.u32[(imm8 >> 0) & 0x3];
    result.u32[1] = a.u32[(imm8 >> 2) & 0x3];
    result.u32[2] = a.u32[(imm8 >> 4) & 0x3];
    result.u32[3] = a.u32[(imm8 >> 6) & 0x3];
    return result;
 }
 /* Logical operations - optimized with RVV when available */
 static inline __m128i _mm_xor_si128(__m128i a, __m128i b)
 {
 #if USE_RVV_INTRINSICS
    __m128i result;
    size_t vl = __riscv_vsetvl_e64m1(2);
    vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
    vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
    vuint64m1_t vr = __riscv_vxor_vv_u64m1(va, vb, vl);
    __riscv_vse64_v_u64m1(result.u64, vr, vl);
    return result;
 #else
    __m128i result;
    result.u64[0] = a.u64[0] ^ b.u64[0];
    result.u64[1] = a.u64[1] ^ b.u64[1];
    return result;
 #endif
 }
 static inline __m128i _mm_or_si128(__m128i a, __m128i b)
 {
 #if USE_RVV_INTRINSICS
    __m128i result;
    size_t vl = __riscv_vsetvl_e64m1(2);
    vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
    vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
    vuint64m1_t vr = __riscv_vor_vv_u64m1(va, vb, vl);
    __riscv_vse64_v_u64m1(result.u64, vr, vl);
    return result;
 #else
    __m128i result;
    result.u64[0] = a.u64[0] | b.u64[0];
    result.u64[1] = a.u64[1] | b.u64[1];
    return result;
 #endif
 }
 static inline __m128i _mm_and_si128(__m128i a, __m128i b)
 {
 #if USE_RVV_INTRINSICS
    __m128i result;
    size_t vl = __riscv_vsetvl_e64m1(2);
    vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
    vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
    vuint64m1_t vr = __riscv_vand_vv_u64m1(va, vb, vl);
    __riscv_vse64_v_u64m1(result.u64, vr, vl);
    return result;
 #else
    __m128i result;
    result.u64[0] = a.u64[0] & b.u64[0];
    result.u64[1] = a.u64[1] & b.u64[1];
    return result;
 #endif
 }
 static inline __m128i _mm_andnot_si128(__m128i a, __m128i b)
 {
 #if USE_RVV_INTRINSICS
    __m128i result;
    size_t vl = __riscv_vsetvl_e64m1(2);
    vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
    vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
    vuint64m1_t vnot_a = __riscv_vnot_v_u64m1(va, vl);
    vuint64m1_t vr = __riscv_vand_vv_u64m1(vnot_a, vb, vl);
    __riscv_vse64_v_u64m1(result.u64, vr, vl);
    return result;
 #else
    __m128i result;
    result.u64[0] = (~a.u64[0]) & b.u64[0];
    result.u64[1] = (~a.u64[1]) & b.u64[1];
    return result;
 #endif
 }
 /* Shift operations */
 static inline __m128i _mm_slli_si128(__m128i a, int imm8)
 {
 #if USE_RVV_INTRINSICS
    __m128i result = _mm_setzero_si128();
    int count = imm8 & 0xFF;
    if (count > 15) return result;
    size_t vl = __riscv_vsetvl_e8m1(16);
    vuint8m1_t va = __riscv_vle8_v_u8m1(a.u8, vl);
    vuint8m1_t vr = __riscv_vslideup_vx_u8m1(__riscv_vmv_v_x_u8m1(0, vl), va, count, vl);
    __riscv_vse8_v_u8m1(result.u8, vr, vl);
    return result;
 #else
    __m128i result = _mm_setzero_si128();
    int count = imm8 & 0xFF;
    if (count > 15) return result;
    for (int i = 0; i < 16 - count; i++) {
        result.u8[i + count] = a.u8[i];
    }
    return result;
 #endif
 }
 static inline __m128i _mm_srli_si128(__m128i a, int imm8)
 {
 #if USE_RVV_INTRINSICS
    __m128i result = _mm_setzero_si128();
    int count = imm8 & 0xFF;
    if (count > 15) return result;
    size_t vl = __riscv_vsetvl_e8m1(16);
    vuint8m1_t va = __riscv_vle8_v_u8m1(a.u8, vl);
    vuint8m1_t vr = __riscv_vslidedown_vx_u8m1(va, count, vl);
    __riscv_vse8_v_u8m1(result.u8, vr, vl);
    return result;
 #else
    __m128i result = _mm_setzero_si128();
    int count = imm8 & 0xFF;
    if (count > 15) return result;
    for (int i = count; i < 16; i++) {
        result.u8[i - count] = a.u8[i];
    }
    return result;
 #endif
 }
 static inline __m128i _mm_slli_epi64(__m128i a, int imm8)
 {
 #if USE_RVV_INTRINSICS
    __m128i result;
    if (imm8 > 63) {
        result.u64[0] = 0;
        result.u64[1] = 0;
    } else {
        size_t vl = __riscv_vsetvl_e64m1(2);
        vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
        vuint64m1_t vr = __riscv_vsll_vx_u64m1(va, imm8, vl);
        __riscv_vse64_v_u64m1(result.u64, vr, vl);
    }
    return result;
 #else
    __m128i result;
    if (imm8 > 63) {
        result.u64[0] = 0;
        result.u64[1] = 0;
    } else {
        result.u64[0] = a.u64[0] << imm8;
        result.u64[1] = a.u64[1] << imm8;
    }
    return result;
 #endif
 }
 static inline __m128i _mm_srli_epi64(__m128i a, int imm8)
 {
 #if USE_RVV_INTRINSICS
    __m128i result;
    if (imm8 > 63) {
        result.u64[0] = 0;
        result.u64[1] = 0;
    } else {
        size_t vl = __riscv_vsetvl_e64m1(2);
        vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
        vuint64m1_t vr = __riscv_vsrl_vx_u64m1(va, imm8, vl);
        __riscv_vse64_v_u64m1(result.u64, vr, vl);
    }
    return result;
 #else
    __m128i result;
    if (imm8 > 63) {
        result.u64[0] = 0;
        result.u64[1] = 0;
    } else {
        result.u64[0] = a.u64[0] >> imm8;
        result.u64[1] = a.u64[1] >> imm8;
    }
    return result;
 #endif
 }
 /* Load/store operations - optimized with RVV */
 static inline __m128i _mm_load_si128(const __m128i* p)
 {
 #if USE_RVV_INTRINSICS
    __m128i result;
    size_t vl = __riscv_vsetvl_e64m1(2);
    vuint64m1_t v = __riscv_vle64_v_u64m1((const uint64_t*)p, vl);
    __riscv_vse64_v_u64m1(result.u64, v, vl);
    return result;
 #else
    __m128i result;
    memcpy(&result, p, sizeof(__m128i));
    return result;
 #endif
 }
 static inline __m128i _mm_loadu_si128(const __m128i* p)
 {
    __m128i result;
    memcpy(&result, p, sizeof(__m128i));
    return result;
 }
 static inline void _mm_store_si128(__m128i* p, __m128i a)
 {
 #if USE_RVV_INTRINSICS
    size_t vl = __riscv_vsetvl_e64m1(2);
    vuint64m1_t v = __riscv_vle64_v_u64m1(a.u64, vl);
    __riscv_vse64_v_u64m1((uint64_t*)p, v, vl);
 #else
    memcpy(p, &a, sizeof(__m128i));
 #endif
 }
 static inline void _mm_storeu_si128(__m128i* p, __m128i a)
 {
    memcpy(p, &a, sizeof(__m128i));
 }
 /* Arithmetic operations - optimized with RVV */
 static inline __m128i _mm_add_epi64(__m128i a, __m128i b)
 {
 #if USE_RVV_INTRINSICS
    __m128i result;
    size_t vl = __riscv_vsetvl_e64m1(2);
    vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
    vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
    vuint64m1_t vr = __riscv_vadd_vv_u64m1(va, vb, vl);
    __riscv_vse64_v_u64m1(result.u64, vr, vl);
    return result;
 #else
    __m128i result;
    result.u64[0] = a.u64[0] + b.u64[0];
    result.u64[1] = a.u64[1] + b.u64[1];
    return result;
 #endif
 }
 static inline __m128i _mm_add_epi32(__m128i a, __m128i b)
 {
 #if USE_RVV_INTRINSICS
    __m128i result;
    size_t vl = __riscv_vsetvl_e32m1(4);
    vuint32m1_t va = __riscv_vle32_v_u32m1(a.u32, vl);
    vuint32m1_t vb = __riscv_vle32_v_u32m1(b.u32, vl);
    vuint32m1_t vr = __riscv_vadd_vv_u32m1(va, vb, vl);
    __riscv_vse32_v_u32m1(result.u32, vr, vl);
    return result;
 #else
    __m128i result;
    for (int i = 0; i < 4; i++) {
        result.i32[i] = a.i32[i] + b.i32[i];
    }
    return result;
 #endif
 }
 static inline __m128i _mm_sub_epi64(__m128i a, __m128i b)
 {
 #if USE_RVV_INTRINSICS
    __m128i result;
    size_t vl = __riscv_vsetvl_e64m1(2);
    vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
    vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
    vuint64m1_t vr = __riscv_vsub_vv_u64m1(va, vb, vl);
    __riscv_vse64_v_u64m1(result.u64, vr, vl);
    return result;
 #else
    __m128i result;
    result.u64[0] = a.u64[0] - b.u64[0];
    result.u64[1] = a.u64[1] - b.u64[1];
    return result;
 #endif
 }
 static inline __m128i _mm_mul_epu32(__m128i a, __m128i b)
 {
 #if USE_RVV_INTRINSICS
    __m128i result;
    size_t vl = __riscv_vsetvl_e64m1(2);
    vuint64m1_t va_lo = __riscv_vzext_vf2_u64m1(__riscv_vle32_v_u32mf2(&a.u32[0], 2), vl);
    vuint64m1_t vb_lo = __riscv_vzext_vf2_u64m1(__riscv_vle32_v_u32mf2(&b.u32[0], 2), vl);
    vuint64m1_t vr = __riscv_vmul_vv_u64m1(va_lo, vb_lo, vl);
    __riscv_vse64_v_u64m1(result.u64, vr, vl);
    return result;
 #else
    __m128i result;
    result.u64[0] = (uint64_t)a.u32[0] * (uint64_t)b.u32[0];
    result.u64[1] = (uint64_t)a.u32[2] * (uint64_t)b.u32[2];
    return result;
 #endif
 }
 /* Unpack operations */
 static inline __m128i _mm_unpacklo_epi64(__m128i a, __m128i b)
 {
    __m128i result;
    result.u64[0] = a.u64[0];
    result.u64[1] = b.u64[0];
    return result;
 }
 static inline __m128i _mm_unpackhi_epi64(__m128i a, __m128i b)
 {
    __m128i result;
    result.u64[0] = a.u64[1];
    result.u64[1] = b.u64[1];
    return result;
 }
 /* Pause instruction for spin-wait loops */
 static inline void _mm_pause(void)
 {
    /* RISC-V pause hint if available (requires Zihintpause extension) */
 #if defined(__riscv_zihintpause)
    __asm__ __volatile__("pause");
 #else
    __asm__ __volatile__("nop");
 #endif
 }
 /* Memory fence - optimized for RISC-V */
 static inline void _mm_mfence(void)
 {
    __asm__ __volatile__("fence rw,rw" ::: "memory");
 }
 static inline void _mm_lfence(void)
 {
    __asm__ __volatile__("fence r,r" ::: "memory");
 }
 static inline void _mm_sfence(void)
 {
    __asm__ __volatile__("fence w,w" ::: "memory");
 }
 /* Comparison operations */
 static inline __m128i _mm_cmpeq_epi32(__m128i a, __m128i b)
 {
    __m128i result;
    for (int i = 0; i < 4; i++) {
        result.u32[i] = (a.u32[i] == b.u32[i]) ? 0xFFFFFFFF : 0;
    }
    return result;
 }
 static inline __m128i _mm_cmpeq_epi64(__m128i a, __m128i b)
 {
    __m128i result;
    for (int i = 0; i < 2; i++) {
        result.u64[i] = (a.u64[i] == b.u64[i]) ? 0xFFFFFFFFFFFFFFFFULL : 0;
    }
    return result;
 }
 /* Additional shift operations */
 static inline __m128i _mm_slli_epi32(__m128i a, int imm8)
 {
 #if USE_RVV_INTRINSICS
    __m128i result;
    if (imm8 > 31) {
        memset(&result, 0, sizeof(result));
    } else {
        size_t vl = __riscv_vsetvl_e32m1(4);
        vuint32m1_t va = __riscv_vle32_v_u32m1(a.u32, vl);
        vuint32m1_t vr = __riscv_vsll_vx_u32m1(va, imm8, vl);
        __riscv_vse32_v_u32m1(result.u32, vr, vl);
    }
    return result;
 #else
    __m128i result;
    if (imm8 > 31) {
        for (int i = 0; i < 4; i++) result.u32[i] = 0;
    } else {
        for (int i = 0; i < 4; i++) {
            result.u32[i] = a.u32[i] << imm8;
        }
    }
    return result;
 #endif
 }
 static inline __m128i _mm_srli_epi32(__m128i a, int imm8)
 {
 #if USE_RVV_INTRINSICS
    __m128i result;
    if (imm8 > 31) {
        memset(&result, 0, sizeof(result));
    } else {
        size_t vl = __riscv_vsetvl_e32m1(4);
        vuint32m1_t va = __riscv_vle32_v_u32m1(a.u32, vl);
        vuint32m1_t vr = __riscv_vsrl_vx_u32m1(va, imm8, vl);
        __riscv_vse32_v_u32m1(result.u32, vr, vl);
    }
    return result;
 #else
    __m128i result;
    if (imm8 > 31) {
        for (int i = 0; i < 4; i++) result.u32[i] = 0;
    } else {
        for (int i = 0; i < 4; i++) {
            result.u32[i] = a.u32[i] >> imm8;
        }
    }
    return result;
 #endif
 }
 /* 64-bit integer operations */
 static inline __m128i _mm_set1_epi64x(int64_t a)
 {
    __m128i result;
    result.i64[0] = a;
    result.i64[1] = a;
    return result;
 }
 /* Float type for compatibility */
 typedef __m128i __m128;
 /* Float operations - simplified scalar implementations */
 static inline __m128 _mm_set1_ps(float a)
 {
    __m128 result;
    uint32_t val;
    memcpy(&val, &a, sizeof(float));
    for (int i = 0; i < 4; i++) {
        result.u32[i] = val;
    }
    return result;
 }
 static inline __m128 _mm_setzero_ps(void)
 {
    __m128 result;
    memset(&result, 0, sizeof(result));
    return result;
 }
 static inline __m128 _mm_add_ps(__m128 a, __m128 b)
 {
    __m128 result;
    float fa[4], fb[4], fr[4];
    memcpy(fa, &a, sizeof(__m128));
    memcpy(fb, &b, sizeof(__m128));
    for (int i = 0; i < 4; i++) {
        fr[i] = fa[i] + fb[i];
    }
    memcpy(&result, fr, sizeof(__m128));
    return result;
 }
 static inline __m128 _mm_mul_ps(__m128 a, __m128 b)
 {
    __m128 result;
    float fa[4], fb[4], fr[4];
    memcpy(fa, &a, sizeof(__m128));
    memcpy(fb, &b, sizeof(__m128));
    for (int i = 0; i < 4; i++) {
        fr[i] = fa[i] * fb[i];
    }
    memcpy(&result, fr, sizeof(__m128));
    return result;
 }
 static inline __m128 _mm_and_ps(__m128 a, __m128 b)
 {
    __m128 result;
    result.u64[0] = a.u64[0] & b.u64[0];
    result.u64[1] = a.u64[1] & b.u64[1];
    return result;
 }
 static inline __m128 _mm_or_ps(__m128 a, __m128 b)
 {
    __m128 result;
    result.u64[0] = a.u64[0] | b.u64[0];
    result.u64[1] = a.u64[1] | b.u64[1];
    return result;
 }
 static inline __m128 _mm_cvtepi32_ps(__m128i a)
 {
    __m128 result;
    float fr[4];
    for (int i = 0; i < 4; i++) {
        fr[i] = (float)a.i32[i];
    }
    memcpy(&result, fr, sizeof(__m128));
    return result;
 }
 static inline __m128i _mm_cvttps_epi32(__m128 a)
 {
    __m128i result;
    float fa[4];
    memcpy(fa, &a, sizeof(__m128));
    for (int i = 0; i < 4; i++) {
        result.i32[i] = (int32_t)fa[i];
    }
    return result;
 }
 /* Casting operations */
 static inline __m128 _mm_castsi128_ps(__m128i a)
 {
    __m128 result;
    memcpy(&result, &a, sizeof(__m128));
    return result;
 }
 static inline __m128i _mm_castps_si128(__m128 a)
 {
    __m128i result;
    memcpy(&result, &a, sizeof(__m128));
    return result;
 }
 /* Additional set operations */
 static inline __m128i _mm_set1_epi32(int a)
 {
    __m128i result;
    for (int i = 0; i < 4; i++) {
        result.i32[i] = a;
    }
    return result;
 }
 /* AES instructions - placeholders for soft_aes compatibility */
 static inline __m128i _mm_aesenc_si128(__m128i a, __m128i roundkey)
 {
    return _mm_xor_si128(a, roundkey);
 }
 static inline __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon)
 {
    return a;
 }
 /* Rotate right operation for soft_aes.h */
 static inline uint32_t _rotr(uint32_t value, unsigned int count)
 {
    const unsigned int mask = 31;
    count &= mask;
    return (value >> count) | (value << ((-count) & mask));
 }
 /* ARM NEON compatibility types and intrinsics for RISC-V */
 typedef __m128i_union uint64x2_t;
 typedef __m128i_union uint8x16_t;
 typedef __m128i_union int64x2_t;
 typedef __m128i_union int32x4_t;
 static inline uint64x2_t vld1q_u64(const uint64_t *ptr)
 {
    uint64x2_t result;
    result.u64[0] = ptr[0];
    result.u64[1] = ptr[1];
    return result;
 }
 static inline int64x2_t vld1q_s64(const int64_t *ptr)
 {
    int64x2_t result;
    result.i64[0] = ptr[0];
    result.i64[1] = ptr[1];
    return result;
 }
 static inline void vst1q_u64(uint64_t *ptr, uint64x2_t val)
 {
    ptr[0] = val.u64[0];
    ptr[1] = val.u64[1];
 }
 static inline uint64x2_t veorq_u64(uint64x2_t a, uint64x2_t b)
 {
    return _mm_xor_si128(a, b);
 }
 static inline uint64x2_t vaddq_u64(uint64x2_t a, uint64x2_t b)
 {
    return _mm_add_epi64(a, b);
 }
 static inline uint64x2_t vreinterpretq_u64_u8(uint8x16_t a)
 {
    uint64x2_t result;
    memcpy(&result, &a, sizeof(uint64x2_t));
    return result;
 }
 static inline uint64_t vgetq_lane_u64(uint64x2_t v, int lane)
 {
    return v.u64[lane];
 }
 static inline int64_t vgetq_lane_s64(int64x2_t v, int lane)
 {
    return v.i64[lane];
 }
 static inline int32_t vgetq_lane_s32(int32x4_t v, int lane)
 {
    return v.i32[lane];
 }
 typedef struct { uint64_t val[1]; } uint64x1_t;
 static inline uint64x1_t vcreate_u64(uint64_t a)
 {
    uint64x1_t result;
    result.val[0] = a;
    return result;
 }
 static inline uint64x2_t vcombine_u64(uint64x1_t low, uint64x1_t high)
 {
    uint64x2_t result;
    result.u64[0] = low.val[0];
    result.u64[1] = high.val[0];
    return result;
 }
 #ifdef __cplusplus
 }
 #endif
 #endif /* XMRIG_SSE2RVV_OPTIMIZED_H */
--- a/src/crypto/cn/sse2rvv_optimized.h
+++ b/src/crypto/cn/sse2rvv_optimized.h
@@ -0,0 +1,748 @@
 /* XMRig
 * Copyright (c) 2025 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
 /*
 * SSE to RISC-V Vector (RVV) optimized compatibility header
 * Provides both scalar fallback and vectorized implementations using RVV intrinsics
 */
 #ifndef XMRIG_SSE2RVV_OPTIMIZED_H
 #define XMRIG_SSE2RVV_OPTIMIZED_H
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include <stdint.h>
 #include <string.h>
 /* Check if RVV is available */
 #if defined(__riscv_vector)
 #include <riscv_vector.h>
 #define USE_RVV_INTRINSICS 1
 #else
 #define USE_RVV_INTRINSICS 0
 #endif
 /* 128-bit vector type */
 typedef union {
    uint8_t  u8[16];
    uint16_t u16[8];
    uint32_t u32[4];
    uint64_t u64[2];
    int8_t   i8[16];
    int16_t  i16[8];
    int32_t  i32[4];
    int64_t  i64[2];
 #if USE_RVV_INTRINSICS
    vuint64m1_t rvv_u64;
    vuint32m1_t rvv_u32;
    vuint8m1_t  rvv_u8;
 #endif
 } __m128i_union;
 typedef __m128i_union __m128i;
 /* Set operations */
 static inline __m128i _mm_set_epi32(int e3, int e2, int e1, int e0)
 {
    __m128i result;
    result.i32[0] = e0;
    result.i32[1] = e1;
    result.i32[2] = e2;
    result.i32[3] = e3;
    return result;
 }
 static inline __m128i _mm_set_epi64x(int64_t e1, int64_t e0)
 {
    __m128i result;
    result.i64[0] = e0;
    result.i64[1] = e1;
    return result;
 }
 static inline __m128i _mm_setzero_si128(void)
 {
    __m128i result;
    memset(&result, 0, sizeof(result));
    return result;
 }
 /* Extract/insert operations */
 static inline int _mm_cvtsi128_si32(__m128i a)
 {
    return a.i32[0];
 }
 static inline int64_t _mm_cvtsi128_si64(__m128i a)
 {
    return a.i64[0];
 }
 static inline __m128i _mm_cvtsi32_si128(int a)
 {
    __m128i result = _mm_setzero_si128();
    result.i32[0] = a;
    return result;
 }
 static inline __m128i _mm_cvtsi64_si128(int64_t a)
 {
    __m128i result = _mm_setzero_si128();
    result.i64[0] = a;
    return result;
 }
 /* Shuffle operations */
 static inline __m128i _mm_shuffle_epi32(__m128i a, int imm8)
 {
    __m128i result;
    result.u32[0] = a.u32[(imm8 >> 0) & 0x3];
    result.u32[1] = a.u32[(imm8 >> 2) & 0x3];
    result.u32[2] = a.u32[(imm8 >> 4) & 0x3];
    result.u32[3] = a.u32[(imm8 >> 6) & 0x3];
    return result;
 }
 /* Logical operations - optimized with RVV when available */
 static inline __m128i _mm_xor_si128(__m128i a, __m128i b)
 {
 #if USE_RVV_INTRINSICS
    __m128i result;
    size_t vl = __riscv_vsetvl_e64m1(2);
    vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
    vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
    vuint64m1_t vr = __riscv_vxor_vv_u64m1(va, vb, vl);
    __riscv_vse64_v_u64m1(result.u64, vr, vl);
    return result;
 #else
    __m128i result;
    result.u64[0] = a.u64[0] ^ b.u64[0];
    result.u64[1] = a.u64[1] ^ b.u64[1];
    return result;
 #endif
 }
 static inline __m128i _mm_or_si128(__m128i a, __m128i b)
 {
 #if USE_RVV_INTRINSICS
    __m128i result;
    size_t vl = __riscv_vsetvl_e64m1(2);
    vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
    vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
    vuint64m1_t vr = __riscv_vor_vv_u64m1(va, vb, vl);
    __riscv_vse64_v_u64m1(result.u64, vr, vl);
    return result;
 #else
    __m128i result;
    result.u64[0] = a.u64[0] | b.u64[0];
    result.u64[1] = a.u64[1] | b.u64[1];
    return result;
 #endif
 }
 static inline __m128i _mm_and_si128(__m128i a, __m128i b)
 {
 #if USE_RVV_INTRINSICS
    __m128i result;
    size_t vl = __riscv_vsetvl_e64m1(2);
    vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
    vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
    vuint64m1_t vr = __riscv_vand_vv_u64m1(va, vb, vl);
    __riscv_vse64_v_u64m1(result.u64, vr, vl);
    return result;
 #else
    __m128i result;
    result.u64[0] = a.u64[0] & b.u64[0];
    result.u64[1] = a.u64[1] & b.u64[1];
    return result;
 #endif
 }
 static inline __m128i _mm_andnot_si128(__m128i a, __m128i b)
 {
 #if USE_RVV_INTRINSICS
    __m128i result;
    size_t vl = __riscv_vsetvl_e64m1(2);
    vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
    vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
    vuint64m1_t vnot_a = __riscv_vnot_v_u64m1(va, vl);
    vuint64m1_t vr = __riscv_vand_vv_u64m1(vnot_a, vb, vl);
    __riscv_vse64_v_u64m1(result.u64, vr, vl);
    return result;
 #else
    __m128i result;
    result.u64[0] = (~a.u64[0]) & b.u64[0];
    result.u64[1] = (~a.u64[1]) & b.u64[1];
    return result;
 #endif
 }
 /* Shift operations */
 static inline __m128i _mm_slli_si128(__m128i a, int imm8)
 {
 #if USE_RVV_INTRINSICS
    __m128i result = _mm_setzero_si128();
    int count = imm8 & 0xFF;
    if (count > 15) return result;
    size_t vl = __riscv_vsetvl_e8m1(16);
    vuint8m1_t va = __riscv_vle8_v_u8m1(a.u8, vl);
    vuint8m1_t vr = __riscv_vslideup_vx_u8m1(__riscv_vmv_v_x_u8m1(0, vl), va, count, vl);
    __riscv_vse8_v_u8m1(result.u8, vr, vl);
    return result;
 #else
    __m128i result = _mm_setzero_si128();
    int count = imm8 & 0xFF;
    if (count > 15) return result;
    for (int i = 0; i < 16 - count; i++) {
        result.u8[i + count] = a.u8[i];
    }
    return result;
 #endif
 }
 static inline __m128i _mm_srli_si128(__m128i a, int imm8)
 {
 #if USE_RVV_INTRINSICS
    __m128i result = _mm_setzero_si128();
    int count = imm8 & 0xFF;
    if (count > 15) return result;
    size_t vl = __riscv_vsetvl_e8m1(16);
    vuint8m1_t va = __riscv_vle8_v_u8m1(a.u8, vl);
    vuint8m1_t vr = __riscv_vslidedown_vx_u8m1(va, count, vl);
    __riscv_vse8_v_u8m1(result.u8, vr, vl);
    return result;
 #else
    __m128i result = _mm_setzero_si128();
    int count = imm8 & 0xFF;
    if (count > 15) return result;
    for (int i = count; i < 16; i++) {
        result.u8[i - count] = a.u8[i];
    }
    return result;
 #endif
 }
 static inline __m128i _mm_slli_epi64(__m128i a, int imm8)
 {
 #if USE_RVV_INTRINSICS
    __m128i result;
    if (imm8 > 63) {
        result.u64[0] = 0;
        result.u64[1] = 0;
    } else {
        size_t vl = __riscv_vsetvl_e64m1(2);
        vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
        vuint64m1_t vr = __riscv_vsll_vx_u64m1(va, imm8, vl);
        __riscv_vse64_v_u64m1(result.u64, vr, vl);
    }
    return result;
 #else
    __m128i result;
    if (imm8 > 63) {
        result.u64[0] = 0;
        result.u64[1] = 0;
    } else {
        result.u64[0] = a.u64[0] << imm8;
        result.u64[1] = a.u64[1] << imm8;
    }
    return result;
 #endif
 }
 static inline __m128i _mm_srli_epi64(__m128i a, int imm8)
 {
 #if USE_RVV_INTRINSICS
    __m128i result;
    if (imm8 > 63) {
        result.u64[0] = 0;
        result.u64[1] = 0;
    } else {
        size_t vl = __riscv_vsetvl_e64m1(2);
        vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
        vuint64m1_t vr = __riscv_vsrl_vx_u64m1(va, imm8, vl);
        __riscv_vse64_v_u64m1(result.u64, vr, vl);
    }
    return result;
 #else
    __m128i result;
    if (imm8 > 63) {
        result.u64[0] = 0;
        result.u64[1] = 0;
    } else {
        result.u64[0] = a.u64[0] >> imm8;
        result.u64[1] = a.u64[1] >> imm8;
    }
    return result;
 #endif
 }
 /* Load/store operations - optimized with RVV */
 static inline __m128i _mm_load_si128(const __m128i* p)
 {
 #if USE_RVV_INTRINSICS
    __m128i result;
    size_t vl = __riscv_vsetvl_e64m1(2);
    vuint64m1_t v = __riscv_vle64_v_u64m1((const uint64_t*)p, vl);
    __riscv_vse64_v_u64m1(result.u64, v, vl);
    return result;
 #else
    __m128i result;
    memcpy(&result, p, sizeof(__m128i));
    return result;
 #endif
 }
 static inline __m128i _mm_loadu_si128(const __m128i* p)
 {
    __m128i result;
    memcpy(&result, p, sizeof(__m128i));
    return result;
 }
 static inline void _mm_store_si128(__m128i* p, __m128i a)
 {
 #if USE_RVV_INTRINSICS
    size_t vl = __riscv_vsetvl_e64m1(2);
    vuint64m1_t v = __riscv_vle64_v_u64m1(a.u64, vl);
    __riscv_vse64_v_u64m1((uint64_t*)p, v, vl);
 #else
    memcpy(p, &a, sizeof(__m128i));
 #endif
 }
 static inline void _mm_storeu_si128(__m128i* p, __m128i a)
 {
    memcpy(p, &a, sizeof(__m128i));
 }
 /* Arithmetic operations - optimized with RVV */
 static inline __m128i _mm_add_epi64(__m128i a, __m128i b)
 {
 #if USE_RVV_INTRINSICS
    __m128i result;
    size_t vl = __riscv_vsetvl_e64m1(2);
    vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
    vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
    vuint64m1_t vr = __riscv_vadd_vv_u64m1(va, vb, vl);
    __riscv_vse64_v_u64m1(result.u64, vr, vl);
    return result;
 #else
    __m128i result;
    result.u64[0] = a.u64[0] + b.u64[0];
    result.u64[1] = a.u64[1] + b.u64[1];
    return result;
 #endif
 }
 static inline __m128i _mm_add_epi32(__m128i a, __m128i b)
 {
 #if USE_RVV_INTRINSICS
    __m128i result;
    size_t vl = __riscv_vsetvl_e32m1(4);
    vuint32m1_t va = __riscv_vle32_v_u32m1(a.u32, vl);
    vuint32m1_t vb = __riscv_vle32_v_u32m1(b.u32, vl);
    vuint32m1_t vr = __riscv_vadd_vv_u32m1(va, vb, vl);
    __riscv_vse32_v_u32m1(result.u32, vr, vl);
    return result;
 #else
    __m128i result;
    for (int i = 0; i < 4; i++) {
        result.i32[i] = a.i32[i] + b.i32[i];
    }
    return result;
 #endif
 }
 static inline __m128i _mm_sub_epi64(__m128i a, __m128i b)
 {
 #if USE_RVV_INTRINSICS
    __m128i result;
    size_t vl = __riscv_vsetvl_e64m1(2);
    vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
    vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
    vuint64m1_t vr = __riscv_vsub_vv_u64m1(va, vb, vl);
    __riscv_vse64_v_u64m1(result.u64, vr, vl);
    return result;
 #else
    __m128i result;
    result.u64[0] = a.u64[0] - b.u64[0];
    result.u64[1] = a.u64[1] - b.u64[1];
    return result;
 #endif
 }
 static inline __m128i _mm_mul_epu32(__m128i a, __m128i b)
 {
 #if USE_RVV_INTRINSICS
    __m128i result;
    size_t vl = __riscv_vsetvl_e64m1(2);
    vuint64m1_t va_lo = __riscv_vzext_vf2_u64m1(__riscv_vle32_v_u32mf2(&a.u32[0], 2), vl);
    vuint64m1_t vb_lo = __riscv_vzext_vf2_u64m1(__riscv_vle32_v_u32mf2(&b.u32[0], 2), vl);
    vuint64m1_t vr = __riscv_vmul_vv_u64m1(va_lo, vb_lo, vl);
    __riscv_vse64_v_u64m1(result.u64, vr, vl);
    return result;
 #else
    __m128i result;
    result.u64[0] = (uint64_t)a.u32[0] * (uint64_t)b.u32[0];
    result.u64[1] = (uint64_t)a.u32[2] * (uint64_t)b.u32[2];
    return result;
 #endif
 }
 /* Unpack operations */
 static inline __m128i _mm_unpacklo_epi64(__m128i a, __m128i b)
 {
    __m128i result;
    result.u64[0] = a.u64[0];
    result.u64[1] = b.u64[0];
    return result;
 }
 static inline __m128i _mm_unpackhi_epi64(__m128i a, __m128i b)
 {
    __m128i result;
    result.u64[0] = a.u64[1];
    result.u64[1] = b.u64[1];
    return result;
 }
 /* Pause instruction for spin-wait loops */
 static inline void _mm_pause(void)
 {
    /* RISC-V pause hint if available (requires Zihintpause extension) */
 #if defined(__riscv_zihintpause)
    __asm__ __volatile__("pause");
 #else
    __asm__ __volatile__("nop");
 #endif
 }
 /* Memory fence - optimized for RISC-V */
 static inline void _mm_mfence(void)
 {
    __asm__ __volatile__("fence rw,rw" ::: "memory");
 }
 static inline void _mm_lfence(void)
 {
    __asm__ __volatile__("fence r,r" ::: "memory");
 }
 static inline void _mm_sfence(void)
 {
    __asm__ __volatile__("fence w,w" ::: "memory");
 }
 /* Comparison operations */
 static inline __m128i _mm_cmpeq_epi32(__m128i a, __m128i b)
 {
    __m128i result;
    for (int i = 0; i < 4; i++) {
        result.u32[i] = (a.u32[i] == b.u32[i]) ? 0xFFFFFFFF : 0;
    }
    return result;
 }
 static inline __m128i _mm_cmpeq_epi64(__m128i a, __m128i b)
 {
    __m128i result;
    for (int i = 0; i < 2; i++) {
        result.u64[i] = (a.u64[i] == b.u64[i]) ? 0xFFFFFFFFFFFFFFFFULL : 0;
    }
    return result;
 }
 /* Additional shift operations */
 static inline __m128i _mm_slli_epi32(__m128i a, int imm8)
 {
 #if USE_RVV_INTRINSICS
    __m128i result;
    if (imm8 > 31) {
        memset(&result, 0, sizeof(result));
    } else {
        size_t vl = __riscv_vsetvl_e32m1(4);
        vuint32m1_t va = __riscv_vle32_v_u32m1(a.u32, vl);
        vuint32m1_t vr = __riscv_vsll_vx_u32m1(va, imm8, vl);
        __riscv_vse32_v_u32m1(result.u32, vr, vl);
    }
    return result;
 #else
    __m128i result;
    if (imm8 > 31) {
        for (int i = 0; i < 4; i++) result.u32[i] = 0;
    } else {
        for (int i = 0; i < 4; i++) {
            result.u32[i] = a.u32[i] << imm8;
        }
    }
    return result;
 #endif
 }
 static inline __m128i _mm_srli_epi32(__m128i a, int imm8)
 {
 #if USE_RVV_INTRINSICS
    __m128i result;
    if (imm8 > 31) {
        memset(&result, 0, sizeof(result));
    } else {
        size_t vl = __riscv_vsetvl_e32m1(4);
        vuint32m1_t va = __riscv_vle32_v_u32m1(a.u32, vl);
        vuint32m1_t vr = __riscv_vsrl_vx_u32m1(va, imm8, vl);
        __riscv_vse32_v_u32m1(result.u32, vr, vl);
    }
    return result;
 #else
    __m128i result;
    if (imm8 > 31) {
        for (int i = 0; i < 4; i++) result.u32[i] = 0;
    } else {
        for (int i = 0; i < 4; i++) {
            result.u32[i] = a.u32[i] >> imm8;
        }
    }
    return result;
 #endif
 }
 /* 64-bit integer operations */
 static inline __m128i _mm_set1_epi64x(int64_t a)
 {
    __m128i result;
    result.i64[0] = a;
    result.i64[1] = a;
    return result;
 }
 /* Float type for compatibility */
 typedef __m128i __m128;
 /* Float operations - simplified scalar implementations */
 static inline __m128 _mm_set1_ps(float a)
 {
    __m128 result;
    uint32_t val;
    memcpy(&val, &a, sizeof(float));
    for (int i = 0; i < 4; i++) {
        result.u32[i] = val;
    }
    return result;
 }
 static inline __m128 _mm_setzero_ps(void)
 {
    __m128 result;
    memset(&result, 0, sizeof(result));
    return result;
 }
 static inline __m128 _mm_add_ps(__m128 a, __m128 b)
 {
    __m128 result;
    float fa[4], fb[4], fr[4];
    memcpy(fa, &a, sizeof(__m128));
    memcpy(fb, &b, sizeof(__m128));
    for (int i = 0; i < 4; i++) {
        fr[i] = fa[i] + fb[i];
    }
    memcpy(&result, fr, sizeof(__m128));
    return result;
 }
 static inline __m128 _mm_mul_ps(__m128 a, __m128 b)
 {
    __m128 result;
    float fa[4], fb[4], fr[4];
    memcpy(fa, &a, sizeof(__m128));
    memcpy(fb, &b, sizeof(__m128));
    for (int i = 0; i < 4; i++) {
        fr[i] = fa[i] * fb[i];
    }
    memcpy(&result, fr, sizeof(__m128));
    return result;
 }
 static inline __m128 _mm_and_ps(__m128 a, __m128 b)
 {
    __m128 result;
    result.u64[0] = a.u64[0] & b.u64[0];
    result.u64[1] = a.u64[1] & b.u64[1];
    return result;
 }
 static inline __m128 _mm_or_ps(__m128 a, __m128 b)
 {
    __m128 result;
    result.u64[0] = a.u64[0] | b.u64[0];
    result.u64[1] = a.u64[1] | b.u64[1];
    return result;
 }
 static inline __m128 _mm_cvtepi32_ps(__m128i a)
 {
    __m128 result;
    float fr[4];
    for (int i = 0; i < 4; i++) {
        fr[i] = (float)a.i32[i];
    }
    memcpy(&result, fr, sizeof(__m128));
    return result;
 }
 static inline __m128i _mm_cvttps_epi32(__m128 a)
 {
    __m128i result;
    float fa[4];
    memcpy(fa, &a, sizeof(__m128));
    for (int i = 0; i < 4; i++) {
        result.i32[i] = (int32_t)fa[i];
    }
    return result;
 }
 /* Casting operations */
 static inline __m128 _mm_castsi128_ps(__m128i a)
 {
    __m128 result;
    memcpy(&result, &a, sizeof(__m128));
    return result;
 }
 static inline __m128i _mm_castps_si128(__m128 a)
 {
    __m128i result;
    memcpy(&result, &a, sizeof(__m128));
    return result;
 }
 /* Additional set operations */
 static inline __m128i _mm_set1_epi32(int a)
 {
    __m128i result;
    for (int i = 0; i < 4; i++) {
        result.i32[i] = a;
    }
    return result;
 }
 /* AES instructions - placeholders for soft_aes compatibility */
 static inline __m128i _mm_aesenc_si128(__m128i a, __m128i roundkey)
 {
    return _mm_xor_si128(a, roundkey);
 }
 static inline __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon)
 {
    return a;
 }
 /* Rotate right operation for soft_aes.h */
 static inline uint32_t _rotr(uint32_t value, unsigned int count)
 {
    const unsigned int mask = 31;
    count &= mask;
    return (value >> count) | (value << ((-count) & mask));
 }
 /* ARM NEON compatibility types and intrinsics for RISC-V */
 typedef __m128i_union uint64x2_t;
 typedef __m128i_union uint8x16_t;
 typedef __m128i_union int64x2_t;
 typedef __m128i_union int32x4_t;
 static inline uint64x2_t vld1q_u64(const uint64_t *ptr)
 {
    uint64x2_t result;
    result.u64[0] = ptr[0];
    result.u64[1] = ptr[1];
    return result;
 }
 static inline int64x2_t vld1q_s64(const int64_t *ptr)
 {
    int64x2_t result;
    result.i64[0] = ptr[0];
    result.i64[1] = ptr[1];
    return result;
 }
 static inline void vst1q_u64(uint64_t *ptr, uint64x2_t val)
 {
    ptr[0] = val.u64[0];
    ptr[1] = val.u64[1];
 }
 static inline uint64x2_t veorq_u64(uint64x2_t a, uint64x2_t b)
 {
    return _mm_xor_si128(a, b);
 }
 static inline uint64x2_t vaddq_u64(uint64x2_t a, uint64x2_t b)
 {
    return _mm_add_epi64(a, b);
 }
 static inline uint64x2_t vreinterpretq_u64_u8(uint8x16_t a)
 {
    uint64x2_t result;
    memcpy(&result, &a, sizeof(uint64x2_t));
    return result;
 }
 static inline uint64_t vgetq_lane_u64(uint64x2_t v, int lane)
 {
    return v.u64[lane];
 }
 static inline int64_t vgetq_lane_s64(int64x2_t v, int lane)
 {
    return v.i64[lane];
 }
 static inline int32_t vgetq_lane_s32(int32x4_t v, int lane)
 {
    return v.i32[lane];
 }
 typedef struct { uint64_t val[1]; } uint64x1_t;
 static inline uint64x1_t vcreate_u64(uint64_t a)
 {
    uint64x1_t result;
    result.val[0] = a;
    return result;
 }
 static inline uint64x2_t vcombine_u64(uint64x1_t low, uint64x1_t high)
 {
    uint64x2_t result;
    result.u64[0] = low.val[0];
    result.u64[1] = high.val[0];
    return result;
 }
 #ifdef __cplusplus
 }
 #endif
 #endif /* XMRIG_SSE2RVV_OPTIMIZED_H */
--- a/src/crypto/cn/sse2rvv_scalar_backup.h
+++ b/src/crypto/cn/sse2rvv_scalar_backup.h
@@ -0,0 +1,571 @@
 /* XMRig
 * Copyright (c) 2025 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
 /*
 * SSE to RISC-V compatibility header
 * Provides scalar implementations of SSE intrinsics for RISC-V architecture
 */
 #ifndef XMRIG_SSE2RVV_H
 #define XMRIG_SSE2RVV_H
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include <stdint.h>
 #include <string.h>
 /* 128-bit vector type */
 typedef union {
    uint8_t  u8[16];
    uint16_t u16[8];
    uint32_t u32[4];
    uint64_t u64[2];
    int8_t   i8[16];
    int16_t  i16[8];
    int32_t  i32[4];
    int64_t  i64[2];
 } __m128i_union;
 typedef __m128i_union __m128i;
 /* Set operations */
 static inline __m128i _mm_set_epi32(int e3, int e2, int e1, int e0)
 {
    __m128i result;
    result.i32[0] = e0;
    result.i32[1] = e1;
    result.i32[2] = e2;
    result.i32[3] = e3;
    return result;
 }
 static inline __m128i _mm_set_epi64x(int64_t e1, int64_t e0)
 {
    __m128i result;
    result.i64[0] = e0;
    result.i64[1] = e1;
    return result;
 }
 static inline __m128i _mm_setzero_si128(void)
 {
    __m128i result;
    memset(&result, 0, sizeof(result));
    return result;
 }
 /* Extract/insert operations */
 static inline int _mm_cvtsi128_si32(__m128i a)
 {
    return a.i32[0];
 }
 static inline int64_t _mm_cvtsi128_si64(__m128i a)
 {
    return a.i64[0];
 }
 static inline __m128i _mm_cvtsi32_si128(int a)
 {
    __m128i result = _mm_setzero_si128();
    result.i32[0] = a;
    return result;
 }
 static inline __m128i _mm_cvtsi64_si128(int64_t a)
 {
    __m128i result = _mm_setzero_si128();
    result.i64[0] = a;
    return result;
 }
 /* Shuffle operations */
 static inline __m128i _mm_shuffle_epi32(__m128i a, int imm8)
 {
    __m128i result;
    result.u32[0] = a.u32[(imm8 >> 0) & 0x3];
    result.u32[1] = a.u32[(imm8 >> 2) & 0x3];
    result.u32[2] = a.u32[(imm8 >> 4) & 0x3];
    result.u32[3] = a.u32[(imm8 >> 6) & 0x3];
    return result;
 }
 /* Logical operations */
 static inline __m128i _mm_xor_si128(__m128i a, __m128i b)
 {
    __m128i result;
    result.u64[0] = a.u64[0] ^ b.u64[0];
    result.u64[1] = a.u64[1] ^ b.u64[1];
    return result;
 }
 static inline __m128i _mm_or_si128(__m128i a, __m128i b)
 {
    __m128i result;
    result.u64[0] = a.u64[0] | b.u64[0];
    result.u64[1] = a.u64[1] | b.u64[1];
    return result;
 }
 static inline __m128i _mm_and_si128(__m128i a, __m128i b)
 {
    __m128i result;
    result.u64[0] = a.u64[0] & b.u64[0];
    result.u64[1] = a.u64[1] & b.u64[1];
    return result;
 }
 static inline __m128i _mm_andnot_si128(__m128i a, __m128i b)
 {
    __m128i result;
    result.u64[0] = (~a.u64[0]) & b.u64[0];
    result.u64[1] = (~a.u64[1]) & b.u64[1];
    return result;
 }
 /* Shift operations */
 static inline __m128i _mm_slli_si128(__m128i a, int imm8)
 {
    __m128i result = _mm_setzero_si128();
    int count = imm8 & 0xFF;
    if (count > 15) return result;
    for (int i = 0; i < 16 - count; i++) {
        result.u8[i + count] = a.u8[i];
    }
    return result;
 }
 static inline __m128i _mm_srli_si128(__m128i a, int imm8)
 {
    __m128i result = _mm_setzero_si128();
    int count = imm8 & 0xFF;
    if (count > 15) return result;
    for (int i = count; i < 16; i++) {
        result.u8[i - count] = a.u8[i];
    }
    return result;
 }
 static inline __m128i _mm_slli_epi64(__m128i a, int imm8)
 {
    __m128i result;
    if (imm8 > 63) {
        result.u64[0] = 0;
        result.u64[1] = 0;
    } else {
        result.u64[0] = a.u64[0] << imm8;
        result.u64[1] = a.u64[1] << imm8;
    }
    return result;
 }
 static inline __m128i _mm_srli_epi64(__m128i a, int imm8)
 {
    __m128i result;
    if (imm8 > 63) {
        result.u64[0] = 0;
        result.u64[1] = 0;
    } else {
        result.u64[0] = a.u64[0] >> imm8;
        result.u64[1] = a.u64[1] >> imm8;
    }
    return result;
 }
 /* Load/store operations */
 static inline __m128i _mm_load_si128(const __m128i* p)
 {
    __m128i result;
    memcpy(&result, p, sizeof(__m128i));
    return result;
 }
 static inline __m128i _mm_loadu_si128(const __m128i* p)
 {
    __m128i result;
    memcpy(&result, p, sizeof(__m128i));
    return result;
 }
 static inline void _mm_store_si128(__m128i* p, __m128i a)
 {
    memcpy(p, &a, sizeof(__m128i));
 }
 static inline void _mm_storeu_si128(__m128i* p, __m128i a)
 {
    memcpy(p, &a, sizeof(__m128i));
 }
 /* Arithmetic operations */
 static inline __m128i _mm_add_epi64(__m128i a, __m128i b)
 {
    __m128i result;
    result.u64[0] = a.u64[0] + b.u64[0];
    result.u64[1] = a.u64[1] + b.u64[1];
    return result;
 }
 static inline __m128i _mm_add_epi32(__m128i a, __m128i b)
 {
    __m128i result;
    for (int i = 0; i < 4; i++) {
        result.i32[i] = a.i32[i] + b.i32[i];
    }
    return result;
 }
 static inline __m128i _mm_sub_epi64(__m128i a, __m128i b)
 {
    __m128i result;
    result.u64[0] = a.u64[0] - b.u64[0];
    result.u64[1] = a.u64[1] - b.u64[1];
    return result;
 }
 static inline __m128i _mm_mul_epu32(__m128i a, __m128i b)
 {
    __m128i result;
    result.u64[0] = (uint64_t)a.u32[0] * (uint64_t)b.u32[0];
    result.u64[1] = (uint64_t)a.u32[2] * (uint64_t)b.u32[2];
    return result;
 }
 /* Unpack operations */
 static inline __m128i _mm_unpacklo_epi64(__m128i a, __m128i b)
 {
    __m128i result;
    result.u64[0] = a.u64[0];
    result.u64[1] = b.u64[0];
    return result;
 }
 static inline __m128i _mm_unpackhi_epi64(__m128i a, __m128i b)
 {
    __m128i result;
    result.u64[0] = a.u64[1];
    result.u64[1] = b.u64[1];
    return result;
 }
 /* Pause instruction for spin-wait loops */
 static inline void _mm_pause(void)
 {
    /* RISC-V doesn't have a direct equivalent to x86 PAUSE
     * Use a simple NOP or yield hint */
    __asm__ __volatile__("nop");
 }
 /* Memory fence */
 static inline void _mm_mfence(void)
 {
    __asm__ __volatile__("fence" ::: "memory");
 }
 static inline void _mm_lfence(void)
 {
    __asm__ __volatile__("fence r,r" ::: "memory");
 }
 static inline void _mm_sfence(void)
 {
    __asm__ __volatile__("fence w,w" ::: "memory");
 }
 /* Comparison operations */
 static inline __m128i _mm_cmpeq_epi32(__m128i a, __m128i b)
 {
    __m128i result;
    for (int i = 0; i < 4; i++) {
        result.u32[i] = (a.u32[i] == b.u32[i]) ? 0xFFFFFFFF : 0;
    }
    return result;
 }
 static inline __m128i _mm_cmpeq_epi64(__m128i a, __m128i b)
 {
    __m128i result;
    for (int i = 0; i < 2; i++) {
        result.u64[i] = (a.u64[i] == b.u64[i]) ? 0xFFFFFFFFFFFFFFFFULL : 0;
    }
    return result;
 }
 /* Additional shift operations */
 static inline __m128i _mm_slli_epi32(__m128i a, int imm8)
 {
    __m128i result;
    if (imm8 > 31) {
        for (int i = 0; i < 4; i++) result.u32[i] = 0;
    } else {
        for (int i = 0; i < 4; i++) {
            result.u32[i] = a.u32[i] << imm8;
        }
    }
    return result;
 }
 static inline __m128i _mm_srli_epi32(__m128i a, int imm8)
 {
    __m128i result;
    if (imm8 > 31) {
        for (int i = 0; i < 4; i++) result.u32[i] = 0;
    } else {
        for (int i = 0; i < 4; i++) {
            result.u32[i] = a.u32[i] >> imm8;
        }
    }
    return result;
 }
 /* 64-bit integer operations */
 static inline __m128i _mm_set1_epi64x(int64_t a)
 {
    __m128i result;
    result.i64[0] = a;
    result.i64[1] = a;
    return result;
 }
 /* Float type for compatibility - we'll treat it as int for simplicity */
 typedef __m128i __m128;
 /* Float operations - simplified scalar implementations */
 static inline __m128 _mm_set1_ps(float a)
 {
    __m128 result;
    uint32_t val;
    memcpy(&val, &a, sizeof(float));
    for (int i = 0; i < 4; i++) {
        result.u32[i] = val;
    }
    return result;
 }
 static inline __m128 _mm_setzero_ps(void)
 {
    __m128 result;
    memset(&result, 0, sizeof(result));
    return result;
 }
 static inline __m128 _mm_add_ps(__m128 a, __m128 b)
 {
    __m128 result;
    float fa[4], fb[4], fr[4];
    memcpy(fa, &a, sizeof(__m128));
    memcpy(fb, &b, sizeof(__m128));
    for (int i = 0; i < 4; i++) {
        fr[i] = fa[i] + fb[i];
    }
    memcpy(&result, fr, sizeof(__m128));
    return result;
 }
 static inline __m128 _mm_mul_ps(__m128 a, __m128 b)
 {
    __m128 result;
    float fa[4], fb[4], fr[4];
    memcpy(fa, &a, sizeof(__m128));
    memcpy(fb, &b, sizeof(__m128));
    for (int i = 0; i < 4; i++) {
        fr[i] = fa[i] * fb[i];
    }
    memcpy(&result, fr, sizeof(__m128));
    return result;
 }
 static inline __m128 _mm_and_ps(__m128 a, __m128 b)
 {
    __m128 result;
    result.u64[0] = a.u64[0] & b.u64[0];
    result.u64[1] = a.u64[1] & b.u64[1];
    return result;
 }
 static inline __m128 _mm_or_ps(__m128 a, __m128 b)
 {
    __m128 result;
    result.u64[0] = a.u64[0] | b.u64[0];
    result.u64[1] = a.u64[1] | b.u64[1];
    return result;
 }
 static inline __m128 _mm_cvtepi32_ps(__m128i a)
 {
    __m128 result;
    float fr[4];
    for (int i = 0; i < 4; i++) {
        fr[i] = (float)a.i32[i];
    }
    memcpy(&result, fr, sizeof(__m128));
    return result;
 }
 static inline __m128i _mm_cvttps_epi32(__m128 a)
 {
    __m128i result;
    float fa[4];
    memcpy(fa, &a, sizeof(__m128));
    for (int i = 0; i < 4; i++) {
        result.i32[i] = (int32_t)fa[i];
    }
    return result;
 }
 /* Casting operations */
 static inline __m128 _mm_castsi128_ps(__m128i a)
 {
    __m128 result;
    memcpy(&result, &a, sizeof(__m128));
    return result;
 }
 static inline __m128i _mm_castps_si128(__m128 a)
 {
    __m128i result;
    memcpy(&result, &a, sizeof(__m128));
    return result;
 }
 /* Additional set operations */
 static inline __m128i _mm_set1_epi32(int a)
 {
    __m128i result;
    for (int i = 0; i < 4; i++) {
        result.i32[i] = a;
    }
    return result;
 }
 /* AES instructions - these are placeholders, actual AES is done via soft_aes.h */
 /* On RISC-V without crypto extensions, these should never be called directly */
 /* They are only here for compilation compatibility */
 static inline __m128i _mm_aesenc_si128(__m128i a, __m128i roundkey)
 {
    /* This is a placeholder - actual implementation should use soft_aes */
    /* If this function is called, it means SOFT_AES template parameter wasn't used */
    /* We return a XOR as a minimal fallback, but proper code should use soft_aesenc */
    return _mm_xor_si128(a, roundkey);
 }
 static inline __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon)
 {
    /* Placeholder for AES key generation - should use soft_aeskeygenassist */
    return a;
 }
 /* Rotate right operation for soft_aes.h */
 static inline uint32_t _rotr(uint32_t value, unsigned int count)
 {
    const unsigned int mask = 31;
    count &= mask;
    return (value >> count) | (value << ((-count) & mask));
 }
 /* ARM NEON compatibility types and intrinsics for RISC-V */
 typedef __m128i_union uint64x2_t;
 typedef __m128i_union uint8x16_t;
 typedef __m128i_union int64x2_t;
 typedef __m128i_union int32x4_t;
 static inline uint64x2_t vld1q_u64(const uint64_t *ptr)
 {
    uint64x2_t result;
    result.u64[0] = ptr[0];
    result.u64[1] = ptr[1];
    return result;
 }
 static inline int64x2_t vld1q_s64(const int64_t *ptr)
 {
    int64x2_t result;
    result.i64[0] = ptr[0];
    result.i64[1] = ptr[1];
    return result;
 }
 static inline void vst1q_u64(uint64_t *ptr, uint64x2_t val)
 {
    ptr[0] = val.u64[0];
    ptr[1] = val.u64[1];
 }
 static inline uint64x2_t veorq_u64(uint64x2_t a, uint64x2_t b)
 {
    uint64x2_t result;
    result.u64[0] = a.u64[0] ^ b.u64[0];
    result.u64[1] = a.u64[1] ^ b.u64[1];
    return result;
 }
 static inline uint64x2_t vaddq_u64(uint64x2_t a, uint64x2_t b)
 {
    uint64x2_t result;
    result.u64[0] = a.u64[0] + b.u64[0];
    result.u64[1] = a.u64[1] + b.u64[1];
    return result;
 }
 static inline uint64x2_t vreinterpretq_u64_u8(uint8x16_t a)
 {
    uint64x2_t result;
    memcpy(&result, &a, sizeof(uint64x2_t));
    return result;
 }
 static inline uint64_t vgetq_lane_u64(uint64x2_t v, int lane)
 {
    return v.u64[lane];
 }
 static inline int64_t vgetq_lane_s64(int64x2_t v, int lane)
 {
    return v.i64[lane];
 }
 static inline int32_t vgetq_lane_s32(int32x4_t v, int lane)
 {
    return v.i32[lane];
 }
 typedef struct { uint64_t val[1]; } uint64x1_t;
 static inline uint64x1_t vcreate_u64(uint64_t a)
 {
    uint64x1_t result;
    result.val[0] = a;
    return result;
 }
 static inline uint64x2_t vcombine_u64(uint64x1_t low, uint64x1_t high)
 {
    uint64x2_t result;
    result.u64[0] = low.val[0];
    result.u64[1] = high.val[0];
    return result;
 }
 #ifdef __cplusplus
 }
 #endif
 #endif /* XMRIG_SSE2RVV_H */
--- a/src/crypto/common/LinuxMemory.cpp
+++ b/src/crypto/common/LinuxMemory.cpp
@@ -1,6 +1,6 @@
 /* XMRig
- * Copyright (c) 2018-2021 SChernykh   <https://github.com/SChernykh>
+ * Copyright (c) 2018-2025 SChernykh   <https://github.com/SChernykh>
- * Copyright (c) 2016-2021 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright (c) 2016-2025 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@@ -35,15 +35,69 @@ constexpr size_t twoMiB = 2U * 1024U * 1024U;
 constexpr size_t oneGiB = 1024U * 1024U * 1024U;
-static inline std::string sysfs_path(uint32_t node, size_t hugePageSize, bool nr)
+static bool sysfs_write(const std::string &path, uint64_t value)
 {
    std::ofstream file(path, std::ios::out | std::ios::binary | std::ios::trunc);
    if (!file.is_open()) {
        return false;
    }
    file << value;
    file.flush();
    return true;
 }
 static int64_t sysfs_read(const std::string &path)
 {
    std::ifstream file(path);
    if (!file.is_open()) {
        return -1;
    }
    uint64_t value = 0;
    file >> value;
    return value;
 }
 static std::string sysfs_path(uint32_t node, size_t hugePageSize, bool nr)
 {
    return fmt::format("/sys/devices/system/node/node{}/hugepages/hugepages-{}kB/{}_hugepages", node, hugePageSize / 1024, nr ? "nr" : "free");
 }
-static inline bool write_nr_hugepages(uint32_t node, size_t hugePageSize, uint64_t count)   { return LinuxMemory::write(sysfs_path(node, hugePageSize, true).c_str(), count); }
+static std::string sysfs_path(size_t hugePageSize, bool nr)
-static inline int64_t free_hugepages(uint32_t node, size_t hugePageSize)                    { return LinuxMemory::read(sysfs_path(node, hugePageSize, false).c_str()); }
+{
-static inline int64_t nr_hugepages(uint32_t node, size_t hugePageSize)                      { return LinuxMemory::read(sysfs_path(node, hugePageSize, true).c_str()); }
+    return fmt::format("/sys/kernel/mm/hugepages/hugepages-{}kB/{}_hugepages", hugePageSize / 1024, nr ? "nr" : "free");
 }
 static bool write_nr_hugepages(uint32_t node, size_t hugePageSize, uint64_t count)
 {
    if (sysfs_write(sysfs_path(node, hugePageSize, true), count)) {
        return true;
    }
    return sysfs_write(sysfs_path(hugePageSize, true), count);
 }
 static int64_t sysfs_read_hugepages(uint32_t node, size_t hugePageSize, bool nr)
 {
    const int64_t value = sysfs_read(sysfs_path(node, hugePageSize, nr));
    if (value >= 0) {
        return value;
    }
    return sysfs_read(sysfs_path(hugePageSize, nr));
 }
 static inline int64_t free_hugepages(uint32_t node, size_t hugePageSize)                    { return sysfs_read_hugepages(node, hugePageSize, false); }
 static inline int64_t nr_hugepages(uint32_t node, size_t hugePageSize)                      { return sysfs_read_hugepages(node, hugePageSize, true); }
 } // namespace xmrig
@@ -62,31 +116,3 @@ bool xmrig::LinuxMemory::reserve(size_t size, uint32_t node, size_t hugePageSize
    return write_nr_hugepages(node, hugePageSize, std::max<size_t>(nr_hugepages(node, hugePageSize), 0) + (required - available));
 }
 bool xmrig::LinuxMemory::write(const char *path, uint64_t value)
 {
    std::ofstream file(path, std::ios::out | std::ios::binary | std::ios::trunc);
    if (!file.is_open()) {
        return false;
    }
    file << value;
    file.flush();
    return true;
 }
 int64_t xmrig::LinuxMemory::read(const char *path)
 {
    std::ifstream file(path);
    if (!file.is_open()) {
        return -1;
    }
    uint64_t value = 0;
    file >> value;
    return value;
 }
--- a/src/crypto/common/LinuxMemory.h
+++ b/src/crypto/common/LinuxMemory.h
@@ -1,6 +1,6 @@
 /* XMRig
- * Copyright (c) 2018-2021 SChernykh   <https://github.com/SChernykh>
+ * Copyright (c) 2018-2025 SChernykh   <https://github.com/SChernykh>
- * Copyright (c) 2016-2021 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright (c) 2016-2025 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@@ -31,13 +31,10 @@ class LinuxMemory
 {
 public:
    static bool reserve(size_t size, uint32_t node, size_t hugePageSize);
    static bool write(const char *path, uint64_t value);
    static int64_t read(const char *path);
 };
-} /* namespace xmrig */
+} // namespace xmrig
-#endif /* XMRIG_LINUXMEMORY_H */
+#endif // XMRIG_LINUXMEMORY_H
--- a/src/crypto/common/VirtualMemory_unix.cpp
+++ b/src/crypto/common/VirtualMemory_unix.cpp
@@ -86,7 +86,7 @@ bool xmrig::VirtualMemory::isHugepagesAvailable()
 {
 #   ifdef XMRIG_OS_LINUX
    return std::ifstream("/proc/sys/vm/nr_hugepages").good() || std::ifstream("/sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages").good();
-#   elif defined(XMRIG_OS_MACOS) && defined(XMRIG_ARM)
+#   elif defined(XMRIG_OS_MACOS) && defined(XMRIG_ARM) || defined(XMRIG_OS_HAIKU)
    return false;
 #   else
    return true;
@@ -156,7 +156,8 @@ void *xmrig::VirtualMemory::allocateExecutableMemory(size_t size, bool hugePages
    if (!mem) {
        mem = mmap(0, size, PROT_READ | PROT_WRITE | SECURE_PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
    }
-
+#   elif defined(XMRIG_OS_HAIKU)
    void *mem = mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 #   else
    void *mem = nullptr;
@@ -181,6 +182,8 @@ void *xmrig::VirtualMemory::allocateLargePagesMemory(size_t size)
    void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
 #   elif defined(XMRIG_OS_FREEBSD)
    void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER | MAP_PREFAULT_READ, -1, 0);
 #   elif defined(XMRIG_OS_HAIKU)
    void *mem = nullptr;
 #   else
    void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE | hugePagesFlag(hugePageSize()), 0, 0);
 #   endif
--- a/src/crypto/common/portable/mm_malloc.h
+++ b/src/crypto/common/portable/mm_malloc.h
@@ -26,7 +26,7 @@
 #define XMRIG_MM_MALLOC_PORTABLE_H
-#if defined(XMRIG_ARM) && !defined(__clang__)
+#if (defined(XMRIG_ARM) || defined(XMRIG_RISCV)) && !defined(__clang__)
 #include <stdlib.h>
--- a/src/crypto/ghostrider/ghostrider.cpp
+++ b/src/crypto/ghostrider/ghostrider.cpp
@@ -57,6 +57,9 @@
 #if defined(XMRIG_ARM)
 #   include "crypto/cn/sse2neon.h"
 #elif defined(XMRIG_RISCV)
    // RISC-V doesn't have SSE/NEON, provide minimal compatibility
 #   define _mm_pause() __asm__ __volatile__("nop")
 #elif defined(__GNUC__)
 #   include <x86intrin.h>
 #else
@@ -286,7 +289,7 @@ struct HelperThread
 void benchmark()
 {
-#ifndef XMRIG_ARM
+#if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
    static std::atomic<int> done{ 0 };
    if (done.exchange(1)) {
        return;
@@ -478,7 +481,7 @@ static inline bool findByType(hwloc_obj_t obj, hwloc_obj_type_t type, func lambd
 HelperThread* create_helper_thread(int64_t cpu_index, int priority, const std::vector<int64_t>& affinities)
 {
-#ifndef XMRIG_ARM
+#if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
    hwloc_bitmap_t helper_cpu_set = hwloc_bitmap_alloc();
    hwloc_bitmap_t main_threads_set = hwloc_bitmap_alloc();
@@ -807,7 +810,7 @@ void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ct
    uint32_t cn_indices[6];
    select_indices(cn_indices, seed);
-#ifdef XMRIG_ARM
+#if defined(XMRIG_ARM) || defined(XMRIG_RISCV)
    uint32_t step[6] = { 1, 1, 1, 1, 1, 1 };
 #else
    uint32_t step[6] = { 4, 4, 1, 2, 4, 4 };
--- a/src/crypto/randomx/aes_hash.cpp
+++ b/src/crypto/randomx/aes_hash.cpp
@@ -235,6 +235,131 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
 template void fillAes4Rx4<true>(void *state, size_t outputSize, void *buffer);
 template void fillAes4Rx4<false>(void *state, size_t outputSize, void *buffer);
 #if defined(XMRIG_RISCV) && defined(XMRIG_RVV_ENABLED)
 static constexpr uint32_t AES_HASH_1R_STATE02[8] = { 0x92b52c0d, 0x9fa856de, 0xcc82db47, 0xd7983aad, 0x6a770017, 0xae62c7d0, 0x5079506b, 0xe8a07ce4 };
 static constexpr uint32_t AES_HASH_1R_STATE13[8] = { 0x338d996e, 0x15c7b798, 0xf59e125a, 0xace78057, 0x630a240c, 0x07ad828d, 0x79a10005, 0x7e994948 };
 static constexpr uint32_t AES_GEN_1R_KEY02[8] = { 0x6daca553, 0x62716609, 0xdbb5552b, 0xb4f44917, 0x3f1262f1, 0x9f947ec6, 0xf4c0794f, 0x3e20e345 };
 static constexpr uint32_t AES_GEN_1R_KEY13[8] = { 0x6d7caf07, 0x846a710d, 0x1725d378, 0x0da1dc4e, 0x6aef8135, 0xb1ba317c, 0x16314c88, 0x49169154 };
 static constexpr uint32_t AES_HASH_1R_XKEY00[8] = { 0xf6fa8389, 0x8b24949f, 0x90dc56bf, 0x06890201, 0xf6fa8389, 0x8b24949f, 0x90dc56bf, 0x06890201 };
 static constexpr uint32_t AES_HASH_1R_XKEY11[8] = { 0x61b263d1, 0x51f4e03c, 0xee1043c6, 0xed18f99b, 0x61b263d1, 0x51f4e03c, 0xee1043c6, 0xed18f99b };
 static constexpr uint32_t AES_HASH_STRIDE[8] = { 0, 4, 8, 12, 32, 36, 40, 44 };
 template<int softAes, int unroll>
 void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
 	PROFILE_SCOPE(RandomX_AES);
 	uint8_t* scratchpadPtr = (uint8_t*)scratchpad;
 	const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize;
 	vuint32m1_t hash_state02 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE02, 8);
 	vuint32m1_t hash_state13 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE13, 8);
 	const vuint32m1_t key02 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY02, 8);
 	const vuint32m1_t key13 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY13, 8);
 	const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE, 8);
 	vuint32m1_t fill_state02 = __riscv_vluxei32_v_u32m1((uint32_t*)fill_state + 0, stride, 8);
 	vuint32m1_t fill_state13 = __riscv_vluxei32_v_u32m1((uint32_t*)fill_state + 4, stride, 8);
 	const vuint8m1_t lutenc_index0 = __riscv_vle8_v_u8m1(lutEncIndex[0], 32);
 	const vuint8m1_t lutenc_index1 = __riscv_vle8_v_u8m1(lutEncIndex[1], 32);
 	const vuint8m1_t lutenc_index2 = __riscv_vle8_v_u8m1(lutEncIndex[2], 32);
 	const vuint8m1_t lutenc_index3 = __riscv_vle8_v_u8m1(lutEncIndex[3], 32);
 	const vuint8m1_t& lutdec_index0 = lutenc_index0;
 	const vuint8m1_t lutdec_index1 = __riscv_vle8_v_u8m1(lutDecIndex[1], 32);
 	const vuint8m1_t& lutdec_index2 = lutenc_index2;
 	const vuint8m1_t lutdec_index3 = __riscv_vle8_v_u8m1(lutDecIndex[3], 32);
 	//process 64 bytes at a time in 4 lanes
 	while (scratchpadPtr < scratchpadEnd) {
 #define HASH_STATE(k) \
 		hash_state02 = softaes_vector_double(hash_state02, __riscv_vluxei32_v_u32m1((uint32_t*)scratchpadPtr + k * 16 + 0, stride, 8), lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3); \
 		hash_state13 = softaes_vector_double(hash_state13, __riscv_vluxei32_v_u32m1((uint32_t*)scratchpadPtr + k * 16 + 4, stride, 8), lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
 #define FILL_STATE(k) \
 		fill_state02 = softaes_vector_double(fill_state02, key02, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3); \
 		fill_state13 = softaes_vector_double(fill_state13, key13, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3); \
 		__riscv_vsuxei32_v_u32m1((uint32_t*)scratchpadPtr + k * 16 + 0, stride, fill_state02, 8); \
 		__riscv_vsuxei32_v_u32m1((uint32_t*)scratchpadPtr + k * 16 + 4, stride, fill_state13, 8);
 		switch (softAes) {
 			case 0:
 				HASH_STATE(0);
 				HASH_STATE(1);
 				FILL_STATE(0);
 				FILL_STATE(1);
 				scratchpadPtr += 128;
 				break;
 			default:
 				switch (unroll) {
 					case 4:
 						HASH_STATE(0);
 						FILL_STATE(0);
 						HASH_STATE(1);
 						FILL_STATE(1);
 						HASH_STATE(2);
 						FILL_STATE(2);
 						HASH_STATE(3);
 						FILL_STATE(3);
 						scratchpadPtr += 64 * 4;
 						break;
 					case 2:
 						HASH_STATE(0);
 						FILL_STATE(0);
 						HASH_STATE(1);
 						FILL_STATE(1);
 						scratchpadPtr += 64 * 2;
 						break;
 					default:
 						HASH_STATE(0);
 						FILL_STATE(0);
 						scratchpadPtr += 64;
 						break;
 				}
 				break;
 		}
 	}
 #undef HASH_STATE
 #undef FILL_STATE
 	__riscv_vsuxei32_v_u32m1((uint32_t*)fill_state + 0, stride, fill_state02, 8);
 	__riscv_vsuxei32_v_u32m1((uint32_t*)fill_state + 4, stride, fill_state13, 8);
 	//two extra rounds to achieve full diffusion
 	const vuint32m1_t xkey00 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY00, 8);
 	const vuint32m1_t xkey11 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY11, 8);
 	hash_state02 = softaes_vector_double(hash_state02, xkey00, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
 	hash_state13 = softaes_vector_double(hash_state13, xkey00, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
 	hash_state02 = softaes_vector_double(hash_state02, xkey11, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
 	hash_state13 = softaes_vector_double(hash_state13, xkey11, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
 	//output hash
 	__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 0, stride, hash_state02, 8);
 	__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 4, stride, hash_state13, 8);
 }
 #else // defined(XMRIG_RISCV) && defined(XMRIG_RVV_ENABLED)
 template<int softAes, int unroll>
 void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
 	PROFILE_SCOPE(RandomX_AES);
@@ -375,6 +500,7 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi
 	rx_store_vec_i128((rx_vec_i128*)hash + 2, hash_state2);
 	rx_store_vec_i128((rx_vec_i128*)hash + 3, hash_state3);
 }
 #endif // defined(XMRIG_RISCV) && defined(XMRIG_RVV_ENABLED)
 template void hashAndFillAes1Rx4<0,2>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);
 template void hashAndFillAes1Rx4<1,1>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);
--- a/src/crypto/randomx/common.hpp
+++ b/src/crypto/randomx/common.hpp
@@ -111,6 +111,10 @@ namespace randomx {
 	#define RANDOMX_HAVE_COMPILER 1
 	class JitCompilerA64;
 	using JitCompiler = JitCompilerA64;
 #elif defined(__riscv) && defined(__riscv_xlen) && (__riscv_xlen == 64)
 	#define RANDOMX_HAVE_COMPILER 1
 	class JitCompilerRV64;
 	using JitCompiler = JitCompilerRV64;
 #else
 	#define RANDOMX_HAVE_COMPILER 0
 	class JitCompilerFallback;
--- a/src/crypto/randomx/intrin_portable.h
+++ b/src/crypto/randomx/intrin_portable.h
@@ -200,7 +200,18 @@ typedef union{
 	int i32[4];
 } vec_u;
-#define rx_aligned_alloc(a, b) malloc(a)
+#ifdef HAVE_POSIX_MEMALIGN
 inline void* rx_aligned_alloc(size_t size, size_t align) {
    void* p;
    if (posix_memalign(&p, align, size) == 0)
        return p;
    return 0;
 };
 #else
 #   define rx_aligned_alloc(a, b) malloc(a)
 #endif
 #define rx_aligned_free(a) free(a)
 #define rx_prefetch_nta(x)
 #define rx_prefetch_t0(x)
@@ -392,7 +403,7 @@ FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
 typedef uint8x16_t rx_vec_i128;
 typedef float64x2_t rx_vec_f128;
-#if !defined(XMRIG_OS_WIN) // FIXME
+#ifdef HAVE_POSIX_MEMALIGN
 inline void* rx_aligned_alloc(size_t size, size_t align) {
    void* p;
    if (posix_memalign(&p, align, size) == 0)
@@ -400,12 +411,15 @@ inline void* rx_aligned_alloc(size_t size, size_t align) {
    return 0;
 };
 #   define rx_aligned_free(a) free(a)
 #elif defined(HAVE_ALIGNED_MALLOC)
 #   define rx_aligned_alloc(a, b) _aligned_malloc(a, b)
 #   define rx_aligned_free(a) _aligned_free(a)
 #else
 #   define rx_aligned_alloc(a, b) malloc(a)
 #   define rx_aligned_free(a) free(a)
 #endif
 #define rx_aligned_free(a) free(a)
 inline void rx_prefetch_nta(void* ptr) {
 	asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr));
 }
@@ -546,8 +560,23 @@ typedef union {
 	rx_vec_i128 i;
 } rx_vec_f128;
-#define rx_aligned_alloc(a, b) malloc(a)
+#ifdef HAVE_POSIX_MEMALIGN
-#define rx_aligned_free(a) free(a)
+inline void* rx_aligned_alloc(size_t size, size_t align) {
    void* p;
    if (posix_memalign(&p, align, size) == 0)
        return p;
    return 0;
 };
 #   define rx_aligned_free(a) free(a)
 #elif defined(HAVE_ALIGNED_MALLOC)
 #   define rx_aligned_alloc(a, b) _aligned_malloc(a, b)
 #   define rx_aligned_free(a) _aligned_free(a)
 #else
 #   define rx_aligned_alloc(a, b) malloc(a)
 #   define rx_aligned_free(a) free(a)
 #endif
 #define rx_prefetch_nta(x)
 #define rx_prefetch_t0(x)
--- a/src/crypto/randomx/jit_compiler.hpp
+++ b/src/crypto/randomx/jit_compiler.hpp
@@ -32,6 +32,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "crypto/randomx/jit_compiler_x86.hpp"
 #elif defined(__aarch64__)
 #include "crypto/randomx/jit_compiler_a64.hpp"
 #elif defined(__riscv) && defined(__riscv_xlen) && (__riscv_xlen == 64)
 #include "crypto/randomx/jit_compiler_rv64.hpp"
 #else
 #include "crypto/randomx/jit_compiler_fallback.hpp"
 #endif
--- a/src/crypto/randomx/jit_compiler_a64.cpp
+++ b/src/crypto/randomx/jit_compiler_a64.cpp
@@ -67,7 +67,6 @@ constexpr uint32_t LDR_LITERAL = 0x58000000;
 constexpr uint32_t ROR         = 0x9AC02C00;
 constexpr uint32_t ROR_IMM     = 0x93C00000;
 constexpr uint32_t MOV_REG     = 0xAA0003E0;
 constexpr uint32_t MOV_VREG_EL = 0x6E080400;
 constexpr uint32_t FADD        = 0x4E60D400;
 constexpr uint32_t FSUB        = 0x4EE0D400;
 constexpr uint32_t FEOR        = 0x6E201C00;
@@ -102,7 +101,7 @@ static size_t CalcDatasetItemSize()
 	((uint8_t*)randomx_calc_dataset_item_aarch64_end - (uint8_t*)randomx_calc_dataset_item_aarch64_store_result);
 }
-constexpr uint32_t IntRegMap[8] = { 4, 5, 6, 7, 12, 13, 14, 15 };
+constexpr uint8_t IntRegMap[8] = { 4, 5, 6, 7, 12, 13, 14, 15 };
 JitCompilerA64::JitCompilerA64(bool hugePagesEnable, bool) :
 	hugePages(hugePagesJIT && hugePagesEnable),
@@ -128,11 +127,12 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
 	uint32_t codePos = MainLoopBegin + 4;
 	uint32_t mask = ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10);
 	// and w16, w10, ScratchpadL3Mask64
-	emit32(0x121A0000 | 16 | (10 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
+	emit32(0x121A0000 | 16 | (10 << 5) | mask, code, codePos);
 	// and w17, w20, ScratchpadL3Mask64
-	emit32(0x121A0000 | 17 | (20 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
+	emit32(0x121A0000 | 17 | (20 << 5) | mask, code, codePos);
 	codePos = PrologueSize;
 	literalPos = ImulRcpLiteralsEnd;
@@ -155,13 +155,14 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
 	const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end) - ((uint8_t*)randomx_program_aarch64)) - codePos;
 	emit32(ARMV8A::B | (offset / 4), code, codePos);
-	// and w20, w20, CacheLineAlignMask
+	mask = ((RandomX_CurrentConfig.Log2_DatasetBaseSize - 7) << 10);
 	// and w20, w9, CacheLineAlignMask
 	codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask1) - ((uint8_t*)randomx_program_aarch64));
-	emit32(0x121A0000 | 20 | (20 << 5) | ((RandomX_CurrentConfig.Log2_DatasetBaseSize - 7) << 10), code, codePos);
+	emit32(0x121A0000 | 20 | (9 << 5) | mask, code, codePos);
 	// and w10, w10, CacheLineAlignMask
 	codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask2) - ((uint8_t*)randomx_program_aarch64));
-	emit32(0x121A0000 | 10 | (10 << 5) | ((RandomX_CurrentConfig.Log2_DatasetBaseSize - 7) << 10), code, codePos);
+	emit32(0x121A0000 | 10 | (10 << 5) | mask, code, codePos);
 	// Update spMix1
 	// eor x10, config.readReg0, config.readReg1
@@ -497,9 +498,12 @@ void JitCompilerA64::emitMemLoad(uint32_t dst, uint32_t src, Instruction& instr,
 	if (src != dst)
 	{
 		imm &= instr.getModMem() ? (RandomX_CurrentConfig.ScratchpadL1_Size - 1) : (RandomX_CurrentConfig.ScratchpadL2_Size - 1);
-		emitAddImmediate(tmp_reg, src, imm, code, k);
+		uint32_t t = 0x927d0000 | tmp_reg | (tmp_reg << 5);
 		if (imm)
 			emitAddImmediate(tmp_reg, src, imm, code, k);
 		else
 			t = 0x927d0000 | tmp_reg | (src << 5);
 		constexpr uint32_t t = 0x927d0000 | tmp_reg | (tmp_reg << 5);
 		const uint32_t andInstrL1 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL1 - 4) << 10);
 		const uint32_t andInstrL2 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL2 - 4) << 10);
@@ -511,10 +515,18 @@ void JitCompilerA64::emitMemLoad(uint32_t dst, uint32_t src, Instruction& instr,
 	else
 	{
 		imm = (imm & ScratchpadL3Mask) >> 3;
-		emitMovImmediate(tmp_reg, imm, code, k);
+		if (imm)
 		{
 			emitMovImmediate(tmp_reg, imm, code, k);
-		// ldr tmp_reg, [x2, tmp_reg, lsl 3]
+			// ldr tmp_reg, [x2, tmp_reg, lsl 3]
-		emit32(0xf8607840 | tmp_reg | (tmp_reg << 16), code, k);
+			emit32(0xf8607840 | tmp_reg | (tmp_reg << 16), code, k);
 		}
 		else
 		{
 			// ldr tmp_reg, [x2]
 			emit32(0xf9400040 | tmp_reg, code, k);
 		}
 	}
 	codePos = k;
@@ -529,25 +541,22 @@ void JitCompilerA64::emitMemLoadFP(uint32_t src, Instruction& instr, uint8_t* co
 	constexpr uint32_t tmp_reg = 19;
 	imm &= instr.getModMem() ? (RandomX_CurrentConfig.ScratchpadL1_Size - 1) : (RandomX_CurrentConfig.ScratchpadL2_Size - 1);
-	emitAddImmediate(tmp_reg, src, imm, code, k);
+	uint32_t t = 0x927d0000 | tmp_reg | (tmp_reg << 5);
 	if (imm)
 		emitAddImmediate(tmp_reg, src, imm, code, k);
 	else
 		t = 0x927d0000 | tmp_reg | (src << 5);
 	constexpr uint32_t t = 0x927d0000 | tmp_reg | (tmp_reg << 5);
 	const uint32_t andInstrL1 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL1 - 4) << 10);
 	const uint32_t andInstrL2 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL2 - 4) << 10);
 	emit32(instr.getModMem() ? andInstrL1 : andInstrL2, code, k);
-	// add tmp_reg, x2, tmp_reg
+	// ldr tmp_reg_fp, [x2, tmp_reg]
-	emit32(ARMV8A::ADD | tmp_reg | (2 << 5) | (tmp_reg << 16), code, k);
+	emit32(0x3ce06800 | tmp_reg_fp | (2 << 5) | (tmp_reg << 16), code, k);
-	// ldpsw tmp_reg, tmp_reg + 1, [tmp_reg]
+	// sxtl.2d	tmp_reg_fp, tmp_reg_fp
-	emit32(0x69400000 | tmp_reg | (tmp_reg << 5) | ((tmp_reg + 1) << 10), code, k);
+	emit32(0x0f20a400 | tmp_reg_fp | (tmp_reg_fp << 5), code, k);
 	// ins tmp_reg_fp.d[0], tmp_reg
 	emit32(0x4E081C00 | tmp_reg_fp | (tmp_reg << 5), code, k);
 	// ins tmp_reg_fp.d[1], tmp_reg + 1
 	emit32(0x4E181C00 | tmp_reg_fp | ((tmp_reg + 1) << 5), code, k);
 	// scvtf tmp_reg_fp.2d, tmp_reg_fp.2d
 	emit32(0x4E61D800 | tmp_reg_fp | (tmp_reg_fp << 5), code, k);
@@ -835,7 +844,8 @@ void JitCompilerA64::h_IROR_R(Instruction& instr, uint32_t& codePos)
 	else
 	{
 		// ror dst, dst, imm
-		emit32(ARMV8A::ROR_IMM | dst | (dst << 5) | ((instr.getImm32() & 63) << 10) | (dst << 16), code, codePos);
+		if ((instr.getImm32() & 63))
 			emit32(ARMV8A::ROR_IMM | dst | (dst << 5) | ((instr.getImm32() & 63) << 10) | (dst << 16), code, codePos);
 	}
 	reg_changed_offset[instr.dst] = codePos;
@@ -861,7 +871,8 @@ void JitCompilerA64::h_IROL_R(Instruction& instr, uint32_t& codePos)
 	else
 	{
 		// ror dst, dst, imm
-		emit32(ARMV8A::ROR_IMM | dst | (dst << 5) | ((-instr.getImm32() & 63) << 10) | (dst << 16), code, k);
+		if ((instr.getImm32() & 63))
 			emit32(ARMV8A::ROR_IMM | dst | (dst << 5) | ((-instr.getImm32() & 63) << 10) | (dst << 16), code, k);
 	}
 	reg_changed_offset[instr.dst] = k;
@@ -894,13 +905,8 @@ void JitCompilerA64::h_FSWAP_R(Instruction& instr, uint32_t& codePos)
 	const uint32_t dst = instr.dst + 16;
-	constexpr uint32_t tmp_reg_fp = 28;
+	// ext	dst.16b, dst.16b, dst.16b, #0x8
-	constexpr uint32_t src_index1 = 1 << 14;
+	emit32(0x6e004000 | dst | (dst << 5) | (dst << 16), code, k);
 	constexpr uint32_t dst_index1 = 1 << 20;
 	emit32(ARMV8A::MOV_VREG_EL | tmp_reg_fp | (dst << 5) | src_index1, code, k);
 	emit32(ARMV8A::MOV_VREG_EL | dst | (dst << 5) | dst_index1, code, k);
 	emit32(ARMV8A::MOV_VREG_EL | dst | (tmp_reg_fp << 5), code, k);
 	codePos = k;
 }
@@ -1029,11 +1035,19 @@ void JitCompilerA64::h_CFROUND(Instruction& instr, uint32_t& codePos)
 	constexpr uint32_t tmp_reg = 20;
 	constexpr uint32_t fpcr_tmp_reg = 8;
-	// ror tmp_reg, src, imm
+	if (instr.getImm32() & 63)
-	emit32(ARMV8A::ROR_IMM | tmp_reg | (src << 5) | ((instr.getImm32() & 63) << 10) | (src << 16), code, k);
+	{
 		// ror tmp_reg, src, imm
 		emit32(ARMV8A::ROR_IMM | tmp_reg | (src << 5) | ((instr.getImm32() & 63) << 10) | (src << 16), code, k);
-	// bfi fpcr_tmp_reg, tmp_reg, 40, 2
+		// bfi fpcr_tmp_reg, tmp_reg, 40, 2
-	emit32(0xB3580400 | fpcr_tmp_reg | (tmp_reg << 5), code, k);
+		emit32(0xB3580400 | fpcr_tmp_reg | (tmp_reg << 5), code, k);
 	}
 	else	// no rotation
 	{
 		// bfi fpcr_tmp_reg, src, 40, 2
 		emit32(0xB3580400 | fpcr_tmp_reg | (src << 5), code, k);
 	}
 	// rbit tmp_reg, fpcr_tmp_reg
 	emit32(0xDAC00000 | tmp_reg | (fpcr_tmp_reg << 5), code, k);
@@ -1059,9 +1073,12 @@ void JitCompilerA64::h_ISTORE(Instruction& instr, uint32_t& codePos)
 	else
 		imm &= RandomX_CurrentConfig.ScratchpadL3_Size - 1;
-	emitAddImmediate(tmp_reg, dst, imm, code, k);
+	uint32_t t = 0x927d0000 | tmp_reg | (tmp_reg << 5);
 	if (imm)
 		emitAddImmediate(tmp_reg, dst, imm, code, k);
 	else
 		t = 0x927d0000 | tmp_reg | (dst << 5);
 	constexpr uint32_t t = 0x927d0000 | tmp_reg | (tmp_reg << 5);
 	const uint32_t andInstrL1 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL1 - 4) << 10);
 	const uint32_t andInstrL2 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL2 - 4) << 10);
 	const uint32_t andInstrL3 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 4) << 10);
--- a/src/crypto/randomx/jit_compiler_a64_static.S
+++ b/src/crypto/randomx/jit_compiler_a64_static.S
@@ -100,9 +100,9 @@
 # v26 -> "a2"
 # v27 -> "a3"
 # v28 -> temporary
-# v29 -> E 'and' mask = 0x00ffffffffffffff00ffffffffffffff
+# v29 -> E 'and' mask = 0x00ffffffffffffff'00ffffffffffffff
-# v30 -> E 'or' mask  = 0x3*00000000******3*00000000******
+# v30 -> E 'or' mask  = 0x3*00000000******'3*00000000******
-# v31 -> scale mask   = 0x81f000000000000081f0000000000000
+# v31 -> scale mask   = 0x80f0000000000000'80f0000000000000
 	.balign 4
 DECL(randomx_program_aarch64):
@@ -142,17 +142,14 @@ DECL(randomx_program_aarch64):
 	ldp	q26, q27, [x0, 224]
 	# Load E 'and' mask
-	mov	x16, 0x00FFFFFFFFFFFFFF
+	movi	v29.2d, #0x00FFFFFFFFFFFFFF
 	ins	v29.d[0], x16
 	ins	v29.d[1], x16
 	# Load E 'or' mask (stored in reg.f[0])
 	ldr	q30, [x0, 64]
 	# Load scale mask
 	mov	x16, 0x80f0000000000000
-	ins	v31.d[0], x16
+	dup	v31.2d, x16
 	ins	v31.d[1], x16
 	# Read fpcr
 	mrs	x8, fpcr
@@ -162,35 +159,22 @@ DECL(randomx_program_aarch64):
 	str	x0, [sp, -16]!
 	# Read literals
-	ldr	x0, literal_x0
+	adr	x30, literal_v0
-	ldr	x11, literal_x11
+	ldp	q0, q1, [x30]
-	ldr	x21, literal_x21
+	ldp	q2, q3, [x30, 32]
-	ldr	x22, literal_x22
+	ldp	q4, q5, [x30, 64]
-	ldr	x23, literal_x23
+	ldp	q6, q7, [x30, 96]
-	ldr	x24, literal_x24
+	ldp	q8, q9, [x30, 128]
-	ldr	x25, literal_x25
+	ldp	q10, q11, [x30, 160]
-	ldr	x26, literal_x26
+	ldp	q12, q13, [x30, 192]
-	ldr	x27, literal_x27
+	ldp	q14, q15, [x30, 224]
 	ldr	x28, literal_x28
 	ldr	x29, literal_x29
 	ldr	x30, literal_x30
-	ldr	q0, literal_v0
+	ldp	x0, x11, [x30, -96]	// literal_x0
-	ldr	q1, literal_v1
+	ldp	x21, x22, [x30, -80]	// literal_x21
-	ldr	q2, literal_v2
+	ldp	x23, x24, [x30, -64]	// literal_x23
-	ldr	q3, literal_v3
+	ldp	x25, x26, [x30, -48]	// literal_x25
-	ldr	q4, literal_v4
+	ldp	x27, x28, [x30, -32]	// literal_x27
-	ldr	q5, literal_v5
+	ldp	x29, x30, [x30, -16]	// literal_x29
 	ldr	q6, literal_v6
 	ldr	q7, literal_v7
 	ldr	q8, literal_v8
 	ldr	q9, literal_v9
 	ldr	q10, literal_v10
 	ldr	q11, literal_v11
 	ldr	q12, literal_v12
 	ldr	q13, literal_v13
 	ldr	q14, literal_v14
 	ldr	q15, literal_v15
 DECL(randomx_program_aarch64_main_loop):
 	# spAddr0 = spMix1 & ScratchpadL3Mask64;
@@ -221,40 +205,31 @@ DECL(randomx_program_aarch64_main_loop):
 	eor	x15, x15, x19
 	# Load group F registers (spAddr1)
-	ldpsw	x20, x19, [x17]
+	ldr	q17, [x17]
-	ins	v16.d[0], x20
+	sxtl	v16.2d, v17.2s
 	ins	v16.d[1], x19
 	ldpsw	x20, x19, [x17, 8]
 	ins	v17.d[0], x20
 	ins	v17.d[1], x19
 	ldpsw	x20, x19, [x17, 16]
 	ins	v18.d[0], x20
 	ins	v18.d[1], x19
 	ldpsw	x20, x19, [x17, 24]
 	ins	v19.d[0], x20
 	ins	v19.d[1], x19
 	scvtf	v16.2d, v16.2d
 	sxtl2	v17.2d, v17.4s
 	scvtf	v17.2d, v17.2d
 	ldr q19, [x17, 16]
 	sxtl	v18.2d, v19.2s
 	scvtf	v18.2d, v18.2d
 	sxtl2	v19.2d, v19.4s
 	scvtf	v19.2d, v19.2d
 	# Load group E registers (spAddr1)
-	ldpsw	x20, x19, [x17, 32]
+	ldr	q21, [x17, 32]
-	ins	v20.d[0], x20
+	sxtl	v20.2d, v21.2s
 	ins	v20.d[1], x19
 	ldpsw	x20, x19, [x17, 40]
 	ins	v21.d[0], x20
 	ins	v21.d[1], x19
 	ldpsw	x20, x19, [x17, 48]
 	ins	v22.d[0], x20
 	ins	v22.d[1], x19
 	ldpsw	x20, x19, [x17, 56]
 	ins	v23.d[0], x20
 	ins	v23.d[1], x19
 	scvtf	v20.2d, v20.2d
 	sxtl2	v21.2d, v21.4s
 	scvtf	v21.2d, v21.2d
 	ldr	q23, [x17, 48]
 	sxtl	v22.2d, v23.2s
 	scvtf	v22.2d, v22.2d
 	sxtl2	v23.2d, v23.4s
 	scvtf	v23.2d, v23.2d
 	and	v20.16b, v20.16b, v29.16b
 	and	v21.16b, v21.16b, v29.16b
 	and	v22.16b, v22.16b, v29.16b
@@ -310,10 +285,9 @@ DECL(randomx_program_aarch64_vm_instructions_end):
 	eor	x9, x9, x20
 	# Calculate dataset pointer for dataset prefetch
 	mov	w20, w9
 DECL(randomx_program_aarch64_cacheline_align_mask1):
 	# Actual mask will be inserted by JIT compiler
-	and	x20, x20, 1
+	and	x20, x9, 1
 	add	x20, x20, x1
 	# Prefetch dataset data
@@ -491,42 +465,39 @@ DECL(randomx_calc_dataset_item_aarch64):
 	stp	x10, x11, [sp, 80]
 	stp	x12, x13, [sp, 96]
-	ldr	x12, superscalarMul0
+	adr	x7, superscalarMul0
 	# superscalarMul0, superscalarAdd1
 	ldp	x12, x13, [x7]
-	mov	x8, x0
+	ldp	x8, x9, [sp]
 	mov	x9, x1
 	mov	x10, x2
 	# rl[0] = (itemNumber + 1) * superscalarMul0;
 	madd	x0, x2, x12, x12
 	# rl[1] = rl[0] ^ superscalarAdd1;
-	ldr	x12, superscalarAdd1
+	eor	x1, x0, x13
 	eor	x1, x0, x12
 	# rl[2] = rl[0] ^ superscalarAdd2;
-	ldr	x12, superscalarAdd2
+	ldp	x12, x13, [x7, 16]
 	eor	x2, x0, x12
 	# rl[3] = rl[0] ^ superscalarAdd3;
-	ldr	x12, superscalarAdd3
+	eor	x3, x0, x13
 	eor	x3, x0, x12
 	# rl[4] = rl[0] ^ superscalarAdd4;
-	ldr	x12, superscalarAdd4
+	ldp	x12, x13, [x7, 32]
 	eor	x4, x0, x12
 	# rl[5] = rl[0] ^ superscalarAdd5;
-	ldr	x12, superscalarAdd5
+	eor	x5, x0, x13
 	eor	x5, x0, x12
 	# rl[6] = rl[0] ^ superscalarAdd6;
-	ldr	x12, superscalarAdd6
+	ldp	x12, x13, [x7, 48]
 	eor	x6, x0, x12
 	# rl[7] = rl[0] ^ superscalarAdd7;
-	ldr	x12, superscalarAdd7
+	eor	x7, x0, x13
 	eor	x7, x0, x12
 	b	DECL(randomx_calc_dataset_item_aarch64_prefetch)
--- a/src/crypto/randomx/jit_compiler_rv64.cpp
+++ b/src/crypto/randomx/jit_compiler_rv64.cpp
--- a/src/crypto/randomx/jit_compiler_rv64.hpp
+++ b/src/crypto/randomx/jit_compiler_rv64.hpp
@@ -0,0 +1,147 @@
 /*
 Copyright (c) 2023 tevador <tevador@gmail.com>
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 	* Redistributions of source code must retain the above copyright
 	  notice, this list of conditions and the following disclaimer.
 	* Redistributions in binary form must reproduce the above copyright
 	  notice, this list of conditions and the following disclaimer in the
 	  documentation and/or other materials provided with the distribution.
 	* Neither the name of the copyright holder nor the
 	  names of its contributors may be used to endorse or promote products
 	  derived from this software without specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 #pragma once
 #include <cstdint>
 #include <cstring>
 #include <vector>
 #include "crypto/randomx/common.hpp"
 #include "crypto/randomx/jit_compiler_rv64_static.hpp"
 namespace randomx {
 	struct CodeBuffer {
 		uint8_t* code;
 		int32_t codePos;
 		int32_t rcpCount;
 		void emit(const uint8_t* src, int32_t len) {
 			memcpy(&code[codePos], src, len);
 			codePos += len;
 		}
 		template<typename T>
 		void emit(T src) {
 			memcpy(&code[codePos], &src, sizeof(src));
 			codePos += sizeof(src);
 		}
 		void emitAt(int32_t codePos, const uint8_t* src, int32_t len) {
 			memcpy(&code[codePos], src, len);
 		}
 		template<typename T>
 		void emitAt(int32_t codePos, T src) {
 			memcpy(&code[codePos], &src, sizeof(src));
 		}
 	};
 	struct CompilerState : public CodeBuffer {
 		int32_t instructionOffsets[RANDOMX_PROGRAM_MAX_SIZE];
 		int registerUsage[RegistersCount];
 	};
 	class Program;
 	struct ProgramConfiguration;
 	class SuperscalarProgram;
 	class Instruction;
 #define HANDLER_ARGS randomx::CompilerState& state, randomx::Instruction isn, int i
 	typedef void(*InstructionGeneratorRV64)(HANDLER_ARGS);
 	class JitCompilerRV64 {
 	public:
 		JitCompilerRV64(bool hugePagesEnable, bool optimizedInitDatasetEnable);
 		~JitCompilerRV64();
 		void prepare() {}
 		void generateProgram(Program&, ProgramConfiguration&, uint32_t);
 		void generateProgramLight(Program&, ProgramConfiguration&, uint32_t);
 		template<size_t N>
 		void generateSuperscalarHash(SuperscalarProgram(&programs)[N]);
 		void generateDatasetInitCode() {}
 		ProgramFunc* getProgramFunc() {
 			return (ProgramFunc*)entryProgram;
 		}
 		DatasetInitFunc* getDatasetInitFunc();
 		uint8_t* getCode() {
 			return state.code;
 		}
 		size_t getCodeSize();
 		void enableWriting() const;
 		void enableExecution() const;
 		static InstructionGeneratorRV64 engine[256];
 	private:
 		CompilerState state;
 		uint8_t* vectorCode;
 		size_t vectorCodeSize;
 		void* entryDataInit;
 		void* entryDataInitOptimized;
 		void* entryProgram;
 	public:
 		static void v1_IADD_RS(HANDLER_ARGS);
 		static void v1_IADD_M(HANDLER_ARGS);
 		static void v1_ISUB_R(HANDLER_ARGS);
 		static void v1_ISUB_M(HANDLER_ARGS);
 		static void v1_IMUL_R(HANDLER_ARGS);
 		static void v1_IMUL_M(HANDLER_ARGS);
 		static void v1_IMULH_R(HANDLER_ARGS);
 		static void v1_IMULH_M(HANDLER_ARGS);
 		static void v1_ISMULH_R(HANDLER_ARGS);
 		static void v1_ISMULH_M(HANDLER_ARGS);
 		static void v1_IMUL_RCP(HANDLER_ARGS);
 		static void v1_INEG_R(HANDLER_ARGS);
 		static void v1_IXOR_R(HANDLER_ARGS);
 		static void v1_IXOR_M(HANDLER_ARGS);
 		static void v1_IROR_R(HANDLER_ARGS);
 		static void v1_IROL_R(HANDLER_ARGS);
 		static void v1_ISWAP_R(HANDLER_ARGS);
 		static void v1_FSWAP_R(HANDLER_ARGS);
 		static void v1_FADD_R(HANDLER_ARGS);
 		static void v1_FADD_M(HANDLER_ARGS);
 		static void v1_FSUB_R(HANDLER_ARGS);
 		static void v1_FSUB_M(HANDLER_ARGS);
 		static void v1_FSCAL_R(HANDLER_ARGS);
 		static void v1_FMUL_R(HANDLER_ARGS);
 		static void v1_FDIV_M(HANDLER_ARGS);
 		static void v1_FSQRT_R(HANDLER_ARGS);
 		static void v1_CBRANCH(HANDLER_ARGS);
 		static void v1_CFROUND(HANDLER_ARGS);
 		static void v1_ISTORE(HANDLER_ARGS);
 		static void v1_NOP(HANDLER_ARGS);
 	};
 }
--- a/src/crypto/randomx/jit_compiler_rv64_static.S
+++ b/src/crypto/randomx/jit_compiler_rv64_static.S
--- a/src/crypto/randomx/jit_compiler_rv64_static.hpp
+++ b/src/crypto/randomx/jit_compiler_rv64_static.hpp
@@ -0,0 +1,53 @@
 /*
 Copyright (c) 2023 tevador <tevador@gmail.com>
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 	* Redistributions of source code must retain the above copyright
 	  notice, this list of conditions and the following disclaimer.
 	* Redistributions in binary form must reproduce the above copyright
 	  notice, this list of conditions and the following disclaimer in the
 	  documentation and/or other materials provided with the distribution.
 	* Neither the name of the copyright holder nor the
 	  names of its contributors may be used to endorse or promote products
 	  derived from this software without specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 #pragma once
 extern "C" {
 	void randomx_riscv64_literals();
 	void randomx_riscv64_literals_end();
 	void randomx_riscv64_data_init();
 	void randomx_riscv64_fix_data_call();
 	void randomx_riscv64_prologue();
 	void randomx_riscv64_loop_begin();
 	void randomx_riscv64_data_read();
 	void randomx_riscv64_data_read_light();
 	void randomx_riscv64_fix_loop_call();
 	void randomx_riscv64_spad_store();
 	void randomx_riscv64_spad_store_hardaes();
 	void randomx_riscv64_spad_store_softaes();
 	void randomx_riscv64_loop_end();
 	void randomx_riscv64_fix_continue_loop();
 	void randomx_riscv64_epilogue();
 	void randomx_riscv64_softaes();
 	void randomx_riscv64_program_end();
 	void randomx_riscv64_ssh_init();
 	void randomx_riscv64_ssh_load();
 	void randomx_riscv64_ssh_prefetch();
 	void randomx_riscv64_ssh_end();
 }
--- a/src/crypto/randomx/jit_compiler_rv64_vector.cpp
+++ b/src/crypto/randomx/jit_compiler_rv64_vector.cpp
@@ -0,0 +1,207 @@
 /*
 Copyright (c) 2018-2020, tevador    <tevador@gmail.com>
 Copyright (c) 2019-2021, XMRig      <https://github.com/xmrig>, <support@xmrig.com>
 Copyright (c) 2025, SChernykh       <https://github.com/SChernykh>
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 	* Redistributions of source code must retain the above copyright
 	  notice, this list of conditions and the following disclaimer.
 	* Redistributions in binary form must reproduce the above copyright
 	  notice, this list of conditions and the following disclaimer in the
 	  documentation and/or other materials provided with the distribution.
 	* Neither the name of the copyright holder nor the
 	  names of its contributors may be used to endorse or promote products
 	  derived from this software without specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 #include "crypto/randomx/configuration.h"
 #include "crypto/randomx/jit_compiler_rv64_vector.h"
 #include "crypto/randomx/jit_compiler_rv64_vector_static.h"
 #include "crypto/randomx/reciprocal.h"
 #include "crypto/randomx/superscalar.hpp"
 namespace randomx {
 #define ADDR(x) ((uint8_t*) &(x))
 #define DIST(x, y) (ADDR(y) - ADDR(x))
 void* generateDatasetInitVectorRV64(uint8_t* buf, size_t buf_size, SuperscalarProgram* programs, size_t num_programs)
 {
 	memcpy(buf, reinterpret_cast<void*>(randomx_riscv64_vector_sshash_begin), buf_size);
 	uint8_t* p = buf + DIST(randomx_riscv64_vector_sshash_begin, randomx_riscv64_vector_sshash_generated_instructions);
 	uint8_t* literals = buf + DIST(randomx_riscv64_vector_sshash_begin, randomx_riscv64_vector_sshash_imul_rcp_literals);
 	uint8_t* cur_literal = literals;
 	for (size_t i = 0; i < num_programs; ++i) {
 		// Step 4
 		size_t k = DIST(randomx_riscv64_vector_sshash_cache_prefetch, randomx_riscv64_vector_sshash_xor);
 		memcpy(p, reinterpret_cast<void*>(randomx_riscv64_vector_sshash_cache_prefetch), k);
 		p += k;
 		// Step 5
 		for (uint32_t j = 0; j < programs[i].size; ++j) {
 			const uint32_t dst = programs[i].programBuffer[j].dst & 7;
 			const uint32_t src = programs[i].programBuffer[j].src & 7;
 			const uint32_t modShift = (programs[i].programBuffer[j].mod >> 2) & 3;
 			const uint32_t imm32 = programs[i].programBuffer[j].imm32;
 			uint32_t inst;
 			#define EMIT(data) inst = (data); memcpy(p, &inst, 4); p += 4
 			switch (static_cast<SuperscalarInstructionType>(programs[i].programBuffer[j].opcode)) {
 			case SuperscalarInstructionType::ISUB_R: 
 				// 57 00 00 0A	vsub.vv v0, v0, v0
 				EMIT(0x0A000057 | (dst << 7) | (src << 15) | (dst << 20));
 				break;
 			case SuperscalarInstructionType::IXOR_R:
 				// 57 00 00 2E	vxor.vv v0, v0, v0
 				EMIT(0x2E000057 | (dst << 7) | (src << 15) | (dst << 20));
 				break;
 			case SuperscalarInstructionType::IADD_RS:
 				// 57 39 00 96	vsll.vi v18, v0, 0
 				// 57 00 09 02	vadd.vv v0, v0, v18
 				EMIT(0x96003957 | (modShift << 15) | (src << 20));
 				EMIT(0x02090057 | (dst << 7) | (dst << 20));
 				break;
 			case SuperscalarInstructionType::IMUL_R:
 				// 57 20 00 96	vmul.vv v0, v0, v0
 				EMIT(0x96002057 | (dst << 7) | (src << 15) | (dst << 20));
 				break;
 			case SuperscalarInstructionType::IROR_C:
 				{
 					const uint32_t shift_right = imm32 & 63;
 					const uint32_t shift_left = 64 - shift_right;
 					if (shift_right < 32) {
 						// 57 39 00 A2	vsrl.vi v18, v0, 0
 						EMIT(0xA2003957 | (shift_right << 15) | (dst << 20));
 					}
 					else {
 						// 93 02 00 00	li x5, 0
 						// 57 C9 02 A2	vsrl.vx v18, v0, x5
 						EMIT(0x00000293 | (shift_right << 20));
 						EMIT(0xA202C957 | (dst << 20));
 					}
 					if (shift_left < 32) {
 						// 57 30 00 96	vsll.vi v0, v0, 0
 						EMIT(0x96003057 | (dst << 7) | (shift_left << 15) | (dst << 20));
 					}
 					else {
 						// 93 02 00 00	li x5, 0
 						// 57 C0 02 96	vsll.vx v0, v0, x5
 						EMIT(0x00000293 | (shift_left << 20));
 						EMIT(0x9602C057 | (dst << 7) | (dst << 20));
 					}
 					// 57 00 20 2B vor.vv v0, v18, v0
 					EMIT(0x2B200057 | (dst << 7) | (dst << 15));
 				}
 				break;
 			case SuperscalarInstructionType::IADD_C7:
 			case SuperscalarInstructionType::IADD_C8:
 			case SuperscalarInstructionType::IADD_C9:
 				// B7 02 00 00	lui x5, 0
 				// 9B 82 02 00	addiw x5, x5, 0
 				// 57 C0 02 02	vadd.vx v0, v0, x5
 				EMIT(0x000002B7 | ((imm32 + ((imm32 & 0x800) << 1)) & 0xFFFFF000));
 				EMIT(0x0002829B | ((imm32 & 0x00000FFF)) << 20);
 				EMIT(0x0202C057 | (dst << 7) | (dst << 20));
 				break;
 			case SuperscalarInstructionType::IXOR_C7:
 			case SuperscalarInstructionType::IXOR_C8:
 			case SuperscalarInstructionType::IXOR_C9:
 				// B7 02 00 00	lui x5, 0
 				// 9B 82 02 00	addiw x5, x5, 0
 				// 57 C0 02 2E	vxor.vx v0, v0, x5
 				EMIT(0x000002B7 | ((imm32 + ((imm32 & 0x800) << 1)) & 0xFFFFF000));
 				EMIT(0x0002829B | ((imm32 & 0x00000FFF)) << 20);
 				EMIT(0x2E02C057 | (dst << 7) | (dst << 20));
 				break;
 			case SuperscalarInstructionType::IMULH_R:
 				// 57 20 00 92	vmulhu.vv v0, v0, v0
 				EMIT(0x92002057 | (dst << 7) | (src << 15) | (dst << 20));
 				break;
 			case SuperscalarInstructionType::ISMULH_R:
 				// 57 20 00 9E	vmulh.vv v0, v0, v0
 				EMIT(0x9E002057 | (dst << 7) | (src << 15) | (dst << 20));
 				break;
 			case SuperscalarInstructionType::IMUL_RCP:
 				{
 					uint32_t offset = cur_literal - literals;
 					if (offset == 2040) {
 						literals += 2040;
 						offset = 0;
 						// 93 87 87 7F	add x15, x15, 2040
 						EMIT(0x7F878793);
 					}
 					const uint64_t r = randomx_reciprocal_fast(imm32);
 					memcpy(cur_literal, &r, 8);
 					cur_literal += 8;
 					// 83 B2 07 00	ld x5, (x15)
 					// 57 E0 02 96	vmul.vx v0, v0, x5
 					EMIT(0x0007B283 | (offset << 20));
 					EMIT(0x9602E057 | (dst << 7) | (dst << 20));
 				}
 				break;
 			default:
 				break;
 			}
 		}
 		// Step 6
 		k = DIST(randomx_riscv64_vector_sshash_xor, randomx_riscv64_vector_sshash_set_cache_index);
 		memcpy(p, reinterpret_cast<void*>(randomx_riscv64_vector_sshash_xor), k);
 		p += k;
 		// Step 7
 		if (i + 1 < num_programs) {
 			memcpy(p, reinterpret_cast<uint8_t*>(randomx_riscv64_vector_sshash_set_cache_index) + programs[i].getAddressRegister() * 4, 4);
 			p += 4;
 		}
 	}
 	// Emit "J randomx_riscv64_vector_sshash_generated_instructions_end" instruction
 	const uint8_t* e = buf + DIST(randomx_riscv64_vector_sshash_begin, randomx_riscv64_vector_sshash_generated_instructions_end);
 	const uint32_t k = e - p;
 	const uint32_t j = 0x6F | ((k & 0x7FE) << 20) | ((k & 0x800) << 9) | (k & 0xFF000);
 	memcpy(p, &j, 4);
 #ifdef __GNUC__
 	__builtin___clear_cache((char*) buf, (char*)(buf + buf_size));
 #endif
 	return buf + DIST(randomx_riscv64_vector_sshash_begin, randomx_riscv64_vector_sshash_dataset_init);
 }
 } // namespace randomx
--- a/src/crypto/randomx/jit_compiler_rv64_vector.h
+++ b/src/crypto/randomx/jit_compiler_rv64_vector.h
@@ -0,0 +1,42 @@
 /*
 Copyright (c) 2018-2020, tevador    <tevador@gmail.com>
 Copyright (c) 2019-2021, XMRig      <https://github.com/xmrig>, <support@xmrig.com>
 Copyright (c) 2025, SChernykh       <https://github.com/SChernykh>
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 	* Redistributions of source code must retain the above copyright
 	  notice, this list of conditions and the following disclaimer.
 	* Redistributions in binary form must reproduce the above copyright
 	  notice, this list of conditions and the following disclaimer in the
 	  documentation and/or other materials provided with the distribution.
 	* Neither the name of the copyright holder nor the
 	  names of its contributors may be used to endorse or promote products
 	  derived from this software without specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 #pragma once
 #include <cstdint>
 #include <cstdlib>
 namespace randomx {
 class SuperscalarProgram;
 void* generateDatasetInitVectorRV64(uint8_t* buf, size_t buf_size, SuperscalarProgram* programs, size_t num_programs);
 } // namespace randomx
--- a/src/crypto/randomx/jit_compiler_rv64_vector_static.S
+++ b/src/crypto/randomx/jit_compiler_rv64_vector_static.S
@@ -0,0 +1,296 @@
 /*
 Copyright (c) 2018-2020, tevador    <tevador@gmail.com>
 Copyright (c) 2019-2021, XMRig      <https://github.com/xmrig>, <support@xmrig.com>
 Copyright (c) 2025, SChernykh       <https://github.com/SChernykh>
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 	* Redistributions of source code must retain the above copyright
 	  notice, this list of conditions and the following disclaimer.
 	* Redistributions in binary form must reproduce the above copyright
 	  notice, this list of conditions and the following disclaimer in the
 	  documentation and/or other materials provided with the distribution.
 	* Neither the name of the copyright holder nor the
 	  names of its contributors may be used to endorse or promote products
 	  derived from this software without specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 #include "configuration.h"
 // Compatibility macros
 #if !defined(RANDOMX_CACHE_ACCESSES) && defined(RANDOMX_CACHE_MAX_ACCESSES)
 #define RANDOMX_CACHE_ACCESSES RANDOMX_CACHE_MAX_ACCESSES
 #endif
 #if defined(RANDOMX_ARGON_MEMORY)
 #define RANDOMX_CACHE_MASK RANDOMX_ARGON_MEMORY * 1024 / 64 - 1
 #elif defined(RANDOMX_CACHE_MAX_SIZE)
 #define RANDOMX_CACHE_MASK RANDOMX_CACHE_MAX_SIZE / 64 - 1
 #endif
 #define DECL(x) x
 .text
 .option arch, rv64gcv_zicbop
 .option pic
 .global DECL(randomx_riscv64_vector_sshash_begin)
 .global DECL(randomx_riscv64_vector_sshash_imul_rcp_literals)
 .global DECL(randomx_riscv64_vector_sshash_dataset_init)
 .global DECL(randomx_riscv64_vector_sshash_generated_instructions)
 .global DECL(randomx_riscv64_vector_sshash_generated_instructions_end)
 .global DECL(randomx_riscv64_vector_sshash_cache_prefetch)
 .global DECL(randomx_riscv64_vector_sshash_xor)
 .global DECL(randomx_riscv64_vector_sshash_set_cache_index)
 .global DECL(randomx_riscv64_vector_sshash_end)
 .balign 8
 DECL(randomx_riscv64_vector_sshash_begin):
 sshash_constant_0: .dword 6364136223846793005
 sshash_constant_1: .dword 9298411001130361340
 sshash_constant_2: .dword 12065312585734608966
 sshash_constant_3: .dword 9306329213124626780
 sshash_constant_4: .dword 5281919268842080866
 sshash_constant_5: .dword 10536153434571861004
 sshash_constant_6: .dword 3398623926847679864
 sshash_constant_7: .dword 9549104520008361294
 sshash_offsets:    .dword 0,1,2,3
 store_offsets:     .dword 0,64,128,192
 DECL(randomx_riscv64_vector_sshash_imul_rcp_literals): .fill 512,8,0
 /*
 Reference: https://github.com/tevador/RandomX/blob/master/doc/specs.md#73-dataset-block-generation
 Register layout
 ---------------
 x5	= temporary
 x10	= randomx cache
 x11	= output buffer
 x12	= startBlock
 x13	= endBlock
 x14	= cache mask
 x15	= imul_rcp literal pointer
 v0-v7	= r0-r7
 v8	= itemNumber
 v9	= cacheIndex, then a pointer into cache->memory (for prefetch), then a byte offset into cache->memory
 v10-v17	= sshash constants
 v18	= temporary
 v19	= dataset item store offsets
 */
 DECL(randomx_riscv64_vector_sshash_dataset_init):
 	// Process 4 64-bit values at a time
 	li x5, 4
 	vsetvli x5, x5, e64, m1, ta, ma
 	// Load cache->memory pointer
 	ld x10, (x10)
 	// Init cache mask
 	li x14, RANDOMX_CACHE_MASK
 	// Init dataset item store offsets
 	lla x5, store_offsets
 	vle64.v v19, (x5)
 	// Init itemNumber vector to (startBlock, startBlock + 1, startBlock + 2, startBlock + 3)
 	lla x5, sshash_offsets
 	vle64.v v8, (x5)
 	vadd.vx v8, v8, x12
 	// Load constants (stride = x0 = 0, so a 64-bit value will be broadcast into each element of a vector)
 	lla x5, sshash_constant_0
 	vlse64.v v10, (x5), x0
 	lla x5, sshash_constant_1
 	vlse64.v v11, (x5), x0
 	lla x5, sshash_constant_2
 	vlse64.v v12, (x5), x0
 	lla x5, sshash_constant_3
 	vlse64.v v13, (x5), x0
 	lla x5, sshash_constant_4
 	vlse64.v v14, (x5), x0
 	lla x5, sshash_constant_5
 	vlse64.v v15, (x5), x0
 	lla x5, sshash_constant_6
 	vlse64.v v16, (x5), x0
 	lla x5, sshash_constant_7
 	vlse64.v v17, (x5), x0
 	// Calculate the end pointer for dataset init
 	sub x13, x13, x12
 	slli x13, x13, 6
 	add x13, x13, x11
 init_item:
 	// Step 1. Init r0-r7
 	// r0 = (itemNumber + 1) * 6364136223846793005
 	vmv.v.v v0, v8
 	vmadd.vv v0, v10, v10
 	// r_i = r0 ^ c_i for i = 1..7
 	vxor.vv v1, v0, v11
 	vxor.vv v2, v0, v12
 	vxor.vv v3, v0, v13
 	vxor.vv v4, v0, v14
 	vxor.vv v5, v0, v15
 	vxor.vv v6, v0, v16
 	vxor.vv v7, v0, v17
 	// Step 2. Let cacheIndex = itemNumber
 	vmv.v.v v9, v8
 	// Step 3 is implicit (all iterations are inlined, there is no "i")
 	// Init imul_rcp literal pointer
 	lla x15, randomx_riscv64_vector_sshash_imul_rcp_literals
 DECL(randomx_riscv64_vector_sshash_generated_instructions):
 	// Generated by JIT compiler
 	//
 	// Step 4. randomx_riscv64_vector_sshash_cache_prefetch
 	// Step 5. SuperscalarHash[i]
 	// Step 6. randomx_riscv64_vector_sshash_xor
 	// Step 7. randomx_riscv64_vector_sshash_set_cache_index
 	//
 	// Above steps will be repeated RANDOMX_CACHE_ACCESSES times
 	.fill RANDOMX_CACHE_ACCESSES * 2048, 4, 0
 DECL(randomx_riscv64_vector_sshash_generated_instructions_end):
 	// Step 9. Concatenate registers r0-r7 in little endian format to get the final Dataset item data.
 	vsuxei64.v v0, (x11), v19
 	add x5, x11, 8
 	vsuxei64.v v1, (x5), v19
 	add x5, x11, 16
 	vsuxei64.v v2, (x5), v19
 	add x5, x11, 24
 	vsuxei64.v v3, (x5), v19
 	add x5, x11, 32
 	vsuxei64.v v4, (x5), v19
 	add x5, x11, 40
 	vsuxei64.v v5, (x5), v19
 	add x5, x11, 48
 	vsuxei64.v v6, (x5), v19
 	add x5, x11, 56
 	vsuxei64.v v7, (x5), v19
 	// Iterate to the next 4 items
 	vadd.vi v8, v8, 4
 	add x11, x11, 256
 	bltu x11, x13, init_item
 	ret
 // Step 4. Load a 64-byte item from the Cache. The item index is given by cacheIndex modulo the total number of 64-byte items in Cache.
 DECL(randomx_riscv64_vector_sshash_cache_prefetch):
 	// v9 = convert from cacheIndex to a direct pointer into cache->memory
 	vand.vx v9, v9, x14
 	vsll.vi v9, v9, 6
 	vadd.vx v9, v9, x10
 	// Prefetch element 0
 	vmv.x.s x5, v9
 	prefetch.r (x5)
 	// Prefetch element 1
 	vslidedown.vi v18, v9, 1
 	vmv.x.s x5, v18
 	prefetch.r (x5)
 	// Prefetch element 2
 	vslidedown.vi v18, v9, 2
 	vmv.x.s x5, v18
 	prefetch.r (x5)
 	// Prefetch element 3
 	vslidedown.vi v18, v9, 3
 	vmv.x.s x5, v18
 	prefetch.r (x5)
 	// v9 = byte offset into cache->memory
 	vsub.vx v9, v9, x10
 // Step 6. XOR all registers with data loaded from randomx cache
 DECL(randomx_riscv64_vector_sshash_xor):
 	vluxei64.v v18, (x10), v9
 	vxor.vv v0, v0, v18
 	add x5, x10, 8
 	vluxei64.v v18, (x5), v9
 	vxor.vv v1, v1, v18
 	add x5, x10, 16
 	vluxei64.v v18, (x5), v9
 	vxor.vv v2, v2, v18
 	add x5, x10, 24
 	vluxei64.v v18, (x5), v9
 	vxor.vv v3, v3, v18
 	add x5, x10, 32
 	vluxei64.v v18, (x5), v9
 	vxor.vv v4, v4, v18
 	add x5, x10, 40
 	vluxei64.v v18, (x5), v9
 	vxor.vv v5, v5, v18
 	add x5, x10, 48
 	vluxei64.v v18, (x5), v9
 	vxor.vv v6, v6, v18
 	add x5, x10, 56
 	vluxei64.v v18, (x5), v9
 	vxor.vv v7, v7, v18
 // Step 7. Set cacheIndex to the value of the register that has the longest dependency chain in the SuperscalarHash function executed in step 5.
 DECL(randomx_riscv64_vector_sshash_set_cache_index):
 	// JIT compiler will pick a single instruction reading from the required register
 	vmv.v.v v9, v0
 	vmv.v.v v9, v1
 	vmv.v.v v9, v2
 	vmv.v.v v9, v3
 	vmv.v.v v9, v4
 	vmv.v.v v9, v5
 	vmv.v.v v9, v6
 	vmv.v.v v9, v7
 DECL(randomx_riscv64_vector_sshash_end):
--- a/src/crypto/randomx/jit_compiler_rv64_vector_static.h
+++ b/src/crypto/randomx/jit_compiler_rv64_vector_static.h
@@ -0,0 +1,58 @@
 /*
 Copyright (c) 2018-2020, tevador    <tevador@gmail.com>
 Copyright (c) 2019-2021, XMRig      <https://github.com/xmrig>, <support@xmrig.com>
 Copyright (c) 2025, SChernykh       <https://github.com/SChernykh>
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 	* Redistributions of source code must retain the above copyright
 	  notice, this list of conditions and the following disclaimer.
 	* Redistributions in binary form must reproduce the above copyright
 	  notice, this list of conditions and the following disclaimer in the
 	  documentation and/or other materials provided with the distribution.
 	* Neither the name of the copyright holder nor the
 	  names of its contributors may be used to endorse or promote products
 	  derived from this software without specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 #pragma once
 #if defined(__cplusplus)
 #include <cstdint>
 #else
 #include <stdint.h>
 #endif
 #if defined(__cplusplus)
 extern "C" {
 #endif
 struct randomx_cache;
 void randomx_riscv64_vector_sshash_begin();
 void randomx_riscv64_vector_sshash_imul_rcp_literals();
 void randomx_riscv64_vector_sshash_dataset_init(struct randomx_cache* cache, uint8_t* output_buf, uint32_t startBlock, uint32_t endBlock);
 void randomx_riscv64_vector_sshash_cache_prefetch();
 void randomx_riscv64_vector_sshash_generated_instructions();
 void randomx_riscv64_vector_sshash_generated_instructions_end();
 void randomx_riscv64_vector_sshash_cache_prefetch();
 void randomx_riscv64_vector_sshash_xor();
 void randomx_riscv64_vector_sshash_set_cache_index();
 void randomx_riscv64_vector_sshash_end();
 #if defined(__cplusplus)
 }
 #endif
--- a/src/crypto/randomx/randomx.cpp
+++ b/src/crypto/randomx/randomx.cpp
@@ -39,6 +39,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "crypto/randomx/jit_compiler_x86_static.hpp"
 #elif (XMRIG_ARM == 8)
 #include "crypto/randomx/jit_compiler_a64_static.hpp"
 #elif defined(__riscv) && defined(__riscv_xlen) && (__riscv_xlen == 64)
 #include "crypto/randomx/jit_compiler_rv64_static.hpp"
 #endif
 #include "backend/cpu/Cpu.h"
@@ -190,7 +192,7 @@ RandomX_ConfigurationBase::RandomX_ConfigurationBase()
 #	endif
 }
-#if (XMRIG_ARM == 8)
+#if (XMRIG_ARM == 8) || defined(XMRIG_RISCV)
 static uint32_t Log2(size_t value) { return (value > 1) ? (Log2(value / 2) + 1) : 0; }
 #endif
@@ -274,6 +276,14 @@ typedef void(randomx::JitCompilerX86::* InstructionGeneratorX86_2)(const randomx
 #define JIT_HANDLE(x, prev) randomx::JitCompilerA64::engine[k] = &randomx::JitCompilerA64::h_##x
 #elif defined(XMRIG_RISCV)
 	Log2_ScratchpadL1 = Log2(ScratchpadL1_Size);
 	Log2_ScratchpadL2 = Log2(ScratchpadL2_Size);
 	Log2_ScratchpadL3 = Log2(ScratchpadL3_Size);
 #define JIT_HANDLE(x, prev) randomx::JitCompilerRV64::engine[k] = &randomx::JitCompilerRV64::v1_##x
 #else
 #define JIT_HANDLE(x, prev)
 #endif
--- a/src/crypto/randomx/randomx.h
+++ b/src/crypto/randomx/randomx.h
@@ -133,7 +133,7 @@ struct RandomX_ConfigurationBase
 	uint32_t ScratchpadL3Mask_Calculated;
 	uint32_t ScratchpadL3Mask64_Calculated;
-#	if (XMRIG_ARM == 8)
+#	if (XMRIG_ARM == 8) || defined(XMRIG_RISCV)
 	uint32_t Log2_ScratchpadL1;
 	uint32_t Log2_ScratchpadL2;
 	uint32_t Log2_ScratchpadL3;
--- a/src/crypto/randomx/reciprocal.c
+++ b/src/crypto/randomx/reciprocal.c
@@ -73,8 +73,20 @@ uint64_t randomx_reciprocal(uint64_t divisor) {
 #if !RANDOMX_HAVE_FAST_RECIPROCAL
 #ifdef __GNUC__
 uint64_t randomx_reciprocal_fast(uint64_t divisor)
 {
 	const uint64_t q = (1ULL << 63) / divisor;
 	const uint64_t r = (1ULL << 63) % divisor;
 	const uint64_t shift = 64 - __builtin_clzll(divisor);
 	return (q << shift) + ((r << shift) / divisor);
 }
 #else
 uint64_t randomx_reciprocal_fast(uint64_t divisor) {
 	return randomx_reciprocal(divisor);
 }
 #endif
 #endif
--- a/src/crypto/randomx/soft_aes.cpp
+++ b/src/crypto/randomx/soft_aes.cpp
@@ -39,6 +39,9 @@ alignas(64) uint32_t lutDec1[256];
 alignas(64) uint32_t lutDec2[256];
 alignas(64) uint32_t lutDec3[256];
 alignas(64) uint8_t lutEncIndex[4][32];
 alignas(64) uint8_t lutDecIndex[4][32];
 static uint32_t mul_gf2(uint32_t b, uint32_t c)
 {
 	uint32_t s = 0;
@@ -115,5 +118,49 @@ static struct SAESInitializer
 			lutDec2[i] = w; w = (w << 8) | (w >> 24);
 			lutDec3[i] = w;
 		}
 		memset(lutEncIndex, -1, sizeof(lutEncIndex));
 		memset(lutDecIndex, -1, sizeof(lutDecIndex));
 		lutEncIndex[0][ 0] =  0;
 		lutEncIndex[0][ 4] =  4;
 		lutEncIndex[0][ 8] =  8;
 		lutEncIndex[0][12] = 12;
 		lutEncIndex[1][ 0] =  5;
 		lutEncIndex[1][ 4] =  9;
 		lutEncIndex[1][ 8] = 13;
 		lutEncIndex[1][12] =  1;
 		lutEncIndex[2][ 0] = 10;
 		lutEncIndex[2][ 4] = 14;
 		lutEncIndex[2][ 8] =  2;
 		lutEncIndex[2][12] =  6;
 		lutEncIndex[3][ 0] = 15;
 		lutEncIndex[3][ 4] =  3;
 		lutEncIndex[3][ 8] =  7;
 		lutEncIndex[3][12] = 11;
 		lutDecIndex[0][ 0] =  0;
 		lutDecIndex[0][ 4] =  4;
 		lutDecIndex[0][ 8] =  8;
 		lutDecIndex[0][12] = 12;
 		lutDecIndex[1][ 0] = 13;
 		lutDecIndex[1][ 4] =  1;
 		lutDecIndex[1][ 8] =  5;
 		lutDecIndex[1][12] =  9;
 		lutDecIndex[2][ 0] = 10;
 		lutDecIndex[2][ 4] = 14;
 		lutDecIndex[2][ 8] =  2;
 		lutDecIndex[2][12] =  6;
 		lutDecIndex[3][ 0] =  7;
 		lutDecIndex[3][ 4] = 11;
 		lutDecIndex[3][ 8] = 15;
 		lutDecIndex[3][12] =  3;
 		for (uint32_t i = 0; i < 4; ++i) {
 			for (uint32_t j = 0; j < 16; j += 4) {
 				lutEncIndex[i][j + 16] = lutEncIndex[i][j] + 16;
 				lutDecIndex[i][j + 16] = lutDecIndex[i][j] + 16;
 			}
 		}
 	}
 } aes_initializer;
--- a/src/crypto/randomx/soft_aes.h
+++ b/src/crypto/randomx/soft_aes.h
@@ -41,6 +41,9 @@ extern uint32_t lutDec1[256];
 extern uint32_t lutDec2[256];
 extern uint32_t lutDec3[256];
 extern uint8_t lutEncIndex[4][32];
 extern uint8_t lutDecIndex[4][32];
 template<int soft> rx_vec_i128 aesenc(rx_vec_i128 in, rx_vec_i128 key);
 template<int soft> rx_vec_i128 aesdec(rx_vec_i128 in, rx_vec_i128 key);
@@ -147,3 +150,32 @@ template<>
 FORCE_INLINE rx_vec_i128 aesdec<0>(rx_vec_i128 in, rx_vec_i128 key) {
 	return rx_aesdec_vec_i128(in, key);
 }
 #if defined(XMRIG_RISCV) && defined(XMRIG_RVV_ENABLED)
 #include <riscv_vector.h>
 FORCE_INLINE vuint32m1_t softaes_vector_double(
 	vuint32m1_t in,
 	vuint32m1_t key,
 	vuint8m1_t i0, vuint8m1_t i1, vuint8m1_t i2, vuint8m1_t i3,
 	const uint32_t* lut0, const uint32_t* lut1, const uint32_t *lut2, const uint32_t* lut3)
 {
 	const vuint8m1_t in8 = __riscv_vreinterpret_v_u32m1_u8m1(in);
 	const vuint32m1_t index0 = __riscv_vreinterpret_v_u8m1_u32m1(__riscv_vrgather_vv_u8m1(in8, i0, 32));
 	const vuint32m1_t index1 = __riscv_vreinterpret_v_u8m1_u32m1(__riscv_vrgather_vv_u8m1(in8, i1, 32));
 	const vuint32m1_t index2 = __riscv_vreinterpret_v_u8m1_u32m1(__riscv_vrgather_vv_u8m1(in8, i2, 32));
 	const vuint32m1_t index3 = __riscv_vreinterpret_v_u8m1_u32m1(__riscv_vrgather_vv_u8m1(in8, i3, 32));
 	vuint32m1_t s0 = __riscv_vluxei32_v_u32m1(lut0, __riscv_vsll_vx_u32m1(index0, 2, 8), 8);
 	vuint32m1_t s1 = __riscv_vluxei32_v_u32m1(lut1, __riscv_vsll_vx_u32m1(index1, 2, 8), 8);
 	vuint32m1_t s2 = __riscv_vluxei32_v_u32m1(lut2, __riscv_vsll_vx_u32m1(index2, 2, 8), 8);
 	vuint32m1_t s3 = __riscv_vluxei32_v_u32m1(lut3, __riscv_vsll_vx_u32m1(index3, 2, 8), 8);
 	s0 = __riscv_vxor_vv_u32m1(s0, s1, 8);
 	s2 = __riscv_vxor_vv_u32m1(s2, s3, 8);
 	s0 = __riscv_vxor_vv_u32m1(s0, s2, 8);
 	return __riscv_vxor_vv_u32m1(s0, key, 8);
 }
 #endif // defined(XMRIG_RISCV) && defined(XMRIG_RVV_ENABLED)
--- a/src/crypto/randomx/tests/riscv64_vector.s
+++ b/src/crypto/randomx/tests/riscv64_vector.s
@@ -0,0 +1,14 @@
 /* RISC-V - test if the vector extension and prefetch instruction are present */
 .text
 .option arch, rv64gcv_zicbop
 .global main
 main:
 	lla x5, main
 	prefetch.r (x5)
 	li x5, 4
 	vsetvli x6, x5, e64, m1, ta, ma
 	vxor.vv v0, v0, v0
 	sub x10, x5, x6
 	ret
--- a/src/crypto/randomx/tests/riscv64_zba.s
+++ b/src/crypto/randomx/tests/riscv64_zba.s
@@ -0,0 +1,9 @@
 /* RISC-V - test if the Zba extension is present */
 .text
 .global main
 main:
    sh1add x6, x6, x7
    li x10, 0
    ret
--- a/src/crypto/randomx/tests/riscv64_zbb.s
+++ b/src/crypto/randomx/tests/riscv64_zbb.s
@@ -0,0 +1,9 @@
 /* RISC-V - test if the Zbb extension is present */
 .text
 .global main
 main:
    ror x6, x6, x7
    li x10, 0
    ret
--- a/src/crypto/rx/RxDataset.cpp
+++ b/src/crypto/rx/RxDataset.cpp
@@ -43,6 +43,12 @@ static void init_dataset_wrapper(randomx_dataset *dataset, randomx_cache *cache,
        randomx_init_dataset(dataset, cache, startItem, itemCount - (itemCount % 5));
        randomx_init_dataset(dataset, cache, startItem + itemCount - 5, 5);
    }
 #ifdef XMRIG_RISCV
    else if (itemCount % 4) {
        randomx_init_dataset(dataset, cache, startItem, itemCount - (itemCount % 4));
        randomx_init_dataset(dataset, cache, startItem + itemCount - 4, 4);
    }
 #endif
    else {
        randomx_init_dataset(dataset, cache, startItem, itemCount);
    }
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
xmrig	856813c1ae	Merge pull request #3740 from SChernykh/dev RISC-V: added vectorized soft AES	2025-12-06 19:39:47 +07:00
SChernykh	23da1a90f5	RISC-V: added vectorized soft AES	2025-12-05 21:09:22 +01:00
xmrig	7981e4a76a	Merge pull request #3736 from SChernykh/dev RISC-V: added vectorized dataset init	2025-12-01 10:46:03 +07:00
SChernykh	7ef5142a52	RISC-V: added vectorized dataset init (activated by setting `init-avx2` to 1 in config.json)	2025-11-30 19:15:15 +01:00
xmrig	db5c6d9190	Merge pull request #3733 from void-512/master Add detection for MSVC/2026	2025-11-13 15:52:43 +07:00
Tony Wang	e88009d575	add detection for MSVC/2026	2025-11-12 17:32:57 -05:00
XMRig	5115597e7f	Improved compatibility for automatically enabling huge pages on Linux systems without NUMA support.	2025-11-07 01:55:00 +07:00
xmrig	4cdc35f966	Merge pull request #3731 from user0-07161/dev-haiku-os-support feat: initial haiku os support	2025-11-05 18:47:22 +07:00
user0-07161	b02519b9f5	feat: initial support for haiku	2025-11-04 13:58:01 +00:00
XMRig	a44b21cef3	Cleanup	2025-10-27 19:18:52 +07:00
XMRig	ea832899f2	Fixed macOS build.	2025-10-23 11:17:59 +07:00
xmrig	3ecacf0ac2	Merge pull request #3725 from SChernykh/dev RISC-V integration and JIT compiler	2025-10-23 11:02:21 +07:00
SChernykh	27c8e60919	Removed unused files	2025-10-22 23:31:02 +02:00
SChernykh	985fe06e8d	RISC-V: test for instruction extensions	2025-10-22 19:21:26 +02:00
SChernykh	75b63ddde9	RISC-V JIT compiler	2025-10-22 19:00:20 +02:00
slayingripper	643b65f2c0	RISC-V Intergration	2025-10-22 18:57:20 +02:00
xmrig	116ba1828f	Merge pull request #3722 from SChernykh/dev Added Zen4 (Hawk Point) CPUs detection	2025-10-15 13:23:36 +07:00
SChernykh	da5a5674b4	Added Zen4 (Hawk Point) CPUs detection	2025-10-15 08:07:58 +02:00
xmrig	6cc4819cec	Merge pull request #3719 from SChernykh/dev Fix: correct FCMP++ version number	2025-10-05 18:28:21 +07:00
SChernykh	a659397c41	Fix: correct FCMP++ version number	2025-10-05 13:24:55 +02:00
xmrig	20acfd0d79	Merge pull request #3718 from SChernykh/dev Solo mining: added support for FCMP++ hardfork	2025-10-05 18:04:23 +07:00
SChernykh	da683d8c3e	Solo mining: added support for FCMP++ hardfork	2025-10-05 13:00:21 +02:00
XMRig	255565b533	Merge branch 'xtophyr-master' into dev	2025-09-22 21:31:28 +07:00
XMRig	878e83bf59	Merge branch 'master' of https://github.com/xtophyr/xmrig into xtophyr-master	2025-09-22 21:31:14 +07:00
Christopher Wright	7abf17cb59	adjust instruction/register suffixes to compile with gcc-based assemblers.	2025-09-21 14:57:42 -04:00
Christopher Wright	eeec5ecd10	undo this change	2025-09-20 08:38:40 -04:00
Christopher Wright	93f5067999	minor Aarch64 JIT changes (better instruction selection, don't emit instructions that add 0, etc)	2025-09-20 08:32:32 -04:00
XMRig	dd6671bc59	Merge branch 'dev' of github.com:xmrig/xmrig into dev	2025-06-29 12:29:01 +07:00
XMRig	a1ee2fd9d2	Improved LibreSSL support.	2025-06-29 12:28:35 +07:00
xmrig	2619131176	Merge pull request #3680 from benthetechguy/armhf Add armv8l to list of 32 bit ARM targets	2025-06-25 04:14:22 +07:00
Ben Westover	1161f230c5	Add armv8l to list of 32 bit ARM targets armv8l is what CMAKE_SYSTEM_PROCESSOR is set to when an ARMv8 processor is in 32-bit mode, so it should be added to the ARMv7 target list even though it's v8 because it's 32 bits. Currently, it's not in any ARM target list which means x86 is assumed and the build fails.	2025-06-24 15:28:01 -04:00
XMRig	d2363ba28b	v6.24.1-dev	2025-06-23 08:37:15 +07:00
XMRig	1676da1fe9	Merge branch 'master' into dev	2025-06-23 08:36:52 +07:00
XMRig	6e4a5a6d94	v6.24.0	2025-06-23 07:44:53 +07:00
XMRig	273133aa63	Merge branch 'dev'	2025-06-23 07:44:05 +07:00
xmrig	c69e30c9a0	Update CHANGELOG.md	2025-06-23 05:39:26 +07:00
XMRig	6a690ba1e9	More DNS cleanup.	2025-06-20 23:45:53 +07:00
XMRig	545aef0937	v6.24.0-dev	2025-06-20 08:34:58 +07:00
xmrig	9fa66d3242	Merge pull request #3678 from xmrig/dns_ip_version Improved IPv6 support.	2025-06-20 08:33:50 +07:00
XMRig	ec286c7fef	Improved IPv6 support.	2025-06-20 07:39:52 +07:00
xmrig	e28d663d80	Merge pull request #3677 from SChernykh/dev Tweaked autoconfig for AMD CPUs with < 2 MB L3 cache per thread, again (hopefully the last time)	2025-06-19 18:07:54 +07:00
SChernykh	aba1ad8cfc	Tweaked autoconfig for AMD CPUs with < 2 MB L3 cache per thread, again (hopefully the last time)	2025-06-19 12:58:31 +02:00
xmrig	bf44ed52e9	Merge pull request #3674 from benthetechguy/armhf cflags: Add lax-vector-conversions on ARMv7	2025-06-19 04:46:02 +07:00
Ben Westover	762c435fa8	cflags: Add lax-vector-conversions on ARMv7 lax-vector-conversions is enabled in the CXXFLAGS but not CFLAGS for ARMv7. This commit adds it to CFLAGS which fixes the ARMv7 build (Fixes: #3673).	2025-06-18 16:38:05 -04:00
xmrig	48faf0a11b	Merge pull request #3671 from SChernykh/dev Hwloc: fixed detection of L2 cache size for some complex NUMA topologies	2025-06-17 18:52:43 +07:00
SChernykh	d125d22d27	Hwloc: fixed detection of L2 cache size for some complex NUMA topologies	2025-06-17 13:49:02 +02:00
XMRig	9f3591ae0d	v6.23.1-dev	2025-06-16 21:29:17 +07:00
XMRig	6bbbcc71f1	Merge branch 'master' into dev	2025-06-16 21:28:48 +07:00
XMRig	e5a7a69cc0	v6.23.0	2025-06-16 21:00:42 +07:00
XMRig	f354b85a7b	Merge branch 'dev'	2025-06-16 21:00:12 +07:00
xmrig	5ed8d79574	Update CHANGELOG.md	2025-06-16 20:46:33 +07:00
XMRig	fc395a5800	Update ARM CPUs database.	2025-06-16 19:54:08 +07:00
XMRig	9138690126	v6.23.0-dev	2025-06-16 02:05:43 +07:00
XMRig	d58061c903	Add detection for _aligned_malloc.	2025-06-15 20:06:19 +07:00
XMRig	3b863cf88f	Fixed __umul128 for MSVC ARM64.	2025-06-15 04:58:03 +07:00
XMRig	9c7468df64	Fixed user agent string.	2025-06-15 00:21:23 +07:00
xmrig	a18fa269a6	Merge pull request #3666 from SChernykh/dev Better detection of aligned malloc functions	2025-06-14 23:09:05 +07:00
SChernykh	bcc5581535	Better detection of aligned malloc functions	2025-06-14 18:00:27 +02:00
XMRig	dba336aa04	Update hwloc for MSVC.	2025-06-14 22:11:33 +07:00