diff --git a/.gitignore b/.gitignore
index 3db117d49..9687ec69d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,5 @@ scripts/deps
 /CMakeLists.txt.user
 /.idea
 /src/backend/opencl/cl/cn/cryptonight_gen.cl
+.vscode
+/.qtcreator
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f2e477dd2..dc25c1b97 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,17 @@
+# v6.25.0
+- [#3680](https://github.com/xmrig/xmrig/pull/3680) Added `armv8l` to the list of 32-bit ARM targets.
+- [#3708](https://github.com/xmrig/xmrig/pull/3708) Minor Aarch64 JIT changes (better instruction selection, don't emit instructions that add 0, etc).
+- [#3718](https://github.com/xmrig/xmrig/pull/3718) Solo mining: added support for FCMP++ hardfork.
+- [#3722](https://github.com/xmrig/xmrig/pull/3722) Added Zen4 (Hawk Point) CPUs detection.
+- [#3725](https://github.com/xmrig/xmrig/pull/3725) Added **RISC-V** support with JIT compiler.
+- [#3731](https://github.com/xmrig/xmrig/pull/3731) Added initial Haiku OS support.
+- [#3733](https://github.com/xmrig/xmrig/pull/3733) Added detection for MSVC/2026.
+- [#3736](https://github.com/xmrig/xmrig/pull/3736) RISC-V: added vectorized dataset init.
+- [#3740](https://github.com/xmrig/xmrig/pull/3740) RISC-V: added vectorized soft AES.
+- [#3743](https://github.com/xmrig/xmrig/pull/3743) Linux: added support for transparent huge pages.
+- Improved LibreSSL support.
+- Improved compatibility for automatically enabling huge pages on Linux systems without NUMA support.
+
 # v6.24.0
 - [#3671](https://github.com/xmrig/xmrig/pull/3671) Fixed detection of L2 cache size for some complex NUMA topologies.
 - [#3674](https://github.com/xmrig/xmrig/pull/3674) Fixed ARMv7 build.
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 313923226..4b36a8dfb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -95,7 +95,7 @@ set(HEADERS_CRYPTO
     src/crypto/common/VirtualMemory.h
    )
 
-if (XMRIG_ARM)
+if (XMRIG_ARM OR XMRIG_RISCV)
     set(HEADERS_CRYPTO "${HEADERS_CRYPTO}" src/crypto/cn/CryptoNight_arm.h)
 else()
     set(HEADERS_CRYPTO "${HEADERS_CRYPTO}" src/crypto/cn/CryptoNight_x86.h)
diff --git a/README.md b/README.md
index b4d40751c..7b6e66c54 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@
 XMRig is a high performance, open source, cross platform RandomX, KawPow, CryptoNight and [GhostRider](https://github.com/xmrig/xmrig/tree/master/src/crypto/ghostrider#readme) unified CPU/GPU miner and [RandomX benchmark](https://xmrig.com/benchmark). Official binaries are available for Windows, Linux, macOS and FreeBSD.
 
 ## Mining backends
-- **CPU** (x86/x64/ARMv7/ARMv8)
+- **CPU** (x86/x64/ARMv7/ARMv8/RISC-V)
 - **OpenCL** for AMD GPUs.
 - **CUDA** for NVIDIA GPUs via external [CUDA plugin](https://github.com/xmrig/xmrig-cuda).
 
diff --git a/cmake/asm.cmake b/cmake/asm.cmake
index e445defde..30a119c30 100644
--- a/cmake/asm.cmake
+++ b/cmake/asm.cmake
@@ -1,4 +1,4 @@
-if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
+if (WITH_ASM AND NOT XMRIG_ARM AND NOT XMRIG_RISCV AND CMAKE_SIZEOF_VOID_P EQUAL 8)
     set(XMRIG_ASM_LIBRARY "xmrig-asm")
 
     if (CMAKE_C_COMPILER_ID MATCHES MSVC)
diff --git a/cmake/cpu.cmake b/cmake/cpu.cmake
index 12dbe9b1b..515c2ccbb 100644
--- a/cmake/cpu.cmake
+++ b/cmake/cpu.cmake
@@ -21,6 +21,19 @@ if (NOT VAES_SUPPORTED)
     set(WITH_VAES OFF)
 endif()
 
+# Detect RISC-V architecture early (before it's used below)
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv64|riscv|rv64)$")
+    set(RISCV_TARGET 64)
+    set(XMRIG_RISCV ON)
+    add_definitions(-DXMRIG_RISCV)
+    message(STATUS "Detected RISC-V 64-bit architecture (${CMAKE_SYSTEM_PROCESSOR})")
+elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv32|rv32)$")
+    set(RISCV_TARGET 32)
+    set(XMRIG_RISCV ON)
+    add_definitions(-DXMRIG_RISCV)
+    message(STATUS "Detected RISC-V 32-bit architecture (${CMAKE_SYSTEM_PROCESSOR})")
+endif()
+
 if (XMRIG_64_BIT AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|AMD64)$")
     add_definitions(-DRAPIDJSON_SSE2)
 else()
@@ -29,6 +42,57 @@ else()
     set(WITH_VAES OFF)
 endif()
 
+# Disable x86-specific features for RISC-V
+if (XMRIG_RISCV)
+    set(WITH_SSE4_1 OFF)
+    set(WITH_AVX2 OFF)
+    set(WITH_VAES OFF)
+
+    # default build uses the RV64GC baseline
+    set(RVARCH "rv64gc")
+
+    # for native builds, enable Zba and Zbb if supported by the CPU
+    if(ARCH STREQUAL "native")
+        enable_language(ASM)
+
+        try_run(RANDOMX_VECTOR_RUN_FAIL
+            RANDOMX_VECTOR_COMPILE_OK
+            ${CMAKE_CURRENT_BINARY_DIR}/
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/crypto/randomx/tests/riscv64_vector.s
+            COMPILE_DEFINITIONS "-march=rv64gcv_zicbop")
+
+        if (RANDOMX_VECTOR_COMPILE_OK AND NOT RANDOMX_VECTOR_RUN_FAIL)
+            set(RVARCH "${RVARCH}v_zicbop")
+            add_definitions(-DXMRIG_RVV_ENABLED)
+            message(STATUS "RISC-V vector extension detected")
+        endif()
+
+        try_run(RANDOMX_ZBA_RUN_FAIL
+            RANDOMX_ZBA_COMPILE_OK
+            ${CMAKE_CURRENT_BINARY_DIR}/
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/crypto/randomx/tests/riscv64_zba.s
+            COMPILE_DEFINITIONS "-march=rv64gc_zba")
+
+        if (RANDOMX_ZBA_COMPILE_OK AND NOT RANDOMX_ZBA_RUN_FAIL)
+            set(RVARCH "${RVARCH}_zba")
+            message(STATUS "RISC-V zba extension detected")
+        endif()
+
+        try_run(RANDOMX_ZBB_RUN_FAIL
+            RANDOMX_ZBB_COMPILE_OK
+            ${CMAKE_CURRENT_BINARY_DIR}/
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/crypto/randomx/tests/riscv64_zbb.s
+            COMPILE_DEFINITIONS "-march=rv64gc_zbb")
+
+        if (RANDOMX_ZBB_COMPILE_OK AND NOT RANDOMX_ZBB_RUN_FAIL)
+            set(RVARCH "${RVARCH}_zbb")
+            message(STATUS "RISC-V zbb extension detected")
+        endif()
+    endif()
+
+    message(STATUS "Using -march=${RVARCH}")
+endif()
+
 add_definitions(-DRAPIDJSON_WRITE_DEFAULT_FLAGS=6) # rapidjson::kWriteNanAndInfFlag | rapidjson::kWriteNanAndInfNullFlag
 
 if (ARM_V8)
@@ -40,7 +104,7 @@ endif()
 if (NOT ARM_TARGET)
     if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64|ARM64|armv8-a)$")
         set(ARM_TARGET 8)
-    elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv7|armv7f|armv7s|armv7k|armv7-a|armv7l|armv7ve)$")
+    elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv7|armv7f|armv7s|armv7k|armv7-a|armv7l|armv7ve|armv8l)$")
         set(ARM_TARGET 7)
     endif()
 endif()
diff --git a/cmake/flags.cmake b/cmake/flags.cmake
index 9abf212a0..a36d18256 100644
--- a/cmake/flags.cmake
+++ b/cmake/flags.cmake
@@ -28,6 +28,11 @@ if (CMAKE_CXX_COMPILER_ID MATCHES GNU)
     elseif (ARM_TARGET EQUAL 7)
         set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv7-a -mfpu=neon -flax-vector-conversions")
         set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv7-a -mfpu=neon -flax-vector-conversions")
+    elseif (XMRIG_RISCV)
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=${RVARCH}")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=${RVARCH}")
+        
+        add_definitions(-DHAVE_ROTR)
     else()
         set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes")
         set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes")
@@ -41,6 +46,8 @@ if (CMAKE_CXX_COMPILER_ID MATCHES GNU)
         else()
             set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static -Wl,--large-address-aware")
         endif()
+    elseif(CMAKE_SYSTEM_NAME STREQUAL "Haiku")
+        set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc")
     else()
         set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc -static-libstdc++")
     endif()
@@ -74,6 +81,11 @@ elseif (CMAKE_CXX_COMPILER_ID MATCHES Clang)
     elseif (ARM_TARGET EQUAL 7)
         set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon -march=${CMAKE_SYSTEM_PROCESSOR}")
         set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -march=${CMAKE_SYSTEM_PROCESSOR}")
+    elseif (XMRIG_RISCV)
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=${RVARCH}")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=${RVARCH}")
+        
+        add_definitions(-DHAVE_ROTR)
     else()
         set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes")
         set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes")
diff --git a/cmake/os.cmake b/cmake/os.cmake
index 8f70e9f42..3025e0c09 100644
--- a/cmake/os.cmake
+++ b/cmake/os.cmake
@@ -17,6 +17,10 @@ else()
         set(XMRIG_OS_LINUX ON)
     elseif(CMAKE_SYSTEM_NAME STREQUAL FreeBSD OR CMAKE_SYSTEM_NAME STREQUAL DragonFly)
         set(XMRIG_OS_FREEBSD ON)
+    elseif(CMAKE_SYSTEM_NAME STREQUAL OpenBSD)
+        set(XMRIG_OS_OPENBSD ON)
+    elseif(CMAKE_SYSTEM_NAME STREQUAL "Haiku")
+        set(XMRIG_OS_HAIKU ON)
     endif()
 endif()
 
@@ -43,6 +47,10 @@ elseif(XMRIG_OS_UNIX)
         add_definitions(-DXMRIG_OS_LINUX)
     elseif (XMRIG_OS_FREEBSD)
         add_definitions(-DXMRIG_OS_FREEBSD)
+    elseif (XMRIG_OS_OPENBSD)
+        add_definitions(-DXMRIG_OS_OPENBSD)
+    elseif (XMRIG_OS_HAIKU)
+        add_definitions(-DXMRIG_OS_HAIKU)
     endif()
 endif()
 
diff --git a/cmake/randomx.cmake b/cmake/randomx.cmake
index a50e078fd..c15024c97 100644
--- a/cmake/randomx.cmake
+++ b/cmake/randomx.cmake
@@ -62,7 +62,7 @@ if (WITH_RANDOMX)
              src/crypto/randomx/jit_compiler_x86_static.asm
              src/crypto/randomx/jit_compiler_x86.cpp
             )
-    elseif (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
+    elseif (WITH_ASM AND NOT XMRIG_ARM AND NOT XMRIG_RISCV AND CMAKE_SIZEOF_VOID_P EQUAL 8)
         list(APPEND SOURCES_CRYPTO
              src/crypto/randomx/jit_compiler_x86_static.S
              src/crypto/randomx/jit_compiler_x86.cpp
@@ -80,6 +80,16 @@ if (WITH_RANDOMX)
         else()
             set_property(SOURCE src/crypto/randomx/jit_compiler_a64_static.S PROPERTY LANGUAGE C)
         endif()
+    elseif (XMRIG_RISCV AND CMAKE_SIZEOF_VOID_P EQUAL 8)
+        list(APPEND SOURCES_CRYPTO
+             src/crypto/randomx/jit_compiler_rv64_static.S
+             src/crypto/randomx/jit_compiler_rv64_vector_static.S
+             src/crypto/randomx/jit_compiler_rv64.cpp
+             src/crypto/randomx/jit_compiler_rv64_vector.cpp
+            )
+        # cheat because cmake and ccache hate each other
+        set_property(SOURCE src/crypto/randomx/jit_compiler_rv64_static.S PROPERTY LANGUAGE C)
+        set_property(SOURCE src/crypto/randomx/jit_compiler_rv64_vector_static.S PROPERTY LANGUAGE C)
     else()
         list(APPEND SOURCES_CRYPTO
              src/crypto/randomx/jit_compiler_fallback.cpp
@@ -116,7 +126,7 @@ if (WITH_RANDOMX)
             )
     endif()
 
-    if (WITH_MSR AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8 AND (XMRIG_OS_WIN OR XMRIG_OS_LINUX))
+    if (WITH_MSR AND NOT XMRIG_ARM AND NOT XMRIG_RISCV AND CMAKE_SIZEOF_VOID_P EQUAL 8 AND (XMRIG_OS_WIN OR XMRIG_OS_LINUX))
         add_definitions(/DXMRIG_FEATURE_MSR)
         add_definitions(/DXMRIG_FIX_RYZEN)
         message("-- WITH_MSR=ON")
diff --git a/doc/RISCV_PERF_TUNING.md b/doc/RISCV_PERF_TUNING.md
new file mode 100644
index 000000000..b37a530d3
--- /dev/null
+++ b/doc/RISCV_PERF_TUNING.md
@@ -0,0 +1,365 @@
+# RISC-V Performance Optimization Guide
+
+This guide provides comprehensive instructions for optimizing XMRig on RISC-V architectures.
+
+## Build Optimizations
+
+### Compiler Flags Applied Automatically
+
+The CMake build now applies aggressive RISC-V-specific optimizations:
+
+```cmake
+# RISC-V ISA with extensions
+-march=rv64gcv_zba_zbb_zbc_zbs
+
+# Aggressive compiler optimizations
+-funroll-loops              # Unroll loops for ILP (instruction-level parallelism)
+-fomit-frame-pointer        # Free up frame pointer register (RISC-V has limited registers)
+-fno-common                 # Better code generation for global variables
+-finline-functions          # Inline more functions for better cache locality
+-ffast-math                 # Relaxed FP semantics (safe for mining)
+-flto                       # Link-time optimization for cross-module inlining
+
+# Release build additions
+-minline-atomics            # Inline atomic operations for faster synchronization
+```
+
+### Optimal Build Command
+
+```bash
+mkdir build && cd build
+cmake -DCMAKE_BUILD_TYPE=Release ..
+make -j$(nproc)
+```
+
+**Expected build time**: 5-15 minutes depending on CPU
+
+## Runtime Optimizations
+
+### 1. Memory Configuration (Most Important)
+
+Enable huge pages to reduce TLB misses and fragmentation:
+
+#### Enable 2MB Huge Pages
+```bash
+# Calculate required huge pages (1 page = 2MB)
+# For 2 GB dataset: 1024 pages
+# For cache + dataset: 1536 pages minimum
+sudo sysctl -w vm.nr_hugepages=2048
+```
+
+Verify:
+```bash
+grep HugePages /proc/meminfo
+# Expected: HugePages_Free should be close to nr_hugepages
+```
+
+#### Enable 1GB Huge Pages (Optional but Recommended)
+
+```bash
+# Run provided helper script
+sudo ./scripts/enable_1gb_pages.sh
+
+# Verify 1GB pages are available
+cat /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages
+# Should be: >= 1 (one 1GB page)
+```
+
+Update config.json:
+```json
+{
+    "cpu": {
+        "huge-pages": true
+    },
+    "randomx": {
+        "1gb-pages": true
+    }
+}
+```
+
+### 2. RandomX Mode Selection
+
+| Mode | Memory | Init Time | Throughput | Recommendation |
+|------|--------|-----------|-----------|-----------------|
+| **light** | 256 MB | 10 sec | Low | Testing, resource-constrained |
+| **fast** | 2 GB | 2-5 min* | High | Production (with huge pages) |
+| **auto** | 2 GB | Varies | High | Default (uses fast if possible) |
+
+*With optimizations; can be 30+ minutes without huge pages
+
+**For RISC-V, use fast mode with huge pages enabled.**
+
+### 3. Dataset Initialization Threads
+
+Optimal thread count = 60-75% of CPU cores (leaves headroom for OS/other tasks)
+
+```json
+{
+    "randomx": {
+        "init": 4
+    }
+}
+```
+
+Or auto-detect (rewritten for RISC-V):
+```json
+{
+    "randomx": {
+        "init": -1
+    }
+}
+```
+
+### 4. CPU Affinity (Optional)
+
+Pin threads to specific cores for better cache locality:
+
+```json
+{
+    "cpu": {
+        "rx/0": [
+            { "threads": 1, "affinity": 0 },
+            { "threads": 1, "affinity": 1 },
+            { "threads": 1, "affinity": 2 },
+            { "threads": 1, "affinity": 3 }
+        ]
+    }
+}
+```
+
+### 5. CPU Governor (Linux)
+
+Set to performance mode for maximum throughput:
+
+```bash
+# Check current governor
+cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor
+
+# Set to performance (requires root)
+echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
+
+# Verify
+cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor
+# Should output: performance
+```
+
+## Configuration Examples
+
+### Minimum (Testing)
+```json
+{
+    "randomx": {
+        "mode": "light"
+    },
+    "cpu": {
+        "huge-pages": false
+    }
+}
+```
+
+### Recommended (Balanced)
+```json
+{
+    "randomx": {
+        "mode": "auto",
+        "init": 4,
+        "1gb-pages": true
+    },
+    "cpu": {
+        "huge-pages": true,
+        "priority": 2
+    }
+}
+```
+
+### Maximum Performance (Production)
+```json
+{
+    "randomx": {
+        "mode": "fast",
+        "init": -1,
+        "1gb-pages": true,
+        "scratchpad_prefetch_mode": 1
+    },
+    "cpu": {
+        "huge-pages": true,
+        "priority": 3,
+        "yield": false
+    }
+}
+```
+
+## CLI Equivalents
+
+```bash
+# Light mode
+./xmrig --randomx-mode=light
+
+# Fast mode with 4 init threads
+./xmrig --randomx-mode=fast --randomx-init=4
+
+# Benchmark
+./xmrig --bench=1M --algo=rx/0
+
+# Benchmark Wownero variant (1 MB scratchpad)
+./xmrig --bench=1M --algo=rx/wow
+
+# Mine to pool
+./xmrig -o pool.example.com:3333 -u YOUR_WALLET -p x
+```
+
+## Performance Diagnostics
+
+### Check if Vector Extensions are Detected
+
+Look for `FEATURES:` line in output:
+```
+ * CPU:       ky,x60 (uarch ky,x1)
+ * FEATURES:  rv64imafdcv zba zbb zbc zbs
+```
+
+- `v`: Vector extension (RVV) ✓
+- `zba`, `zbb`, `zbc`, `zbs`: Bit manipulation ✓
+- If missing, make sure build used `-march=rv64gcv_zba_zbb_zbc_zbs`
+
+### Verify Huge Pages at Runtime
+
+```bash
+# Run xmrig with --bench=1M and check output
+./xmrig --bench=1M
+
+# Look for line like:
+# HUGE PAGES   100%  1 / 1 (1024 MB)
+```
+
+- Should show 100% for dataset AND threads
+- If less, increase `vm.nr_hugepages` and reboot
+
+### Monitor Performance
+
+```bash
+# Run benchmark multiple times to find stable hashrate
+./xmrig --bench=1M --algo=rx/0
+./xmrig --bench=10M --algo=rx/0
+./xmrig --bench=100M --algo=rx/0
+
+# Check system load and memory during mining
+while true; do free -h; grep HugePages /proc/meminfo; sleep 2; done
+```
+
+## Expected Performance
+
+### Hardware: Orange Pi RV2 (Ky X1, 8 cores @ ~1.5 GHz)
+
+| Config | Mode | Hashrate | Init Time |
+|--------|------|----------|-----------|
+| Scalar (baseline) | fast | 30 H/s | 10 min |
+| Scalar + huge pages | fast | 33 H/s | 2 min |
+| RVV (if enabled) | fast | 70-100 H/s | 3 min |
+
+*Actual results depend on CPU frequency, memory speed, and load*
+
+## Troubleshooting
+
+### Long Initialization Times (30+ minutes)
+
+**Cause**: Huge pages not enabled, system using swap
+**Solution**:
+1. Enable huge pages: `sudo sysctl -w vm.nr_hugepages=2048`
+2. Reboot: `sudo reboot`
+3. Reduce mining threads to free memory
+4. Check available memory: `free -h`
+
+### Low Hashrate (50% of expected)
+
+**Cause**: CPU governor set to power-save, no huge pages, high contention
+**Solution**:
+1. Set governor to performance: `echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor`
+2. Enable huge pages
+3. Reduce number of mining threads
+4. Check system load: `top` or `htop`
+
+### Dataset Init Crashes or Hangs
+
+**Cause**: Insufficient memory, corrupted huge pages
+**Solution**:
+1. Disable huge pages temporarily: set `huge-pages: false` in config
+2. Reduce mining threads
+3. Reboot and re-enable huge pages
+4. Try light mode: `--randomx-mode=light`
+
+### Out of Memory During Benchmark
+
+**Cause**: Not enough RAM for dataset + cache + threads
+**Solution**:
+1. Use light mode: `--randomx-mode=light`
+2. Reduce mining threads: `--threads=1`
+3. Increase available memory (kill other processes)
+4. Check: `free -h` before mining
+
+## Advanced Tuning
+
+### Vector Length (VLEN) Detection
+
+RISC-V vector extension variable length (VLEN) affects performance:
+
+```bash
+# Check VLEN on your CPU
+cat /proc/cpuinfo | grep vlen
+
+# Expected values:
+# - 128 bits (16 bytes) = minimum
+# - 256 bits (32 bytes) = common
+# - 512 bits (64 bytes) = high performance
+```
+
+Larger VLEN generally means better performance for vectorized operations.
+
+### Prefetch Optimization
+
+The code automatically optimizes memory prefetching for RISC-V:
+
+```
+scratchpad_prefetch_mode: 0 = disabled (slowest)
+scratchpad_prefetch_mode: 1 = prefetch.r (default, recommended)
+scratchpad_prefetch_mode: 2 = prefetch.w (experimental)
+```
+
+### Memory Bandwidth Saturation
+
+If experiencing memory bandwidth saturation (high latency):
+
+1. Reduce mining threads
+2. Increase L2/L3 cache by mining fewer threads per core
+3. Enable cache QoS (AMD Ryzen): `cache_qos: true`
+
+## Building with Custom Flags
+
+To build with custom RISC-V flags:
+
+```bash
+mkdir build && cd build
+cmake -DCMAKE_BUILD_TYPE=Release \
+      -DCMAKE_C_FLAGS="-march=rv64gcv_zba_zbb_zbc_zbs -O3 -funroll-loops -fomit-frame-pointer" \
+      ..
+make -j$(nproc)
+```
+
+## Future Optimizations
+
+- [ ] Zbk* (crypto) support detection and usage
+- [ ] Optimal VLEN-aware algorithm selection
+- [ ] Per-core memory affinity (NUMA support)
+- [ ] Dynamic thread count adjustment based on thermals
+- [ ] Cross-compile optimizations for various RISC-V cores
+
+## References
+
+- [RISC-V Vector Extension Spec](https://github.com/riscv/riscv-v-spec)
+- [RISC-V Bit Manipulation Spec](https://github.com/riscv/riscv-bitmanip)
+- [RISC-V Crypto Spec](https://github.com/riscv/riscv-crypto)
+- [XMRig Documentation](https://xmrig.com/docs)
+
+---
+
+For further optimization, enable RVV intrinsics by replacing `sse2rvv.h` with `sse2rvv_optimized.h` in the build.
diff --git a/scripts/randomx_boost.sh b/scripts/randomx_boost.sh
index 8580229a5..4181a95c0 100755
--- a/scripts/randomx_boost.sh
+++ b/scripts/randomx_boost.sh
@@ -12,7 +12,7 @@ if grep -E 'AMD Ryzen|AMD EPYC|AuthenticAMD' /proc/cpuinfo > /dev/null;
 	then
 	if grep "cpu family[[:space:]]\{1,\}:[[:space:]]25" /proc/cpuinfo > /dev/null;
 		then
-			if grep "model[[:space:]]\{1,\}:[[:space:]]97" /proc/cpuinfo > /dev/null;
+			if grep "model[[:space:]]\{1,\}:[[:space:]]\(97\|117\)" /proc/cpuinfo > /dev/null;
 				then
 					echo "Detected Zen4 CPU"
 					wrmsr -a 0xc0011020 0x4400000000000
diff --git a/src/3rdparty/argon2/CMakeLists.txt b/src/3rdparty/argon2/CMakeLists.txt
index a9751fd94..7d09e5172 100644
--- a/src/3rdparty/argon2/CMakeLists.txt
+++ b/src/3rdparty/argon2/CMakeLists.txt
@@ -35,7 +35,7 @@ if (CMAKE_C_COMPILER_ID MATCHES MSVC)
     add_feature_impl(xop     ""              HAVE_XOP)
     add_feature_impl(avx2    "/arch:AVX2"    HAVE_AVX2)
     add_feature_impl(avx512f "/arch:AVX512F" HAVE_AVX512F)
-elseif (NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
+elseif (NOT XMRIG_ARM AND NOT XMRIG_RISCV AND CMAKE_SIZEOF_VOID_P EQUAL 8)
     function(add_feature_impl FEATURE GCC_FLAG DEF)
         add_library(argon2-${FEATURE} STATIC arch/x86_64/lib/argon2-${FEATURE}.c)
         target_include_directories(argon2-${FEATURE} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../)
diff --git a/src/3rdparty/libethash/endian.h b/src/3rdparty/libethash/endian.h
index f960d7429..deb57620a 100644
--- a/src/3rdparty/libethash/endian.h
+++ b/src/3rdparty/libethash/endian.h
@@ -31,7 +31,7 @@
 #include <libkern/OSByteOrder.h>
 #define ethash_swap_u32(input_) OSSwapInt32(input_)
 #define ethash_swap_u64(input_) OSSwapInt64(input_)
-#elif defined(__FreeBSD__) || defined(__DragonFly__) || defined(__NetBSD__)
+#elif defined(__FreeBSD__) || defined(__DragonFly__) || defined(__NetBSD__) || defined(__HAIKU__)
 #define ethash_swap_u32(input_) bswap32(input_)
 #define ethash_swap_u64(input_) bswap64(input_)
 #elif defined(__OpenBSD__)
diff --git a/src/backend/cpu/CpuWorker.cpp b/src/backend/cpu/CpuWorker.cpp
index bef2e898e..cba7e8839 100644
--- a/src/backend/cpu/CpuWorker.cpp
+++ b/src/backend/cpu/CpuWorker.cpp
@@ -87,14 +87,14 @@ xmrig::CpuWorker<N>::CpuWorker(size_t id, const CpuLaunchData &data) :
         if (!cn_heavyZen3Memory) {
             // Round up number of threads to the multiple of 8
             const size_t num_threads = ((m_threads + 7) / 8) * 8;
-            cn_heavyZen3Memory = new VirtualMemory(m_algorithm.l3() * num_threads, data.hugePages, false, false, node());
+            cn_heavyZen3Memory = new VirtualMemory(m_algorithm.l3() * num_threads, data.hugePages, false, false, node(), VirtualMemory::kDefaultHugePageSize);
         }
         m_memory = cn_heavyZen3Memory;
     }
     else
 #   endif
     {
-        m_memory = new VirtualMemory(m_algorithm.l3() * N, data.hugePages, false, true, node());
+        m_memory = new VirtualMemory(m_algorithm.l3() * N, data.hugePages, false, true, node(), VirtualMemory::kDefaultHugePageSize);
     }
 
 #   ifdef XMRIG_ALGO_GHOSTRIDER
diff --git a/src/backend/cpu/cpu.cmake b/src/backend/cpu/cpu.cmake
index f9a02abd8..3c9d779b0 100644
--- a/src/backend/cpu/cpu.cmake
+++ b/src/backend/cpu/cpu.cmake
@@ -46,7 +46,12 @@ else()
     set(CPUID_LIB "")
 endif()
 
-if (XMRIG_ARM)
+if (XMRIG_RISCV)
+    list(APPEND SOURCES_BACKEND_CPU
+        src/backend/cpu/platform/lscpu_riscv.cpp
+        src/backend/cpu/platform/BasicCpuInfo_riscv.cpp
+    )
+elseif (XMRIG_ARM)
     list(APPEND SOURCES_BACKEND_CPU src/backend/cpu/platform/BasicCpuInfo_arm.cpp)
 
     if (XMRIG_OS_WIN)
diff --git a/src/backend/cpu/interfaces/ICpuInfo.h b/src/backend/cpu/interfaces/ICpuInfo.h
index 8d10d4d29..e28a14734 100644
--- a/src/backend/cpu/interfaces/ICpuInfo.h
+++ b/src/backend/cpu/interfaces/ICpuInfo.h
@@ -91,7 +91,7 @@ public:
     ICpuInfo()          = default;
     virtual ~ICpuInfo() = default;
 
-#   if defined(__x86_64__) || defined(_M_AMD64) || defined (__arm64__) || defined (__aarch64__)
+#   if defined(__x86_64__) || defined(_M_AMD64) || defined (__arm64__) || defined (__aarch64__) || defined(__riscv) && (__riscv_xlen == 64)
     inline constexpr static bool is64bit() { return true; }
 #   else
     inline constexpr static bool is64bit() { return false; }
diff --git a/src/backend/cpu/platform/BasicCpuInfo.cpp b/src/backend/cpu/platform/BasicCpuInfo.cpp
index 30a78f828..9f5595aac 100644
--- a/src/backend/cpu/platform/BasicCpuInfo.cpp
+++ b/src/backend/cpu/platform/BasicCpuInfo.cpp
@@ -250,7 +250,7 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
                     break;
 
                 case 0x19:
-                    if (m_model == 0x61) {
+                    if ((m_model == 0x61) || (m_model == 0x75)) {
                         m_arch = ARCH_ZEN4;
                         m_msrMod = MSR_MOD_RYZEN_19H_ZEN4;
                     }
diff --git a/src/backend/cpu/platform/BasicCpuInfo.h b/src/backend/cpu/platform/BasicCpuInfo.h
index 5ea5661d1..97fe20e1b 100644
--- a/src/backend/cpu/platform/BasicCpuInfo.h
+++ b/src/backend/cpu/platform/BasicCpuInfo.h
@@ -65,7 +65,7 @@ protected:
     inline Vendor vendor() const override                       { return m_vendor; }
     inline uint32_t model() const override
     {
-#   ifndef XMRIG_ARM
+#   if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
         return m_model;
 #   else
         return 0;
@@ -80,7 +80,7 @@ protected:
     Vendor m_vendor         = VENDOR_UNKNOWN;
 
 private:
-#   ifndef XMRIG_ARM
+#   if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
     uint32_t m_procInfo     = 0;
     uint32_t m_family       = 0;
     uint32_t m_model        = 0;
diff --git a/src/backend/cpu/platform/BasicCpuInfo_riscv.cpp b/src/backend/cpu/platform/BasicCpuInfo_riscv.cpp
new file mode 100644
index 000000000..fd9c9ce62
--- /dev/null
+++ b/src/backend/cpu/platform/BasicCpuInfo_riscv.cpp
@@ -0,0 +1,116 @@
+/* XMRig
+ * Copyright (c) 2025      Slayingripper <https://github.com/Slayingripper>
+ * Copyright (c) 2018-2025 SChernykh     <https://github.com/SChernykh>
+ * Copyright (c) 2017-2019 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
+ * Copyright (c) 2016-2025 XMRig         <support@xmrig.com>
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <array>
+#include <cstring>
+#include <fstream>
+#include <thread>
+
+
+#include "backend/cpu/platform/BasicCpuInfo.h"
+#include "base/tools/String.h"
+#include "3rdparty/rapidjson/document.h"
+
+
+namespace xmrig {
+
+
+extern String cpu_name_riscv();
+extern bool has_riscv_vector();
+extern bool has_riscv_crypto();
+
+
+} // namespace xmrig
+
+
+xmrig::BasicCpuInfo::BasicCpuInfo() :
+    m_threads(std::thread::hardware_concurrency())
+{
+    m_units.resize(m_threads);
+    for (int32_t i = 0; i < static_cast<int32_t>(m_threads); ++i) {
+        m_units[i] = i;
+    }
+
+    memcpy(m_brand, "RISC-V", 6);
+
+    auto name = cpu_name_riscv();
+    if (!name.isNull()) {
+        strncpy(m_brand, name.data(), sizeof(m_brand) - 1);
+    }
+    
+    // Check for crypto extensions (Zknd/Zkne/Zknh - AES and SHA)
+    m_flags.set(FLAG_AES, has_riscv_crypto());
+    
+    // RISC-V typically supports 1GB huge pages
+    m_flags.set(FLAG_PDPE1GB, std::ifstream("/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages").good());
+}
+
+
+const char *xmrig::BasicCpuInfo::backend() const
+{
+    return "basic/1";
+}
+
+
+xmrig::CpuThreads xmrig::BasicCpuInfo::threads(const Algorithm &algorithm, uint32_t) const
+{
+#   ifdef XMRIG_ALGO_GHOSTRIDER
+    if (algorithm.family() == Algorithm::GHOSTRIDER) {
+        return CpuThreads(threads(), 8);
+    }
+#   endif
+
+    return CpuThreads(threads());
+}
+
+
+rapidjson::Value xmrig::BasicCpuInfo::toJSON(rapidjson::Document &doc) const
+{
+    using namespace rapidjson;
+    auto &allocator = doc.GetAllocator();
+
+    Value out(kObjectType);
+
+    out.AddMember("brand",      StringRef(brand()), allocator);
+    out.AddMember("aes",        hasAES(), allocator);
+    out.AddMember("avx2",       false, allocator);
+    out.AddMember("x64",        is64bit(), allocator); // DEPRECATED will be removed in the next major release.
+    out.AddMember("64_bit",     is64bit(), allocator);
+    out.AddMember("l2",         static_cast<uint64_t>(L2()), allocator);
+    out.AddMember("l3",         static_cast<uint64_t>(L3()), allocator);
+    out.AddMember("cores",      static_cast<uint64_t>(cores()), allocator);
+    out.AddMember("threads",    static_cast<uint64_t>(threads()), allocator);
+    out.AddMember("packages",   static_cast<uint64_t>(packages()), allocator);
+    out.AddMember("nodes",      static_cast<uint64_t>(nodes()), allocator);
+    out.AddMember("backend",    StringRef(backend()), allocator);
+    out.AddMember("msr",        "none", allocator);
+    out.AddMember("assembly",   "none", allocator);
+    out.AddMember("arch",       "riscv64", allocator);
+
+    Value flags(kArrayType);
+
+    if (hasAES()) {
+        flags.PushBack("aes", allocator);
+    }
+
+    out.AddMember("flags", flags, allocator);
+
+    return out;
+}
diff --git a/src/backend/cpu/platform/HwlocCpuInfo.cpp b/src/backend/cpu/platform/HwlocCpuInfo.cpp
index f796416b4..1cb071b7a 100644
--- a/src/backend/cpu/platform/HwlocCpuInfo.cpp
+++ b/src/backend/cpu/platform/HwlocCpuInfo.cpp
@@ -87,7 +87,7 @@ static inline size_t countByType(hwloc_topology_t topology, hwloc_obj_type_t typ
 }
 
 
-#ifndef XMRIG_ARM
+#if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
 static inline std::vector<hwloc_obj_t> findByType(hwloc_obj_t obj, hwloc_obj_type_t type)
 {
     std::vector<hwloc_obj_t> out;
@@ -207,7 +207,7 @@ bool xmrig::HwlocCpuInfo::membind(hwloc_const_bitmap_t nodeset)
 
 xmrig::CpuThreads xmrig::HwlocCpuInfo::threads(const Algorithm &algorithm, uint32_t limit) const
 {
-#   ifndef XMRIG_ARM
+#   if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
     if (L2() == 0 && L3() == 0) {
         return BasicCpuInfo::threads(algorithm, limit);
     }
@@ -277,7 +277,7 @@ xmrig::CpuThreads xmrig::HwlocCpuInfo::allThreads(const Algorithm &algorithm, ui
 
 void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorithm &algorithm, CpuThreads &threads, size_t limit) const
 {
-#   ifndef XMRIG_ARM
+#   if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
     constexpr size_t oneMiB = 1024U * 1024U;
 
     size_t PUs = countByType(cache, HWLOC_OBJ_PU);
diff --git a/src/backend/cpu/platform/lscpu_riscv.cpp b/src/backend/cpu/platform/lscpu_riscv.cpp
new file mode 100644
index 000000000..d19d26a8f
--- /dev/null
+++ b/src/backend/cpu/platform/lscpu_riscv.cpp
@@ -0,0 +1,140 @@
+/* XMRig
+ * Copyright (c) 2025      Slayingripper <https://github.com/Slayingripper>
+ * Copyright (c) 2018-2025 SChernykh     <https://github.com/SChernykh>
+ * Copyright (c) 2016-2025 XMRig         <support@xmrig.com>
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "base/tools/String.h"
+#include "3rdparty/fmt/core.h"
+
+#include <cstdio>
+#include <cstring>
+#include <string>
+
+namespace xmrig {
+
+struct riscv_cpu_desc
+{
+    String model;
+    String isa;
+    String uarch;
+    bool has_vector = false;
+    bool has_crypto = false;
+    
+    inline bool isReady() const { return !model.isNull(); }
+};
+
+static bool lookup_riscv(char *line, const char *pattern, String &value)
+{
+    char *p = strstr(line, pattern);
+    if (!p) {
+        return false;
+    }
+
+    p += strlen(pattern);
+    while (isspace(*p)) {
+        ++p;
+    }
+
+    if (*p == ':') {
+        ++p;
+    }
+
+    while (isspace(*p)) {
+        ++p;
+    }
+
+    // Remove trailing newline
+    size_t len = strlen(p);
+    if (len > 0 && p[len - 1] == '\n') {
+        p[len - 1] = '\0';
+    }
+
+    // Ensure we call the const char* assignment (which performs a copy)
+    // instead of the char* overload (which would take ownership of the pointer)
+    value = (const char*)p;
+    return true;
+}
+
+static bool read_riscv_cpuinfo(riscv_cpu_desc *desc)
+{
+    auto fp = fopen("/proc/cpuinfo", "r");
+    if (!fp) {
+        return false;
+    }
+
+    char buf[2048]; // Larger buffer for long ISA strings
+    while (fgets(buf, sizeof(buf), fp) != nullptr) {
+        lookup_riscv(buf, "model name", desc->model);
+        
+        if (lookup_riscv(buf, "isa", desc->isa)) {
+            // Check for vector extensions
+            if (strstr(buf, "zve") || strstr(buf, "v_")) {
+                desc->has_vector = true;
+            }
+            // Check for crypto extensions (AES, SHA, etc.)
+            // zkn* = NIST crypto suite, zks* = SM crypto suite
+            // Note: zba/zbb/zbc/zbs are bit-manipulation, NOT crypto
+            if (strstr(buf, "zknd") || strstr(buf, "zkne") || strstr(buf, "zknh") ||
+                strstr(buf, "zksed") || strstr(buf, "zksh")) {
+                desc->has_crypto = true;
+            }
+        }
+        
+        lookup_riscv(buf, "uarch", desc->uarch);
+
+        if (desc->isReady() && !desc->isa.isNull()) {
+            break;
+        }
+    }
+
+    fclose(fp);
+
+    return desc->isReady();
+}
+
+String cpu_name_riscv()
+{
+    riscv_cpu_desc desc;
+    if (read_riscv_cpuinfo(&desc)) {
+        if (!desc.uarch.isNull()) {
+            return fmt::format("{} ({})", desc.model, desc.uarch).c_str();
+        }
+        return desc.model;
+    }
+
+    return "RISC-V";
+}
+
+bool has_riscv_vector()
+{
+    riscv_cpu_desc desc;
+    if (read_riscv_cpuinfo(&desc)) {
+        return desc.has_vector;
+    }
+    return false;
+}
+
+bool has_riscv_crypto()
+{
+    riscv_cpu_desc desc;
+    if (read_riscv_cpuinfo(&desc)) {
+        return desc.has_crypto;
+    }
+    return false;
+}
+
+} // namespace xmrig
diff --git a/src/base/kernel/Platform_unix.cpp b/src/base/kernel/Platform_unix.cpp
index 4ffee2140..e53fe58d5 100644
--- a/src/base/kernel/Platform_unix.cpp
+++ b/src/base/kernel/Platform_unix.cpp
@@ -1,6 +1,6 @@
 /* XMRig
- * Copyright (c) 2018-2021 SChernykh   <https://github.com/SChernykh>
- * Copyright (c) 2016-2021 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright (c) 2018-2025 SChernykh   <https://github.com/SChernykh>
+ * Copyright (c) 2016-2025 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
  *
  *   This program is free software: you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
@@ -71,11 +71,11 @@ char *xmrig::Platform::createUserAgent()
 
 
 #ifndef XMRIG_FEATURE_HWLOC
-#ifdef __DragonFly__
+#if defined(__DragonFly__) || defined(XMRIG_OS_OPENBSD) || defined(XMRIG_OS_HAIKU)
 
 bool xmrig::Platform::setThreadAffinity(uint64_t cpu_id)
 {
-    return true;
+    return false;
 }
 
 #else
diff --git a/src/base/net/tls/TlsContext.cpp b/src/base/net/tls/TlsContext.cpp
index 54b904eab..410059fb5 100644
--- a/src/base/net/tls/TlsContext.cpp
+++ b/src/base/net/tls/TlsContext.cpp
@@ -1,7 +1,7 @@
 /* XMRig
  * Copyright (c) 2018      Lee Clagett <https://github.com/vtnerd>
- * Copyright (c) 2018-2023 SChernykh   <https://github.com/SChernykh>
- * Copyright (c) 2016-2023 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright (c) 2018-2025 SChernykh   <https://github.com/SChernykh>
+ * Copyright (c) 2016-2025 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
  *
  *   This program is free software: you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
@@ -45,7 +45,7 @@ namespace xmrig {
 
 
 // https://wiki.openssl.org/index.php/Diffie-Hellman_parameters
-#if OPENSSL_VERSION_NUMBER < 0x30000000L || defined(LIBRESSL_VERSION_NUMBER)
+#if OPENSSL_VERSION_NUMBER < 0x30000000L || (defined(LIBRESSL_VERSION_NUMBER) && !defined(LIBRESSL_HAS_TLS1_3))
 static DH *get_dh2048()
 {
     static unsigned char dhp_2048[] = {
@@ -152,7 +152,7 @@ bool xmrig::TlsContext::load(const TlsConfig &config)
     SSL_CTX_set_options(m_ctx, SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3);
     SSL_CTX_set_options(m_ctx, SSL_OP_CIPHER_SERVER_PREFERENCE);
 
-#   if OPENSSL_VERSION_NUMBER >= 0x1010100fL && !defined(LIBRESSL_VERSION_NUMBER)
+#   if OPENSSL_VERSION_NUMBER >= 0x1010100fL || defined(LIBRESSL_HAS_TLS1_3)
     SSL_CTX_set_max_early_data(m_ctx, 0);
 #   endif
 
@@ -180,7 +180,7 @@ bool xmrig::TlsContext::setCipherSuites(const char *ciphersuites)
         return true;
     }
 
-#   if OPENSSL_VERSION_NUMBER >= 0x1010100fL && !defined(LIBRESSL_VERSION_NUMBER)
+#   if OPENSSL_VERSION_NUMBER >= 0x1010100fL || defined(LIBRESSL_HAS_TLS1_3)
     if (SSL_CTX_set_ciphersuites(m_ctx, ciphersuites) == 1) {
         return true;
     }
@@ -194,7 +194,7 @@ bool xmrig::TlsContext::setCipherSuites(const char *ciphersuites)
 
 bool xmrig::TlsContext::setDH(const char *dhparam)
 {
-#   if OPENSSL_VERSION_NUMBER < 0x30000000L || defined(LIBRESSL_VERSION_NUMBER)
+#   if OPENSSL_VERSION_NUMBER < 0x30000000L || (defined(LIBRESSL_VERSION_NUMBER) && !defined(LIBRESSL_HAS_TLS1_3))
     DH *dh = nullptr;
 
     if (dhparam != nullptr) {
diff --git a/src/base/tools/cryptonote/BlockTemplate.cpp b/src/base/tools/cryptonote/BlockTemplate.cpp
index 310fedf4d..1b64f2ee5 100644
--- a/src/base/tools/cryptonote/BlockTemplate.cpp
+++ b/src/base/tools/cryptonote/BlockTemplate.cpp
@@ -241,8 +241,13 @@ bool xmrig::BlockTemplate::parse(bool hashes)
     ar(m_amount);
     ar(m_outputType);
 
-    // output type must be txout_to_key (2) or txout_to_tagged_key (3)
-    if ((m_outputType != 2) && (m_outputType != 3)) {
+    const bool is_fcmp_pp = (m_coin == Coin::MONERO) && (m_version.first >= 17);
+
+    // output type must be txout_to_key (2) or txout_to_tagged_key (3) for versions < 17, and txout_to_carrot_v1 (0) for version FCMP++
+    if (is_fcmp_pp && (m_outputType == 0)) {
+        // all good
+    }
+    else if ((m_outputType != 2) && (m_outputType != 3)) {
         return false;
     }
 
@@ -250,6 +255,11 @@ bool xmrig::BlockTemplate::parse(bool hashes)
 
     ar(m_ephPublicKey, kKeySize);
 
+    if (is_fcmp_pp) {
+        ar(m_carrotViewTag);
+        ar(m_janusAnchor);
+    }
+
     if (m_coin == Coin::ZEPHYR) {
         if (m_outputType != 2) {
             return false;
diff --git a/src/base/tools/cryptonote/BlockTemplate.h b/src/base/tools/cryptonote/BlockTemplate.h
index c731aad23..a4e75f3ff 100644
--- a/src/base/tools/cryptonote/BlockTemplate.h
+++ b/src/base/tools/cryptonote/BlockTemplate.h
@@ -148,6 +148,8 @@ private:
     Buffer m_hashes;
     Buffer m_minerTxMerkleTreeBranch;
     uint8_t m_rootHash[kHashSize]{};
+    uint8_t m_carrotViewTag[3]{};
+    uint8_t m_janusAnchor[16]{};
 };
 
 
diff --git a/src/crypto/cn/CnHash.cpp b/src/crypto/cn/CnHash.cpp
index 4b4b006f3..b1f228b21 100644
--- a/src/crypto/cn/CnHash.cpp
+++ b/src/crypto/cn/CnHash.cpp
@@ -23,7 +23,7 @@
 #include "crypto/common/VirtualMemory.h"
 
 
-#if defined(XMRIG_ARM)
+#if defined(XMRIG_ARM) || defined(XMRIG_RISCV)
 #   include "crypto/cn/CryptoNight_arm.h"
 #else
 #   include "crypto/cn/CryptoNight_x86.h"
diff --git a/src/crypto/cn/CryptoNight.h b/src/crypto/cn/CryptoNight.h
index 897890d28..d37c3ea8e 100644
--- a/src/crypto/cn/CryptoNight.h
+++ b/src/crypto/cn/CryptoNight.h
@@ -30,7 +30,7 @@
 #include <stddef.h>
 #include <stdint.h>
 
-#if defined _MSC_VER || defined XMRIG_ARM
+#if defined _MSC_VER || defined XMRIG_ARM || defined XMRIG_RISCV
 #   define ABI_ATTRIBUTE
 #else
 #   define ABI_ATTRIBUTE __attribute__((ms_abi))
diff --git a/src/crypto/cn/CryptoNight_arm.h b/src/crypto/cn/CryptoNight_arm.h
index 7b47e97da..eeb5bd007 100644
--- a/src/crypto/cn/CryptoNight_arm.h
+++ b/src/crypto/cn/CryptoNight_arm.h
@@ -27,6 +27,9 @@
 #ifndef XMRIG_CRYPTONIGHT_ARM_H
 #define XMRIG_CRYPTONIGHT_ARM_H
 
+#ifdef XMRIG_RISCV
+#   include "crypto/cn/sse2rvv.h"
+#endif
 
 #include "base/crypto/keccak.h"
 #include "crypto/cn/CnAlgo.h"
diff --git a/src/crypto/cn/CryptoNight_monero.h b/src/crypto/cn/CryptoNight_monero.h
index a9975e784..6c3d115ed 100644
--- a/src/crypto/cn/CryptoNight_monero.h
+++ b/src/crypto/cn/CryptoNight_monero.h
@@ -30,7 +30,7 @@
 #include <math.h>
 
 // VARIANT ALTERATIONS
-#ifndef XMRIG_ARM
+#if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
 #   define VARIANT1_INIT(part) \
     uint64_t tweak1_2_##part = 0; \
     if (BASE == Algorithm::CN_1) { \
@@ -60,7 +60,7 @@
     }
 
 
-#ifndef XMRIG_ARM
+#if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
 #   define VARIANT2_INIT(part) \
     __m128i division_result_xmm_##part = _mm_cvtsi64_si128(static_cast<int64_t>(h##part[12])); \
     __m128i sqrt_result_xmm_##part     = _mm_cvtsi64_si128(static_cast<int64_t>(h##part[13]));
diff --git a/src/crypto/cn/soft_aes.h b/src/crypto/cn/soft_aes.h
index fc3712298..6de0089db 100644
--- a/src/crypto/cn/soft_aes.h
+++ b/src/crypto/cn/soft_aes.h
@@ -29,6 +29,8 @@
 
 #if defined(XMRIG_ARM)
 #   include "crypto/cn/sse2neon.h"
+#elif defined(XMRIG_RISCV)
+#   include "crypto/cn/sse2rvv.h"
 #elif defined(__GNUC__)
 #   include <x86intrin.h>
 #else
diff --git a/src/crypto/cn/sse2rvv.h b/src/crypto/cn/sse2rvv.h
new file mode 100644
index 000000000..d5b525b51
--- /dev/null
+++ b/src/crypto/cn/sse2rvv.h
@@ -0,0 +1,748 @@
+/* XMRig
+ * Copyright (c) 2025      Slayingripper <https://github.com/Slayingripper>
+ * Copyright (c) 2018-2025 SChernykh     <https://github.com/SChernykh>
+ * Copyright (c) 2016-2025 XMRig         <support@xmrig.com>
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * SSE to RISC-V Vector (RVV) optimized compatibility header
+ * Provides both scalar fallback and vectorized implementations using RVV intrinsics
+ * 
+ * Based on sse2neon.h concepts, adapted for RISC-V architecture with RVV extensions
+ * Original sse2neon.h: https://github.com/DLTcollab/sse2neon
+ */
+
+#ifndef XMRIG_SSE2RVV_OPTIMIZED_H
+#define XMRIG_SSE2RVV_OPTIMIZED_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <string.h>
+
+/* Check if RVV is available */
+#if defined(__riscv_vector)
+#include <riscv_vector.h>
+#define USE_RVV_INTRINSICS 1
+#else
+#define USE_RVV_INTRINSICS 0
+#endif
+
+/* 128-bit vector type */
+typedef union {
+    uint8_t  u8[16];
+    uint16_t u16[8];
+    uint32_t u32[4];
+    uint64_t u64[2];
+    int8_t   i8[16];
+    int16_t  i16[8];
+    int32_t  i32[4];
+    int64_t  i64[2];
+} __m128i_union;
+
+typedef __m128i_union __m128i;
+
+/* Set operations */
+static inline __m128i _mm_set_epi32(int e3, int e2, int e1, int e0)
+{
+    __m128i result;
+    result.i32[0] = e0;
+    result.i32[1] = e1;
+    result.i32[2] = e2;
+    result.i32[3] = e3;
+    return result;
+}
+
+static inline __m128i _mm_set_epi64x(int64_t e1, int64_t e0)
+{
+    __m128i result;
+    result.i64[0] = e0;
+    result.i64[1] = e1;
+    return result;
+}
+
+static inline __m128i _mm_setzero_si128(void)
+{
+    __m128i result;
+    memset(&result, 0, sizeof(result));
+    return result;
+}
+
+/* Extract/insert operations */
+static inline int _mm_cvtsi128_si32(__m128i a)
+{
+    return a.i32[0];
+}
+
+static inline int64_t _mm_cvtsi128_si64(__m128i a)
+{
+    return a.i64[0];
+}
+
+static inline __m128i _mm_cvtsi32_si128(int a)
+{
+    __m128i result = _mm_setzero_si128();
+    result.i32[0] = a;
+    return result;
+}
+
+static inline __m128i _mm_cvtsi64_si128(int64_t a)
+{
+    __m128i result = _mm_setzero_si128();
+    result.i64[0] = a;
+    return result;
+}
+
+/* Shuffle operations */
+static inline __m128i _mm_shuffle_epi32(__m128i a, int imm8)
+{
+    __m128i result;
+    result.u32[0] = a.u32[(imm8 >> 0) & 0x3];
+    result.u32[1] = a.u32[(imm8 >> 2) & 0x3];
+    result.u32[2] = a.u32[(imm8 >> 4) & 0x3];
+    result.u32[3] = a.u32[(imm8 >> 6) & 0x3];
+    return result;
+}
+
+/* Logical operations - optimized with RVV when available */
+static inline __m128i _mm_xor_si128(__m128i a, __m128i b)
+{
+#if USE_RVV_INTRINSICS
+    __m128i result;
+    size_t vl = __riscv_vsetvl_e64m1(2);
+    vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
+    vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
+    vuint64m1_t vr = __riscv_vxor_vv_u64m1(va, vb, vl);
+    __riscv_vse64_v_u64m1(result.u64, vr, vl);
+    return result;
+#else
+    __m128i result;
+    result.u64[0] = a.u64[0] ^ b.u64[0];
+    result.u64[1] = a.u64[1] ^ b.u64[1];
+    return result;
+#endif
+}
+
+static inline __m128i _mm_or_si128(__m128i a, __m128i b)
+{
+#if USE_RVV_INTRINSICS
+    __m128i result;
+    size_t vl = __riscv_vsetvl_e64m1(2);
+    vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
+    vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
+    vuint64m1_t vr = __riscv_vor_vv_u64m1(va, vb, vl);
+    __riscv_vse64_v_u64m1(result.u64, vr, vl);
+    return result;
+#else
+    __m128i result;
+    result.u64[0] = a.u64[0] | b.u64[0];
+    result.u64[1] = a.u64[1] | b.u64[1];
+    return result;
+#endif
+}
+
+static inline __m128i _mm_and_si128(__m128i a, __m128i b)
+{
+#if USE_RVV_INTRINSICS
+    __m128i result;
+    size_t vl = __riscv_vsetvl_e64m1(2);
+    vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
+    vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
+    vuint64m1_t vr = __riscv_vand_vv_u64m1(va, vb, vl);
+    __riscv_vse64_v_u64m1(result.u64, vr, vl);
+    return result;
+#else
+    __m128i result;
+    result.u64[0] = a.u64[0] & b.u64[0];
+    result.u64[1] = a.u64[1] & b.u64[1];
+    return result;
+#endif
+}
+
+static inline __m128i _mm_andnot_si128(__m128i a, __m128i b)
+{
+#if USE_RVV_INTRINSICS
+    __m128i result;
+    size_t vl = __riscv_vsetvl_e64m1(2);
+    vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
+    vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
+    vuint64m1_t vnot_a = __riscv_vnot_v_u64m1(va, vl);
+    vuint64m1_t vr = __riscv_vand_vv_u64m1(vnot_a, vb, vl);
+    __riscv_vse64_v_u64m1(result.u64, vr, vl);
+    return result;
+#else
+    __m128i result;
+    result.u64[0] = (~a.u64[0]) & b.u64[0];
+    result.u64[1] = (~a.u64[1]) & b.u64[1];
+    return result;
+#endif
+}
+
+/* Shift operations */
+static inline __m128i _mm_slli_si128(__m128i a, int imm8)
+{
+#if USE_RVV_INTRINSICS
+    __m128i result = _mm_setzero_si128();
+    int count = imm8 & 0xFF;
+    if (count > 15) return result;
+    
+    size_t vl = __riscv_vsetvl_e8m1(16);
+    vuint8m1_t va = __riscv_vle8_v_u8m1(a.u8, vl);
+    vuint8m1_t vr = __riscv_vslideup_vx_u8m1(__riscv_vmv_v_x_u8m1(0, vl), va, count, vl);
+    __riscv_vse8_v_u8m1(result.u8, vr, vl);
+    return result;
+#else
+    __m128i result = _mm_setzero_si128();
+    int count = imm8 & 0xFF;
+    if (count > 15) return result;
+    
+    for (int i = 0; i < 16 - count; i++) {
+        result.u8[i + count] = a.u8[i];
+    }
+    return result;
+#endif
+}
+
+static inline __m128i _mm_srli_si128(__m128i a, int imm8)
+{
+#if USE_RVV_INTRINSICS
+    __m128i result = _mm_setzero_si128();
+    int count = imm8 & 0xFF;
+    if (count > 15) return result;
+    
+    size_t vl = __riscv_vsetvl_e8m1(16);
+    vuint8m1_t va = __riscv_vle8_v_u8m1(a.u8, vl);
+    vuint8m1_t vr = __riscv_vslidedown_vx_u8m1(va, count, vl);
+    __riscv_vse8_v_u8m1(result.u8, vr, vl);
+    return result;
+#else
+    __m128i result = _mm_setzero_si128();
+    int count = imm8 & 0xFF;
+    if (count > 15) return result;
+    
+    for (int i = count; i < 16; i++) {
+        result.u8[i - count] = a.u8[i];
+    }
+    return result;
+#endif
+}
+
+static inline __m128i _mm_slli_epi64(__m128i a, int imm8)
+{
+#if USE_RVV_INTRINSICS
+    __m128i result;
+    if (imm8 > 63) {
+        result.u64[0] = 0;
+        result.u64[1] = 0;
+    } else {
+        size_t vl = __riscv_vsetvl_e64m1(2);
+        vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
+        vuint64m1_t vr = __riscv_vsll_vx_u64m1(va, imm8, vl);
+        __riscv_vse64_v_u64m1(result.u64, vr, vl);
+    }
+    return result;
+#else
+    __m128i result;
+    if (imm8 > 63) {
+        result.u64[0] = 0;
+        result.u64[1] = 0;
+    } else {
+        result.u64[0] = a.u64[0] << imm8;
+        result.u64[1] = a.u64[1] << imm8;
+    }
+    return result;
+#endif
+}
+
+static inline __m128i _mm_srli_epi64(__m128i a, int imm8)
+{
+#if USE_RVV_INTRINSICS
+    __m128i result;
+    if (imm8 > 63) {
+        result.u64[0] = 0;
+        result.u64[1] = 0;
+    } else {
+        size_t vl = __riscv_vsetvl_e64m1(2);
+        vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
+        vuint64m1_t vr = __riscv_vsrl_vx_u64m1(va, imm8, vl);
+        __riscv_vse64_v_u64m1(result.u64, vr, vl);
+    }
+    return result;
+#else
+    __m128i result;
+    if (imm8 > 63) {
+        result.u64[0] = 0;
+        result.u64[1] = 0;
+    } else {
+        result.u64[0] = a.u64[0] >> imm8;
+        result.u64[1] = a.u64[1] >> imm8;
+    }
+    return result;
+#endif
+}
+
+/* Load/store operations - optimized with RVV */
+static inline __m128i _mm_load_si128(const __m128i* p)
+{
+#if USE_RVV_INTRINSICS
+    __m128i result;
+    size_t vl = __riscv_vsetvl_e64m1(2);
+    vuint64m1_t v = __riscv_vle64_v_u64m1((const uint64_t*)p, vl);
+    __riscv_vse64_v_u64m1(result.u64, v, vl);
+    return result;
+#else
+    __m128i result;
+    memcpy(&result, p, sizeof(__m128i));
+    return result;
+#endif
+}
+
+static inline __m128i _mm_loadu_si128(const __m128i* p)
+{
+    __m128i result;
+    memcpy(&result, p, sizeof(__m128i));
+    return result;
+}
+
+static inline void _mm_store_si128(__m128i* p, __m128i a)
+{
+#if USE_RVV_INTRINSICS
+    size_t vl = __riscv_vsetvl_e64m1(2);
+    vuint64m1_t v = __riscv_vle64_v_u64m1(a.u64, vl);
+    __riscv_vse64_v_u64m1((uint64_t*)p, v, vl);
+#else
+    memcpy(p, &a, sizeof(__m128i));
+#endif
+}
+
+static inline void _mm_storeu_si128(__m128i* p, __m128i a)
+{
+    memcpy(p, &a, sizeof(__m128i));
+}
+
+/* Arithmetic operations - optimized with RVV */
+static inline __m128i _mm_add_epi64(__m128i a, __m128i b)
+{
+#if USE_RVV_INTRINSICS
+    __m128i result;
+    size_t vl = __riscv_vsetvl_e64m1(2);
+    vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
+    vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
+    vuint64m1_t vr = __riscv_vadd_vv_u64m1(va, vb, vl);
+    __riscv_vse64_v_u64m1(result.u64, vr, vl);
+    return result;
+#else
+    __m128i result;
+    result.u64[0] = a.u64[0] + b.u64[0];
+    result.u64[1] = a.u64[1] + b.u64[1];
+    return result;
+#endif
+}
+
+static inline __m128i _mm_add_epi32(__m128i a, __m128i b)
+{
+#if USE_RVV_INTRINSICS
+    __m128i result;
+    size_t vl = __riscv_vsetvl_e32m1(4);
+    vuint32m1_t va = __riscv_vle32_v_u32m1(a.u32, vl);
+    vuint32m1_t vb = __riscv_vle32_v_u32m1(b.u32, vl);
+    vuint32m1_t vr = __riscv_vadd_vv_u32m1(va, vb, vl);
+    __riscv_vse32_v_u32m1(result.u32, vr, vl);
+    return result;
+#else
+    __m128i result;
+    for (int i = 0; i < 4; i++) {
+        result.i32[i] = a.i32[i] + b.i32[i];
+    }
+    return result;
+#endif
+}
+
+static inline __m128i _mm_sub_epi64(__m128i a, __m128i b)
+{
+#if USE_RVV_INTRINSICS
+    __m128i result;
+    size_t vl = __riscv_vsetvl_e64m1(2);
+    vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
+    vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
+    vuint64m1_t vr = __riscv_vsub_vv_u64m1(va, vb, vl);
+    __riscv_vse64_v_u64m1(result.u64, vr, vl);
+    return result;
+#else
+    __m128i result;
+    result.u64[0] = a.u64[0] - b.u64[0];
+    result.u64[1] = a.u64[1] - b.u64[1];
+    return result;
+#endif
+}
+
+static inline __m128i _mm_mul_epu32(__m128i a, __m128i b)
+{
+#if USE_RVV_INTRINSICS
+    __m128i result;
+    size_t vl = __riscv_vsetvl_e64m1(2);
+    vuint64m1_t va_lo = __riscv_vzext_vf2_u64m1(__riscv_vle32_v_u32mf2(&a.u32[0], 2), vl);
+    vuint64m1_t vb_lo = __riscv_vzext_vf2_u64m1(__riscv_vle32_v_u32mf2(&b.u32[0], 2), vl);
+    vuint64m1_t vr = __riscv_vmul_vv_u64m1(va_lo, vb_lo, vl);
+    __riscv_vse64_v_u64m1(result.u64, vr, vl);
+    return result;
+#else
+    __m128i result;
+    result.u64[0] = (uint64_t)a.u32[0] * (uint64_t)b.u32[0];
+    result.u64[1] = (uint64_t)a.u32[2] * (uint64_t)b.u32[2];
+    return result;
+#endif
+}
+
+/* Unpack operations */
+static inline __m128i _mm_unpacklo_epi64(__m128i a, __m128i b)
+{
+    __m128i result;
+    result.u64[0] = a.u64[0];
+    result.u64[1] = b.u64[0];
+    return result;
+}
+
+static inline __m128i _mm_unpackhi_epi64(__m128i a, __m128i b)
+{
+    __m128i result;
+    result.u64[0] = a.u64[1];
+    result.u64[1] = b.u64[1];
+    return result;
+}
+
+/* Pause instruction for spin-wait loops */
+static inline void _mm_pause(void)
+{
+    /* RISC-V pause hint if available (requires Zihintpause extension) */
+#if defined(__riscv_zihintpause)
+    __asm__ __volatile__("pause");
+#else
+    __asm__ __volatile__("nop");
+#endif
+}
+
+/* Memory fence - optimized for RISC-V */
+static inline void _mm_mfence(void)
+{
+    __asm__ __volatile__("fence rw,rw" ::: "memory");
+}
+
+static inline void _mm_lfence(void)
+{
+    __asm__ __volatile__("fence r,r" ::: "memory");
+}
+
+static inline void _mm_sfence(void)
+{
+    __asm__ __volatile__("fence w,w" ::: "memory");
+}
+
+/* Comparison operations */
+static inline __m128i _mm_cmpeq_epi32(__m128i a, __m128i b)
+{
+    __m128i result;
+    for (int i = 0; i < 4; i++) {
+        result.u32[i] = (a.u32[i] == b.u32[i]) ? 0xFFFFFFFF : 0;
+    }
+    return result;
+}
+
+static inline __m128i _mm_cmpeq_epi64(__m128i a, __m128i b)
+{
+    __m128i result;
+    for (int i = 0; i < 2; i++) {
+        result.u64[i] = (a.u64[i] == b.u64[i]) ? 0xFFFFFFFFFFFFFFFFULL : 0;
+    }
+    return result;
+}
+
+/* Additional shift operations */
+static inline __m128i _mm_slli_epi32(__m128i a, int imm8)
+{
+#if USE_RVV_INTRINSICS
+    __m128i result;
+    if (imm8 > 31) {
+        memset(&result, 0, sizeof(result));
+    } else {
+        size_t vl = __riscv_vsetvl_e32m1(4);
+        vuint32m1_t va = __riscv_vle32_v_u32m1(a.u32, vl);
+        vuint32m1_t vr = __riscv_vsll_vx_u32m1(va, imm8, vl);
+        __riscv_vse32_v_u32m1(result.u32, vr, vl);
+    }
+    return result;
+#else
+    __m128i result;
+    if (imm8 > 31) {
+        for (int i = 0; i < 4; i++) result.u32[i] = 0;
+    } else {
+        for (int i = 0; i < 4; i++) {
+            result.u32[i] = a.u32[i] << imm8;
+        }
+    }
+    return result;
+#endif
+}
+
+static inline __m128i _mm_srli_epi32(__m128i a, int imm8)
+{
+#if USE_RVV_INTRINSICS
+    __m128i result;
+    if (imm8 > 31) {
+        memset(&result, 0, sizeof(result));
+    } else {
+        size_t vl = __riscv_vsetvl_e32m1(4);
+        vuint32m1_t va = __riscv_vle32_v_u32m1(a.u32, vl);
+        vuint32m1_t vr = __riscv_vsrl_vx_u32m1(va, imm8, vl);
+        __riscv_vse32_v_u32m1(result.u32, vr, vl);
+    }
+    return result;
+#else
+    __m128i result;
+    if (imm8 > 31) {
+        for (int i = 0; i < 4; i++) result.u32[i] = 0;
+    } else {
+        for (int i = 0; i < 4; i++) {
+            result.u32[i] = a.u32[i] >> imm8;
+        }
+    }
+    return result;
+#endif
+}
+
+/* 64-bit integer operations */
+static inline __m128i _mm_set1_epi64x(int64_t a)
+{
+    __m128i result;
+    result.i64[0] = a;
+    result.i64[1] = a;
+    return result;
+}
+
+/* Float type for compatibility */
+typedef __m128i __m128;
+
+/* Float operations - simplified scalar implementations */
+static inline __m128 _mm_set1_ps(float a)
+{
+    __m128 result;
+    uint32_t val;
+    memcpy(&val, &a, sizeof(float));
+    for (int i = 0; i < 4; i++) {
+        result.u32[i] = val;
+    }
+    return result;
+}
+
+static inline __m128 _mm_setzero_ps(void)
+{
+    __m128 result;
+    memset(&result, 0, sizeof(result));
+    return result;
+}
+
+static inline __m128 _mm_add_ps(__m128 a, __m128 b)
+{
+    __m128 result;
+    float fa[4], fb[4], fr[4];
+    memcpy(fa, &a, sizeof(__m128));
+    memcpy(fb, &b, sizeof(__m128));
+    for (int i = 0; i < 4; i++) {
+        fr[i] = fa[i] + fb[i];
+    }
+    memcpy(&result, fr, sizeof(__m128));
+    return result;
+}
+
+static inline __m128 _mm_mul_ps(__m128 a, __m128 b)
+{
+    __m128 result;
+    float fa[4], fb[4], fr[4];
+    memcpy(fa, &a, sizeof(__m128));
+    memcpy(fb, &b, sizeof(__m128));
+    for (int i = 0; i < 4; i++) {
+        fr[i] = fa[i] * fb[i];
+    }
+    memcpy(&result, fr, sizeof(__m128));
+    return result;
+}
+
+static inline __m128 _mm_and_ps(__m128 a, __m128 b)
+{
+    __m128 result;
+    result.u64[0] = a.u64[0] & b.u64[0];
+    result.u64[1] = a.u64[1] & b.u64[1];
+    return result;
+}
+
+static inline __m128 _mm_or_ps(__m128 a, __m128 b)
+{
+    __m128 result;
+    result.u64[0] = a.u64[0] | b.u64[0];
+    result.u64[1] = a.u64[1] | b.u64[1];
+    return result;
+}
+
+static inline __m128 _mm_cvtepi32_ps(__m128i a)
+{
+    __m128 result;
+    float fr[4];
+    for (int i = 0; i < 4; i++) {
+        fr[i] = (float)a.i32[i];
+    }
+    memcpy(&result, fr, sizeof(__m128));
+    return result;
+}
+
+static inline __m128i _mm_cvttps_epi32(__m128 a)
+{
+    __m128i result;
+    float fa[4];
+    memcpy(fa, &a, sizeof(__m128));
+    for (int i = 0; i < 4; i++) {
+        result.i32[i] = (int32_t)fa[i];
+    }
+    return result;
+}
+
+/* Casting operations */
+static inline __m128 _mm_castsi128_ps(__m128i a)
+{
+    __m128 result;
+    memcpy(&result, &a, sizeof(__m128));
+    return result;
+}
+
+static inline __m128i _mm_castps_si128(__m128 a)
+{
+    __m128i result;
+    memcpy(&result, &a, sizeof(__m128));
+    return result;
+}
+
+/* Additional set operations */
+static inline __m128i _mm_set1_epi32(int a)
+{
+    __m128i result;
+    for (int i = 0; i < 4; i++) {
+        result.i32[i] = a;
+    }
+    return result;
+}
+
+/* AES instructions - placeholders for soft_aes compatibility */
+static inline __m128i _mm_aesenc_si128(__m128i a, __m128i roundkey)
+{
+    return _mm_xor_si128(a, roundkey);
+}
+
+static inline __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon)
+{
+    return a;
+}
+
+/* Rotate right operation for soft_aes.h */
+static inline uint32_t _rotr(uint32_t value, unsigned int count)
+{
+    const unsigned int mask = 31;
+    count &= mask;
+    return (value >> count) | (value << ((-count) & mask));
+}
+
+/* ARM NEON compatibility types and intrinsics for RISC-V */
+typedef __m128i_union uint64x2_t;
+typedef __m128i_union uint8x16_t;
+typedef __m128i_union int64x2_t;
+typedef __m128i_union int32x4_t;
+
+static inline uint64x2_t vld1q_u64(const uint64_t *ptr)
+{
+    uint64x2_t result;
+    result.u64[0] = ptr[0];
+    result.u64[1] = ptr[1];
+    return result;
+}
+
+static inline int64x2_t vld1q_s64(const int64_t *ptr)
+{
+    int64x2_t result;
+    result.i64[0] = ptr[0];
+    result.i64[1] = ptr[1];
+    return result;
+}
+
+static inline void vst1q_u64(uint64_t *ptr, uint64x2_t val)
+{
+    ptr[0] = val.u64[0];
+    ptr[1] = val.u64[1];
+}
+
+static inline uint64x2_t veorq_u64(uint64x2_t a, uint64x2_t b)
+{
+    return _mm_xor_si128(a, b);
+}
+
+static inline uint64x2_t vaddq_u64(uint64x2_t a, uint64x2_t b)
+{
+    return _mm_add_epi64(a, b);
+}
+
+static inline uint64x2_t vreinterpretq_u64_u8(uint8x16_t a)
+{
+    uint64x2_t result;
+    memcpy(&result, &a, sizeof(uint64x2_t));
+    return result;
+}
+
+static inline uint64_t vgetq_lane_u64(uint64x2_t v, int lane)
+{
+    return v.u64[lane];
+}
+
+static inline int64_t vgetq_lane_s64(int64x2_t v, int lane)
+{
+    return v.i64[lane];
+}
+
+static inline int32_t vgetq_lane_s32(int32x4_t v, int lane)
+{
+    return v.i32[lane];
+}
+
+typedef struct { uint64_t val[1]; } uint64x1_t;
+
+static inline uint64x1_t vcreate_u64(uint64_t a)
+{
+    uint64x1_t result;
+    result.val[0] = a;
+    return result;
+}
+
+static inline uint64x2_t vcombine_u64(uint64x1_t low, uint64x1_t high)
+{
+    uint64x2_t result;
+    result.u64[0] = low.val[0];
+    result.u64[1] = high.val[0];
+    return result;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* XMRIG_SSE2RVV_OPTIMIZED_H */
diff --git a/src/crypto/cn/sse2rvv_optimized.h b/src/crypto/cn/sse2rvv_optimized.h
new file mode 100644
index 000000000..f83f1101c
--- /dev/null
+++ b/src/crypto/cn/sse2rvv_optimized.h
@@ -0,0 +1,748 @@
+/* XMRig
+ * Copyright (c) 2025 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * SSE to RISC-V Vector (RVV) optimized compatibility header
+ * Provides both scalar fallback and vectorized implementations using RVV intrinsics
+ */
+
+#ifndef XMRIG_SSE2RVV_OPTIMIZED_H
+#define XMRIG_SSE2RVV_OPTIMIZED_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <string.h>
+
+/* Check if RVV is available */
+#if defined(__riscv_vector)
+#include <riscv_vector.h>
+#define USE_RVV_INTRINSICS 1
+#else
+#define USE_RVV_INTRINSICS 0
+#endif
+
+/* 128-bit vector type */
+typedef union {
+    uint8_t  u8[16];
+    uint16_t u16[8];
+    uint32_t u32[4];
+    uint64_t u64[2];
+    int8_t   i8[16];
+    int16_t  i16[8];
+    int32_t  i32[4];
+    int64_t  i64[2];
+#if USE_RVV_INTRINSICS
+    vuint64m1_t rvv_u64;
+    vuint32m1_t rvv_u32;
+    vuint8m1_t  rvv_u8;
+#endif
+} __m128i_union;
+
+typedef __m128i_union __m128i;
+
+/* Set operations */
+static inline __m128i _mm_set_epi32(int e3, int e2, int e1, int e0)
+{
+    __m128i result;
+    result.i32[0] = e0;
+    result.i32[1] = e1;
+    result.i32[2] = e2;
+    result.i32[3] = e3;
+    return result;
+}
+
+static inline __m128i _mm_set_epi64x(int64_t e1, int64_t e0)
+{
+    __m128i result;
+    result.i64[0] = e0;
+    result.i64[1] = e1;
+    return result;
+}
+
+static inline __m128i _mm_setzero_si128(void)
+{
+    __m128i result;
+    memset(&result, 0, sizeof(result));
+    return result;
+}
+
+/* Extract/insert operations */
+static inline int _mm_cvtsi128_si32(__m128i a)
+{
+    return a.i32[0];
+}
+
+static inline int64_t _mm_cvtsi128_si64(__m128i a)
+{
+    return a.i64[0];
+}
+
+static inline __m128i _mm_cvtsi32_si128(int a)
+{
+    __m128i result = _mm_setzero_si128();
+    result.i32[0] = a;
+    return result;
+}
+
+static inline __m128i _mm_cvtsi64_si128(int64_t a)
+{
+    __m128i result = _mm_setzero_si128();
+    result.i64[0] = a;
+    return result;
+}
+
+/* Shuffle operations */
+static inline __m128i _mm_shuffle_epi32(__m128i a, int imm8)
+{
+    __m128i result;
+    result.u32[0] = a.u32[(imm8 >> 0) & 0x3];
+    result.u32[1] = a.u32[(imm8 >> 2) & 0x3];
+    result.u32[2] = a.u32[(imm8 >> 4) & 0x3];
+    result.u32[3] = a.u32[(imm8 >> 6) & 0x3];
+    return result;
+}
+
+/* Logical operations - optimized with RVV when available */
+static inline __m128i _mm_xor_si128(__m128i a, __m128i b)
+{
+#if USE_RVV_INTRINSICS
+    __m128i result;
+    size_t vl = __riscv_vsetvl_e64m1(2);
+    vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
+    vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
+    vuint64m1_t vr = __riscv_vxor_vv_u64m1(va, vb, vl);
+    __riscv_vse64_v_u64m1(result.u64, vr, vl);
+    return result;
+#else
+    __m128i result;
+    result.u64[0] = a.u64[0] ^ b.u64[0];
+    result.u64[1] = a.u64[1] ^ b.u64[1];
+    return result;
+#endif
+}
+
+static inline __m128i _mm_or_si128(__m128i a, __m128i b)
+{
+#if USE_RVV_INTRINSICS
+    __m128i result;
+    size_t vl = __riscv_vsetvl_e64m1(2);
+    vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
+    vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
+    vuint64m1_t vr = __riscv_vor_vv_u64m1(va, vb, vl);
+    __riscv_vse64_v_u64m1(result.u64, vr, vl);
+    return result;
+#else
+    __m128i result;
+    result.u64[0] = a.u64[0] | b.u64[0];
+    result.u64[1] = a.u64[1] | b.u64[1];
+    return result;
+#endif
+}
+
+static inline __m128i _mm_and_si128(__m128i a, __m128i b)
+{
+#if USE_RVV_INTRINSICS
+    __m128i result;
+    size_t vl = __riscv_vsetvl_e64m1(2);
+    vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
+    vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
+    vuint64m1_t vr = __riscv_vand_vv_u64m1(va, vb, vl);
+    __riscv_vse64_v_u64m1(result.u64, vr, vl);
+    return result;
+#else
+    __m128i result;
+    result.u64[0] = a.u64[0] & b.u64[0];
+    result.u64[1] = a.u64[1] & b.u64[1];
+    return result;
+#endif
+}
+
+static inline __m128i _mm_andnot_si128(__m128i a, __m128i b)
+{
+#if USE_RVV_INTRINSICS
+    __m128i result;
+    size_t vl = __riscv_vsetvl_e64m1(2);
+    vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
+    vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
+    vuint64m1_t vnot_a = __riscv_vnot_v_u64m1(va, vl);
+    vuint64m1_t vr = __riscv_vand_vv_u64m1(vnot_a, vb, vl);
+    __riscv_vse64_v_u64m1(result.u64, vr, vl);
+    return result;
+#else
+    __m128i result;
+    result.u64[0] = (~a.u64[0]) & b.u64[0];
+    result.u64[1] = (~a.u64[1]) & b.u64[1];
+    return result;
+#endif
+}
+
+/* Shift operations */
+static inline __m128i _mm_slli_si128(__m128i a, int imm8)
+{
+#if USE_RVV_INTRINSICS
+    __m128i result = _mm_setzero_si128();
+    int count = imm8 & 0xFF;
+    if (count > 15) return result;
+    
+    size_t vl = __riscv_vsetvl_e8m1(16);
+    vuint8m1_t va = __riscv_vle8_v_u8m1(a.u8, vl);
+    vuint8m1_t vr = __riscv_vslideup_vx_u8m1(__riscv_vmv_v_x_u8m1(0, vl), va, count, vl);
+    __riscv_vse8_v_u8m1(result.u8, vr, vl);
+    return result;
+#else
+    __m128i result = _mm_setzero_si128();
+    int count = imm8 & 0xFF;
+    if (count > 15) return result;
+    
+    for (int i = 0; i < 16 - count; i++) {
+        result.u8[i + count] = a.u8[i];
+    }
+    return result;
+#endif
+}
+
+static inline __m128i _mm_srli_si128(__m128i a, int imm8)
+{
+#if USE_RVV_INTRINSICS
+    __m128i result = _mm_setzero_si128();
+    int count = imm8 & 0xFF;
+    if (count > 15) return result;
+    
+    size_t vl = __riscv_vsetvl_e8m1(16);
+    vuint8m1_t va = __riscv_vle8_v_u8m1(a.u8, vl);
+    vuint8m1_t vr = __riscv_vslidedown_vx_u8m1(va, count, vl);
+    __riscv_vse8_v_u8m1(result.u8, vr, vl);
+    return result;
+#else
+    __m128i result = _mm_setzero_si128();
+    int count = imm8 & 0xFF;
+    if (count > 15) return result;
+    
+    for (int i = count; i < 16; i++) {
+        result.u8[i - count] = a.u8[i];
+    }
+    return result;
+#endif
+}
+
+static inline __m128i _mm_slli_epi64(__m128i a, int imm8)
+{
+#if USE_RVV_INTRINSICS
+    __m128i result;
+    if (imm8 > 63) {
+        result.u64[0] = 0;
+        result.u64[1] = 0;
+    } else {
+        size_t vl = __riscv_vsetvl_e64m1(2);
+        vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
+        vuint64m1_t vr = __riscv_vsll_vx_u64m1(va, imm8, vl);
+        __riscv_vse64_v_u64m1(result.u64, vr, vl);
+    }
+    return result;
+#else
+    __m128i result;
+    if (imm8 > 63) {
+        result.u64[0] = 0;
+        result.u64[1] = 0;
+    } else {
+        result.u64[0] = a.u64[0] << imm8;
+        result.u64[1] = a.u64[1] << imm8;
+    }
+    return result;
+#endif
+}
+
+static inline __m128i _mm_srli_epi64(__m128i a, int imm8)
+{
+#if USE_RVV_INTRINSICS
+    __m128i result;
+    if (imm8 > 63) {
+        result.u64[0] = 0;
+        result.u64[1] = 0;
+    } else {
+        size_t vl = __riscv_vsetvl_e64m1(2);
+        vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
+        vuint64m1_t vr = __riscv_vsrl_vx_u64m1(va, imm8, vl);
+        __riscv_vse64_v_u64m1(result.u64, vr, vl);
+    }
+    return result;
+#else
+    __m128i result;
+    if (imm8 > 63) {
+        result.u64[0] = 0;
+        result.u64[1] = 0;
+    } else {
+        result.u64[0] = a.u64[0] >> imm8;
+        result.u64[1] = a.u64[1] >> imm8;
+    }
+    return result;
+#endif
+}
+
+/* Load/store operations - optimized with RVV */
+static inline __m128i _mm_load_si128(const __m128i* p)
+{
+#if USE_RVV_INTRINSICS
+    __m128i result;
+    size_t vl = __riscv_vsetvl_e64m1(2);
+    vuint64m1_t v = __riscv_vle64_v_u64m1((const uint64_t*)p, vl);
+    __riscv_vse64_v_u64m1(result.u64, v, vl);
+    return result;
+#else
+    __m128i result;
+    memcpy(&result, p, sizeof(__m128i));
+    return result;
+#endif
+}
+
+static inline __m128i _mm_loadu_si128(const __m128i* p)
+{
+    __m128i result;
+    memcpy(&result, p, sizeof(__m128i));
+    return result;
+}
+
+static inline void _mm_store_si128(__m128i* p, __m128i a)
+{
+#if USE_RVV_INTRINSICS
+    size_t vl = __riscv_vsetvl_e64m1(2);
+    vuint64m1_t v = __riscv_vle64_v_u64m1(a.u64, vl);
+    __riscv_vse64_v_u64m1((uint64_t*)p, v, vl);
+#else
+    memcpy(p, &a, sizeof(__m128i));
+#endif
+}
+
+static inline void _mm_storeu_si128(__m128i* p, __m128i a)
+{
+    memcpy(p, &a, sizeof(__m128i));
+}
+
+/* Arithmetic operations - optimized with RVV */
+static inline __m128i _mm_add_epi64(__m128i a, __m128i b)
+{
+#if USE_RVV_INTRINSICS
+    __m128i result;
+    size_t vl = __riscv_vsetvl_e64m1(2);
+    vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
+    vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
+    vuint64m1_t vr = __riscv_vadd_vv_u64m1(va, vb, vl);
+    __riscv_vse64_v_u64m1(result.u64, vr, vl);
+    return result;
+#else
+    __m128i result;
+    result.u64[0] = a.u64[0] + b.u64[0];
+    result.u64[1] = a.u64[1] + b.u64[1];
+    return result;
+#endif
+}
+
+static inline __m128i _mm_add_epi32(__m128i a, __m128i b)
+{
+#if USE_RVV_INTRINSICS
+    __m128i result;
+    size_t vl = __riscv_vsetvl_e32m1(4);
+    vuint32m1_t va = __riscv_vle32_v_u32m1(a.u32, vl);
+    vuint32m1_t vb = __riscv_vle32_v_u32m1(b.u32, vl);
+    vuint32m1_t vr = __riscv_vadd_vv_u32m1(va, vb, vl);
+    __riscv_vse32_v_u32m1(result.u32, vr, vl);
+    return result;
+#else
+    __m128i result;
+    for (int i = 0; i < 4; i++) {
+        result.i32[i] = a.i32[i] + b.i32[i];
+    }
+    return result;
+#endif
+}
+
+static inline __m128i _mm_sub_epi64(__m128i a, __m128i b)
+{
+#if USE_RVV_INTRINSICS
+    __m128i result;
+    size_t vl = __riscv_vsetvl_e64m1(2);
+    vuint64m1_t va = __riscv_vle64_v_u64m1(a.u64, vl);
+    vuint64m1_t vb = __riscv_vle64_v_u64m1(b.u64, vl);
+    vuint64m1_t vr = __riscv_vsub_vv_u64m1(va, vb, vl);
+    __riscv_vse64_v_u64m1(result.u64, vr, vl);
+    return result;
+#else
+    __m128i result;
+    result.u64[0] = a.u64[0] - b.u64[0];
+    result.u64[1] = a.u64[1] - b.u64[1];
+    return result;
+#endif
+}
+
+static inline __m128i _mm_mul_epu32(__m128i a, __m128i b)
+{
+#if USE_RVV_INTRINSICS
+    __m128i result;
+    size_t vl = __riscv_vsetvl_e64m1(2);
+    vuint64m1_t va_lo = __riscv_vzext_vf2_u64m1(__riscv_vle32_v_u32mf2(&a.u32[0], 2), vl);
+    vuint64m1_t vb_lo = __riscv_vzext_vf2_u64m1(__riscv_vle32_v_u32mf2(&b.u32[0], 2), vl);
+    vuint64m1_t vr = __riscv_vmul_vv_u64m1(va_lo, vb_lo, vl);
+    __riscv_vse64_v_u64m1(result.u64, vr, vl);
+    return result;
+#else
+    __m128i result;
+    result.u64[0] = (uint64_t)a.u32[0] * (uint64_t)b.u32[0];
+    result.u64[1] = (uint64_t)a.u32[2] * (uint64_t)b.u32[2];
+    return result;
+#endif
+}
+
+/* Unpack operations */
+static inline __m128i _mm_unpacklo_epi64(__m128i a, __m128i b)
+{
+    __m128i result;
+    result.u64[0] = a.u64[0];
+    result.u64[1] = b.u64[0];
+    return result;
+}
+
+static inline __m128i _mm_unpackhi_epi64(__m128i a, __m128i b)
+{
+    __m128i result;
+    result.u64[0] = a.u64[1];
+    result.u64[1] = b.u64[1];
+    return result;
+}
+
+/* Pause instruction for spin-wait loops */
+static inline void _mm_pause(void)
+{
+    /* RISC-V pause hint if available (requires Zihintpause extension) */
+#if defined(__riscv_zihintpause)
+    __asm__ __volatile__("pause");
+#else
+    __asm__ __volatile__("nop");
+#endif
+}
+
+/* Memory fence - optimized for RISC-V */
+static inline void _mm_mfence(void)
+{
+    __asm__ __volatile__("fence rw,rw" ::: "memory");
+}
+
+static inline void _mm_lfence(void)
+{
+    __asm__ __volatile__("fence r,r" ::: "memory");
+}
+
+static inline void _mm_sfence(void)
+{
+    __asm__ __volatile__("fence w,w" ::: "memory");
+}
+
+/* Comparison operations */
+static inline __m128i _mm_cmpeq_epi32(__m128i a, __m128i b)
+{
+    __m128i result;
+    for (int i = 0; i < 4; i++) {
+        result.u32[i] = (a.u32[i] == b.u32[i]) ? 0xFFFFFFFF : 0;
+    }
+    return result;
+}
+
+static inline __m128i _mm_cmpeq_epi64(__m128i a, __m128i b)
+{
+    __m128i result;
+    for (int i = 0; i < 2; i++) {
+        result.u64[i] = (a.u64[i] == b.u64[i]) ? 0xFFFFFFFFFFFFFFFFULL : 0;
+    }
+    return result;
+}
+
+/* Additional shift operations */
+static inline __m128i _mm_slli_epi32(__m128i a, int imm8)
+{
+#if USE_RVV_INTRINSICS
+    __m128i result;
+    if (imm8 > 31) {
+        memset(&result, 0, sizeof(result));
+    } else {
+        size_t vl = __riscv_vsetvl_e32m1(4);
+        vuint32m1_t va = __riscv_vle32_v_u32m1(a.u32, vl);
+        vuint32m1_t vr = __riscv_vsll_vx_u32m1(va, imm8, vl);
+        __riscv_vse32_v_u32m1(result.u32, vr, vl);
+    }
+    return result;
+#else
+    __m128i result;
+    if (imm8 > 31) {
+        for (int i = 0; i < 4; i++) result.u32[i] = 0;
+    } else {
+        for (int i = 0; i < 4; i++) {
+            result.u32[i] = a.u32[i] << imm8;
+        }
+    }
+    return result;
+#endif
+}
+
+static inline __m128i _mm_srli_epi32(__m128i a, int imm8)
+{
+#if USE_RVV_INTRINSICS
+    __m128i result;
+    if (imm8 > 31) {
+        memset(&result, 0, sizeof(result));
+    } else {
+        size_t vl = __riscv_vsetvl_e32m1(4);
+        vuint32m1_t va = __riscv_vle32_v_u32m1(a.u32, vl);
+        vuint32m1_t vr = __riscv_vsrl_vx_u32m1(va, imm8, vl);
+        __riscv_vse32_v_u32m1(result.u32, vr, vl);
+    }
+    return result;
+#else
+    __m128i result;
+    if (imm8 > 31) {
+        for (int i = 0; i < 4; i++) result.u32[i] = 0;
+    } else {
+        for (int i = 0; i < 4; i++) {
+            result.u32[i] = a.u32[i] >> imm8;
+        }
+    }
+    return result;
+#endif
+}
+
+/* 64-bit integer operations */
+static inline __m128i _mm_set1_epi64x(int64_t a)
+{
+    __m128i result;
+    result.i64[0] = a;
+    result.i64[1] = a;
+    return result;
+}
+
+/* Float type for compatibility */
+typedef __m128i __m128;
+
+/* Float operations - simplified scalar implementations */
+static inline __m128 _mm_set1_ps(float a)
+{
+    __m128 result;
+    uint32_t val;
+    memcpy(&val, &a, sizeof(float));
+    for (int i = 0; i < 4; i++) {
+        result.u32[i] = val;
+    }
+    return result;
+}
+
+static inline __m128 _mm_setzero_ps(void)
+{
+    __m128 result;
+    memset(&result, 0, sizeof(result));
+    return result;
+}
+
+static inline __m128 _mm_add_ps(__m128 a, __m128 b)
+{
+    __m128 result;
+    float fa[4], fb[4], fr[4];
+    memcpy(fa, &a, sizeof(__m128));
+    memcpy(fb, &b, sizeof(__m128));
+    for (int i = 0; i < 4; i++) {
+        fr[i] = fa[i] + fb[i];
+    }
+    memcpy(&result, fr, sizeof(__m128));
+    return result;
+}
+
+static inline __m128 _mm_mul_ps(__m128 a, __m128 b)
+{
+    __m128 result;
+    float fa[4], fb[4], fr[4];
+    memcpy(fa, &a, sizeof(__m128));
+    memcpy(fb, &b, sizeof(__m128));
+    for (int i = 0; i < 4; i++) {
+        fr[i] = fa[i] * fb[i];
+    }
+    memcpy(&result, fr, sizeof(__m128));
+    return result;
+}
+
+static inline __m128 _mm_and_ps(__m128 a, __m128 b)
+{
+    __m128 result;
+    result.u64[0] = a.u64[0] & b.u64[0];
+    result.u64[1] = a.u64[1] & b.u64[1];
+    return result;
+}
+
+static inline __m128 _mm_or_ps(__m128 a, __m128 b)
+{
+    __m128 result;
+    result.u64[0] = a.u64[0] | b.u64[0];
+    result.u64[1] = a.u64[1] | b.u64[1];
+    return result;
+}
+
+static inline __m128 _mm_cvtepi32_ps(__m128i a)
+{
+    __m128 result;
+    float fr[4];
+    for (int i = 0; i < 4; i++) {
+        fr[i] = (float)a.i32[i];
+    }
+    memcpy(&result, fr, sizeof(__m128));
+    return result;
+}
+
+static inline __m128i _mm_cvttps_epi32(__m128 a)
+{
+    __m128i result;
+    float fa[4];
+    memcpy(fa, &a, sizeof(__m128));
+    for (int i = 0; i < 4; i++) {
+        result.i32[i] = (int32_t)fa[i];
+    }
+    return result;
+}
+
+/* Casting operations */
+static inline __m128 _mm_castsi128_ps(__m128i a)
+{
+    __m128 result;
+    memcpy(&result, &a, sizeof(__m128));
+    return result;
+}
+
+static inline __m128i _mm_castps_si128(__m128 a)
+{
+    __m128i result;
+    memcpy(&result, &a, sizeof(__m128));
+    return result;
+}
+
+/* Additional set operations */
+static inline __m128i _mm_set1_epi32(int a)
+{
+    __m128i result;
+    for (int i = 0; i < 4; i++) {
+        result.i32[i] = a;
+    }
+    return result;
+}
+
+/* AES instructions - placeholders for soft_aes compatibility */
+static inline __m128i _mm_aesenc_si128(__m128i a, __m128i roundkey)
+{
+    return _mm_xor_si128(a, roundkey);
+}
+
+static inline __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon)
+{
+    return a;
+}
+
+/* Rotate right operation for soft_aes.h */
+static inline uint32_t _rotr(uint32_t value, unsigned int count)
+{
+    const unsigned int mask = 31;
+    count &= mask;
+    return (value >> count) | (value << ((-count) & mask));
+}
+
+/* ARM NEON compatibility types and intrinsics for RISC-V */
+typedef __m128i_union uint64x2_t;
+typedef __m128i_union uint8x16_t;
+typedef __m128i_union int64x2_t;
+typedef __m128i_union int32x4_t;
+
+static inline uint64x2_t vld1q_u64(const uint64_t *ptr)
+{
+    uint64x2_t result;
+    result.u64[0] = ptr[0];
+    result.u64[1] = ptr[1];
+    return result;
+}
+
+static inline int64x2_t vld1q_s64(const int64_t *ptr)
+{
+    int64x2_t result;
+    result.i64[0] = ptr[0];
+    result.i64[1] = ptr[1];
+    return result;
+}
+
+static inline void vst1q_u64(uint64_t *ptr, uint64x2_t val)
+{
+    ptr[0] = val.u64[0];
+    ptr[1] = val.u64[1];
+}
+
+static inline uint64x2_t veorq_u64(uint64x2_t a, uint64x2_t b)
+{
+    return _mm_xor_si128(a, b);
+}
+
+static inline uint64x2_t vaddq_u64(uint64x2_t a, uint64x2_t b)
+{
+    return _mm_add_epi64(a, b);
+}
+
+static inline uint64x2_t vreinterpretq_u64_u8(uint8x16_t a)
+{
+    uint64x2_t result;
+    memcpy(&result, &a, sizeof(uint64x2_t));
+    return result;
+}
+
+static inline uint64_t vgetq_lane_u64(uint64x2_t v, int lane)
+{
+    return v.u64[lane];
+}
+
+static inline int64_t vgetq_lane_s64(int64x2_t v, int lane)
+{
+    return v.i64[lane];
+}
+
+static inline int32_t vgetq_lane_s32(int32x4_t v, int lane)
+{
+    return v.i32[lane];
+}
+
+typedef struct { uint64_t val[1]; } uint64x1_t;
+
+static inline uint64x1_t vcreate_u64(uint64_t a)
+{
+    uint64x1_t result;
+    result.val[0] = a;
+    return result;
+}
+
+static inline uint64x2_t vcombine_u64(uint64x1_t low, uint64x1_t high)
+{
+    uint64x2_t result;
+    result.u64[0] = low.val[0];
+    result.u64[1] = high.val[0];
+    return result;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* XMRIG_SSE2RVV_OPTIMIZED_H */
diff --git a/src/crypto/cn/sse2rvv_scalar_backup.h b/src/crypto/cn/sse2rvv_scalar_backup.h
new file mode 100644
index 000000000..853adbb88
--- /dev/null
+++ b/src/crypto/cn/sse2rvv_scalar_backup.h
@@ -0,0 +1,571 @@
+/* XMRig
+ * Copyright (c) 2025 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * SSE to RISC-V compatibility header
+ * Provides scalar implementations of SSE intrinsics for RISC-V architecture
+ */
+
+#ifndef XMRIG_SSE2RVV_H
+#define XMRIG_SSE2RVV_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <string.h>
+
+/* 128-bit vector type */
+typedef union {
+    uint8_t  u8[16];
+    uint16_t u16[8];
+    uint32_t u32[4];
+    uint64_t u64[2];
+    int8_t   i8[16];
+    int16_t  i16[8];
+    int32_t  i32[4];
+    int64_t  i64[2];
+} __m128i_union;
+
+typedef __m128i_union __m128i;
+
+/* Set operations */
+static inline __m128i _mm_set_epi32(int e3, int e2, int e1, int e0)
+{
+    __m128i result;
+    result.i32[0] = e0;
+    result.i32[1] = e1;
+    result.i32[2] = e2;
+    result.i32[3] = e3;
+    return result;
+}
+
+static inline __m128i _mm_set_epi64x(int64_t e1, int64_t e0)
+{
+    __m128i result;
+    result.i64[0] = e0;
+    result.i64[1] = e1;
+    return result;
+}
+
+static inline __m128i _mm_setzero_si128(void)
+{
+    __m128i result;
+    memset(&result, 0, sizeof(result));
+    return result;
+}
+
+/* Extract/insert operations */
+static inline int _mm_cvtsi128_si32(__m128i a)
+{
+    return a.i32[0];
+}
+
+static inline int64_t _mm_cvtsi128_si64(__m128i a)
+{
+    return a.i64[0];
+}
+
+static inline __m128i _mm_cvtsi32_si128(int a)
+{
+    __m128i result = _mm_setzero_si128();
+    result.i32[0] = a;
+    return result;
+}
+
+static inline __m128i _mm_cvtsi64_si128(int64_t a)
+{
+    __m128i result = _mm_setzero_si128();
+    result.i64[0] = a;
+    return result;
+}
+
+/* Shuffle operations */
+static inline __m128i _mm_shuffle_epi32(__m128i a, int imm8)
+{
+    __m128i result;
+    result.u32[0] = a.u32[(imm8 >> 0) & 0x3];
+    result.u32[1] = a.u32[(imm8 >> 2) & 0x3];
+    result.u32[2] = a.u32[(imm8 >> 4) & 0x3];
+    result.u32[3] = a.u32[(imm8 >> 6) & 0x3];
+    return result;
+}
+
+/* Logical operations */
+static inline __m128i _mm_xor_si128(__m128i a, __m128i b)
+{
+    __m128i result;
+    result.u64[0] = a.u64[0] ^ b.u64[0];
+    result.u64[1] = a.u64[1] ^ b.u64[1];
+    return result;
+}
+
+static inline __m128i _mm_or_si128(__m128i a, __m128i b)
+{
+    __m128i result;
+    result.u64[0] = a.u64[0] | b.u64[0];
+    result.u64[1] = a.u64[1] | b.u64[1];
+    return result;
+}
+
+static inline __m128i _mm_and_si128(__m128i a, __m128i b)
+{
+    __m128i result;
+    result.u64[0] = a.u64[0] & b.u64[0];
+    result.u64[1] = a.u64[1] & b.u64[1];
+    return result;
+}
+
+static inline __m128i _mm_andnot_si128(__m128i a, __m128i b)
+{
+    __m128i result;
+    result.u64[0] = (~a.u64[0]) & b.u64[0];
+    result.u64[1] = (~a.u64[1]) & b.u64[1];
+    return result;
+}
+
+/* Shift operations */
+static inline __m128i _mm_slli_si128(__m128i a, int imm8)
+{
+    __m128i result = _mm_setzero_si128();
+    int count = imm8 & 0xFF;
+    if (count > 15) return result;
+    
+    for (int i = 0; i < 16 - count; i++) {
+        result.u8[i + count] = a.u8[i];
+    }
+    return result;
+}
+
+static inline __m128i _mm_srli_si128(__m128i a, int imm8)
+{
+    __m128i result = _mm_setzero_si128();
+    int count = imm8 & 0xFF;
+    if (count > 15) return result;
+    
+    for (int i = count; i < 16; i++) {
+        result.u8[i - count] = a.u8[i];
+    }
+    return result;
+}
+
+static inline __m128i _mm_slli_epi64(__m128i a, int imm8)
+{
+    __m128i result;
+    if (imm8 > 63) {
+        result.u64[0] = 0;
+        result.u64[1] = 0;
+    } else {
+        result.u64[0] = a.u64[0] << imm8;
+        result.u64[1] = a.u64[1] << imm8;
+    }
+    return result;
+}
+
+static inline __m128i _mm_srli_epi64(__m128i a, int imm8)
+{
+    __m128i result;
+    if (imm8 > 63) {
+        result.u64[0] = 0;
+        result.u64[1] = 0;
+    } else {
+        result.u64[0] = a.u64[0] >> imm8;
+        result.u64[1] = a.u64[1] >> imm8;
+    }
+    return result;
+}
+
+/* Load/store operations */
+static inline __m128i _mm_load_si128(const __m128i* p)
+{
+    __m128i result;
+    memcpy(&result, p, sizeof(__m128i));
+    return result;
+}
+
+static inline __m128i _mm_loadu_si128(const __m128i* p)
+{
+    __m128i result;
+    memcpy(&result, p, sizeof(__m128i));
+    return result;
+}
+
+static inline void _mm_store_si128(__m128i* p, __m128i a)
+{
+    memcpy(p, &a, sizeof(__m128i));
+}
+
+static inline void _mm_storeu_si128(__m128i* p, __m128i a)
+{
+    memcpy(p, &a, sizeof(__m128i));
+}
+
+/* Arithmetic operations */
+static inline __m128i _mm_add_epi64(__m128i a, __m128i b)
+{
+    __m128i result;
+    result.u64[0] = a.u64[0] + b.u64[0];
+    result.u64[1] = a.u64[1] + b.u64[1];
+    return result;
+}
+
+static inline __m128i _mm_add_epi32(__m128i a, __m128i b)
+{
+    __m128i result;
+    for (int i = 0; i < 4; i++) {
+        result.i32[i] = a.i32[i] + b.i32[i];
+    }
+    return result;
+}
+
+static inline __m128i _mm_sub_epi64(__m128i a, __m128i b)
+{
+    __m128i result;
+    result.u64[0] = a.u64[0] - b.u64[0];
+    result.u64[1] = a.u64[1] - b.u64[1];
+    return result;
+}
+
+static inline __m128i _mm_mul_epu32(__m128i a, __m128i b)
+{
+    __m128i result;
+    result.u64[0] = (uint64_t)a.u32[0] * (uint64_t)b.u32[0];
+    result.u64[1] = (uint64_t)a.u32[2] * (uint64_t)b.u32[2];
+    return result;
+}
+
+/* Unpack operations */
+static inline __m128i _mm_unpacklo_epi64(__m128i a, __m128i b)
+{
+    __m128i result;
+    result.u64[0] = a.u64[0];
+    result.u64[1] = b.u64[0];
+    return result;
+}
+
+static inline __m128i _mm_unpackhi_epi64(__m128i a, __m128i b)
+{
+    __m128i result;
+    result.u64[0] = a.u64[1];
+    result.u64[1] = b.u64[1];
+    return result;
+}
+
+/* Pause instruction for spin-wait loops */
+static inline void _mm_pause(void)
+{
+    /* RISC-V doesn't have a direct equivalent to x86 PAUSE
+     * Use a simple NOP or yield hint */
+    __asm__ __volatile__("nop");
+}
+
+/* Memory fence */
+static inline void _mm_mfence(void)
+{
+    __asm__ __volatile__("fence" ::: "memory");
+}
+
+static inline void _mm_lfence(void)
+{
+    __asm__ __volatile__("fence r,r" ::: "memory");
+}
+
+static inline void _mm_sfence(void)
+{
+    __asm__ __volatile__("fence w,w" ::: "memory");
+}
+
+/* Comparison operations */
+static inline __m128i _mm_cmpeq_epi32(__m128i a, __m128i b)
+{
+    __m128i result;
+    for (int i = 0; i < 4; i++) {
+        result.u32[i] = (a.u32[i] == b.u32[i]) ? 0xFFFFFFFF : 0;
+    }
+    return result;
+}
+
+static inline __m128i _mm_cmpeq_epi64(__m128i a, __m128i b)
+{
+    __m128i result;
+    for (int i = 0; i < 2; i++) {
+        result.u64[i] = (a.u64[i] == b.u64[i]) ? 0xFFFFFFFFFFFFFFFFULL : 0;
+    }
+    return result;
+}
+
+/* Additional shift operations */
+static inline __m128i _mm_slli_epi32(__m128i a, int imm8)
+{
+    __m128i result;
+    if (imm8 > 31) {
+        for (int i = 0; i < 4; i++) result.u32[i] = 0;
+    } else {
+        for (int i = 0; i < 4; i++) {
+            result.u32[i] = a.u32[i] << imm8;
+        }
+    }
+    return result;
+}
+
+static inline __m128i _mm_srli_epi32(__m128i a, int imm8)
+{
+    __m128i result;
+    if (imm8 > 31) {
+        for (int i = 0; i < 4; i++) result.u32[i] = 0;
+    } else {
+        for (int i = 0; i < 4; i++) {
+            result.u32[i] = a.u32[i] >> imm8;
+        }
+    }
+    return result;
+}
+
+/* 64-bit integer operations */
+static inline __m128i _mm_set1_epi64x(int64_t a)
+{
+    __m128i result;
+    result.i64[0] = a;
+    result.i64[1] = a;
+    return result;
+}
+
+/* Float type for compatibility - we'll treat it as int for simplicity */
+typedef __m128i __m128;
+
+/* Float operations - simplified scalar implementations */
+static inline __m128 _mm_set1_ps(float a)
+{
+    __m128 result;
+    uint32_t val;
+    memcpy(&val, &a, sizeof(float));
+    for (int i = 0; i < 4; i++) {
+        result.u32[i] = val;
+    }
+    return result;
+}
+
+static inline __m128 _mm_setzero_ps(void)
+{
+    __m128 result;
+    memset(&result, 0, sizeof(result));
+    return result;
+}
+
+static inline __m128 _mm_add_ps(__m128 a, __m128 b)
+{
+    __m128 result;
+    float fa[4], fb[4], fr[4];
+    memcpy(fa, &a, sizeof(__m128));
+    memcpy(fb, &b, sizeof(__m128));
+    for (int i = 0; i < 4; i++) {
+        fr[i] = fa[i] + fb[i];
+    }
+    memcpy(&result, fr, sizeof(__m128));
+    return result;
+}
+
+static inline __m128 _mm_mul_ps(__m128 a, __m128 b)
+{
+    __m128 result;
+    float fa[4], fb[4], fr[4];
+    memcpy(fa, &a, sizeof(__m128));
+    memcpy(fb, &b, sizeof(__m128));
+    for (int i = 0; i < 4; i++) {
+        fr[i] = fa[i] * fb[i];
+    }
+    memcpy(&result, fr, sizeof(__m128));
+    return result;
+}
+
+static inline __m128 _mm_and_ps(__m128 a, __m128 b)
+{
+    __m128 result;
+    result.u64[0] = a.u64[0] & b.u64[0];
+    result.u64[1] = a.u64[1] & b.u64[1];
+    return result;
+}
+
+static inline __m128 _mm_or_ps(__m128 a, __m128 b)
+{
+    __m128 result;
+    result.u64[0] = a.u64[0] | b.u64[0];
+    result.u64[1] = a.u64[1] | b.u64[1];
+    return result;
+}
+
+static inline __m128 _mm_cvtepi32_ps(__m128i a)
+{
+    __m128 result;
+    float fr[4];
+    for (int i = 0; i < 4; i++) {
+        fr[i] = (float)a.i32[i];
+    }
+    memcpy(&result, fr, sizeof(__m128));
+    return result;
+}
+
+static inline __m128i _mm_cvttps_epi32(__m128 a)
+{
+    __m128i result;
+    float fa[4];
+    memcpy(fa, &a, sizeof(__m128));
+    for (int i = 0; i < 4; i++) {
+        result.i32[i] = (int32_t)fa[i];
+    }
+    return result;
+}
+
+/* Casting operations */
+static inline __m128 _mm_castsi128_ps(__m128i a)
+{
+    __m128 result;
+    memcpy(&result, &a, sizeof(__m128));
+    return result;
+}
+
+static inline __m128i _mm_castps_si128(__m128 a)
+{
+    __m128i result;
+    memcpy(&result, &a, sizeof(__m128));
+    return result;
+}
+
+/* Additional set operations */
+static inline __m128i _mm_set1_epi32(int a)
+{
+    __m128i result;
+    for (int i = 0; i < 4; i++) {
+        result.i32[i] = a;
+    }
+    return result;
+}
+
+/* AES instructions - these are placeholders, actual AES is done via soft_aes.h */
+/* On RISC-V without crypto extensions, these should never be called directly */
+/* They are only here for compilation compatibility */
+static inline __m128i _mm_aesenc_si128(__m128i a, __m128i roundkey)
+{
+    /* This is a placeholder - actual implementation should use soft_aes */
+    /* If this function is called, it means SOFT_AES template parameter wasn't used */
+    /* We return a XOR as a minimal fallback, but proper code should use soft_aesenc */
+    return _mm_xor_si128(a, roundkey);
+}
+
+static inline __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon)
+{
+    /* Placeholder for AES key generation - should use soft_aeskeygenassist */
+    return a;
+}
+
+/* Rotate right operation for soft_aes.h */
+static inline uint32_t _rotr(uint32_t value, unsigned int count)
+{
+    const unsigned int mask = 31;
+    count &= mask;
+    return (value >> count) | (value << ((-count) & mask));
+}
+
+/* ARM NEON compatibility types and intrinsics for RISC-V */
+typedef __m128i_union uint64x2_t;
+typedef __m128i_union uint8x16_t;
+typedef __m128i_union int64x2_t;
+typedef __m128i_union int32x4_t;
+
+static inline uint64x2_t vld1q_u64(const uint64_t *ptr)
+{
+    uint64x2_t result;
+    result.u64[0] = ptr[0];
+    result.u64[1] = ptr[1];
+    return result;
+}
+
+static inline int64x2_t vld1q_s64(const int64_t *ptr)
+{
+    int64x2_t result;
+    result.i64[0] = ptr[0];
+    result.i64[1] = ptr[1];
+    return result;
+}
+
+static inline void vst1q_u64(uint64_t *ptr, uint64x2_t val)
+{
+    ptr[0] = val.u64[0];
+    ptr[1] = val.u64[1];
+}
+
+static inline uint64x2_t veorq_u64(uint64x2_t a, uint64x2_t b)
+{
+    uint64x2_t result;
+    result.u64[0] = a.u64[0] ^ b.u64[0];
+    result.u64[1] = a.u64[1] ^ b.u64[1];
+    return result;
+}
+
+static inline uint64x2_t vaddq_u64(uint64x2_t a, uint64x2_t b)
+{
+    uint64x2_t result;
+    result.u64[0] = a.u64[0] + b.u64[0];
+    result.u64[1] = a.u64[1] + b.u64[1];
+    return result;
+}
+
+static inline uint64x2_t vreinterpretq_u64_u8(uint8x16_t a)
+{
+    uint64x2_t result;
+    memcpy(&result, &a, sizeof(uint64x2_t));
+    return result;
+}
+
+static inline uint64_t vgetq_lane_u64(uint64x2_t v, int lane)
+{
+    return v.u64[lane];
+}
+
+static inline int64_t vgetq_lane_s64(int64x2_t v, int lane)
+{
+    return v.i64[lane];
+}
+
+static inline int32_t vgetq_lane_s32(int32x4_t v, int lane)
+{
+    return v.i32[lane];
+}
+
+typedef struct { uint64_t val[1]; } uint64x1_t;
+
+static inline uint64x1_t vcreate_u64(uint64_t a)
+{
+    uint64x1_t result;
+    result.val[0] = a;
+    return result;
+}
+
+static inline uint64x2_t vcombine_u64(uint64x1_t low, uint64x1_t high)
+{
+    uint64x2_t result;
+    result.u64[0] = low.val[0];
+    result.u64[1] = high.val[0];
+    return result;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* XMRIG_SSE2RVV_H */
diff --git a/src/crypto/common/LinuxMemory.cpp b/src/crypto/common/LinuxMemory.cpp
index 8a00e1c36..a09f5a1c7 100644
--- a/src/crypto/common/LinuxMemory.cpp
+++ b/src/crypto/common/LinuxMemory.cpp
@@ -1,6 +1,6 @@
 /* XMRig
- * Copyright (c) 2018-2021 SChernykh   <https://github.com/SChernykh>
- * Copyright (c) 2016-2021 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright (c) 2018-2025 SChernykh   <https://github.com/SChernykh>
+ * Copyright (c) 2016-2025 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
  *
  *   This program is free software: you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
@@ -35,15 +35,69 @@ constexpr size_t twoMiB = 2U * 1024U * 1024U;
 constexpr size_t oneGiB = 1024U * 1024U * 1024U;
 
 
-static inline std::string sysfs_path(uint32_t node, size_t hugePageSize, bool nr)
+static bool sysfs_write(const std::string &path, uint64_t value)
+{
+    std::ofstream file(path, std::ios::out | std::ios::binary | std::ios::trunc);
+    if (!file.is_open()) {
+        return false;
+    }
+
+    file << value;
+    file.flush();
+
+    return true;
+}
+
+
+static int64_t sysfs_read(const std::string &path)
+{
+    std::ifstream file(path);
+    if (!file.is_open()) {
+        return -1;
+    }
+
+    uint64_t value = 0;
+    file >> value;
+
+    return value;
+}
+
+
+static std::string sysfs_path(uint32_t node, size_t hugePageSize, bool nr)
 {
     return fmt::format("/sys/devices/system/node/node{}/hugepages/hugepages-{}kB/{}_hugepages", node, hugePageSize / 1024, nr ? "nr" : "free");
 }
 
 
-static inline bool write_nr_hugepages(uint32_t node, size_t hugePageSize, uint64_t count)   { return LinuxMemory::write(sysfs_path(node, hugePageSize, true).c_str(), count); }
-static inline int64_t free_hugepages(uint32_t node, size_t hugePageSize)                    { return LinuxMemory::read(sysfs_path(node, hugePageSize, false).c_str()); }
-static inline int64_t nr_hugepages(uint32_t node, size_t hugePageSize)                      { return LinuxMemory::read(sysfs_path(node, hugePageSize, true).c_str()); }
+static std::string sysfs_path(size_t hugePageSize, bool nr)
+{
+    return fmt::format("/sys/kernel/mm/hugepages/hugepages-{}kB/{}_hugepages", hugePageSize / 1024, nr ? "nr" : "free");
+}
+
+
+static bool write_nr_hugepages(uint32_t node, size_t hugePageSize, uint64_t count)
+{
+    if (sysfs_write(sysfs_path(node, hugePageSize, true), count)) {
+        return true;
+    }
+
+    return sysfs_write(sysfs_path(hugePageSize, true), count);
+}
+
+
+static int64_t sysfs_read_hugepages(uint32_t node, size_t hugePageSize, bool nr)
+{
+    const int64_t value = sysfs_read(sysfs_path(node, hugePageSize, nr));
+    if (value >= 0) {
+        return value;
+    }
+
+    return sysfs_read(sysfs_path(hugePageSize, nr));
+}
+
+
+static inline int64_t free_hugepages(uint32_t node, size_t hugePageSize)                    { return sysfs_read_hugepages(node, hugePageSize, false); }
+static inline int64_t nr_hugepages(uint32_t node, size_t hugePageSize)                      { return sysfs_read_hugepages(node, hugePageSize, true); }
 
 
 } // namespace xmrig
@@ -62,31 +116,3 @@ bool xmrig::LinuxMemory::reserve(size_t size, uint32_t node, size_t hugePageSize
 
     return write_nr_hugepages(node, hugePageSize, std::max<size_t>(nr_hugepages(node, hugePageSize), 0) + (required - available));
 }
-
-
-bool xmrig::LinuxMemory::write(const char *path, uint64_t value)
-{
-    std::ofstream file(path, std::ios::out | std::ios::binary | std::ios::trunc);
-    if (!file.is_open()) {
-        return false;
-    }
-
-    file << value;
-    file.flush();
-
-    return true;
-}
-
-
-int64_t xmrig::LinuxMemory::read(const char *path)
-{
-    std::ifstream file(path);
-    if (!file.is_open()) {
-        return -1;
-    }
-
-    uint64_t value = 0;
-    file >> value;
-
-    return value;
-}
diff --git a/src/crypto/common/LinuxMemory.h b/src/crypto/common/LinuxMemory.h
index 0d71af249..c39f96edc 100644
--- a/src/crypto/common/LinuxMemory.h
+++ b/src/crypto/common/LinuxMemory.h
@@ -1,6 +1,6 @@
 /* XMRig
- * Copyright (c) 2018-2021 SChernykh   <https://github.com/SChernykh>
- * Copyright (c) 2016-2021 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright (c) 2018-2025 SChernykh   <https://github.com/SChernykh>
+ * Copyright (c) 2016-2025 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
  *
  *   This program is free software: you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
@@ -31,13 +31,10 @@ class LinuxMemory
 {
 public:
     static bool reserve(size_t size, uint32_t node, size_t hugePageSize);
-
-    static bool write(const char *path, uint64_t value);
-    static int64_t read(const char *path);
 };
 
 
-} /* namespace xmrig */
+} // namespace xmrig
 
 
-#endif /* XMRIG_LINUXMEMORY_H */
+#endif // XMRIG_LINUXMEMORY_H
diff --git a/src/crypto/common/MemoryPool.cpp b/src/crypto/common/MemoryPool.cpp
index e99757ee9..0e809125a 100644
--- a/src/crypto/common/MemoryPool.cpp
+++ b/src/crypto/common/MemoryPool.cpp
@@ -49,7 +49,7 @@ xmrig::MemoryPool::MemoryPool(size_t size, bool hugePages, uint32_t node)
 
     constexpr size_t alignment = 1 << 24;
 
-    m_memory = new VirtualMemory(size * pageSize + alignment, hugePages, false, false, node);
+    m_memory = new VirtualMemory(size * pageSize + alignment, hugePages, false, false, node, VirtualMemory::kDefaultHugePageSize);
 
     m_alignOffset = (alignment - (((size_t)m_memory->scratchpad()) % alignment)) % alignment;
 }
diff --git a/src/crypto/common/VirtualMemory.cpp b/src/crypto/common/VirtualMemory.cpp
index e425750dd..d7d3a545e 100644
--- a/src/crypto/common/VirtualMemory.cpp
+++ b/src/crypto/common/VirtualMemory.cpp
@@ -75,6 +75,16 @@ xmrig::VirtualMemory::VirtualMemory(size_t size, bool hugePages, bool oneGbPages
     }
 
     m_scratchpad = static_cast<uint8_t*>(_mm_malloc(m_size, alignSize));
+
+    // Huge pages failed to allocate, but try to enable transparent huge pages for the range
+    if (alignSize >= kDefaultHugePageSize) {
+        if (m_scratchpad) {
+            adviseLargePages(m_scratchpad, m_size);
+        }
+        else {
+            m_scratchpad = static_cast<uint8_t*>(_mm_malloc(m_size, 64));
+        }
+    }
 }
 
 
diff --git a/src/crypto/common/VirtualMemory.h b/src/crypto/common/VirtualMemory.h
index 3056cbaed..2edd3ae92 100644
--- a/src/crypto/common/VirtualMemory.h
+++ b/src/crypto/common/VirtualMemory.h
@@ -65,6 +65,7 @@ public:
     static void *allocateExecutableMemory(size_t size, bool hugePages);
     static void *allocateLargePagesMemory(size_t size);
     static void *allocateOneGbPagesMemory(size_t size);
+    static bool adviseLargePages(void *p, size_t size);
     static void destroy();
     static void flushInstructionCache(void *p, size_t size);
     static void freeLargePagesMemory(void *p, size_t size);
diff --git a/src/crypto/common/VirtualMemory_unix.cpp b/src/crypto/common/VirtualMemory_unix.cpp
index 003b92e45..471c9cf07 100644
--- a/src/crypto/common/VirtualMemory_unix.cpp
+++ b/src/crypto/common/VirtualMemory_unix.cpp
@@ -86,7 +86,7 @@ bool xmrig::VirtualMemory::isHugepagesAvailable()
 {
 #   ifdef XMRIG_OS_LINUX
     return std::ifstream("/proc/sys/vm/nr_hugepages").good() || std::ifstream("/sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages").good();
-#   elif defined(XMRIG_OS_MACOS) && defined(XMRIG_ARM)
+#   elif defined(XMRIG_OS_MACOS) && defined(XMRIG_ARM) || defined(XMRIG_OS_HAIKU)
     return false;
 #   else
     return true;
@@ -156,7 +156,8 @@ void *xmrig::VirtualMemory::allocateExecutableMemory(size_t size, bool hugePages
     if (!mem) {
         mem = mmap(0, size, PROT_READ | PROT_WRITE | SECURE_PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
     }
-
+#   elif defined(XMRIG_OS_HAIKU)
+    void *mem = mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 #   else
 
     void *mem = nullptr;
@@ -181,6 +182,8 @@ void *xmrig::VirtualMemory::allocateLargePagesMemory(size_t size)
     void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
 #   elif defined(XMRIG_OS_FREEBSD)
     void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER | MAP_PREFAULT_READ, -1, 0);
+#   elif defined(XMRIG_OS_HAIKU)
+    void *mem = nullptr;
 #   else
     void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE | hugePagesFlag(hugePageSize()), 0, 0);
 #   endif
@@ -273,6 +276,16 @@ bool xmrig::VirtualMemory::allocateOneGbPagesMemory()
 }
 
 
+bool xmrig::VirtualMemory::adviseLargePages(void *p, size_t size)
+{
+#   ifdef XMRIG_OS_LINUX
+    return (madvise(p, size, MADV_HUGEPAGE) == 0);
+#   else
+    return false;
+#   endif
+}
+
+
 void xmrig::VirtualMemory::freeLargePagesMemory()
 {
     if (m_flags.test(FLAG_LOCK)) {
diff --git a/src/crypto/common/VirtualMemory_win.cpp b/src/crypto/common/VirtualMemory_win.cpp
index acf8119fa..28f515bac 100644
--- a/src/crypto/common/VirtualMemory_win.cpp
+++ b/src/crypto/common/VirtualMemory_win.cpp
@@ -260,6 +260,12 @@ bool xmrig::VirtualMemory::allocateOneGbPagesMemory()
 }
 
 
+bool xmrig::VirtualMemory::adviseLargePages(void *p, size_t size)
+{
+    return false;
+}
+
+
 void xmrig::VirtualMemory::freeLargePagesMemory()
 {
     freeLargePagesMemory(m_scratchpad, m_size);
diff --git a/src/crypto/common/portable/mm_malloc.h b/src/crypto/common/portable/mm_malloc.h
index 34ca7d48b..388da645a 100644
--- a/src/crypto/common/portable/mm_malloc.h
+++ b/src/crypto/common/portable/mm_malloc.h
@@ -26,7 +26,7 @@
 #define XMRIG_MM_MALLOC_PORTABLE_H
 
 
-#if defined(XMRIG_ARM) && !defined(__clang__)
+#if (defined(XMRIG_ARM) || defined(XMRIG_RISCV)) && !defined(__clang__)
 #include <stdlib.h>
 
 
diff --git a/src/crypto/ghostrider/ghostrider.cpp b/src/crypto/ghostrider/ghostrider.cpp
index 25bb44e74..4a21ae032 100644
--- a/src/crypto/ghostrider/ghostrider.cpp
+++ b/src/crypto/ghostrider/ghostrider.cpp
@@ -57,6 +57,9 @@
 
 #if defined(XMRIG_ARM)
 #   include "crypto/cn/sse2neon.h"
+#elif defined(XMRIG_RISCV)
+    // RISC-V doesn't have SSE/NEON, provide minimal compatibility
+#   define _mm_pause() __asm__ __volatile__("nop")
 #elif defined(__GNUC__)
 #   include <x86intrin.h>
 #else
@@ -286,7 +289,7 @@ struct HelperThread
 
 void benchmark()
 {
-#ifndef XMRIG_ARM
+#if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
     static std::atomic<int> done{ 0 };
     if (done.exchange(1)) {
         return;
@@ -478,7 +481,7 @@ static inline bool findByType(hwloc_obj_t obj, hwloc_obj_type_t type, func lambd
 
 HelperThread* create_helper_thread(int64_t cpu_index, int priority, const std::vector<int64_t>& affinities)
 {
-#ifndef XMRIG_ARM
+#if !defined(XMRIG_ARM) && !defined(XMRIG_RISCV)
     hwloc_bitmap_t helper_cpu_set = hwloc_bitmap_alloc();
     hwloc_bitmap_t main_threads_set = hwloc_bitmap_alloc();
 
@@ -807,7 +810,7 @@ void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ct
     uint32_t cn_indices[6];
     select_indices(cn_indices, seed);
 
-#ifdef XMRIG_ARM
+#if defined(XMRIG_ARM) || defined(XMRIG_RISCV)
     uint32_t step[6] = { 1, 1, 1, 1, 1, 1 };
 #else
     uint32_t step[6] = { 4, 4, 1, 2, 4, 4 };
diff --git a/src/crypto/randomx/aes_hash.cpp b/src/crypto/randomx/aes_hash.cpp
index 38eb4d645..04b813b15 100644
--- a/src/crypto/randomx/aes_hash.cpp
+++ b/src/crypto/randomx/aes_hash.cpp
@@ -235,6 +235,131 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
 template void fillAes4Rx4<true>(void *state, size_t outputSize, void *buffer);
 template void fillAes4Rx4<false>(void *state, size_t outputSize, void *buffer);
 
+#if defined(XMRIG_RISCV) && defined(XMRIG_RVV_ENABLED)
+static constexpr uint32_t AES_HASH_1R_STATE02[8] = { 0x92b52c0d, 0x9fa856de, 0xcc82db47, 0xd7983aad, 0x6a770017, 0xae62c7d0, 0x5079506b, 0xe8a07ce4 };
+static constexpr uint32_t AES_HASH_1R_STATE13[8] = { 0x338d996e, 0x15c7b798, 0xf59e125a, 0xace78057, 0x630a240c, 0x07ad828d, 0x79a10005, 0x7e994948 };
+
+static constexpr uint32_t AES_GEN_1R_KEY02[8] = { 0x6daca553, 0x62716609, 0xdbb5552b, 0xb4f44917, 0x3f1262f1, 0x9f947ec6, 0xf4c0794f, 0x3e20e345 };
+static constexpr uint32_t AES_GEN_1R_KEY13[8] = { 0x6d7caf07, 0x846a710d, 0x1725d378, 0x0da1dc4e, 0x6aef8135, 0xb1ba317c, 0x16314c88, 0x49169154 };
+
+static constexpr uint32_t AES_HASH_1R_XKEY00[8] = { 0xf6fa8389, 0x8b24949f, 0x90dc56bf, 0x06890201, 0xf6fa8389, 0x8b24949f, 0x90dc56bf, 0x06890201 };
+static constexpr uint32_t AES_HASH_1R_XKEY11[8] = { 0x61b263d1, 0x51f4e03c, 0xee1043c6, 0xed18f99b, 0x61b263d1, 0x51f4e03c, 0xee1043c6, 0xed18f99b };
+
+static constexpr uint32_t AES_HASH_STRIDE[8] = { 0, 4, 8, 12, 32, 36, 40, 44 };
+
+template<int softAes, int unroll>
+void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
+	PROFILE_SCOPE(RandomX_AES);
+
+	uint8_t* scratchpadPtr = (uint8_t*)scratchpad;
+	const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize;
+
+	vuint32m1_t hash_state02 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE02, 8);
+	vuint32m1_t hash_state13 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE13, 8);
+
+	const vuint32m1_t key02 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY02, 8);
+	const vuint32m1_t key13 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY13, 8);
+
+	const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE, 8);
+
+	vuint32m1_t fill_state02 = __riscv_vluxei32_v_u32m1((uint32_t*)fill_state + 0, stride, 8);
+	vuint32m1_t fill_state13 = __riscv_vluxei32_v_u32m1((uint32_t*)fill_state + 4, stride, 8);
+
+	const vuint8m1_t lutenc_index0 = __riscv_vle8_v_u8m1(lutEncIndex[0], 32);
+	const vuint8m1_t lutenc_index1 = __riscv_vle8_v_u8m1(lutEncIndex[1], 32);
+	const vuint8m1_t lutenc_index2 = __riscv_vle8_v_u8m1(lutEncIndex[2], 32);
+	const vuint8m1_t lutenc_index3 = __riscv_vle8_v_u8m1(lutEncIndex[3], 32);
+
+	const vuint8m1_t& lutdec_index0 = lutenc_index0;
+	const vuint8m1_t lutdec_index1 = __riscv_vle8_v_u8m1(lutDecIndex[1], 32);
+	const vuint8m1_t& lutdec_index2 = lutenc_index2;
+	const vuint8m1_t lutdec_index3 = __riscv_vle8_v_u8m1(lutDecIndex[3], 32);
+
+	//process 64 bytes at a time in 4 lanes
+	while (scratchpadPtr < scratchpadEnd) {
+#define HASH_STATE(k) \
+		hash_state02 = softaes_vector_double(hash_state02, __riscv_vluxei32_v_u32m1((uint32_t*)scratchpadPtr + k * 16 + 0, stride, 8), lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3); \
+		hash_state13 = softaes_vector_double(hash_state13, __riscv_vluxei32_v_u32m1((uint32_t*)scratchpadPtr + k * 16 + 4, stride, 8), lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
+
+#define FILL_STATE(k) \
+		fill_state02 = softaes_vector_double(fill_state02, key02, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3); \
+		fill_state13 = softaes_vector_double(fill_state13, key13, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3); \
+		__riscv_vsuxei32_v_u32m1((uint32_t*)scratchpadPtr + k * 16 + 0, stride, fill_state02, 8); \
+		__riscv_vsuxei32_v_u32m1((uint32_t*)scratchpadPtr + k * 16 + 4, stride, fill_state13, 8);
+
+		switch (softAes) {
+			case 0:
+				HASH_STATE(0);
+				HASH_STATE(1);
+
+				FILL_STATE(0);
+				FILL_STATE(1);
+
+				scratchpadPtr += 128;
+				break;
+
+			default:
+				switch (unroll) {
+					case 4:
+						HASH_STATE(0);
+						FILL_STATE(0);
+
+						HASH_STATE(1);
+						FILL_STATE(1);
+
+						HASH_STATE(2);
+						FILL_STATE(2);
+
+						HASH_STATE(3);
+						FILL_STATE(3);
+
+						scratchpadPtr += 64 * 4;
+						break;
+
+					case 2:
+						HASH_STATE(0);
+						FILL_STATE(0);
+
+						HASH_STATE(1);
+						FILL_STATE(1);
+
+						scratchpadPtr += 64 * 2;
+						break;
+
+					default:
+						HASH_STATE(0);
+						FILL_STATE(0);
+
+						scratchpadPtr += 64;
+						break;
+				}
+				break;
+		}
+	}
+
+#undef HASH_STATE
+#undef FILL_STATE
+
+	__riscv_vsuxei32_v_u32m1((uint32_t*)fill_state + 0, stride, fill_state02, 8);
+	__riscv_vsuxei32_v_u32m1((uint32_t*)fill_state + 4, stride, fill_state13, 8);
+
+	//two extra rounds to achieve full diffusion
+	const vuint32m1_t xkey00 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY00, 8);
+	const vuint32m1_t xkey11 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY11, 8);
+
+	hash_state02 = softaes_vector_double(hash_state02, xkey00, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
+	hash_state13 = softaes_vector_double(hash_state13, xkey00, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
+
+	hash_state02 = softaes_vector_double(hash_state02, xkey11, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
+	hash_state13 = softaes_vector_double(hash_state13, xkey11, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
+
+	//output hash
+	__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 0, stride, hash_state02, 8);
+	__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 4, stride, hash_state13, 8);
+}
+
+#else // defined(XMRIG_RISCV) && defined(XMRIG_RVV_ENABLED)
+
 template<int softAes, int unroll>
 void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
 	PROFILE_SCOPE(RandomX_AES);
@@ -375,6 +500,7 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi
 	rx_store_vec_i128((rx_vec_i128*)hash + 2, hash_state2);
 	rx_store_vec_i128((rx_vec_i128*)hash + 3, hash_state3);
 }
+#endif // defined(XMRIG_RISCV) && defined(XMRIG_RVV_ENABLED)
 
 template void hashAndFillAes1Rx4<0,2>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);
 template void hashAndFillAes1Rx4<1,1>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);
diff --git a/src/crypto/randomx/common.hpp b/src/crypto/randomx/common.hpp
index 98f96727b..6fbfb9785 100644
--- a/src/crypto/randomx/common.hpp
+++ b/src/crypto/randomx/common.hpp
@@ -111,6 +111,10 @@ namespace randomx {
 	#define RANDOMX_HAVE_COMPILER 1
 	class JitCompilerA64;
 	using JitCompiler = JitCompilerA64;
+#elif defined(__riscv) && defined(__riscv_xlen) && (__riscv_xlen == 64)
+	#define RANDOMX_HAVE_COMPILER 1
+	class JitCompilerRV64;
+	using JitCompiler = JitCompilerRV64;
 #else
 	#define RANDOMX_HAVE_COMPILER 0
 	class JitCompilerFallback;
diff --git a/src/crypto/randomx/jit_compiler.hpp b/src/crypto/randomx/jit_compiler.hpp
index db635c6f4..114ec3bd0 100644
--- a/src/crypto/randomx/jit_compiler.hpp
+++ b/src/crypto/randomx/jit_compiler.hpp
@@ -32,6 +32,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "crypto/randomx/jit_compiler_x86.hpp"
 #elif defined(__aarch64__)
 #include "crypto/randomx/jit_compiler_a64.hpp"
+#elif defined(__riscv) && defined(__riscv_xlen) && (__riscv_xlen == 64)
+#include "crypto/randomx/jit_compiler_rv64.hpp"
 #else
 #include "crypto/randomx/jit_compiler_fallback.hpp"
 #endif
diff --git a/src/crypto/randomx/jit_compiler_a64.cpp b/src/crypto/randomx/jit_compiler_a64.cpp
index 860503081..6192cdeca 100644
--- a/src/crypto/randomx/jit_compiler_a64.cpp
+++ b/src/crypto/randomx/jit_compiler_a64.cpp
@@ -67,7 +67,6 @@ constexpr uint32_t LDR_LITERAL = 0x58000000;
 constexpr uint32_t ROR         = 0x9AC02C00;
 constexpr uint32_t ROR_IMM     = 0x93C00000;
 constexpr uint32_t MOV_REG     = 0xAA0003E0;
-constexpr uint32_t MOV_VREG_EL = 0x6E080400;
 constexpr uint32_t FADD        = 0x4E60D400;
 constexpr uint32_t FSUB        = 0x4EE0D400;
 constexpr uint32_t FEOR        = 0x6E201C00;
@@ -102,7 +101,7 @@ static size_t CalcDatasetItemSize()
 	((uint8_t*)randomx_calc_dataset_item_aarch64_end - (uint8_t*)randomx_calc_dataset_item_aarch64_store_result);
 }
 
-constexpr uint32_t IntRegMap[8] = { 4, 5, 6, 7, 12, 13, 14, 15 };
+constexpr uint8_t IntRegMap[8] = { 4, 5, 6, 7, 12, 13, 14, 15 };
 
 JitCompilerA64::JitCompilerA64(bool hugePagesEnable, bool) :
 	hugePages(hugePagesJIT && hugePagesEnable),
@@ -128,11 +127,12 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
 
 	uint32_t codePos = MainLoopBegin + 4;
 
+	uint32_t mask = ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10);
 	// and w16, w10, ScratchpadL3Mask64
-	emit32(0x121A0000 | 16 | (10 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
+	emit32(0x121A0000 | 16 | (10 << 5) | mask, code, codePos);
 
 	// and w17, w20, ScratchpadL3Mask64
-	emit32(0x121A0000 | 17 | (20 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
+	emit32(0x121A0000 | 17 | (20 << 5) | mask, code, codePos);
 
 	codePos = PrologueSize;
 	literalPos = ImulRcpLiteralsEnd;
@@ -155,13 +155,14 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
 	const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end) - ((uint8_t*)randomx_program_aarch64)) - codePos;
 	emit32(ARMV8A::B | (offset / 4), code, codePos);
 
-	// and w20, w20, CacheLineAlignMask
+	mask = ((RandomX_CurrentConfig.Log2_DatasetBaseSize - 7) << 10);
+	// and w20, w9, CacheLineAlignMask
 	codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask1) - ((uint8_t*)randomx_program_aarch64));
-	emit32(0x121A0000 | 20 | (20 << 5) | ((RandomX_CurrentConfig.Log2_DatasetBaseSize - 7) << 10), code, codePos);
+	emit32(0x121A0000 | 20 | (9 << 5) | mask, code, codePos);
 
 	// and w10, w10, CacheLineAlignMask
 	codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask2) - ((uint8_t*)randomx_program_aarch64));
-	emit32(0x121A0000 | 10 | (10 << 5) | ((RandomX_CurrentConfig.Log2_DatasetBaseSize - 7) << 10), code, codePos);
+	emit32(0x121A0000 | 10 | (10 << 5) | mask, code, codePos);
 
 	// Update spMix1
 	// eor x10, config.readReg0, config.readReg1
@@ -497,9 +498,12 @@ void JitCompilerA64::emitMemLoad(uint32_t dst, uint32_t src, Instruction& instr,
 	if (src != dst)
 	{
 		imm &= instr.getModMem() ? (RandomX_CurrentConfig.ScratchpadL1_Size - 1) : (RandomX_CurrentConfig.ScratchpadL2_Size - 1);
-		emitAddImmediate(tmp_reg, src, imm, code, k);
+		uint32_t t = 0x927d0000 | tmp_reg | (tmp_reg << 5);
+		if (imm)
+			emitAddImmediate(tmp_reg, src, imm, code, k);
+		else
+			t = 0x927d0000 | tmp_reg | (src << 5);
 
-		constexpr uint32_t t = 0x927d0000 | tmp_reg | (tmp_reg << 5);
 		const uint32_t andInstrL1 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL1 - 4) << 10);
 		const uint32_t andInstrL2 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL2 - 4) << 10);
 
@@ -511,10 +515,18 @@ void JitCompilerA64::emitMemLoad(uint32_t dst, uint32_t src, Instruction& instr,
 	else
 	{
 		imm = (imm & ScratchpadL3Mask) >> 3;
-		emitMovImmediate(tmp_reg, imm, code, k);
+		if (imm)
+		{
+			emitMovImmediate(tmp_reg, imm, code, k);
 
-		// ldr tmp_reg, [x2, tmp_reg, lsl 3]
-		emit32(0xf8607840 | tmp_reg | (tmp_reg << 16), code, k);
+			// ldr tmp_reg, [x2, tmp_reg, lsl 3]
+			emit32(0xf8607840 | tmp_reg | (tmp_reg << 16), code, k);
+		}
+		else
+		{
+			// ldr tmp_reg, [x2]
+			emit32(0xf9400040 | tmp_reg, code, k);
+		}
 	}
 
 	codePos = k;
@@ -529,25 +541,22 @@ void JitCompilerA64::emitMemLoadFP(uint32_t src, Instruction& instr, uint8_t* co
 	constexpr uint32_t tmp_reg = 19;
 
 	imm &= instr.getModMem() ? (RandomX_CurrentConfig.ScratchpadL1_Size - 1) : (RandomX_CurrentConfig.ScratchpadL2_Size - 1);
-	emitAddImmediate(tmp_reg, src, imm, code, k);
+	uint32_t t = 0x927d0000 | tmp_reg | (tmp_reg << 5);
+	if (imm)
+		emitAddImmediate(tmp_reg, src, imm, code, k);
+	else
+		t = 0x927d0000 | tmp_reg | (src << 5);
 
-	constexpr uint32_t t = 0x927d0000 | tmp_reg | (tmp_reg << 5);
 	const uint32_t andInstrL1 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL1 - 4) << 10);
 	const uint32_t andInstrL2 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL2 - 4) << 10);
 
 	emit32(instr.getModMem() ? andInstrL1 : andInstrL2, code, k);
 
-	// add tmp_reg, x2, tmp_reg
-	emit32(ARMV8A::ADD | tmp_reg | (2 << 5) | (tmp_reg << 16), code, k);
+	// ldr tmp_reg_fp, [x2, tmp_reg]
+	emit32(0x3ce06800 | tmp_reg_fp | (2 << 5) | (tmp_reg << 16), code, k);
 
-	// ldpsw tmp_reg, tmp_reg + 1, [tmp_reg]
-	emit32(0x69400000 | tmp_reg | (tmp_reg << 5) | ((tmp_reg + 1) << 10), code, k);
-
-	// ins tmp_reg_fp.d[0], tmp_reg
-	emit32(0x4E081C00 | tmp_reg_fp | (tmp_reg << 5), code, k);
-
-	// ins tmp_reg_fp.d[1], tmp_reg + 1
-	emit32(0x4E181C00 | tmp_reg_fp | ((tmp_reg + 1) << 5), code, k);
+	// sxtl.2d	tmp_reg_fp, tmp_reg_fp
+	emit32(0x0f20a400 | tmp_reg_fp | (tmp_reg_fp << 5), code, k);
 
 	// scvtf tmp_reg_fp.2d, tmp_reg_fp.2d
 	emit32(0x4E61D800 | tmp_reg_fp | (tmp_reg_fp << 5), code, k);
@@ -835,7 +844,8 @@ void JitCompilerA64::h_IROR_R(Instruction& instr, uint32_t& codePos)
 	else
 	{
 		// ror dst, dst, imm
-		emit32(ARMV8A::ROR_IMM | dst | (dst << 5) | ((instr.getImm32() & 63) << 10) | (dst << 16), code, codePos);
+		if ((instr.getImm32() & 63))
+			emit32(ARMV8A::ROR_IMM | dst | (dst << 5) | ((instr.getImm32() & 63) << 10) | (dst << 16), code, codePos);
 	}
 
 	reg_changed_offset[instr.dst] = codePos;
@@ -861,7 +871,8 @@ void JitCompilerA64::h_IROL_R(Instruction& instr, uint32_t& codePos)
 	else
 	{
 		// ror dst, dst, imm
-		emit32(ARMV8A::ROR_IMM | dst | (dst << 5) | ((-instr.getImm32() & 63) << 10) | (dst << 16), code, k);
+		if ((instr.getImm32() & 63))
+			emit32(ARMV8A::ROR_IMM | dst | (dst << 5) | ((-instr.getImm32() & 63) << 10) | (dst << 16), code, k);
 	}
 
 	reg_changed_offset[instr.dst] = k;
@@ -894,13 +905,8 @@ void JitCompilerA64::h_FSWAP_R(Instruction& instr, uint32_t& codePos)
 
 	const uint32_t dst = instr.dst + 16;
 
-	constexpr uint32_t tmp_reg_fp = 28;
-	constexpr uint32_t src_index1 = 1 << 14;
-	constexpr uint32_t dst_index1 = 1 << 20;
-
-	emit32(ARMV8A::MOV_VREG_EL | tmp_reg_fp | (dst << 5) | src_index1, code, k);
-	emit32(ARMV8A::MOV_VREG_EL | dst | (dst << 5) | dst_index1, code, k);
-	emit32(ARMV8A::MOV_VREG_EL | dst | (tmp_reg_fp << 5), code, k);
+	// ext	dst.16b, dst.16b, dst.16b, #0x8
+	emit32(0x6e004000 | dst | (dst << 5) | (dst << 16), code, k);
 
 	codePos = k;
 }
@@ -1029,11 +1035,19 @@ void JitCompilerA64::h_CFROUND(Instruction& instr, uint32_t& codePos)
 	constexpr uint32_t tmp_reg = 20;
 	constexpr uint32_t fpcr_tmp_reg = 8;
 
-	// ror tmp_reg, src, imm
-	emit32(ARMV8A::ROR_IMM | tmp_reg | (src << 5) | ((instr.getImm32() & 63) << 10) | (src << 16), code, k);
+	if (instr.getImm32() & 63)
+	{
+		// ror tmp_reg, src, imm
+		emit32(ARMV8A::ROR_IMM | tmp_reg | (src << 5) | ((instr.getImm32() & 63) << 10) | (src << 16), code, k);
 
-	// bfi fpcr_tmp_reg, tmp_reg, 40, 2
-	emit32(0xB3580400 | fpcr_tmp_reg | (tmp_reg << 5), code, k);
+		// bfi fpcr_tmp_reg, tmp_reg, 40, 2
+		emit32(0xB3580400 | fpcr_tmp_reg | (tmp_reg << 5), code, k);
+	}
+	else	// no rotation
+	{
+		// bfi fpcr_tmp_reg, src, 40, 2
+		emit32(0xB3580400 | fpcr_tmp_reg | (src << 5), code, k);
+	}
 
 	// rbit tmp_reg, fpcr_tmp_reg
 	emit32(0xDAC00000 | tmp_reg | (fpcr_tmp_reg << 5), code, k);
@@ -1059,9 +1073,12 @@ void JitCompilerA64::h_ISTORE(Instruction& instr, uint32_t& codePos)
 	else
 		imm &= RandomX_CurrentConfig.ScratchpadL3_Size - 1;
 
-	emitAddImmediate(tmp_reg, dst, imm, code, k);
+	uint32_t t = 0x927d0000 | tmp_reg | (tmp_reg << 5);
+	if (imm)
+		emitAddImmediate(tmp_reg, dst, imm, code, k);
+	else
+		t = 0x927d0000 | tmp_reg | (dst << 5);
 
-	constexpr uint32_t t = 0x927d0000 | tmp_reg | (tmp_reg << 5);
 	const uint32_t andInstrL1 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL1 - 4) << 10);
 	const uint32_t andInstrL2 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL2 - 4) << 10);
 	const uint32_t andInstrL3 = t | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 4) << 10);
diff --git a/src/crypto/randomx/jit_compiler_a64_static.S b/src/crypto/randomx/jit_compiler_a64_static.S
index e019c6b4b..6133b284a 100644
--- a/src/crypto/randomx/jit_compiler_a64_static.S
+++ b/src/crypto/randomx/jit_compiler_a64_static.S
@@ -100,9 +100,9 @@
 # v26 -> "a2"
 # v27 -> "a3"
 # v28 -> temporary
-# v29 -> E 'and' mask = 0x00ffffffffffffff00ffffffffffffff
-# v30 -> E 'or' mask  = 0x3*00000000******3*00000000******
-# v31 -> scale mask   = 0x81f000000000000081f0000000000000
+# v29 -> E 'and' mask = 0x00ffffffffffffff'00ffffffffffffff
+# v30 -> E 'or' mask  = 0x3*00000000******'3*00000000******
+# v31 -> scale mask   = 0x80f0000000000000'80f0000000000000
 
 	.balign 4
 DECL(randomx_program_aarch64):
@@ -142,17 +142,14 @@ DECL(randomx_program_aarch64):
 	ldp	q26, q27, [x0, 224]
 
 	# Load E 'and' mask
-	mov	x16, 0x00FFFFFFFFFFFFFF
-	ins	v29.d[0], x16
-	ins	v29.d[1], x16
+	movi	v29.2d, #0x00FFFFFFFFFFFFFF
 
 	# Load E 'or' mask (stored in reg.f[0])
 	ldr	q30, [x0, 64]
 
 	# Load scale mask
 	mov	x16, 0x80f0000000000000
-	ins	v31.d[0], x16
-	ins	v31.d[1], x16
+	dup	v31.2d, x16
 
 	# Read fpcr
 	mrs	x8, fpcr
@@ -162,35 +159,22 @@ DECL(randomx_program_aarch64):
 	str	x0, [sp, -16]!
 
 	# Read literals
-	ldr	x0, literal_x0
-	ldr	x11, literal_x11
-	ldr	x21, literal_x21
-	ldr	x22, literal_x22
-	ldr	x23, literal_x23
-	ldr	x24, literal_x24
-	ldr	x25, literal_x25
-	ldr	x26, literal_x26
-	ldr	x27, literal_x27
-	ldr	x28, literal_x28
-	ldr	x29, literal_x29
-	ldr	x30, literal_x30
+	adr	x30, literal_v0
+	ldp	q0, q1, [x30]
+	ldp	q2, q3, [x30, 32]
+	ldp	q4, q5, [x30, 64]
+	ldp	q6, q7, [x30, 96]
+	ldp	q8, q9, [x30, 128]
+	ldp	q10, q11, [x30, 160]
+	ldp	q12, q13, [x30, 192]
+	ldp	q14, q15, [x30, 224]
 
-	ldr	q0, literal_v0
-	ldr	q1, literal_v1
-	ldr	q2, literal_v2
-	ldr	q3, literal_v3
-	ldr	q4, literal_v4
-	ldr	q5, literal_v5
-	ldr	q6, literal_v6
-	ldr	q7, literal_v7
-	ldr	q8, literal_v8
-	ldr	q9, literal_v9
-	ldr	q10, literal_v10
-	ldr	q11, literal_v11
-	ldr	q12, literal_v12
-	ldr	q13, literal_v13
-	ldr	q14, literal_v14
-	ldr	q15, literal_v15
+	ldp	x0, x11, [x30, -96]	// literal_x0
+	ldp	x21, x22, [x30, -80]	// literal_x21
+	ldp	x23, x24, [x30, -64]	// literal_x23
+	ldp	x25, x26, [x30, -48]	// literal_x25
+	ldp	x27, x28, [x30, -32]	// literal_x27
+	ldp	x29, x30, [x30, -16]	// literal_x29
 
 DECL(randomx_program_aarch64_main_loop):
 	# spAddr0 = spMix1 & ScratchpadL3Mask64;
@@ -221,40 +205,31 @@ DECL(randomx_program_aarch64_main_loop):
 	eor	x15, x15, x19
 
 	# Load group F registers (spAddr1)
-	ldpsw	x20, x19, [x17]
-	ins	v16.d[0], x20
-	ins	v16.d[1], x19
-	ldpsw	x20, x19, [x17, 8]
-	ins	v17.d[0], x20
-	ins	v17.d[1], x19
-	ldpsw	x20, x19, [x17, 16]
-	ins	v18.d[0], x20
-	ins	v18.d[1], x19
-	ldpsw	x20, x19, [x17, 24]
-	ins	v19.d[0], x20
-	ins	v19.d[1], x19
+	ldr	q17, [x17]
+	sxtl	v16.2d, v17.2s
 	scvtf	v16.2d, v16.2d
+	sxtl2	v17.2d, v17.4s
 	scvtf	v17.2d, v17.2d
+
+	ldr q19, [x17, 16]
+	sxtl	v18.2d, v19.2s
 	scvtf	v18.2d, v18.2d
+	sxtl2	v19.2d, v19.4s
 	scvtf	v19.2d, v19.2d
 
 	# Load group E registers (spAddr1)
-	ldpsw	x20, x19, [x17, 32]
-	ins	v20.d[0], x20
-	ins	v20.d[1], x19
-	ldpsw	x20, x19, [x17, 40]
-	ins	v21.d[0], x20
-	ins	v21.d[1], x19
-	ldpsw	x20, x19, [x17, 48]
-	ins	v22.d[0], x20
-	ins	v22.d[1], x19
-	ldpsw	x20, x19, [x17, 56]
-	ins	v23.d[0], x20
-	ins	v23.d[1], x19
+	ldr	q21, [x17, 32]
+	sxtl	v20.2d, v21.2s
 	scvtf	v20.2d, v20.2d
+	sxtl2	v21.2d, v21.4s
 	scvtf	v21.2d, v21.2d
+
+	ldr	q23, [x17, 48]
+	sxtl	v22.2d, v23.2s
 	scvtf	v22.2d, v22.2d
+	sxtl2	v23.2d, v23.4s
 	scvtf	v23.2d, v23.2d
+
 	and	v20.16b, v20.16b, v29.16b
 	and	v21.16b, v21.16b, v29.16b
 	and	v22.16b, v22.16b, v29.16b
@@ -310,10 +285,9 @@ DECL(randomx_program_aarch64_vm_instructions_end):
 	eor	x9, x9, x20
 
 	# Calculate dataset pointer for dataset prefetch
-	mov	w20, w9
 DECL(randomx_program_aarch64_cacheline_align_mask1):
 	# Actual mask will be inserted by JIT compiler
-	and	x20, x20, 1
+	and	x20, x9, 1
 	add	x20, x20, x1
 
 	# Prefetch dataset data
@@ -491,42 +465,39 @@ DECL(randomx_calc_dataset_item_aarch64):
 	stp	x10, x11, [sp, 80]
 	stp	x12, x13, [sp, 96]
 
-	ldr	x12, superscalarMul0
+	adr	x7, superscalarMul0
+	# superscalarMul0, superscalarAdd1
+	ldp	x12, x13, [x7]
 
-	mov	x8, x0
-	mov	x9, x1
+	ldp	x8, x9, [sp]
 	mov	x10, x2
 
 	# rl[0] = (itemNumber + 1) * superscalarMul0;
 	madd	x0, x2, x12, x12
 
 	# rl[1] = rl[0] ^ superscalarAdd1;
-	ldr	x12, superscalarAdd1
-	eor	x1, x0, x12
+	eor	x1, x0, x13
 
 	# rl[2] = rl[0] ^ superscalarAdd2;
-	ldr	x12, superscalarAdd2
+	ldp	x12, x13, [x7, 16]
 	eor	x2, x0, x12
 
 	# rl[3] = rl[0] ^ superscalarAdd3;
-	ldr	x12, superscalarAdd3
-	eor	x3, x0, x12
+	eor	x3, x0, x13
 
 	# rl[4] = rl[0] ^ superscalarAdd4;
-	ldr	x12, superscalarAdd4
+	ldp	x12, x13, [x7, 32]
 	eor	x4, x0, x12
 
 	# rl[5] = rl[0] ^ superscalarAdd5;
-	ldr	x12, superscalarAdd5
-	eor	x5, x0, x12
+	eor	x5, x0, x13
 
 	# rl[6] = rl[0] ^ superscalarAdd6;
-	ldr	x12, superscalarAdd6
+	ldp	x12, x13, [x7, 48]
 	eor	x6, x0, x12
 
 	# rl[7] = rl[0] ^ superscalarAdd7;
-	ldr	x12, superscalarAdd7
-	eor	x7, x0, x12
+	eor	x7, x0, x13
 
 	b	DECL(randomx_calc_dataset_item_aarch64_prefetch)
 
diff --git a/src/crypto/randomx/jit_compiler_rv64.cpp b/src/crypto/randomx/jit_compiler_rv64.cpp
new file mode 100644
index 000000000..161343471
--- /dev/null
+++ b/src/crypto/randomx/jit_compiler_rv64.cpp
@@ -0,0 +1,1187 @@
+/*
+Copyright (c) 2023 tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <stdexcept>
+#include <cstring>
+#include <climits>
+#include <cassert>
+#include "crypto/randomx/jit_compiler_rv64.hpp"
+#include "crypto/randomx/jit_compiler_rv64_static.hpp"
+#include "crypto/randomx/jit_compiler_rv64_vector.h"
+#include "crypto/randomx/jit_compiler_rv64_vector_static.h"
+#include "crypto/randomx/superscalar.hpp"
+#include "crypto/randomx/program.hpp"
+#include "crypto/randomx/reciprocal.h"
+#include "crypto/randomx/virtual_memory.hpp"
+#include "crypto/common/VirtualMemory.h"
+
+
+static bool hugePagesJIT = false;
+static int optimizedDatasetInit = -1;
+
+void randomx_set_huge_pages_jit(bool hugePages)
+{
+	hugePagesJIT = hugePages;
+}
+
+void randomx_set_optimized_dataset_init(int value)
+{
+	optimizedDatasetInit = value;
+}
+
+#define alignSize(pos, align) (((pos - 1) / align + 1) * align)
+
+
+namespace rv64 {
+	constexpr uint16_t C_LUI    =     0x6001;
+	constexpr uint32_t LUI      = 0x00000037;
+	constexpr uint16_t C_ADDI   =     0x0001;
+	constexpr uint32_t ADDI     = 0x00000013;
+	constexpr uint32_t ADDIW    = 0x0000001b;
+	constexpr uint16_t C_ADD    =     0x9002;
+	constexpr uint32_t ADD      = 0x00000033;
+	constexpr uint32_t SHXADD   = 0x20000033; //Zba
+	constexpr uint32_t SLL      = 0x00001033;
+	constexpr uint32_t SRL      = 0x00005033;
+	constexpr uint32_t SLLI     = 0x00001013;
+	constexpr uint32_t C_SLLI   =     0x0002;
+	constexpr uint32_t SRLI     = 0x00005013;
+	constexpr uint32_t AND      = 0x00007033;
+	constexpr uint32_t ANDI     = 0x00007013;
+	constexpr uint16_t C_AND    =     0x8c61;
+	constexpr uint16_t C_ANDI   =     0x8801;
+	constexpr uint32_t OR       = 0x00006033;
+	constexpr uint16_t C_OR     =     0x8c41;
+	constexpr uint32_t XOR      = 0x00004033;
+	constexpr uint16_t C_XOR    =     0x8c21;
+	constexpr uint32_t LD       = 0x00003003;
+	constexpr uint16_t C_LD     =     0x6000;
+	constexpr uint16_t C_LW     =     0x4000;
+	constexpr uint32_t SD       = 0x00003023;
+	constexpr uint32_t SUB      = 0x40000033;
+	constexpr uint16_t C_SUB    =     0x8c01;
+	constexpr uint32_t MUL      = 0x02000033;
+	constexpr uint32_t MULHU    = 0x02003033;
+	constexpr uint32_t MULH     = 0x02001033;
+	constexpr uint16_t C_MV     =     0x8002;
+	constexpr uint32_t ROR      = 0x60005033; //Zbb
+	constexpr uint32_t RORI     = 0x60005013; //Zbb
+	constexpr uint32_t ROL      = 0x60001033; //Zbb
+	constexpr uint32_t FMV_X_D  = 0xe2000053;
+	constexpr uint32_t FMV_D_X  = 0xf2000053;
+	constexpr uint32_t FMV_D    = 0x22000053;
+	constexpr uint32_t FADD_D   = 0x02007053;
+	constexpr uint32_t FSUB_D   = 0x0a007053;
+	constexpr uint32_t FMUL_D   = 0x12007053;
+	constexpr uint32_t FDIV_D   = 0x1a007053;
+	constexpr uint32_t FSQRT_D  = 0x5a007053;
+	constexpr uint32_t FCVT_D_W = 0xd2000053;
+	constexpr uint32_t FSRM     = 0x00201073;
+	constexpr uint16_t C_BEQZ   =     0xc001;
+	constexpr uint32_t BEQ      = 0x00000063;
+	constexpr uint16_t C_BNEZ   =     0xe001;
+	constexpr uint32_t JAL      = 0x0000006f;
+	constexpr uint16_t C_RET    =     0x8082;
+}
+
+namespace randomx {
+
+	constexpr size_t MaxRandomXInstrCodeSize = 56;     //FDIV_M requires 56 bytes of rv64 code
+	constexpr size_t MaxSuperscalarInstrSize = 12;     //IXOR_C requires 12 bytes of rv64 code
+	constexpr size_t SuperscalarProgramHeader = 136;   //overhead per superscalar program
+	constexpr size_t CodeAlign = 4096;                 //align code size to a multiple of 4 KiB
+	constexpr size_t LiteralPoolSize = CodeAlign;
+	constexpr size_t SuperscalarLiteraPoolSize = RANDOMX_CACHE_MAX_ACCESSES * CodeAlign;
+	constexpr size_t ReserveCodeSize = CodeAlign;  //prologue, epilogue + reserve
+
+	constexpr size_t RandomXCodeSize = alignSize(LiteralPoolSize + ReserveCodeSize + MaxRandomXInstrCodeSize * RANDOMX_PROGRAM_MAX_SIZE, CodeAlign);
+	constexpr size_t SuperscalarSize = alignSize(SuperscalarLiteraPoolSize + ReserveCodeSize + (SuperscalarProgramHeader + MaxSuperscalarInstrSize * SuperscalarMaxSize) * RANDOMX_CACHE_MAX_ACCESSES, CodeAlign);
+
+	constexpr uint32_t CodeSize = RandomXCodeSize + SuperscalarSize;
+	constexpr uint32_t ExecutableSize = CodeSize - LiteralPoolSize;
+
+	constexpr int32_t LiteralPoolOffset = LiteralPoolSize / 2;
+	constexpr int32_t SuperScalarLiteralPoolOffset = RandomXCodeSize;
+	constexpr int32_t SuperScalarLiteralPoolRefOffset = RandomXCodeSize + (RANDOMX_CACHE_MAX_ACCESSES - 1) * LiteralPoolSize + LiteralPoolOffset;
+	constexpr int32_t SuperScalarHashOffset = SuperScalarLiteralPoolOffset + SuperscalarLiteraPoolSize;
+
+	constexpr int32_t unsigned32ToSigned2sCompl(uint32_t x) {
+		return (-1 == ~0) ? (int32_t)x : (x > INT32_MAX ? (-(int32_t)(UINT32_MAX - x) - 1) : (int32_t)x);
+	}
+
+#define MaskL1Shift (32 - RandomX_CurrentConfig.Log2_ScratchpadL1)
+#define MaskL2Shift (32 - RandomX_CurrentConfig.Log2_ScratchpadL2)
+#define	MaskL3Shift (32 - RandomX_CurrentConfig.Log2_ScratchpadL3)
+
+	constexpr int RcpLiteralsOffset = 144;
+
+	constexpr int LiteralPoolReg = 3; //x3
+	constexpr int SpadReg = 5;  //x5
+	constexpr int DataReg = 6;  //x6
+	constexpr int SuperscalarReg = 7; //x7
+	constexpr int SshTmp1Reg = 28; //x28
+	constexpr int SshTmp2Reg = 29; //x29
+	constexpr int SshPoolReg = 30; //x30
+	constexpr int SshRcpReg = 31; //x31
+	constexpr int Tmp1Reg = 8;  //x8
+	constexpr int Tmp2Reg = 9;  //x9
+	constexpr int Tmp1RegF = 24;  //f24
+	constexpr int Tmp2RegF = 25;  //f25
+	constexpr int MaskL1Reg = 10; //x10
+	constexpr int MaskL2Reg = 11; //x11
+	constexpr int MaskFscalReg = 12; //x12
+	constexpr int MaskEclear = 13; //x13
+	constexpr int MaskEsetLo = 14; //x14
+	constexpr int MaskEsetHi = 15; //x15
+	constexpr int MaskL3Reg = 1; //x1
+	constexpr int ReturnReg = 1; //x1
+	constexpr int SpAddr0Reg = 26; //x26
+	constexpr int OffsetXC = -8; //x8-x15
+	constexpr int OffsetR = 16; //x16-x23
+	constexpr int OffsetF = 0;  //f0-f7
+	constexpr int OffsetE = 8; //f8-f15
+	constexpr int OffsetA = 16;  //f16-f23
+	constexpr int OffsetRcp = 28; //x28-x31
+	constexpr int OffsetRcpF = 22; //f26-f31
+	constexpr int OffsetSsh = 8; //x8-x15
+
+	//destination register (bit 7+)
+	constexpr int rvrd(int reg) {
+		return reg << 7;
+	}
+
+	//first source register (bit 15+)
+	constexpr int rvrs1(int reg) {
+		return reg << 15;
+	}
+
+	//second source register (bit 20+)
+	constexpr int rvrs2(int reg) {
+		return reg << 20;
+	}
+
+	//compressed source register (bit 2+)
+	constexpr int rvcrs(int reg) {
+		return reg << 2;
+	}
+
+	//base instruction: {op} x{rd}, x{rs1}, x{rs2}
+	constexpr uint32_t rvi(uint32_t op, int rd, int rs1, int rs2 = 0) {
+		return op | rvrs2(rs2) | rvrs1(rs1) | rvrd(rd);
+	}
+
+	//compressed instruction: op x{rd}, x{rs}
+	constexpr uint16_t rvc(uint16_t op, int rd, int rs) {
+		return op | rvrd(rd) | rvcrs(rs);
+	}
+
+	//compressed instruction: op x{rd}, imm6
+	constexpr uint16_t rvc(uint16_t op, int imm5, int rd, int imm40) {
+		return op | (imm5 << 12) | rvrd(rd) | (imm40 << 2);
+	}
+
+	constexpr int regR(int reg) {
+		return reg + OffsetR;
+	}
+
+	constexpr int regLoA(int reg) {
+		return 2 * reg + OffsetA;
+	}
+
+	constexpr int regHiA(int reg) {
+		return 2 * reg + OffsetA + 1;
+	}
+
+	constexpr int regLoF(int reg) {
+		return 2 * reg + OffsetF;
+	}
+
+	constexpr int regHiF(int reg) {
+		return 2 * reg + OffsetF + 1;
+	}
+
+	constexpr int regLoE(int reg) {
+		return 2 * reg + OffsetE;
+	}
+
+	constexpr int regHiE(int reg) {
+		return 2 * reg + OffsetE + 1;
+	}
+
+	constexpr int regRcp(int reg) {
+		return reg + OffsetRcp;
+	}
+
+	constexpr int regRcpF(int reg) {
+		return reg + OffsetRcpF;
+	}
+
+	constexpr int regSS(int reg) {
+		return reg + OffsetSsh;
+	}
+
+	static const uint8_t* codeLiterals = (uint8_t*)&randomx_riscv64_literals;
+	static const uint8_t* codeLiteralsEnd = (uint8_t*)&randomx_riscv64_literals_end;
+	static const uint8_t* codeDataInit = (uint8_t*)&randomx_riscv64_data_init;
+	static const uint8_t* codeFixDataCall = (uint8_t*)&randomx_riscv64_fix_data_call;
+	static const uint8_t* codePrologue = (uint8_t*)&randomx_riscv64_prologue;
+	static const uint8_t* codeLoopBegin = (uint8_t*)&randomx_riscv64_loop_begin;
+	static const uint8_t* codeDataRead = (uint8_t*)&randomx_riscv64_data_read;
+	static const uint8_t* codeDataReadLight = (uint8_t*)&randomx_riscv64_data_read_light;
+	static const uint8_t* codeFixLoopCall = (uint8_t*)&randomx_riscv64_fix_loop_call;
+	static const uint8_t* codeSpadStore = (uint8_t*)&randomx_riscv64_spad_store;
+	static const uint8_t* codeSpadStoreHardAes = (uint8_t*)&randomx_riscv64_spad_store_hardaes;
+	static const uint8_t* codeSpadStoreSoftAes = (uint8_t*)&randomx_riscv64_spad_store_softaes;
+	static const uint8_t* codeLoopEnd = (uint8_t*)&randomx_riscv64_loop_end;
+	static const uint8_t* codeFixContinueLoop = (uint8_t*)&randomx_riscv64_fix_continue_loop;
+	static const uint8_t* codeEpilogue = (uint8_t*)&randomx_riscv64_epilogue;
+	static const uint8_t* codeSoftAes = (uint8_t*)&randomx_riscv64_softaes;
+	static const uint8_t* codeProgramEnd = (uint8_t*)&randomx_riscv64_program_end;
+	static const uint8_t* codeSshInit = (uint8_t*)&randomx_riscv64_ssh_init;
+	static const uint8_t* codeSshLoad = (uint8_t*)&randomx_riscv64_ssh_load;
+	static const uint8_t* codeSshPrefetch = (uint8_t*)&randomx_riscv64_ssh_prefetch;
+	static const uint8_t* codeSshEnd = (uint8_t*)&randomx_riscv64_ssh_end;
+
+	static const int32_t sizeLiterals = codeLiteralsEnd - codeLiterals;
+	static const int32_t sizeDataInit = codePrologue - codeDataInit;
+	static const int32_t sizePrologue = codeLoopBegin - codePrologue;
+	static const int32_t sizeLoopBegin = codeDataRead - codeLoopBegin;
+	static const int32_t sizeDataRead = codeDataReadLight - codeDataRead;
+	static const int32_t sizeDataReadLight = codeSpadStore - codeDataReadLight;
+	static const int32_t sizeSpadStore = codeSpadStoreHardAes - codeSpadStore;
+	static const int32_t sizeSpadStoreSoftAes = codeLoopEnd - codeSpadStoreSoftAes;
+	static const int32_t sizeLoopEnd = codeEpilogue - codeLoopEnd;
+	static const int32_t sizeEpilogue = codeSoftAes - codeEpilogue;
+	static const int32_t sizeSoftAes = codeProgramEnd - codeSoftAes;
+	static const int32_t sizeSshInit = codeSshLoad - codeSshInit;
+	static const int32_t sizeSshLoad = codeSshPrefetch - codeSshLoad;
+	static const int32_t sizeSshPrefetch = codeSshEnd - codeSshPrefetch;
+
+	static const int32_t offsetFixDataCall = codeFixDataCall - codeDataInit;
+	static const int32_t offsetFixLoopCall = codeFixLoopCall - codeDataReadLight;
+	static const int32_t offsetFixContinueLoop = codeFixContinueLoop - codeLoopEnd;
+
+	static const int32_t LoopTopPos = LiteralPoolSize + sizeDataInit + sizePrologue;
+	static const int32_t RandomXCodePos = LoopTopPos + sizeLoopBegin;
+
+	static void clearCache(CodeBuffer& buf) {
+#ifdef __GNUC__
+		__builtin___clear_cache((char*)buf.code, (char*)(buf.code + CodeSize));
+#endif
+	}
+
+	//emits code to calculate: x{dst} = x{src} + {imm32}
+	//takes 1-3 isns, 2-10 bytes
+	static void emitImm32(CodeBuffer& buf, int32_t imm, int dst, int src = 0, int tmp = 0) {
+
+		//lower 12 bits
+		int32_t limm = (imm << 20) >> 20;
+		//upper 20 bits
+		int32_t uimm = (imm >> 12) + (limm < 0);
+
+		//If there are no upper bits, the whole thing
+		//can be done with a single instruction.
+		if (uimm == 0) {
+			//addi x{dst}, x{src}, {limm}
+			buf.emit(rvi(rv64::ADDI, dst, src, limm));
+			return;
+		}
+
+		//dst1 is the register where imm will be materialized
+		int dst1 = src != dst ? dst : tmp;
+		assert(dst1 != 0);
+		//src1 is the register that will be added to the result
+		int src1 = src != dst ? src : dst1;
+
+		//load upper bits
+		if (uimm >= -32 && uimm <= 31) {
+			//c.lui x{dst1}, {uimm}
+			buf.emit(rvc(rv64::C_LUI, (uimm < 0), dst1, (uimm & 31)));
+		}
+		else {
+			//lui x{dst1}, {uimm}
+			buf.emit(rv64::LUI | (uimm << 12) | rvrd(dst1));
+		}
+		//load lower bits
+		if (limm != 0) {
+			//Note: this must be addiw NOT addi, otherwise the upper 32 bits
+			//of the 64-bit register will be incorrect.
+			//addiw x{dst1}, x{dst1}, {limm}
+			buf.emit(rvi(rv64::ADDIW, dst1, dst1, limm));
+		}
+		//add src
+		if (src1 != 0) {
+			//c.add x{dst}, x{src1}
+			buf.emit(rvc(rv64::C_ADD, dst, src1));
+		}
+	}
+
+	//x9 = &Scratchpad[isn.imm]
+	//takes 3 isns, 10 bytes
+	static void genAddressRegImm(CodeBuffer& buf, const Instruction& isn) {
+		//signed offset 8-byte aligned
+		int32_t imm = unsigned32ToSigned2sCompl(isn.getImm32()) & ScratchpadL3Mask;
+		//x9 = x5 + {imm}
+		emitImm32(buf, imm, Tmp2Reg, SpadReg, Tmp1Reg);
+	}
+
+	//x9 = &Scratchpad[isn.src + isn.imm] (for reading)
+	//takes 5 isns, 12 bytes
+	static void genAddressReg(CodeBuffer& buf, const Instruction& isn) {
+		int shift, maskReg;
+		if (isn.getModMem()) {
+			shift = MaskL1Shift;
+			maskReg = MaskL1Reg;
+		}
+		else {
+			shift = MaskL2Shift;
+			maskReg = MaskL2Reg;
+		}
+		int32_t imm = unsigned32ToSigned2sCompl(isn.getImm32());
+		imm = (imm << shift) >> shift;
+		//x9 = x{src} + {imm}
+		emitImm32(buf, imm, Tmp2Reg, regR(isn.src), Tmp1Reg);
+		//c.and x9, x{maskReg}
+		buf.emit(rvc(rv64::C_AND, (Tmp2Reg + OffsetXC), (maskReg + OffsetXC)));
+		//c.add x9, x{spadReg}
+		buf.emit(rvc(rv64::C_ADD, Tmp2Reg, SpadReg));
+	}
+
+	//x8 = Scratchpad[isn]
+	static void loadFromScratchpad(CodeBuffer& buf, const Instruction& isn) {
+		if (isn.src != isn.dst) {
+			//x9 = &Scratchpad[isn.src + isn.imm]
+			genAddressReg(buf, isn);
+		}
+		else {
+			///x9 = &Scratchpad[isn.imm]
+			genAddressRegImm(buf, isn);
+		}
+		//c.ld x8, 0(x9)
+		buf.emit(rvc(rv64::C_LD, Tmp2Reg + OffsetXC, Tmp1Reg + OffsetXC));
+	}
+
+	//x9 = &Scratchpad[isn.dst + isn.imm32] (for writing)
+	//takes 5 isns, 12-16 bytes
+	static void genAddressRegDst(CodeBuffer& buf, const Instruction& isn) {
+		if (isn.getModCond() < StoreL3Condition) {
+			int shift, maskReg;
+			if (isn.getModMem()) {
+				shift = MaskL1Shift;
+				maskReg = MaskL1Reg;
+			}
+			else {
+				shift = MaskL2Shift;
+				maskReg = MaskL2Reg;
+			}
+			int32_t imm = unsigned32ToSigned2sCompl(isn.getImm32());
+			imm = (imm << shift) >> shift;
+			//x9 = x{dst} + {imm}
+			emitImm32(buf, imm, Tmp2Reg, regR(isn.dst), Tmp1Reg);
+			//c.and x9, x{maskReg}
+			buf.emit(rvc(rv64::C_AND, Tmp2Reg + OffsetXC, maskReg + OffsetXC));
+			//c.add x9, x5
+			buf.emit(rvc(rv64::C_ADD, Tmp2Reg, SpadReg));
+		}
+		else {
+			int shift = MaskL3Shift;
+			int32_t imm = unsigned32ToSigned2sCompl(isn.getImm32());
+			imm = (imm << shift) >> shift;
+			//x9 = x{dst} + {imm}
+			emitImm32(buf, imm, Tmp2Reg, regR(isn.dst), Tmp1Reg);
+			//and x9, x9, x1
+			buf.emit(rvi(rv64::AND, Tmp2Reg, Tmp2Reg, MaskL3Reg));
+			//c.add x9, x5
+			buf.emit(rvc(rv64::C_ADD, Tmp2Reg, SpadReg));
+		}
+	}
+
+	static void emitRcpLiteral1(CodeBuffer& buf, uint64_t literal) {
+		//first 238 at positive offsets
+		if (buf.rcpCount < 238) {
+			buf.emitAt(LiteralPoolOffset + RcpLiteralsOffset + buf.rcpCount * 8, literal);
+			buf.rcpCount++;
+		}
+		//next 256 at negative offsets
+		else if (buf.rcpCount < 494) {
+			buf.emitAt(buf.rcpCount * 8 - (2048 - RcpLiteralsOffset), literal);
+			buf.rcpCount++;
+		}
+		else {
+			//checked at compile time, but double-check here
+			throw std::runtime_error("Literal pool overflow");
+		}
+	}
+
+	static void emitRcpLiteral2(CodeBuffer& buf, uint64_t literal, bool lastLiteral) {
+		//store the current literal in the pool
+		int32_t offset = 2040 - buf.rcpCount * 8;
+		buf.emitAt(SuperScalarLiteralPoolRefOffset + offset, literal);
+		buf.rcpCount++;
+		if (lastLiteral) {
+			return;
+		}
+		//load the next literal
+		offset -= 8;
+		int32_t imm = offset & 0xfff;
+		//ld x31, {offset}(x30)
+		buf.emit(rvi(rv64::LD, SshRcpReg, SshPoolReg, imm));
+		if (imm == 0x800) {
+			//move pool pointer back 4KB
+			//c.lui x29, 0xfffff
+			buf.emit(rvc(rv64::C_LUI, 1, SshTmp2Reg, 31));
+			//c.add x30, x29
+			buf.emit(rvc(rv64::C_ADD, SshPoolReg, SshTmp2Reg));
+		}
+	}
+
+	static void emitJump(CodeBuffer& buf, int dst, int32_t codePos, int32_t targetPos) {
+		int32_t imm = targetPos - codePos;
+		int32_t imm20 = (imm < 0) << 11;
+		int32_t imm1912 = (imm >> 7) & 8160;
+		int32_t imm11 = (imm >> 11) & 1;
+		int32_t imm101 = imm & 2046;
+		//jal x{dst}, {imm}
+		buf.emitAt(codePos, rvi(rv64::JAL, dst + imm1912, 0, imm20 + imm101 + imm11));
+	}
+
+	static void emitInstruction(CompilerState& state, Instruction isn, int i) {
+		state.instructionOffsets[i] = state.codePos;
+		(*JitCompilerRV64::engine[isn.opcode])(state, isn, i);
+	}
+
+	static void emitProgramPrefix(CompilerState& state, Program& prog, ProgramConfiguration& pcfg) {
+		state.codePos = RandomXCodePos;
+		state.rcpCount = 0;
+		state.emitAt(LiteralPoolOffset + sizeLiterals, pcfg.eMask[0]);
+		state.emitAt(LiteralPoolOffset + sizeLiterals + 8, pcfg.eMask[1]);
+		for (unsigned i = 0; i < RegistersCount; ++i) {
+			state.registerUsage[i] = -1;
+		}
+		for (unsigned i = 0; i < prog.getSize(); ++i) {
+			Instruction instr = prog(i);
+			instr.src %= RegistersCount;
+			instr.dst %= RegistersCount;
+			emitInstruction(state, instr, i);
+		}
+	}
+
+	static void emitProgramSuffix(CompilerState& state, ProgramConfiguration& pcfg) {
+		state.emit(codeSpadStore, sizeSpadStore);
+		int32_t fixPos = state.codePos;
+		state.emit(codeLoopEnd, sizeLoopEnd);
+		//xor x26, x{readReg0}, x{readReg1}
+		state.emitAt(fixPos, rvi(rv64::XOR, SpAddr0Reg, regR(pcfg.readReg0), regR(pcfg.readReg1)));
+		fixPos += offsetFixContinueLoop;
+		//j LoopTop
+		emitJump(state, 0, fixPos, LoopTopPos);
+		state.emit(codeEpilogue, sizeEpilogue);
+	}
+
+	static void generateSuperscalarCode(CodeBuffer& buf, Instruction isn, bool lastLiteral) {
+		switch ((SuperscalarInstructionType)isn.opcode)
+		{
+		case randomx::SuperscalarInstructionType::ISUB_R:
+			//c.sub x{dst}, x{src}
+			buf.emit(rvc(rv64::C_SUB, regSS(isn.dst) + OffsetXC, regSS(isn.src) + OffsetXC));
+			break;
+		case randomx::SuperscalarInstructionType::IXOR_R:
+			//c.xor x{dst}, x{src}
+			buf.emit(rvc(rv64::C_XOR, regSS(isn.dst) + OffsetXC, regSS(isn.src) + OffsetXC));
+			break;
+		case randomx::SuperscalarInstructionType::IADD_RS:
+			{
+				int shift = isn.getModShift();
+				if (shift == 0) {
+					//c.add x{dst}, x{src}
+					buf.emit(rvc(rv64::C_ADD, regSS(isn.dst), regSS(isn.src)));
+				}
+				else {
+#ifdef __riscv_zba
+				//sh{1,2,3}add x{dst}, x{src}, x{dst}
+				buf.emit(rv64::SHXADD | rvrs2(regSS(isn.dst)) | rvrs1(regSS(isn.src)) | (shift << 13) | rvrd(regSS(isn.dst)));
+#else
+				//slli x28, x{src}, {shift}
+				buf.emit(rvi(rv64::SLLI, SshTmp1Reg, regSS(isn.src), shift));
+				//c.add x{dst}, x28
+				buf.emit(rvc(rv64::C_ADD, regSS(isn.dst), SshTmp1Reg));
+#endif
+				}
+			}
+			break;
+		case randomx::SuperscalarInstructionType::IMUL_R:
+			//mul x{dst}, x{dst}, x{src}
+			buf.emit(rvi(rv64::MUL, regSS(isn.dst), regSS(isn.dst), regSS(isn.src)));
+			break;
+		case randomx::SuperscalarInstructionType::IROR_C:
+			{
+#ifdef __riscv_zbb
+				int32_t imm = isn.getImm32() & 63;
+				//rori x{dst}, x{dst}, {imm}
+				buf.emit(rvi(rv64::RORI, regSS(isn.dst), regSS(isn.dst), imm));
+#else
+				int32_t immr = isn.getImm32() & 63;
+				int32_t imml = -immr & 63;
+				int32_t imml5 = imml >> 5;
+				int32_t imml40 = imml & 31;
+				//srli x28, x{dst}, {immr}
+				buf.emit(rvi(rv64::SRLI, SshTmp1Reg, regSS(isn.dst), immr));
+				//c.slli x{dst}, {imml}
+				buf.emit(rvc(rv64::C_SLLI, imml5, regSS(isn.dst), imml40));
+				//or x{dst}, x{dst}, x28
+				buf.emit(rvi(rv64::OR, regSS(isn.dst), regSS(isn.dst), SshTmp1Reg));
+#endif
+			}
+			break;
+		case randomx::SuperscalarInstructionType::IADD_C7:
+		case randomx::SuperscalarInstructionType::IADD_C8:
+		case randomx::SuperscalarInstructionType::IADD_C9:
+			{
+				int32_t imm = unsigned32ToSigned2sCompl(isn.getImm32());
+				//x{dst} = x{dst} + {imm}
+				emitImm32(buf, imm, regSS(isn.dst), regSS(isn.dst), SshTmp1Reg);
+			}
+			break;
+		case randomx::SuperscalarInstructionType::IXOR_C7:
+		case randomx::SuperscalarInstructionType::IXOR_C8:
+		case randomx::SuperscalarInstructionType::IXOR_C9:
+			{
+				int32_t imm = unsigned32ToSigned2sCompl(isn.getImm32());
+				//x28 = {imm}
+				emitImm32(buf, imm, SshTmp1Reg);
+				//xor x{dst}, x{dst}, x28
+				buf.emit(rvi(rv64::XOR, regSS(isn.dst), regSS(isn.dst), SshTmp1Reg));
+			}
+			break;
+		case randomx::SuperscalarInstructionType::IMULH_R:
+			//mulhu x{dst}, x{dst}, x{src}
+			buf.emit(rvi(rv64::MULHU, regSS(isn.dst), regSS(isn.dst), regSS(isn.src)));
+			break;
+		case randomx::SuperscalarInstructionType::ISMULH_R:
+			//mulh x{dst}, x{dst}, x{src}
+			buf.emit(rvi(rv64::MULH, regSS(isn.dst), regSS(isn.dst), regSS(isn.src)));
+			break;
+		case randomx::SuperscalarInstructionType::IMUL_RCP:
+			//mul x{dst}, x{dst}, x31
+			buf.emit(rvi(rv64::MUL, regSS(isn.dst), regSS(isn.dst), SshRcpReg));
+			//load the next literal into x31
+			emitRcpLiteral2(buf, randomx_reciprocal(isn.getImm32()), lastLiteral);
+			break;
+		default:
+			UNREACHABLE;
+		}
+	}
+
+	size_t JitCompilerRV64::getCodeSize() {
+		return CodeSize;
+	}
+
+	JitCompilerRV64::JitCompilerRV64(bool hugePagesEnable, bool) {
+		state.code = static_cast<uint8_t*>(allocExecutableMemory(CodeSize, hugePagesJIT && hugePagesEnable));
+		state.emitAt(LiteralPoolOffset, codeLiterals, sizeLiterals);
+
+		const uint32_t L1_Mask = RandomX_CurrentConfig.ScratchpadL1_Size - 8;
+		const uint32_t L2_Mask = RandomX_CurrentConfig.ScratchpadL2_Size - 8;
+		const uint32_t L3_Mask = RandomX_CurrentConfig.ScratchpadL3_Size - 64;
+		const uint32_t DatasetBaseSize_Mask = RandomX_CurrentConfig.DatasetBaseSize - 64;
+
+		state.emitAt(LiteralPoolOffset + 80, reinterpret_cast<const uint8_t*>(&L1_Mask), sizeof(L1_Mask));
+		state.emitAt(LiteralPoolOffset + 84, reinterpret_cast<const uint8_t*>(&L2_Mask), sizeof(L2_Mask));
+		state.emitAt(LiteralPoolOffset + 88, reinterpret_cast<const uint8_t*>(&L3_Mask), sizeof(L3_Mask));
+		state.emitAt(LiteralPoolOffset + 92, reinterpret_cast<const uint8_t*>(&DatasetBaseSize_Mask), sizeof(DatasetBaseSize_Mask));
+
+		state.emitAt(LiteralPoolSize, codeDataInit, sizeDataInit + sizePrologue + sizeLoopBegin);
+		entryDataInit = state.code + LiteralPoolSize;
+		entryProgram = state.code + LiteralPoolSize + sizeDataInit;
+		//jal x1, SuperscalarHash
+		emitJump(state, ReturnReg, LiteralPoolSize + offsetFixDataCall, SuperScalarHashOffset);
+
+		vectorCodeSize = ((uint8_t*)randomx_riscv64_vector_sshash_end) - ((uint8_t*)randomx_riscv64_vector_sshash_begin);
+		vectorCode = static_cast<uint8_t*>(allocExecutableMemory(vectorCodeSize, hugePagesJIT && hugePagesEnable));
+	}
+
+	JitCompilerRV64::~JitCompilerRV64() {
+		freePagedMemory(state.code, CodeSize);
+		freePagedMemory(vectorCode, vectorCodeSize);
+	}
+
+	void JitCompilerRV64::enableWriting() const
+	{
+		xmrig::VirtualMemory::protectRW(entryDataInit, ExecutableSize);
+
+		if (vectorCode) {
+			xmrig::VirtualMemory::protectRW(vectorCode, vectorCodeSize);
+		}
+	}
+
+	void JitCompilerRV64::enableExecution() const
+	{
+		xmrig::VirtualMemory::protectRX(entryDataInit, ExecutableSize);
+
+		if (vectorCode) {
+			xmrig::VirtualMemory::protectRX(vectorCode, vectorCodeSize);
+		}
+	}
+
+	void JitCompilerRV64::generateProgram(Program& prog, ProgramConfiguration& pcfg, uint32_t) {
+		emitProgramPrefix(state, prog, pcfg);
+		int32_t fixPos = state.codePos;
+		state.emit(codeDataRead, sizeDataRead);
+		//xor x8, x{readReg2}, x{readReg3}
+		state.emitAt(fixPos, rvi(rv64::XOR, Tmp1Reg, regR(pcfg.readReg2), regR(pcfg.readReg3)));
+		emitProgramSuffix(state, pcfg);
+		clearCache(state);
+	}
+
+	void JitCompilerRV64::generateProgramLight(Program& prog, ProgramConfiguration& pcfg, uint32_t datasetOffset) {
+		emitProgramPrefix(state, prog, pcfg);
+		int32_t fixPos = state.codePos;
+		state.emit(codeDataReadLight, sizeDataReadLight);
+		//xor x8, x{readReg2}, x{readReg3}
+		state.emitAt(fixPos, rvi(rv64::XOR, Tmp1Reg, regR(pcfg.readReg2), regR(pcfg.readReg3)));
+		int32_t imm = datasetOffset / CacheLineSize;
+		int32_t limm = (imm << 20) >> 20;
+		int32_t uimm = (imm >> 12) + (limm < 0);
+		//lui x9, {uimm}
+		state.emitAt(fixPos + 4, rv64::LUI | (uimm << 12) | rvrd(Tmp2Reg));
+		//addi x9, x9, {limm}
+		state.emitAt(fixPos + 8, rvi(rv64::ADDI, Tmp2Reg, Tmp2Reg, limm));
+		fixPos += offsetFixLoopCall;
+		//jal x1, SuperscalarHash
+		emitJump(state, ReturnReg, fixPos, SuperScalarHashOffset);
+		emitProgramSuffix(state, pcfg);
+		clearCache(state);
+	}
+
+	template<size_t N>
+	void JitCompilerRV64::generateSuperscalarHash(SuperscalarProgram(&programs)[N]) {
+		if (optimizedDatasetInit > 0) {
+			entryDataInitOptimized = generateDatasetInitVectorRV64(vectorCode, vectorCodeSize, programs, RandomX_ConfigurationBase::CacheAccesses);
+			return;
+		}
+
+		state.codePos = SuperScalarHashOffset;
+		state.rcpCount = 0;
+		state.emit(codeSshInit, sizeSshInit);
+
+		std::pair<uint32_t, uint32_t> lastLiteral{ 0xFFFFFFFFUL, 0xFFFFFFFFUL };
+
+		for (int j = RandomX_ConfigurationBase::CacheAccesses - 1; (j >= 0) && (lastLiteral.first == 0xFFFFFFFFUL); --j) {
+			SuperscalarProgram& prog = programs[j];
+			for (int i = prog.getSize() - 1; i >= 0; --i) {
+				if (prog(i).opcode == static_cast<uint8_t>(SuperscalarInstructionType::IMUL_RCP)) {
+					lastLiteral.first = j;
+					lastLiteral.second = i;
+					break;
+				}
+			}
+		}
+
+		for (unsigned j = 0; j < RandomX_ConfigurationBase::CacheAccesses; ++j) {
+			SuperscalarProgram& prog = programs[j];
+			for (unsigned i = 0; i < prog.getSize(); ++i) {
+				Instruction instr = prog(i);
+				generateSuperscalarCode(state, instr, (j == lastLiteral.first) && (i == lastLiteral.second));
+			}
+			state.emit(codeSshLoad, sizeSshLoad);
+			if (j < RandomX_ConfigurationBase::CacheAccesses - 1) {
+				int32_t fixPos = state.codePos;
+				state.emit(codeSshPrefetch, sizeSshPrefetch);
+				//and x7, x{addrReg}, x7
+				state.emitAt(fixPos, rvi(rv64::AND, SuperscalarReg, regSS(prog.getAddressRegister()), SuperscalarReg));
+			}
+		}
+		state.emit(rvc(rv64::C_RET, 0, 0));
+		clearCache(state);
+	}
+
+	template void JitCompilerRV64::generateSuperscalarHash(SuperscalarProgram(&)[RANDOMX_CACHE_MAX_ACCESSES]);
+
+	DatasetInitFunc* JitCompilerRV64::getDatasetInitFunc() {
+		return (DatasetInitFunc*)((optimizedDatasetInit > 0) ? entryDataInitOptimized : entryDataInit);
+	}
+
+	void JitCompilerRV64::v1_IADD_RS(HANDLER_ARGS) {
+		state.registerUsage[isn.dst] = i;
+		int shift = isn.getModShift();
+		if (shift == 0) {
+			//c.add x{dst}, x{src}
+			state.emit(rvc(rv64::C_ADD, regR(isn.dst), regR(isn.src)));
+		}
+		else {
+#ifdef __riscv_zba
+			//sh{1,2,3}add x{dst}, x{src}, x{dst}
+			state.emit(rv64::SHXADD | rvrs2(regR(isn.dst)) | rvrs1(regR(isn.src)) | (shift << 13) | rvrd(regR(isn.dst)));
+#else
+			//slli x8, x{src}, {shift}
+			state.emit(rvi(rv64::SLLI, Tmp1Reg, regR(isn.src), shift));
+			//c.add x{dst}, x8
+			state.emit(rvc(rv64::C_ADD, regR(isn.dst), Tmp1Reg));
+#endif
+		}
+		if (isn.dst == RegisterNeedsDisplacement) {
+			int32_t imm = unsigned32ToSigned2sCompl(isn.getImm32());
+			//x{dst} = x{dst} + {imm}
+			emitImm32(state, imm, regR(isn.dst), regR(isn.dst), Tmp1Reg);
+		}
+	}
+
+	void JitCompilerRV64::v1_IADD_M(HANDLER_ARGS) {
+		state.registerUsage[isn.dst] = i;
+		loadFromScratchpad(state, isn);
+		//c.add x{dst}, x8
+		state.emit(rvc(rv64::C_ADD, regR(isn.dst), Tmp1Reg));
+	}
+
+	void JitCompilerRV64::v1_ISUB_R(HANDLER_ARGS) {
+		state.registerUsage[isn.dst] = i;
+		if (isn.src != isn.dst) {
+			//sub x{dst}, x{dst}, x{src}
+			state.emit(rvi(rv64::SUB, regR(isn.dst), regR(isn.dst), regR(isn.src)));
+		}
+		else {
+			int32_t imm = unsigned32ToSigned2sCompl(-isn.getImm32()); //convert to add
+			//x{dst} = x{dst} + {-imm}
+			emitImm32(state, imm, regR(isn.dst), regR(isn.dst), Tmp1Reg);
+		}
+	}
+
+	void JitCompilerRV64::v1_ISUB_M(HANDLER_ARGS) {
+		state.registerUsage[isn.dst] = i;
+		loadFromScratchpad(state, isn);
+		//sub x{dst}, x{dst}, x8
+		state.emit(rvi(rv64::SUB, regR(isn.dst), regR(isn.dst), Tmp1Reg));
+	}
+
+	void JitCompilerRV64::v1_IMUL_R(HANDLER_ARGS) {
+		state.registerUsage[isn.dst] = i;
+		if (isn.src != isn.dst) {
+			//mul x{dst}, x{dst}, x{src}
+			state.emit(rvi(rv64::MUL, regR(isn.dst), regR(isn.dst), regR(isn.src)));
+		}
+		else {
+			int32_t imm = unsigned32ToSigned2sCompl(isn.getImm32());
+			//x8 = {imm}
+			emitImm32(state, imm, Tmp1Reg);
+			//mul x{dst}, x{dst}, x8
+			state.emit(rvi(rv64::MUL, regR(isn.dst), regR(isn.dst), Tmp1Reg));
+		}
+	}
+
+	void JitCompilerRV64::v1_IMUL_M(HANDLER_ARGS) {
+		state.registerUsage[isn.dst] = i;
+		loadFromScratchpad(state, isn);
+		//mul x{dst}, x{dst}, x8
+		state.emit(rvi(rv64::MUL, regR(isn.dst), regR(isn.dst), Tmp1Reg));
+	}
+
+	void JitCompilerRV64::v1_IMULH_R(HANDLER_ARGS) {
+		state.registerUsage[isn.dst] = i;
+		//mulhu x{dst}, x{dst}, x{src}
+		state.emit(rvi(rv64::MULHU, regR(isn.dst), regR(isn.dst), regR(isn.src)));
+	}
+
+	void JitCompilerRV64::v1_IMULH_M(HANDLER_ARGS) {
+		state.registerUsage[isn.dst] = i;
+		loadFromScratchpad(state, isn);
+		//mulhu x{dst}, x{dst}, x8
+		state.emit(rvi(rv64::MULHU, regR(isn.dst), regR(isn.dst), Tmp1Reg));
+	}
+
+	void JitCompilerRV64::v1_ISMULH_R(HANDLER_ARGS) {
+		state.registerUsage[isn.dst] = i;
+		//mulh x{dst}, x{dst}, x{src}
+		state.emit(rvi(rv64::MULH, regR(isn.dst), regR(isn.dst), regR(isn.src)));
+	}
+
+	void JitCompilerRV64::v1_ISMULH_M(HANDLER_ARGS) {
+		state.registerUsage[isn.dst] = i;
+		loadFromScratchpad(state, isn);
+		//mulh x{dst}, x{dst}, x8
+		state.emit(rvi(rv64::MULH, regR(isn.dst), regR(isn.dst), Tmp1Reg));
+	}
+
+	void JitCompilerRV64::v1_IMUL_RCP(HANDLER_ARGS) {
+		const uint32_t divisor = isn.getImm32();
+		if (!isZeroOrPowerOf2(divisor)) {
+			state.registerUsage[isn.dst] = i;
+			if (state.rcpCount < 4) {
+				//mul x{dst}, x{dst}, x{rcp}
+				state.emit(rvi(rv64::MUL, regR(isn.dst), regR(isn.dst), regRcp(state.rcpCount)));
+			}
+			else if (state.rcpCount < 10) {
+				//fmv.x.d x8, f{rcp}
+				state.emit(rvi(rv64::FMV_X_D, Tmp1Reg, regRcpF(state.rcpCount)));
+				//mul x{dst}, x{dst}, x8
+				state.emit(rvi(rv64::MUL, regR(isn.dst), regR(isn.dst), Tmp1Reg));
+			}
+			else {
+				int32_t offset = RcpLiteralsOffset + state.rcpCount * 8;
+				//ld x8, {offset}(x3)
+				state.emit(rvi(rv64::LD, Tmp1Reg, LiteralPoolReg, offset));
+				//mul x{dst}, x{dst}, x8
+				state.emit(rvi(rv64::MUL, regR(isn.dst), regR(isn.dst), Tmp1Reg));
+			}
+			emitRcpLiteral1(state, randomx_reciprocal_fast(divisor));
+		}
+	}
+
+	void JitCompilerRV64::v1_INEG_R(HANDLER_ARGS) {
+		state.registerUsage[isn.dst] = i;
+		//sub x{dst}, x0, x{dst}
+		state.emit(rvi(rv64::SUB, regR(isn.dst), 0, regR(isn.dst)));
+	}
+
+	void JitCompilerRV64::v1_IXOR_R(HANDLER_ARGS) {
+		state.registerUsage[isn.dst] = i;
+		if (isn.src != isn.dst) {
+			//xor x{dst}, x{dst}, x{src}
+			state.emit(rvi(rv64::XOR, regR(isn.dst), regR(isn.dst), regR(isn.src)));
+		}
+		else {
+			int32_t imm = unsigned32ToSigned2sCompl(isn.getImm32());
+			//x8 = {imm}
+			emitImm32(state, imm, Tmp1Reg);
+			//xor x{dst}, x{dst}, x8
+			state.emit(rvi(rv64::XOR, regR(isn.dst), regR(isn.dst), Tmp1Reg));
+		}
+	}
+
+	void JitCompilerRV64::v1_IXOR_M(HANDLER_ARGS) {
+		state.registerUsage[isn.dst] = i;
+		loadFromScratchpad(state, isn);
+		//xor x{dst}, x{dst}, x8
+		state.emit(rvi(rv64::XOR, regR(isn.dst), regR(isn.dst), Tmp1Reg));
+	}
+
+	void JitCompilerRV64::v1_IROR_R(HANDLER_ARGS) {
+		state.registerUsage[isn.dst] = i;
+#ifdef __riscv_zbb
+		if (isn.src != isn.dst) {
+			//ror x{dst}, x{dst}, x{src}
+			state.emit(rvi(rv64::ROR, regR(isn.dst), regR(isn.dst), regR(isn.src)));
+		}
+		else {
+			int32_t imm = isn.getImm32() & 63;
+			//rori x{dst}, x{dst}, {imm}
+			state.emit(rvi(rv64::RORI, regR(isn.dst), regR(isn.dst), imm));
+		}
+#else
+		if (isn.src != isn.dst) {
+			//sub x8, x0, x{src}
+			state.emit(rvi(rv64::SUB, Tmp1Reg, 0, regR(isn.src)));
+			//srl x9, x{dst}, x{src}
+			state.emit(rvi(rv64::SRL, Tmp2Reg, regR(isn.dst), regR(isn.src)));
+			//sll x{dst}, x{dst}, x8
+			state.emit(rvi(rv64::SLL, regR(isn.dst), regR(isn.dst), Tmp1Reg));
+			//or x{dst}, x{dst}, x9
+			state.emit(rvi(rv64::OR, regR(isn.dst), regR(isn.dst), Tmp2Reg));
+		}
+		else {
+			int32_t immr = isn.getImm32() & 63;
+			int32_t imml = -immr & 63;
+			int32_t imml5 = imml >> 5;
+			int32_t imml40 = imml & 31;
+			//srli x8, x{dst}, {immr}
+			state.emit(rvi(rv64::SRLI, Tmp1Reg, regR(isn.dst), immr));
+			//c.slli x{dst}, {imml}
+			state.emit(rvc(rv64::C_SLLI, imml5, regR(isn.dst), imml40));
+			//or x{dst}, x{dst}, x8
+			state.emit(rvi(rv64::OR, regR(isn.dst), regR(isn.dst), Tmp1Reg));
+		}
+#endif
+	}
+
+	void JitCompilerRV64::v1_IROL_R(HANDLER_ARGS) {
+		state.registerUsage[isn.dst] = i;
+#ifdef __riscv_zbb
+		if (isn.src != isn.dst) {
+			//rol x{dst}, x{dst}, x{src}
+			state.emit(rvi(rv64::ROL, regR(isn.dst), regR(isn.dst), regR(isn.src)));
+		}
+		else {
+			int32_t imm = -isn.getImm32() & 63;
+			//rori x{dst}, x{dst}, {imm}
+			state.emit(rvi(rv64::RORI, regR(isn.dst), regR(isn.dst), imm));
+		}
+#else
+		if (isn.src != isn.dst) {
+			//sub x8, x0, x{src}
+			state.emit(rvi(rv64::SUB, Tmp1Reg, 0, regR(isn.src)));
+			//sll x9, x{dst}, x{src}
+			state.emit(rvi(rv64::SLL, Tmp2Reg, regR(isn.dst), regR(isn.src)));
+			//srl x{dst}, x{dst}, x8
+			state.emit(rvi(rv64::SRL, regR(isn.dst), regR(isn.dst), Tmp1Reg));
+			//or x{dst}, x{dst}, x9
+			state.emit(rvi(rv64::OR, regR(isn.dst), regR(isn.dst), Tmp2Reg));
+		}
+		else {
+			int32_t imml = isn.getImm32() & 63;
+			int32_t immr = -imml & 63;
+			int32_t imml5 = imml >> 5;
+			int32_t imml40 = imml & 31;
+			//srli x8, x{dst}, {immr}
+			state.emit(rvi(rv64::SRLI, Tmp1Reg, regR(isn.dst), immr));
+			//c.slli x{dst}, {imml}
+			state.emit(rvc(rv64::C_SLLI, imml5, regR(isn.dst), imml40));
+			//or x{dst}, x{dst}, x8
+			state.emit(rvi(rv64::OR, regR(isn.dst), regR(isn.dst), Tmp1Reg));
+		}
+#endif
+	}
+
+	void JitCompilerRV64::v1_ISWAP_R(HANDLER_ARGS) {
+		if (isn.src != isn.dst) {
+			state.registerUsage[isn.dst] = i;
+			state.registerUsage[isn.src] = i;
+			//c.mv x8, x{dst}
+			state.emit(rvc(rv64::C_MV, Tmp1Reg, regR(isn.dst)));
+			//c.mv x{dst}, x{src}
+			state.emit(rvc(rv64::C_MV, regR(isn.dst), regR(isn.src)));
+			//c.mv x{src}, x8
+			state.emit(rvc(rv64::C_MV, regR(isn.src), Tmp1Reg));
+		}
+	}
+
+	void JitCompilerRV64::v1_FSWAP_R(HANDLER_ARGS) {
+		//fmv.d f24, f{dst_lo}
+		state.emit(rvi(rv64::FMV_D, Tmp1RegF, regLoF(isn.dst), regLoF(isn.dst)));
+		//fmv.d f{dst_lo}, f{dst_hi}
+		state.emit(rvi(rv64::FMV_D, regLoF(isn.dst), regHiF(isn.dst), regHiF(isn.dst)));
+		//fmv.d f{dst_hi}, f24
+		state.emit(rvi(rv64::FMV_D, regHiF(isn.dst), Tmp1RegF, Tmp1RegF));
+	}
+
+	void JitCompilerRV64::v1_FADD_R(HANDLER_ARGS) {
+		isn.dst %= RegisterCountFlt;
+		isn.src %= RegisterCountFlt;
+		//fadd.d f{dst_lo}, f{dst_lo}, f{src_lo}
+		state.emit(rvi(rv64::FADD_D, regLoF(isn.dst), regLoF(isn.dst), regLoA(isn.src)));
+		//fadd.d f{dst_hi}, f{dst_hi}, f{src_hi}
+		state.emit(rvi(rv64::FADD_D, regHiF(isn.dst), regHiF(isn.dst), regHiA(isn.src)));
+	}
+
+	void JitCompilerRV64::v1_FADD_M(HANDLER_ARGS) {
+		isn.dst %= RegisterCountFlt;
+		//x9 = mem
+		genAddressReg(state, isn);
+		//lw x8, 0(x9)
+		state.emit(rvc(rv64::C_LW, Tmp2Reg + OffsetXC, Tmp1Reg + OffsetXC));
+		//lw x9, 4(x9)
+		state.emit(rvc(rv64::C_LW, Tmp2Reg + OffsetXC, 16 + Tmp2Reg + OffsetXC));
+		//fcvt.d.w f24, x8
+		state.emit(rvi(rv64::FCVT_D_W, Tmp1RegF, Tmp1Reg));
+		//fcvt.d.w f25, x9
+		state.emit(rvi(rv64::FCVT_D_W, Tmp2RegF, Tmp2Reg));
+		//fadd.d f{dst_lo}, f{dst_lo}, f24
+		state.emit(rvi(rv64::FADD_D, regLoF(isn.dst), regLoF(isn.dst), Tmp1RegF));
+		//fadd.d f{dst_hi}, f{dst_hi}, f25
+		state.emit(rvi(rv64::FADD_D, regHiF(isn.dst), regHiF(isn.dst), Tmp2RegF));
+	}
+
+	void JitCompilerRV64::v1_FSUB_R(HANDLER_ARGS) {
+		isn.dst %= RegisterCountFlt;
+		isn.src %= RegisterCountFlt;
+		//fsub.d f{dst_lo}, f{dst_lo}, f{src_lo}
+		state.emit(rvi(rv64::FSUB_D, regLoF(isn.dst), regLoF(isn.dst), regLoA(isn.src)));
+		//fsub.d f{dst_hi}, f{dst_hi}, f{src_hi}
+		state.emit(rvi(rv64::FSUB_D, regHiF(isn.dst), regHiF(isn.dst), regHiA(isn.src)));
+	}
+
+	void JitCompilerRV64::v1_FSUB_M(HANDLER_ARGS) {
+		isn.dst %= RegisterCountFlt;
+		//x9 = mem
+		genAddressReg(state, isn);
+		//c.lw x8, 0(x9)
+		state.emit(rvc(rv64::C_LW, Tmp2Reg + OffsetXC, Tmp1Reg + OffsetXC));
+		//c.lw x9, 4(x9)
+		state.emit(rvc(rv64::C_LW, Tmp2Reg + OffsetXC, 16 + Tmp2Reg + OffsetXC));
+		//fcvt.d.w f24, x8
+		state.emit(rvi(rv64::FCVT_D_W, Tmp1RegF, Tmp1Reg));
+		//fcvt.d.w f25, x9
+		state.emit(rvi(rv64::FCVT_D_W, Tmp2RegF, Tmp2Reg));
+		//fsub.d f{dst_lo}, f{dst_lo}, f24
+		state.emit(rvi(rv64::FSUB_D, regLoF(isn.dst), regLoF(isn.dst), Tmp1RegF));
+		//fsub.d f{dst_hi}, f{dst_hi}, f25
+		state.emit(rvi(rv64::FSUB_D, regHiF(isn.dst), regHiF(isn.dst), Tmp2RegF));
+	}
+
+	void JitCompilerRV64::v1_FSCAL_R(HANDLER_ARGS) {
+		isn.dst %= RegisterCountFlt;
+		//fmv.x.d x8, f{dst_lo}
+		state.emit(rvi(rv64::FMV_X_D, Tmp1Reg, regLoF(isn.dst)));
+		//fmv.x.d x9, f{dst_hi}
+		state.emit(rvi(rv64::FMV_X_D, Tmp2Reg, regHiF(isn.dst)));
+		//c.xor x8, x12
+		state.emit(rvc(rv64::C_XOR, Tmp1Reg + OffsetXC, MaskFscalReg + OffsetXC));
+		//c.xor x9, x12
+		state.emit(rvc(rv64::C_XOR, Tmp2Reg + OffsetXC, MaskFscalReg + OffsetXC));
+		//fmv.d.x f{dst_lo}, x8
+		state.emit(rvi(rv64::FMV_D_X, regLoF(isn.dst), Tmp1Reg));
+		//fmv.d.x f{dst_hi}, x9
+		state.emit(rvi(rv64::FMV_D_X, regHiF(isn.dst), Tmp2Reg));
+	}
+
+	void JitCompilerRV64::v1_FMUL_R(HANDLER_ARGS) {
+		isn.dst %= RegisterCountFlt;
+		isn.src %= RegisterCountFlt;
+		//fmul.d f{dst_lo}, f{dst_lo}, f{src_lo}
+		state.emit(rvi(rv64::FMUL_D, regLoE(isn.dst), regLoE(isn.dst), regLoA(isn.src)));
+		//fmul.d f{dst_hi}, f{dst_hi}, f{src_hi}
+		state.emit(rvi(rv64::FMUL_D, regHiE(isn.dst), regHiE(isn.dst), regHiA(isn.src)));
+	}
+
+	void JitCompilerRV64::v1_FDIV_M(HANDLER_ARGS) {
+		isn.dst %= RegisterCountFlt;
+		//x9 = mem
+		genAddressReg(state, isn);
+		//lw x8, 0(x9)
+		state.emit(rvc(rv64::C_LW, Tmp2Reg + OffsetXC, Tmp1Reg + OffsetXC));
+		//lw x9, 4(x9)
+		state.emit(rvc(rv64::C_LW, Tmp2Reg + OffsetXC, 16 + Tmp2Reg + OffsetXC));
+		//fcvt.d.w f24, x8
+		state.emit(rvi(rv64::FCVT_D_W, Tmp1RegF, Tmp1Reg));
+		//fcvt.d.w f25, x9
+		state.emit(rvi(rv64::FCVT_D_W, Tmp2RegF, Tmp2Reg));
+		//fmv.x.d x8, f24
+		state.emit(rvi(rv64::FMV_X_D, Tmp1Reg, Tmp1RegF));
+		//fmv.x.d x9, f25
+		state.emit(rvi(rv64::FMV_X_D, Tmp2Reg, Tmp2RegF));
+		//c.and x8, x13
+		state.emit(rvc(rv64::C_AND, Tmp1Reg + OffsetXC, MaskEclear + OffsetXC));
+		//c.and x9, x13
+		state.emit(rvc(rv64::C_AND, Tmp2Reg + OffsetXC, MaskEclear + OffsetXC));
+		//c.or x8, x14
+		state.emit(rvc(rv64::C_OR, Tmp1Reg + OffsetXC, MaskEsetLo + OffsetXC));
+		//c.or x9, x15
+		state.emit(rvc(rv64::C_OR, Tmp2Reg + OffsetXC, MaskEsetHi + OffsetXC));
+		//fmv.d.x f24, x8
+		state.emit(rvi(rv64::FMV_D_X, Tmp1RegF, Tmp1Reg));
+		//fmv.d.x f25, x9
+		state.emit(rvi(rv64::FMV_D_X, Tmp2RegF, Tmp2Reg));
+		//fdiv.d f{dst_lo}, f{dst_lo}, f24
+		state.emit(rvi(rv64::FDIV_D, regLoE(isn.dst), regLoE(isn.dst), Tmp1RegF));
+		//fdiv.d f{dst_hi}, f{dst_hi}, f25
+		state.emit(rvi(rv64::FDIV_D, regHiE(isn.dst), regHiE(isn.dst), Tmp2RegF));
+	}
+
+	void JitCompilerRV64::v1_FSQRT_R(HANDLER_ARGS) {
+		isn.dst %= RegisterCountFlt;
+		//fsqrt.d f{dst_lo}, f{dst_lo}
+		state.emit(rvi(rv64::FSQRT_D, regLoE(isn.dst), regLoE(isn.dst)));
+		//fsqrt.d f{dst_hi}, f{dst_hi}
+		state.emit(rvi(rv64::FSQRT_D, regHiE(isn.dst), regHiE(isn.dst)));
+	}
+
+	void JitCompilerRV64::v1_CBRANCH(HANDLER_ARGS) {
+		int reg = isn.dst;
+		int target = state.registerUsage[reg] + 1;
+		int shift = isn.getModCond() + RandomX_ConfigurationBase::JumpOffset;
+		int32_t imm = unsigned32ToSigned2sCompl(isn.getImm32());
+		imm |= (1UL << shift);
+		if (RandomX_ConfigurationBase::JumpOffset > 0 || shift > 0)
+			imm &= ~(1UL << (shift - 1));
+		//x8 = branchMask
+		emitImm32(state, (int32_t)((1 << RandomX_ConfigurationBase::JumpBits) - 1) << shift, Tmp1Reg);
+		//x{dst} += {imm}
+		emitImm32(state, imm, regR(isn.dst), regR(isn.dst), Tmp2Reg);
+		//and x8, x8, x{dst}
+		state.emit(rvi(rv64::AND, Tmp1Reg, Tmp1Reg, regR(isn.dst)));
+		int32_t targetPos = state.instructionOffsets[target];
+		int offset = targetPos - state.codePos;
+		if (offset >= -256) { //C.BEQZ only has a range of 256B
+			//c.beqz x8, {offset}
+			int imm8 = 1; //sign bit is always 1
+			int imm21 = offset & 6; //offset[2:1]
+			int imm5 = (offset >> 5) & 1; //offset[5]
+			int imm43 = offset & 24; //offset[4:3]
+			int imm76 = (offset >> 3) & 24; //offset[7:6]
+			state.emit(rvc(rv64::C_BEQZ, imm8, imm43 + (Tmp1Reg + OffsetXC), imm76 + imm21 + imm5));
+		}
+		else if (offset >= -4096) { //BEQ only has a range of 4KB
+			//beq x8, x0, offset
+			int imm12 = 1 << 11; //sign bit is always 1
+			int imm105 = offset & 2016; //offset[10:5]
+			int imm41 = offset & 30; //offset[4:1]
+			int imm11 = (offset >> 11) & 1; //offset[11]
+			state.emit(rvi(rv64::BEQ, imm41 + imm11, Tmp1Reg, imm12 + imm105));
+		}
+		else {
+			//c.bnez x8, +6
+			state.emit(rvc(rv64::C_BNEZ, Tmp1Reg + OffsetXC, 6));
+			//j targetPos
+			emitJump(state, 0, state.codePos, targetPos);
+			state.codePos += 4;
+		}
+		//mark all registers as used
+		for (unsigned j = 0; j < RegistersCount; ++j) {
+			state.registerUsage[j] = i;
+		}
+	}
+
+	void JitCompilerRV64::v1_CFROUND(HANDLER_ARGS) {
+		int32_t imm = (isn.getImm32() - 2) & 63; //-2 to avoid a later left shift to multiply by 4
+		if (imm != 0) {
+#ifdef __riscv_zbb
+			//rori x8, x{src}, {imm}
+			state.emit(rvi(rv64::RORI, Tmp1Reg, regR(isn.src), imm));
+#else
+			int32_t imml = -imm & 63;
+			//srli x8, x{src}, {imm}
+			state.emit(rvi(rv64::SRLI, Tmp1Reg, regR(isn.src), imm));
+			//slli x9, x{src}, {imml}
+			state.emit(rvi(rv64::SLLI, Tmp2Reg, regR(isn.src), imml));
+			//c.or x8, x9
+			state.emit(rvc(rv64::C_OR, Tmp1Reg + OffsetXC, Tmp2Reg + OffsetXC));
+#endif
+			//c.andi x8, 12
+			state.emit(rvc(rv64::C_ANDI, Tmp1Reg + OffsetXC, 12));
+		}
+		else {
+			//and x8, x{src}, 12
+			state.emit(rvi(rv64::ANDI, Tmp1Reg, regR(isn.src), 12));
+		}
+		//c.add x8, x3
+		state.emit(rvc(rv64::C_ADD, Tmp1Reg, LiteralPoolReg));
+		//c.lw x8, 64(x8)
+		state.emit(rvc(rv64::C_LW, Tmp1Reg + OffsetXC, 8 + Tmp1Reg + OffsetXC));
+		//fsrm x8
+		state.emit(rvi(rv64::FSRM, 0, Tmp1Reg, 0));
+	}
+
+	void JitCompilerRV64::v1_ISTORE(HANDLER_ARGS) {
+		genAddressRegDst(state, isn);
+		//sd x{src}, 0(x9)
+		state.emit(rvi(rv64::SD, 0, Tmp2Reg, regR(isn.src)));
+	}
+
+	void JitCompilerRV64::v1_NOP(HANDLER_ARGS) {
+	}
+
+InstructionGeneratorRV64 JitCompilerRV64::engine[256] = {};
+}
diff --git a/src/crypto/randomx/jit_compiler_rv64.hpp b/src/crypto/randomx/jit_compiler_rv64.hpp
new file mode 100644
index 000000000..dbad88e1b
--- /dev/null
+++ b/src/crypto/randomx/jit_compiler_rv64.hpp
@@ -0,0 +1,147 @@
+/*
+Copyright (c) 2023 tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+#include <vector>
+#include "crypto/randomx/common.hpp"
+#include "crypto/randomx/jit_compiler_rv64_static.hpp"
+
+namespace randomx {
+
+	struct CodeBuffer {
+		uint8_t* code;
+		int32_t codePos;
+		int32_t rcpCount;
+
+		void emit(const uint8_t* src, int32_t len) {
+			memcpy(&code[codePos], src, len);
+			codePos += len;
+		}
+
+		template<typename T>
+		void emit(T src) {
+			memcpy(&code[codePos], &src, sizeof(src));
+			codePos += sizeof(src);
+		}
+
+		void emitAt(int32_t codePos, const uint8_t* src, int32_t len) {
+			memcpy(&code[codePos], src, len);
+		}
+
+		template<typename T>
+		void emitAt(int32_t codePos, T src) {
+			memcpy(&code[codePos], &src, sizeof(src));
+		}
+	};
+
+	struct CompilerState : public CodeBuffer {
+		int32_t instructionOffsets[RANDOMX_PROGRAM_MAX_SIZE];
+		int registerUsage[RegistersCount];
+	};
+
+	class Program;
+	struct ProgramConfiguration;
+	class SuperscalarProgram;
+	class Instruction;
+
+#define HANDLER_ARGS randomx::CompilerState& state, randomx::Instruction isn, int i
+	typedef void(*InstructionGeneratorRV64)(HANDLER_ARGS);
+
+	class JitCompilerRV64 {
+	public:
+		JitCompilerRV64(bool hugePagesEnable, bool optimizedInitDatasetEnable);
+		~JitCompilerRV64();
+
+		void prepare() {}
+		void generateProgram(Program&, ProgramConfiguration&, uint32_t);
+		void generateProgramLight(Program&, ProgramConfiguration&, uint32_t);
+
+		template<size_t N>
+		void generateSuperscalarHash(SuperscalarProgram(&programs)[N]);
+
+		void generateDatasetInitCode() {}
+
+		ProgramFunc* getProgramFunc() {
+			return (ProgramFunc*)entryProgram;
+		}
+		DatasetInitFunc* getDatasetInitFunc();
+		uint8_t* getCode() {
+			return state.code;
+		}
+		size_t getCodeSize();
+
+		void enableWriting() const;
+		void enableExecution() const;
+
+		static InstructionGeneratorRV64 engine[256];
+	private:
+		CompilerState state;
+
+		uint8_t* vectorCode;
+		size_t vectorCodeSize;
+
+		void* entryDataInit;
+		void* entryDataInitOptimized;
+		void* entryProgram;
+
+	public:
+		static void v1_IADD_RS(HANDLER_ARGS);
+		static void v1_IADD_M(HANDLER_ARGS);
+		static void v1_ISUB_R(HANDLER_ARGS);
+		static void v1_ISUB_M(HANDLER_ARGS);
+		static void v1_IMUL_R(HANDLER_ARGS);
+		static void v1_IMUL_M(HANDLER_ARGS);
+		static void v1_IMULH_R(HANDLER_ARGS);
+		static void v1_IMULH_M(HANDLER_ARGS);
+		static void v1_ISMULH_R(HANDLER_ARGS);
+		static void v1_ISMULH_M(HANDLER_ARGS);
+		static void v1_IMUL_RCP(HANDLER_ARGS);
+		static void v1_INEG_R(HANDLER_ARGS);
+		static void v1_IXOR_R(HANDLER_ARGS);
+		static void v1_IXOR_M(HANDLER_ARGS);
+		static void v1_IROR_R(HANDLER_ARGS);
+		static void v1_IROL_R(HANDLER_ARGS);
+		static void v1_ISWAP_R(HANDLER_ARGS);
+		static void v1_FSWAP_R(HANDLER_ARGS);
+		static void v1_FADD_R(HANDLER_ARGS);
+		static void v1_FADD_M(HANDLER_ARGS);
+		static void v1_FSUB_R(HANDLER_ARGS);
+		static void v1_FSUB_M(HANDLER_ARGS);
+		static void v1_FSCAL_R(HANDLER_ARGS);
+		static void v1_FMUL_R(HANDLER_ARGS);
+		static void v1_FDIV_M(HANDLER_ARGS);
+		static void v1_FSQRT_R(HANDLER_ARGS);
+		static void v1_CBRANCH(HANDLER_ARGS);
+		static void v1_CFROUND(HANDLER_ARGS);
+		static void v1_ISTORE(HANDLER_ARGS);
+		static void v1_NOP(HANDLER_ARGS);
+	};
+}
diff --git a/src/crypto/randomx/jit_compiler_rv64_static.S b/src/crypto/randomx/jit_compiler_rv64_static.S
new file mode 100644
index 000000000..c4f341adb
--- /dev/null
+++ b/src/crypto/randomx/jit_compiler_rv64_static.S
@@ -0,0 +1,1236 @@
+/*
+Copyright (c) 2023 tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#define DECL(x) x
+
+.text
+.option rvc
+
+#include "configuration.h"
+
+.global DECL(randomx_riscv64_literals)
+.global DECL(randomx_riscv64_literals_end)
+.global DECL(randomx_riscv64_data_init)
+.global DECL(randomx_riscv64_fix_data_call)
+.global DECL(randomx_riscv64_prologue)
+.global DECL(randomx_riscv64_loop_begin)
+.global DECL(randomx_riscv64_data_read)
+.global DECL(randomx_riscv64_data_read_light)
+.global DECL(randomx_riscv64_fix_loop_call)
+.global DECL(randomx_riscv64_spad_store)
+.global DECL(randomx_riscv64_spad_store_hardaes)
+.global DECL(randomx_riscv64_spad_store_softaes)
+.global DECL(randomx_riscv64_loop_end)
+.global DECL(randomx_riscv64_fix_continue_loop)
+.global DECL(randomx_riscv64_epilogue)
+.global DECL(randomx_riscv64_softaes)
+.global DECL(randomx_riscv64_program_end)
+.global DECL(randomx_riscv64_ssh_init)
+.global DECL(randomx_riscv64_ssh_load)
+.global DECL(randomx_riscv64_ssh_prefetch)
+.global DECL(randomx_riscv64_ssh_end)
+
+/* The literal pool can fit at most 494 IMUL_RCP literals */
+#if RANDOMX_PROGRAM_SIZE > 494
+    #error RANDOMX_PROGRAM_SIZE larger than 494 is not supported.
+#endif
+
+#define RANDOMX_ARGON_MEMORY 262144
+#define RANDOMX_CACHE_MASK (RANDOMX_ARGON_MEMORY*16-1)
+
+/* shared literal pool: 4 KB */
+    /* space for 256 IMUL_RCP literals -2048 */
+    /* filled by JIT compiler */
+DECL(randomx_riscv64_literals):
+literal_pool:
+    /* SuperscalarHash constants +0 */
+    .dword 6364136223846793005
+    .dword 9298411001130361340
+    .dword 12065312585734608966
+    .dword 9306329213124626780
+    .dword 5281919268842080866
+    .dword 10536153434571861004
+    .dword 3398623926847679864
+    .dword 9549104520008361294
+    /* CFROUND lookup table +64 */
+    .word  0x00000000 /* RTN */
+    .word  0x00000002 /* RDN */
+    .word  0x00000003 /* RUP */
+    .word  0x00000001 /* RTZ */
+    /* mask literals +80,+84,+88,+92,+96,+104 */
+    .word (/*RANDOMX_SCRATCHPAD_L1*/0) /* filled by JIT compiler */
+    .word (/*RANDOMX_SCRATCHPAD_L2*/0) /* filled by JIT compiler */
+    .word (/*RANDOMX_SCRATCHPAD_L3*/0) /* filled by JIT compiler */
+    .word (/*RANDOMX_DATASET_BASE_SIZE*/0) /* filled by JIT compiler */
+    .dword 0x80f0000000000000
+    .dword 0x00ffffffffffffff
+DECL(randomx_riscv64_literals_end):
+    /* E reg. set masks, +112,+120 */
+    .dword 0 /* filled by JIT compiler */
+    .dword 0 /* filled by JIT compiler */
+    /* soft AES table addresses, +128,+136 */
+    .dword 0 /* filled by JIT compiler */
+    .dword 0 /* filled by JIT compiler */
+    /* space for 238 IMUL_RCP literals, +144 */
+    .fill 238,8,0 /* filled by JIT compiler */
+
+/* ================================= */
+/* Dataset init function entry point */
+/* ================================= */
+
+/* Register allocation:
+   ----------------------
+  x0      -> zero
+  x1      -> temp/return address
+  x2      -> stack pointer (sp)
+  x3      -> literal pool pointer
+  x5      -> dataset pointer
+  x6      -> cache pointer
+  x7      -> temp/itemNumber
+  x8-x15  -> SuperscalarHash registers
+  x16     -> itemNumber
+  x17     -> endItem
+  x28-x31 -> temp
+
+  Stack layout:
+  ------------------------
+  sp+
+  0   -> return address
+  8   -> saved x3
+  16  -> saved x8-x9
+  32  -> caller stack
+*/
+DECL(randomx_riscv64_data_init):
+    addi sp, sp, -32
+    /* dataset ptr */
+    mv x5, x11
+    /* cache->memory */
+    ld x6, 0(x10)
+    /* callee saved registers */
+    sd x1, 0(sp)
+    sd x3, 8(sp)
+    /* literal pool */
+    lla x3, literal_pool
+    sd x8, 16(sp)
+    sd x9, 24(sp)
+    /* startItem */
+    mv x16, x12
+    /* endItem */
+    mv x17, x13
+init_item:
+    mv x7, x16
+DECL(randomx_riscv64_fix_data_call):
+    jal superscalar_hash /* JIT compiler will adjust the offset */
+    sd x8, 0(x5)
+    sd x9, 8(x5)
+    sd x10, 16(x5)
+    sd x11, 24(x5)
+    sd x12, 32(x5)
+    sd x13, 40(x5)
+    sd x14, 48(x5)
+    sd x15, 56(x5)
+    addi x5, x5, 64
+    addi x16, x16, 1
+    bltu x16, x17, init_item
+    ld x1, 0(sp)
+    ld x3, 8(sp)
+    ld x8, 16(sp)
+    ld x9, 24(sp)
+    addi sp, sp, 32
+    ret
+
+/* ====================================== */
+/* Program execution function entry point */
+/* ====================================== */
+
+/* Register allocation:
+   ----------------------
+  x0      -> zero
+  x1      -> temp/scratchpad L3 mask
+  x2      -> stack pointer (sp)
+  x3      -> literal pool pointer
+  x5      -> scratchpad pointer
+  x6      -> dataset/cache pointer
+  x7      -> temp/next dataset access
+  x8      -> temp
+  x9      -> temp
+  x10     -> scratchpad L1 mask (0x0000000000003ff8)
+  x11     -> scratchpad L2 mask (0x000000000003fff8)
+  x12     -> FSCAL_R mask       (0x80f0000000000000)
+  x13     -> E reg. clear mask  (0x00ffffffffffffff)
+  x14     -> E reg. set mask    (0x3*00000000******)
+  x15     -> E reg. set mask    (0x3*00000000******)
+  x16-x23 -> VM registers "r0"-"r7"
+  x24     -> iteration counter "ic"
+  x25     -> VM registers "mx", "ma"
+  x26     -> spAddr0
+  x27     -> spAddr1
+  x28-x31 -> temp/literals for IMUL_RCP (4x)
+
+  (Note: We avoid using x4 because it breaks debugging with gdb.)
+
+  f0-f7   -> VM registers "f0"-"f3"
+  f8-f15  -> VM registers "e0"-"e3"
+  f16-f23 -> VM registers "a0"-"a3"
+  f24-f25 -> temp
+  f26-f31 -> literals for IMUL_RCP (6x)
+
+  Stack layout:
+  ------------------------
+  sp+
+  0   -> return address
+  8   -> register file ptr
+  16  -> saved x3-x4
+  32  -> saved x8-x9
+  48  -> saved x18-x27
+  128 -> saved f8-f9
+  144 -> saved f18-f27
+  224 -> caller stack
+*/
+
+DECL(randomx_riscv64_prologue):
+    addi sp, sp, -224
+    /* scratchpad pointer */
+    mv x5, x12
+    /* register file pointer */
+    sd x10, 8(sp)
+    /* callee saved registers */
+    sd x3, 16(sp)
+    sd x8, 32(sp)
+    sd x9, 40(sp)
+    sd x18, 48(sp)
+    sd x19, 56(sp)
+    sd x20, 64(sp)
+    sd x21, 72(sp)
+    sd x22, 80(sp)
+    sd x23, 88(sp)
+    sd x24, 96(sp)
+    sd x25, 104(sp)
+    sd x26, 112(sp)
+    sd x27, 120(sp)
+    fsd f8, 128(sp)
+    fsd f9, 136(sp)
+    fsd f18, 144(sp)
+    fsd f19, 152(sp)
+    fsd f20, 160(sp)
+    fsd f21, 168(sp)
+    fsd f22, 176(sp)
+    fsd f23, 184(sp)
+    fsd f24, 192(sp)
+    fsd f25, 200(sp)
+    fsd f26, 208(sp)
+    fsd f27, 216(sp)
+    /* iteration counter */
+    mv x24, x13
+    /* return address */
+    sd x1, 0(sp)
+    /* literal pool */
+    lla x3, literal_pool
+    /* load (ma, mx) */
+    ld x25, 0(x11)
+    /* dataset ptr */
+    ld x6, 8(x11)
+    /* load dataset mask */
+    lwu x1, 92(x3)
+    /* zero registers r0-r3, load a0-a1 */
+    li x16, 0
+    fld f16, 192(x10)
+    li x17, 0
+    fld f17, 200(x10)
+    srli x7, x25, 32 /* x7 = ma */
+    li x18, 0
+    fld f18, 208(x10)
+    mv x27, x7 /* x27 = ma */
+    li x19, 0
+    fld f19, 216(x10)
+    /* set dataset read address */
+    and x7, x7, x1
+    add x7, x7, x6
+    /* zero registers r4-r7, load a2-a3 */
+    li x20, 0
+    fld f20, 224(x10)
+    li x21, 0
+    fld f21, 232(x10)
+    li x22, 0
+    fld f22, 240(x10)
+    li x23, 0
+    fld f23, 248(x10)
+    /* load L3 mask */
+    lwu x1, 88(x3)
+    /* load scratchpad masks */
+    lwu x10, 80(x3)
+    lwu x11, 84(x3)
+    /* set spAddr0, spAddr1 */
+    and x26, x25, x1
+    and x27, x27, x1
+    add x26, x26, x5
+    add x27, x27, x5
+    /* align L3 mask */
+    addi x1, x1, 56
+    /* FSCAL, E reg. masks */
+    ld x12, 96(x3)
+    ld x13, 104(x3)
+    ld x14, 112(x3)
+    ld x15, 120(x3)
+    /* IMUL_RCP literals */
+    fld f26, 176(x3)
+    fld f27, 184(x3)
+    fld f28, 192(x3)
+    fld f29, 200(x3)
+    fld f30, 208(x3)
+    fld f31, 216(x3)
+
+.balign 4
+DECL(randomx_riscv64_loop_begin):
+loop_begin:
+    /* mix integer registers */
+    ld x8, 0(x26)
+    ld x9, 8(x26)
+    ld x30, 16(x26)
+    ld x31, 24(x26)
+    xor x16, x16, x8
+    ld x8, 32(x26)
+    xor x17, x17, x9
+    ld x9, 40(x26)
+    xor x18, x18, x30
+    ld x30, 48(x26)
+    xor x19, x19, x31
+    ld x31, 56(x26)
+    xor x20, x20, x8
+    lw x8, 0(x27)
+    xor x21, x21, x9
+    lw x9, 4(x27)
+    xor x22, x22, x30
+    lw x30, 8(x27)
+    xor x23, x23, x31
+    lw x31, 12(x27)
+    /* load F registers */
+    fcvt.d.w f0, x8
+    lw x8, 16(x27)
+    fcvt.d.w f1, x9
+    lw x9, 20(x27)
+    fcvt.d.w f2, x30
+    lw x30, 24(x27)
+    fcvt.d.w f3, x31
+    lw x31, 28(x27)
+    fcvt.d.w f4, x8
+    lw x8, 32(x27)
+    fcvt.d.w f5, x9
+    lw x9, 36(x27)
+    fcvt.d.w f6, x30
+    lw x30, 40(x27)
+    fcvt.d.w f7, x31
+    lw x31, 44(x27)
+    /* load E registers */
+    fcvt.d.w f8, x8
+    lw x8, 48(x27)
+    fcvt.d.w f9, x9
+    lw x9, 52(x27)
+    fcvt.d.w f10, x30
+    lw x30, 56(x27)
+    fcvt.d.w f11, x31
+    lw x31, 60(x27)
+    fcvt.d.w f12, x8
+    fmv.x.d x8, f8
+    fcvt.d.w f13, x9
+    fmv.x.d x9, f9
+    fcvt.d.w f14, x30
+    fmv.x.d x30, f10
+    fcvt.d.w f15, x31
+    fmv.x.d x31, f11
+    and x8, x8, x13
+    and x9, x9, x13
+    or x8, x8, x14
+    or x9, x9, x15
+    and x30, x30, x13
+    and x31, x31, x13
+    or x30, x30, x14
+    or x31, x31, x15
+    fmv.d.x f8, x8
+    fmv.d.x f9, x9
+    fmv.d.x f10, x30
+    fmv.d.x f11, x31
+    fmv.x.d x8, f12
+    fmv.x.d x9, f13
+    fmv.x.d x30, f14
+    fmv.x.d x31, f15
+    and x8, x8, x13
+    and x9, x9, x13
+    or x8, x8, x14
+    or x9, x9, x15
+    fmv.d.x f12, x8
+    fmv.d.x f13, x9
+    and x30, x30, x13
+    and x31, x31, x13
+    or x30, x30, x14
+    or x31, x31, x15
+    fmv.d.x f14, x30
+    fmv.d.x f15, x31
+    /* reload clobbered IMUL_RCP regs */
+    ld x28, 144(x3)
+    ld x29, 152(x3)
+    ld x30, 160(x3)
+    ld x31, 168(x3)
+
+DECL(randomx_riscv64_data_read):
+    xor x8, x20, x22 /* JIT compiler will adjust the registers */
+    /* load dataset mask */
+    lwu x1, 92(x3)
+    /* zero-extend x8 */
+#ifdef __riscv_zba
+    zext.w x8, x8
+#else
+    slli x8, x8, 32
+    srli x8, x8, 32
+#endif
+    /* update "mx" */
+    xor x25, x25, x8
+    /* read dataset and update registers */
+    ld x8, 0(x7)
+    ld x9, 8(x7)
+    ld x30, 16(x7)
+    ld x31, 24(x7)
+    xor x16, x16, x8
+    ld x8, 32(x7)
+    xor x17, x17, x9
+    ld x9, 40(x7)
+    xor x18, x18, x30
+    ld x30, 48(x7)
+    xor x19, x19, x31
+    ld x31, 56(x7)
+    xor x20, x20, x8
+    /* calculate the next dataset address */
+    and x7, x25, x1
+    xor x21, x21, x9
+    add x7, x7, x6
+    xor x22, x22, x30
+    /* prefetch - doesn't seem to have any effect */
+    /* ld x0, 0(x7) */
+    xor x23, x23, x31
+    /* swap mx <-> ma */
+#ifdef __riscv_zbb
+    rori x25, x25, 32
+#else
+    srli x9, x25, 32
+    slli x25, x25, 32
+    or x25, x25, x9
+#endif
+
+DECL(randomx_riscv64_data_read_light):
+    xor x8, x20, x22 /* JIT compiler will adjust the registers */
+    /* load dataset offset */
+    lui x9, 0x02000  /* JIT compiler will adjust the immediate */
+    addi x9, x9, -64
+    /* load dataset mask */
+    lwu x1, 92(x3)
+    /* swap mx <-> ma */
+#ifdef __riscv_zbb
+    rori x25, x25, 32
+#else
+    srli x31, x25, 32
+    slli x25, x25, 32
+    or x25, x25, x31
+#endif
+    slli x8, x8, 32
+    /* update "mx" */
+    xor x25, x25, x8
+    /* the next dataset item */
+    and x7, x25, x1
+    srli x7, x7, 6
+    add x7, x7, x9
+DECL(randomx_riscv64_fix_loop_call):
+    jal superscalar_hash /* JIT compiler will adjust the offset */
+    xor x16, x16, x8
+    xor x17, x17, x9
+    xor x18, x18, x10
+    xor x19, x19, x11
+    xor x20, x20, x12
+    xor x21, x21, x13
+    xor x22, x22, x14
+    xor x23, x23, x15
+    /* restore clobbered registers */
+    lwu x10, 80(x3)
+    lwu x11, 84(x3)
+    ld x12, 96(x3)
+    ld x13, 104(x3)
+    ld x14, 112(x3)
+    ld x15, 120(x3)
+
+DECL(randomx_riscv64_spad_store):
+    /* store integer registers */
+    sd x16, 0(x27)
+    sd x17, 8(x27)
+    sd x18, 16(x27)
+    sd x19, 24(x27)
+    sd x20, 32(x27)
+    sd x21, 40(x27)
+    sd x22, 48(x27)
+    sd x23, 56(x27)
+    /* XOR and store f0,e0 */
+    fmv.x.d x8, f0
+    fmv.x.d x9, f8
+    fmv.x.d x30, f1
+    fmv.x.d x31, f9
+    xor x8, x8, x9
+    xor x30, x30, x31
+    sd x8, 0(x26)
+    fmv.d.x f0, x8
+    sd x30, 8(x26)
+    fmv.d.x f1, x30
+    /* XOR and store f1,e1 */
+    fmv.x.d x8, f2
+    fmv.x.d x9, f10
+    fmv.x.d x30, f3
+    fmv.x.d x31, f11
+    xor x8, x8, x9
+    xor x30, x30, x31
+    sd x8, 16(x26)
+    fmv.d.x f2, x8
+    sd x30, 24(x26)
+    fmv.d.x f3, x30
+    /* XOR and store f2,e2 */
+    fmv.x.d x8, f4
+    fmv.x.d x9, f12
+    fmv.x.d x30, f5
+    fmv.x.d x31, f13
+    xor x8, x8, x9
+    xor x30, x30, x31
+    sd x8, 32(x26)
+    fmv.d.x f4, x8
+    sd x30, 40(x26)
+    fmv.d.x f5, x30
+    /* XOR and store f3,e3 */
+    fmv.x.d x8, f6
+    fmv.x.d x9, f14
+    fmv.x.d x30, f7
+    fmv.x.d x31, f15
+    xor x8, x8, x9
+    xor x30, x30, x31
+    sd x8, 48(x26)
+    fmv.d.x f6, x8
+    sd x30, 56(x26)
+    fmv.d.x f7, x30
+
+DECL(randomx_riscv64_spad_store_hardaes):
+    nop /* not implemented */
+
+DECL(randomx_riscv64_spad_store_softaes):
+    /* store integer registers */
+    sd x16, 0(x27)
+    sd x17, 8(x27)
+    sd x18, 16(x27)
+    sd x19, 24(x27)
+    sd x20, 32(x27)
+    sd x21, 40(x27)
+    sd x22, 48(x27)
+    sd x23, 56(x27)
+    /* process f0 with 4 AES rounds */
+    fmv.x.d x8, f8
+    fmv.x.d x10, f9
+    fmv.x.d x30, f0
+    fmv.x.d x31, f1
+    jal softaes_enc
+    fmv.x.d x8, f10
+    fmv.x.d x10, f11
+    jal softaes_enc
+    fmv.x.d x8, f12
+    fmv.x.d x10, f13
+    jal softaes_enc
+    fmv.x.d x8, f14
+    fmv.x.d x10, f15
+    jal softaes_enc
+    sd x30, 0(x26)
+    fmv.d.x f0, x30
+    sd x31, 8(x26)
+    fmv.d.x f1, x31
+    /* process f1 with 4 AES rounds */
+    fmv.x.d x8, f8
+    fmv.x.d x10, f9
+    fmv.x.d x30, f2
+    fmv.x.d x31, f3
+    jal softaes_dec
+    fmv.x.d x8, f10
+    fmv.x.d x10, f11
+    jal softaes_dec
+    fmv.x.d x8, f12
+    fmv.x.d x10, f13
+    jal softaes_dec
+    fmv.x.d x8, f14
+    fmv.x.d x10, f15
+    jal softaes_dec
+    sd x30, 16(x26)
+    fmv.d.x f2, x30
+    sd x31, 24(x26)
+    fmv.d.x f3, x31
+    /* process f2 with 4 AES rounds */
+    fmv.x.d x8, f8
+    fmv.x.d x10, f9
+    fmv.x.d x30, f4
+    fmv.x.d x31, f5
+    jal softaes_enc
+    fmv.x.d x8, f10
+    fmv.x.d x10, f11
+    jal softaes_enc
+    fmv.x.d x8, f12
+    fmv.x.d x10, f13
+    jal softaes_enc
+    fmv.x.d x8, f14
+    fmv.x.d x10, f15
+    jal softaes_enc
+    sd x30, 32(x26)
+    fmv.d.x f4, x30
+    sd x31, 40(x26)
+    fmv.d.x f5, x31
+    /* process f3 with 4 AES rounds */
+    fmv.x.d x8, f8
+    fmv.x.d x10, f9
+    fmv.x.d x30, f6
+    fmv.x.d x31, f7
+    jal softaes_dec
+    fmv.x.d x8, f10
+    fmv.x.d x10, f11
+    jal softaes_dec
+    fmv.x.d x8, f12
+    fmv.x.d x10, f13
+    jal softaes_dec
+    fmv.x.d x8, f14
+    fmv.x.d x10, f15
+    jal softaes_dec
+    sd x30, 48(x26)
+    fmv.d.x f6, x30
+    sd x31, 56(x26)
+    fmv.d.x f7, x31
+    /* restore clobbered registers */
+    lwu x10, 80(x3)
+    lwu x11, 84(x3)
+    ld x12, 96(x3)
+    ld x13, 104(x3)
+    ld x14, 112(x3)
+    ld x15, 120(x3)
+
+DECL(randomx_riscv64_loop_end):
+    xor x26, x16, x18 /* JIT compiler will adjust the registers */
+    /* load L3 mask */
+    lwu x1, 88(x3)
+    addi x24, x24, -1
+    srli x27, x26, 32
+    /* set spAddr0, spAddr1 */
+    and x26, x26, x1
+    and x27, x27, x1
+    add x26, x26, x5
+    add x27, x27, x5
+    /* align L3 mask */
+    addi x1, x1, 56
+    /* conditional branch doesn't have sufficient range */
+    j condition_check
+DECL(randomx_riscv64_fix_continue_loop):
+continue_loop:
+    .word 0 /* JIT compiler will write a jump to loop_begin  */
+condition_check:
+    bnez x24, continue_loop
+
+DECL(randomx_riscv64_epilogue):
+    /* restore callee saved registers */
+    ld x10, 8(sp)
+    ld x1, 0(sp)
+    ld x3, 16(sp)
+    ld x8, 32(sp)
+    ld x9, 40(sp)
+    ld x24, 96(sp)
+    ld x25, 104(sp)
+    ld x26, 112(sp)
+    ld x27, 120(sp)
+    fld f18, 144(sp)
+    fld f19, 152(sp)
+    fld f20, 160(sp)
+    fld f21, 168(sp)
+    fld f22, 176(sp)
+    fld f23, 184(sp)
+    fld f24, 192(sp)
+    fld f25, 200(sp)
+    fld f26, 208(sp)
+    fld f27, 216(sp)
+    /* save VM registers */
+    sd x16, 0(x10)
+    sd x17, 8(x10)
+    sd x18, 16(x10)
+    sd x19, 24(x10)
+    sd x20, 32(x10)
+    sd x21, 40(x10)
+    sd x22, 48(x10)
+    sd x23, 56(x10)
+    fsd f0, 64(x10)
+    fsd f1, 72(x10)
+    fsd f2, 80(x10)
+    fsd f3, 88(x10)
+    fsd f4, 96(x10)
+    fsd f5, 104(x10)
+    fsd f6, 112(x10)
+    fsd f7, 120(x10)
+    fsd f8, 128(x10)
+    fsd f9, 136(x10)
+    fsd f10, 144(x10)
+    fsd f11, 152(x10)
+    fsd f12, 160(x10)
+    fsd f13, 168(x10)
+    fsd f14, 176(x10)
+    fsd f15, 184(x10)
+    /* restore callee saved registers */
+    ld x18, 48(sp)
+    ld x19, 56(sp)
+    ld x20, 64(sp)
+    ld x21, 72(sp)
+    ld x22, 80(sp)
+    ld x23, 88(sp)
+    fld f8, 128(sp)
+    fld f9, 136(sp)
+    /* restore stack pointer */
+    addi sp, sp, 224
+    /* return */
+    ret
+
+/*
+    Soft AES subroutines
+        in:
+                  x3 = literal pool
+             x8, x10 = round key
+            x30, x31 = plaintext
+        out:
+            x30, x31 = ciphertext
+        clobbers:
+             x8-x11 (limbs)
+            x12-x13 (LUTs)
+            x14-x15 (temp)
+*/
+DECL(randomx_riscv64_softaes):
+softaes_enc:
+    /* enc. lookup table */
+    ld x13, 128(x3)
+
+    /* load the round key into x8, x9, x10, x11 */
+    srli x9, x8, 32
+    srli x11, x10, 32
+#ifdef __riscv_zba
+    zext.w x8, x8
+    zext.w x10, x10
+#else
+    slli x8, x8, 32
+    slli x10, x10, 32
+    srli x8, x8, 32
+    srli x10, x10, 32
+#endif
+
+    /* byte 0 */
+    andi x14, x30, 255
+    srli x30, x30, 8
+    addi x12, x13, -2048
+#ifdef __riscv_zba
+    sh2add x14, x14, x13
+#else
+    slli x14, x14, 2
+    add x14, x14, x13
+#endif
+    lwu x14, -2048(x14)
+
+    /* byte 1 */
+    andi x15, x30, 255
+    srli x30, x30, 8
+#ifdef __riscv_zba
+    sh2add x15, x15, x12
+#else
+    slli x15, x15, 2
+    add x15, x15, x12
+#endif
+    lwu x15, 1024(x15)
+    xor x8, x8, x14
+
+    /* byte 2 */
+    andi x14, x30, 255
+    srli x30, x30, 8
+#ifdef __riscv_zba
+    sh2add x14, x14, x13
+#else
+    slli x14, x14, 2
+    add x14, x14, x13
+#endif
+    lwu x14, 0(x14)
+    xor x11, x11, x15
+
+    /* byte 3 */
+    andi x15, x30, 255
+    srli x30, x30, 8
+#ifdef __riscv_zba
+    sh2add x15, x15, x13
+#else
+    slli x15, x15, 2
+    add x15, x15, x13
+#endif
+    lwu x15, 1024(x15)
+    xor x10, x10, x14
+
+    /* byte 4 */
+    andi x14, x30, 255
+    srli x30, x30, 8
+#ifdef __riscv_zba
+    sh2add x14, x14, x12
+#else
+    slli x14, x14, 2
+    add x14, x14, x12
+#endif
+    lwu x14, 0(x14)
+    xor x9, x9, x15
+
+    /* byte 5 */
+    andi x15, x30, 255
+    srli x30, x30, 8
+#ifdef __riscv_zba
+    sh2add x15, x15, x12
+#else
+    slli x15, x15, 2
+    add x15, x15, x12
+#endif
+    lwu x15, 1024(x15)
+    xor x9, x9, x14
+
+    /* byte 6 */
+    andi x14, x30, 255
+    srli x30, x30, 8
+#ifdef __riscv_zba
+    sh2add x14, x14, x13
+#else
+    slli x14, x14, 2
+    add x14, x14, x13
+#endif
+    lwu x14, 0(x14)
+    xor x8, x8, x15
+
+    /* byte 7 */
+    andi x15, x30, 255
+#ifdef __riscv_zba
+    sh2add x15, x15, x13
+#else
+    slli x15, x15, 2
+    add x15, x15, x13
+#endif
+    lwu x15, 1024(x15)
+    xor x11, x11, x14
+
+    /* byte 8 */
+    andi x14, x31, 255
+    srli x31, x31, 8
+#ifdef __riscv_zba
+    sh2add x14, x14, x12
+#else
+    slli x14, x14, 2
+    add x14, x14, x12
+#endif
+    lwu x14, 0(x14)
+    xor x10, x10, x15
+
+    /* byte 9 */
+    andi x15, x31, 255
+    srli x31, x31, 8
+#ifdef __riscv_zba
+    sh2add x15, x15, x12
+#else
+    slli x15, x15, 2
+    add x15, x15, x12
+#endif
+    lwu x15, 1024(x15)
+    xor x10, x10, x14
+
+    /* byte 10 */
+    andi x14, x31, 255
+    srli x31, x31, 8
+#ifdef __riscv_zba
+    sh2add x14, x14, x13
+#else
+    slli x14, x14, 2
+    add x14, x14, x13
+#endif
+    lwu x14, 0(x14)
+    xor x9, x9, x15
+
+    /* byte 11 */
+    andi x15, x31, 255
+    srli x31, x31, 8
+#ifdef __riscv_zba
+    sh2add x15, x15, x13
+#else
+    slli x15, x15, 2
+    add x15, x15, x13
+#endif
+    lwu x15, 1024(x15)
+    xor x8, x8, x14
+
+    /* byte 12 */
+    andi x14, x31, 255
+    srli x31, x31, 8
+#ifdef __riscv_zba
+    sh2add x14, x14, x12
+#else
+    slli x14, x14, 2
+    add x14, x14, x12
+#endif
+    lwu x14, 0(x14)
+    xor x11, x11, x15
+
+    /* byte 13 */
+    andi x15, x31, 255
+    srli x31, x31, 8
+#ifdef __riscv_zba
+    sh2add x15, x15, x12
+#else
+    slli x15, x15, 2
+    add x15, x15, x12
+#endif
+    lwu x15, 1024(x15)
+    xor x11, x11, x14
+
+    /* byte 14 */
+    andi x14, x31, 255
+    srli x31, x31, 8
+#ifdef __riscv_zba
+    sh2add x14, x14, x13
+#else
+    slli x14, x14, 2
+    add x14, x14, x13
+#endif
+    lwu x14, 0(x14)
+    xor x10, x10, x15
+
+    /* byte 15 */
+    andi x15, x31, 255
+#ifdef __riscv_zba
+    sh2add x15, x15, x13
+#else
+    slli x15, x15, 2
+    add x15, x15, x13
+#endif
+    lwu x15, 1024(x15)
+    xor x9, x9, x14
+
+    slli x11, x11, 32
+    slli x9, x9, 32
+    or x30, x8, x9
+    or x31, x10, x11
+    xor x30, x30, x15
+
+    ret
+
+softaes_dec:
+    /* dec. lookup table */
+    ld x13, 136(x3)
+
+    /* load the round key into x8, x9, x10, x11 */
+    srli x9, x8, 32
+    srli x11, x10, 32
+#ifdef __riscv_zba
+    zext.w x8, x8
+    zext.w x10, x10
+#else
+    slli x8, x8, 32
+    slli x10, x10, 32
+    srli x8, x8, 32
+    srli x10, x10, 32
+#endif
+
+    /* byte 0 */
+    andi x14, x30, 255
+    srli x30, x30, 8
+    addi x12, x13, -2048
+#ifdef __riscv_zba
+    sh2add x14, x14, x13
+#else
+    slli x14, x14, 2
+    add x14, x14, x13
+#endif
+    lwu x14, -2048(x14)
+
+    /* byte 1 */
+    andi x15, x30, 255
+    srli x30, x30, 8
+#ifdef __riscv_zba
+    sh2add x15, x15, x12
+#else
+    slli x15, x15, 2
+    add x15, x15, x12
+#endif
+    lwu x15, 1024(x15)
+    xor x8, x8, x14
+
+    /* byte 2 */
+    andi x14, x30, 255
+    srli x30, x30, 8
+#ifdef __riscv_zba
+    sh2add x14, x14, x13
+#else
+    slli x14, x14, 2
+    add x14, x14, x13
+#endif
+    lwu x14, 0(x14)
+    xor x9, x9, x15
+
+    /* byte 3 */
+    andi x15, x30, 255
+    srli x30, x30, 8
+#ifdef __riscv_zba
+    sh2add x15, x15, x13
+#else
+    slli x15, x15, 2
+    add x15, x15, x13
+#endif
+    lwu x15, 1024(x15)
+    xor x10, x10, x14
+
+    /* byte 4 */
+    andi x14, x30, 255
+    srli x30, x30, 8
+#ifdef __riscv_zba
+    sh2add x14, x14, x12
+#else
+    slli x14, x14, 2
+    add x14, x14, x12
+#endif
+    lwu x14, 0(x14)
+    xor x11, x11, x15
+
+    /* byte 5 */
+    andi x15, x30, 255
+    srli x30, x30, 8
+#ifdef __riscv_zba
+    sh2add x15, x15, x12
+#else
+    slli x15, x15, 2
+    add x15, x15, x12
+#endif
+    lwu x15, 1024(x15)
+    xor x9, x9, x14
+
+    /* byte 6 */
+    andi x14, x30, 255
+    srli x30, x30, 8
+#ifdef __riscv_zba
+    sh2add x14, x14, x13
+#else
+    slli x14, x14, 2
+    add x14, x14, x13
+#endif
+    lwu x14, 0(x14)
+    xor x10, x10, x15
+
+    /* byte 7 */
+    andi x15, x30, 255
+#ifdef __riscv_zba
+    sh2add x15, x15, x13
+#else
+    slli x15, x15, 2
+    add x15, x15, x13
+#endif
+    lwu x15, 1024(x15)
+    xor x11, x11, x14
+
+    /* byte 8 */
+    andi x14, x31, 255
+    srli x31, x31, 8
+#ifdef __riscv_zba
+    sh2add x14, x14, x12
+#else
+    slli x14, x14, 2
+    add x14, x14, x12
+#endif
+    lwu x14, 0(x14)
+    xor x8, x8, x15
+
+    /* byte 9 */
+    andi x15, x31, 255
+    srli x31, x31, 8
+#ifdef __riscv_zba
+    sh2add x15, x15, x12
+#else
+    slli x15, x15, 2
+    add x15, x15, x12
+#endif
+    lwu x15, 1024(x15)
+    xor x10, x10, x14
+
+    /* byte 10 */
+    andi x14, x31, 255
+    srli x31, x31, 8
+#ifdef __riscv_zba
+    sh2add x14, x14, x13
+#else
+    slli x14, x14, 2
+    add x14, x14, x13
+#endif
+    lwu x14, 0(x14)
+    xor x11, x11, x15
+
+    /* byte 11 */
+    andi x15, x31, 255
+    srli x31, x31, 8
+#ifdef __riscv_zba
+    sh2add x15, x15, x13
+#else
+    slli x15, x15, 2
+    add x15, x15, x13
+#endif
+    lwu x15, 1024(x15)
+    xor x8, x8, x14
+
+    /* byte 12 */
+    andi x14, x31, 255
+    srli x31, x31, 8
+#ifdef __riscv_zba
+    sh2add x14, x14, x12
+#else
+    slli x14, x14, 2
+    add x14, x14, x12
+#endif
+    lwu x14, 0(x14)
+    xor x9, x9, x15
+
+    /* byte 13 */
+    andi x15, x31, 255
+    srli x31, x31, 8
+#ifdef __riscv_zba
+    sh2add x15, x15, x12
+#else
+    slli x15, x15, 2
+    add x15, x15, x12
+#endif
+    lwu x15, 1024(x15)
+    xor x11, x11, x14
+
+    /* byte 14 */
+    andi x14, x31, 255
+    srli x31, x31, 8
+#ifdef __riscv_zba
+    sh2add x14, x14, x13
+#else
+    slli x14, x14, 2
+    add x14, x14, x13
+#endif
+    lwu x14, 0(x14)
+    xor x8, x8, x15
+
+    /* byte 15 */
+    andi x15, x31, 255
+#ifdef __riscv_zba
+    sh2add x15, x15, x13
+#else
+    slli x15, x15, 2
+    add x15, x15, x13
+#endif
+    lwu x15, 1024(x15)
+    xor x9, x9, x14
+
+    slli x11, x11, 32
+    slli x9, x9, 32
+    or x30, x8, x9
+    or x31, x10, x11
+    xor x31, x31, x15
+
+    ret
+
+DECL(randomx_riscv64_program_end):
+    nop
+
+
+/* literal pool for SuperscalarHash */
+    /* space for remaining IMUL_RCP literals */
+ssh_literal_pool:
+    /* space for 256 IMUL_RCP literals */
+    .fill 256,8,0
+
+/*
+    SuperscalarHash subroutine
+        in:
+            x3 = literal pool
+            x6 = cache
+            x7 = itemNumber
+        out:
+            x8-x15 = 64-byte hash
+        clobbers:
+            x7, x28-x31
+*/
+DECL(randomx_riscv64_ssh_init):
+superscalar_hash:
+    ld x30, 0(x3) /* superscalarMul0 */
+    addi x8, x7, 1
+    ld x9, 8(x3)
+    li x31, RANDOMX_CACHE_MASK
+    ld x10, 16(x3)
+    ld x11, 24(x3)
+    mul x8, x8, x30
+    ld x12, 32(x3)
+    ld x13, 40(x3)
+    lla x30, ssh_literal_pool
+    ld x14, 48(x3)
+    and x7, x7, x31
+    ld x15, 56(x3)
+    slli x7, x7, 6
+    xor x9, x9, x8
+    add x7, x7, x6
+    xor x10, x10, x8
+    /* load the first IMUL_RCP literal */
+    ld x31, 2040(x30)
+    xor x11, x11, x8
+    xor x12, x12, x8
+    xor x13, x13, x8
+    xor x14, x14, x8
+    xor x15, x15, x8
+
+DECL(randomx_riscv64_ssh_load):
+    ld x28, 0(x7)
+    ld x29, 8(x7)
+    xor x8, x8, x28
+    ld x28, 16(x7)
+    xor x9, x9, x29
+    ld x29, 24(x7)
+    xor x10, x10, x28
+    ld x28, 32(x7)
+    xor x11, x11, x29
+    ld x29, 40(x7)
+    xor x12, x12, x28
+    ld x28, 48(x7)
+    xor x13, x13, x29
+    ld x29, 56(x7)
+    xor x14, x14, x28
+    li x7, RANDOMX_CACHE_MASK
+    xor x15, x15, x29
+
+DECL(randomx_riscv64_ssh_prefetch):
+    and x7, x8, x7   /* JIT compiler will adjust the register */
+    slli x7, x7, 6
+    add x7, x7, x6
+    /* prefetch - doesn't seem to have any effect */
+    /* ld x0, 0(x7) */
+
+DECL(randomx_riscv64_ssh_end):
+    nop
diff --git a/src/crypto/randomx/jit_compiler_rv64_static.hpp b/src/crypto/randomx/jit_compiler_rv64_static.hpp
new file mode 100644
index 000000000..656623c74
--- /dev/null
+++ b/src/crypto/randomx/jit_compiler_rv64_static.hpp
@@ -0,0 +1,53 @@
+/*
+Copyright (c) 2023 tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+extern "C" {
+	void randomx_riscv64_literals();
+	void randomx_riscv64_literals_end();
+	void randomx_riscv64_data_init();
+	void randomx_riscv64_fix_data_call();
+	void randomx_riscv64_prologue();
+	void randomx_riscv64_loop_begin();
+	void randomx_riscv64_data_read();
+	void randomx_riscv64_data_read_light();
+	void randomx_riscv64_fix_loop_call();
+	void randomx_riscv64_spad_store();
+	void randomx_riscv64_spad_store_hardaes();
+	void randomx_riscv64_spad_store_softaes();
+	void randomx_riscv64_loop_end();
+	void randomx_riscv64_fix_continue_loop();
+	void randomx_riscv64_epilogue();
+	void randomx_riscv64_softaes();
+	void randomx_riscv64_program_end();
+	void randomx_riscv64_ssh_init();
+	void randomx_riscv64_ssh_load();
+	void randomx_riscv64_ssh_prefetch();
+	void randomx_riscv64_ssh_end();
+}
diff --git a/src/crypto/randomx/jit_compiler_rv64_vector.cpp b/src/crypto/randomx/jit_compiler_rv64_vector.cpp
new file mode 100644
index 000000000..8dc95613e
--- /dev/null
+++ b/src/crypto/randomx/jit_compiler_rv64_vector.cpp
@@ -0,0 +1,207 @@
+/*
+Copyright (c) 2018-2020, tevador    <tevador@gmail.com>
+Copyright (c) 2019-2021, XMRig      <https://github.com/xmrig>, <support@xmrig.com>
+Copyright (c) 2025, SChernykh       <https://github.com/SChernykh>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "crypto/randomx/configuration.h"
+#include "crypto/randomx/jit_compiler_rv64_vector.h"
+#include "crypto/randomx/jit_compiler_rv64_vector_static.h"
+#include "crypto/randomx/reciprocal.h"
+#include "crypto/randomx/superscalar.hpp"
+
+namespace randomx {
+
+#define ADDR(x) ((uint8_t*) &(x))
+#define DIST(x, y) (ADDR(y) - ADDR(x))
+
+void* generateDatasetInitVectorRV64(uint8_t* buf, size_t buf_size, SuperscalarProgram* programs, size_t num_programs)
+{
+	memcpy(buf, reinterpret_cast<void*>(randomx_riscv64_vector_sshash_begin), buf_size);
+
+	uint8_t* p = buf + DIST(randomx_riscv64_vector_sshash_begin, randomx_riscv64_vector_sshash_generated_instructions);
+
+	uint8_t* literals = buf + DIST(randomx_riscv64_vector_sshash_begin, randomx_riscv64_vector_sshash_imul_rcp_literals);
+	uint8_t* cur_literal = literals;
+
+	for (size_t i = 0; i < num_programs; ++i) {
+		// Step 4
+		size_t k = DIST(randomx_riscv64_vector_sshash_cache_prefetch, randomx_riscv64_vector_sshash_xor);
+		memcpy(p, reinterpret_cast<void*>(randomx_riscv64_vector_sshash_cache_prefetch), k);
+		p += k;
+
+		// Step 5
+		for (uint32_t j = 0; j < programs[i].size; ++j) {
+			const uint32_t dst = programs[i].programBuffer[j].dst & 7;
+			const uint32_t src = programs[i].programBuffer[j].src & 7;
+			const uint32_t modShift = (programs[i].programBuffer[j].mod >> 2) & 3;
+			const uint32_t imm32 = programs[i].programBuffer[j].imm32;
+
+			uint32_t inst;
+			#define EMIT(data) inst = (data); memcpy(p, &inst, 4); p += 4
+
+			switch (static_cast<SuperscalarInstructionType>(programs[i].programBuffer[j].opcode)) {
+			case SuperscalarInstructionType::ISUB_R: 
+				// 57 00 00 0A	vsub.vv v0, v0, v0
+				EMIT(0x0A000057 | (dst << 7) | (src << 15) | (dst << 20));
+				break;
+
+			case SuperscalarInstructionType::IXOR_R:
+				// 57 00 00 2E	vxor.vv v0, v0, v0
+				EMIT(0x2E000057 | (dst << 7) | (src << 15) | (dst << 20));
+				break;
+
+			case SuperscalarInstructionType::IADD_RS:
+				// 57 39 00 96	vsll.vi v18, v0, 0
+				// 57 00 09 02	vadd.vv v0, v0, v18
+				EMIT(0x96003957 | (modShift << 15) | (src << 20));
+				EMIT(0x02090057 | (dst << 7) | (dst << 20));
+				break;
+
+			case SuperscalarInstructionType::IMUL_R:
+				// 57 20 00 96	vmul.vv v0, v0, v0
+				EMIT(0x96002057 | (dst << 7) | (src << 15) | (dst << 20));
+				break;
+
+			case SuperscalarInstructionType::IROR_C:
+				{
+					const uint32_t shift_right = imm32 & 63;
+					const uint32_t shift_left = 64 - shift_right;
+
+					if (shift_right < 32) {
+						// 57 39 00 A2	vsrl.vi v18, v0, 0
+						EMIT(0xA2003957 | (shift_right << 15) | (dst << 20));
+					}
+					else {
+						// 93 02 00 00	li x5, 0
+						// 57 C9 02 A2	vsrl.vx v18, v0, x5
+						EMIT(0x00000293 | (shift_right << 20));
+						EMIT(0xA202C957 | (dst << 20));
+					}
+
+					if (shift_left < 32) {
+						// 57 30 00 96	vsll.vi v0, v0, 0
+						EMIT(0x96003057 | (dst << 7) | (shift_left << 15) | (dst << 20));
+					}
+					else {
+						// 93 02 00 00	li x5, 0
+						// 57 C0 02 96	vsll.vx v0, v0, x5
+						EMIT(0x00000293 | (shift_left << 20));
+						EMIT(0x9602C057 | (dst << 7) | (dst << 20));
+					}
+
+					// 57 00 20 2B vor.vv v0, v18, v0
+					EMIT(0x2B200057 | (dst << 7) | (dst << 15));
+				}
+				break;
+
+			case SuperscalarInstructionType::IADD_C7:
+			case SuperscalarInstructionType::IADD_C8:
+			case SuperscalarInstructionType::IADD_C9:
+				// B7 02 00 00	lui x5, 0
+				// 9B 82 02 00	addiw x5, x5, 0
+				// 57 C0 02 02	vadd.vx v0, v0, x5
+				EMIT(0x000002B7 | ((imm32 + ((imm32 & 0x800) << 1)) & 0xFFFFF000));
+				EMIT(0x0002829B | ((imm32 & 0x00000FFF)) << 20);
+				EMIT(0x0202C057 | (dst << 7) | (dst << 20));
+				break;
+
+			case SuperscalarInstructionType::IXOR_C7:
+			case SuperscalarInstructionType::IXOR_C8:
+			case SuperscalarInstructionType::IXOR_C9:
+				// B7 02 00 00	lui x5, 0
+				// 9B 82 02 00	addiw x5, x5, 0
+				// 57 C0 02 2E	vxor.vx v0, v0, x5
+				EMIT(0x000002B7 | ((imm32 + ((imm32 & 0x800) << 1)) & 0xFFFFF000));
+				EMIT(0x0002829B | ((imm32 & 0x00000FFF)) << 20);
+				EMIT(0x2E02C057 | (dst << 7) | (dst << 20));
+				break;
+
+			case SuperscalarInstructionType::IMULH_R:
+				// 57 20 00 92	vmulhu.vv v0, v0, v0
+				EMIT(0x92002057 | (dst << 7) | (src << 15) | (dst << 20));
+				break;
+
+			case SuperscalarInstructionType::ISMULH_R:
+				// 57 20 00 9E	vmulh.vv v0, v0, v0
+				EMIT(0x9E002057 | (dst << 7) | (src << 15) | (dst << 20));
+				break;
+
+			case SuperscalarInstructionType::IMUL_RCP:
+				{
+					uint32_t offset = cur_literal - literals;
+
+					if (offset == 2040) {
+						literals += 2040;
+						offset = 0;
+
+						// 93 87 87 7F	add x15, x15, 2040
+						EMIT(0x7F878793);
+					}
+
+					const uint64_t r = randomx_reciprocal_fast(imm32);
+					memcpy(cur_literal, &r, 8);
+					cur_literal += 8;
+
+					// 83 B2 07 00	ld x5, (x15)
+					// 57 E0 02 96	vmul.vx v0, v0, x5
+					EMIT(0x0007B283 | (offset << 20));
+					EMIT(0x9602E057 | (dst << 7) | (dst << 20));
+				}
+				break;
+
+			default:
+				break;
+			}
+		}
+
+		// Step 6
+		k = DIST(randomx_riscv64_vector_sshash_xor, randomx_riscv64_vector_sshash_set_cache_index);
+		memcpy(p, reinterpret_cast<void*>(randomx_riscv64_vector_sshash_xor), k);
+		p += k;
+
+		// Step 7
+		if (i + 1 < num_programs) {
+			memcpy(p, reinterpret_cast<uint8_t*>(randomx_riscv64_vector_sshash_set_cache_index) + programs[i].getAddressRegister() * 4, 4);
+			p += 4;
+		}
+	}
+
+	// Emit "J randomx_riscv64_vector_sshash_generated_instructions_end" instruction
+	const uint8_t* e = buf + DIST(randomx_riscv64_vector_sshash_begin, randomx_riscv64_vector_sshash_generated_instructions_end);
+	const uint32_t k = e - p;
+	const uint32_t j = 0x6F | ((k & 0x7FE) << 20) | ((k & 0x800) << 9) | (k & 0xFF000);
+	memcpy(p, &j, 4);
+
+#ifdef __GNUC__
+	__builtin___clear_cache((char*) buf, (char*)(buf + buf_size));
+#endif
+
+	return buf + DIST(randomx_riscv64_vector_sshash_begin, randomx_riscv64_vector_sshash_dataset_init);
+}
+
+} // namespace randomx
diff --git a/src/crypto/randomx/jit_compiler_rv64_vector.h b/src/crypto/randomx/jit_compiler_rv64_vector.h
new file mode 100644
index 000000000..ea06862e5
--- /dev/null
+++ b/src/crypto/randomx/jit_compiler_rv64_vector.h
@@ -0,0 +1,42 @@
+/*
+Copyright (c) 2018-2020, tevador    <tevador@gmail.com>
+Copyright (c) 2019-2021, XMRig      <https://github.com/xmrig>, <support@xmrig.com>
+Copyright (c) 2025, SChernykh       <https://github.com/SChernykh>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include <cstdint>
+#include <cstdlib>
+
+namespace randomx {
+
+class SuperscalarProgram;
+
+void* generateDatasetInitVectorRV64(uint8_t* buf, size_t buf_size, SuperscalarProgram* programs, size_t num_programs);
+
+} // namespace randomx
diff --git a/src/crypto/randomx/jit_compiler_rv64_vector_static.S b/src/crypto/randomx/jit_compiler_rv64_vector_static.S
new file mode 100644
index 000000000..ac63c625f
--- /dev/null
+++ b/src/crypto/randomx/jit_compiler_rv64_vector_static.S
@@ -0,0 +1,296 @@
+/*
+Copyright (c) 2018-2020, tevador    <tevador@gmail.com>
+Copyright (c) 2019-2021, XMRig      <https://github.com/xmrig>, <support@xmrig.com>
+Copyright (c) 2025, SChernykh       <https://github.com/SChernykh>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "configuration.h"
+
+// Compatibility macros
+
+#if !defined(RANDOMX_CACHE_ACCESSES) && defined(RANDOMX_CACHE_MAX_ACCESSES)
+#define RANDOMX_CACHE_ACCESSES RANDOMX_CACHE_MAX_ACCESSES
+#endif
+
+#if defined(RANDOMX_ARGON_MEMORY)
+#define RANDOMX_CACHE_MASK RANDOMX_ARGON_MEMORY * 1024 / 64 - 1
+#elif defined(RANDOMX_CACHE_MAX_SIZE)
+#define RANDOMX_CACHE_MASK RANDOMX_CACHE_MAX_SIZE / 64 - 1
+#endif
+
+#define DECL(x) x
+
+.text
+
+.option arch, rv64gcv_zicbop
+.option pic
+
+.global DECL(randomx_riscv64_vector_sshash_begin)
+.global DECL(randomx_riscv64_vector_sshash_imul_rcp_literals)
+.global DECL(randomx_riscv64_vector_sshash_dataset_init)
+.global DECL(randomx_riscv64_vector_sshash_generated_instructions)
+.global DECL(randomx_riscv64_vector_sshash_generated_instructions_end)
+.global DECL(randomx_riscv64_vector_sshash_cache_prefetch)
+.global DECL(randomx_riscv64_vector_sshash_xor)
+.global DECL(randomx_riscv64_vector_sshash_set_cache_index)
+.global DECL(randomx_riscv64_vector_sshash_end)
+
+.balign 8
+
+DECL(randomx_riscv64_vector_sshash_begin):
+
+sshash_constant_0: .dword 6364136223846793005
+sshash_constant_1: .dword 9298411001130361340
+sshash_constant_2: .dword 12065312585734608966
+sshash_constant_3: .dword 9306329213124626780
+sshash_constant_4: .dword 5281919268842080866
+sshash_constant_5: .dword 10536153434571861004
+sshash_constant_6: .dword 3398623926847679864
+sshash_constant_7: .dword 9549104520008361294
+sshash_offsets:    .dword 0,1,2,3
+store_offsets:     .dword 0,64,128,192
+
+DECL(randomx_riscv64_vector_sshash_imul_rcp_literals): .fill 512,8,0
+
+/*
+Reference: https://github.com/tevador/RandomX/blob/master/doc/specs.md#73-dataset-block-generation
+
+Register layout
+---------------
+x5	= temporary
+
+x10	= randomx cache
+x11	= output buffer
+x12	= startBlock
+x13	= endBlock
+
+x14	= cache mask
+x15	= imul_rcp literal pointer
+
+v0-v7	= r0-r7
+v8	= itemNumber
+v9	= cacheIndex, then a pointer into cache->memory (for prefetch), then a byte offset into cache->memory
+
+v10-v17	= sshash constants
+
+v18	= temporary
+
+v19	= dataset item store offsets
+*/
+
+DECL(randomx_riscv64_vector_sshash_dataset_init):
+	// Process 4 64-bit values at a time
+	li x5, 4
+	vsetvli x5, x5, e64, m1, ta, ma
+
+	// Load cache->memory pointer
+	ld x10, (x10)
+
+	// Init cache mask
+	li x14, RANDOMX_CACHE_MASK
+
+	// Init dataset item store offsets
+	lla x5, store_offsets
+	vle64.v v19, (x5)
+
+	// Init itemNumber vector to (startBlock, startBlock + 1, startBlock + 2, startBlock + 3)
+	lla x5, sshash_offsets
+	vle64.v v8, (x5)
+	vadd.vx v8, v8, x12
+
+	// Load constants (stride = x0 = 0, so a 64-bit value will be broadcast into each element of a vector)
+	lla x5, sshash_constant_0
+	vlse64.v v10, (x5), x0
+
+	lla x5, sshash_constant_1
+	vlse64.v v11, (x5), x0
+
+	lla x5, sshash_constant_2
+	vlse64.v v12, (x5), x0
+
+	lla x5, sshash_constant_3
+	vlse64.v v13, (x5), x0
+
+	lla x5, sshash_constant_4
+	vlse64.v v14, (x5), x0
+
+	lla x5, sshash_constant_5
+	vlse64.v v15, (x5), x0
+
+	lla x5, sshash_constant_6
+	vlse64.v v16, (x5), x0
+
+	lla x5, sshash_constant_7
+	vlse64.v v17, (x5), x0
+
+	// Calculate the end pointer for dataset init
+	sub x13, x13, x12
+	slli x13, x13, 6
+	add x13, x13, x11
+
+init_item:
+	// Step 1. Init r0-r7
+
+	// r0 = (itemNumber + 1) * 6364136223846793005
+	vmv.v.v v0, v8
+	vmadd.vv v0, v10, v10
+
+	// r_i = r0 ^ c_i for i = 1..7
+	vxor.vv v1, v0, v11
+	vxor.vv v2, v0, v12
+	vxor.vv v3, v0, v13
+	vxor.vv v4, v0, v14
+	vxor.vv v5, v0, v15
+	vxor.vv v6, v0, v16
+	vxor.vv v7, v0, v17
+
+	// Step 2. Let cacheIndex = itemNumber
+	vmv.v.v v9, v8
+
+	// Step 3 is implicit (all iterations are inlined, there is no "i")
+
+	// Init imul_rcp literal pointer
+	lla x15, randomx_riscv64_vector_sshash_imul_rcp_literals
+
+DECL(randomx_riscv64_vector_sshash_generated_instructions):
+	// Generated by JIT compiler
+	//
+	// Step 4. randomx_riscv64_vector_sshash_cache_prefetch
+	// Step 5. SuperscalarHash[i]
+	// Step 6. randomx_riscv64_vector_sshash_xor
+	// Step 7. randomx_riscv64_vector_sshash_set_cache_index
+	//
+	// Above steps will be repeated RANDOMX_CACHE_ACCESSES times
+	.fill RANDOMX_CACHE_ACCESSES * 2048, 4, 0
+
+DECL(randomx_riscv64_vector_sshash_generated_instructions_end):
+	// Step 9. Concatenate registers r0-r7 in little endian format to get the final Dataset item data.
+	vsuxei64.v v0, (x11), v19
+
+	add x5, x11, 8
+	vsuxei64.v v1, (x5), v19
+
+	add x5, x11, 16
+	vsuxei64.v v2, (x5), v19
+
+	add x5, x11, 24
+	vsuxei64.v v3, (x5), v19
+
+	add x5, x11, 32
+	vsuxei64.v v4, (x5), v19
+
+	add x5, x11, 40
+	vsuxei64.v v5, (x5), v19
+
+	add x5, x11, 48
+	vsuxei64.v v6, (x5), v19
+
+	add x5, x11, 56
+	vsuxei64.v v7, (x5), v19
+
+	// Iterate to the next 4 items
+	vadd.vi v8, v8, 4
+	add x11, x11, 256
+	bltu x11, x13, init_item
+
+	ret
+
+// Step 4. Load a 64-byte item from the Cache. The item index is given by cacheIndex modulo the total number of 64-byte items in Cache.
+DECL(randomx_riscv64_vector_sshash_cache_prefetch):
+	// v9 = convert from cacheIndex to a direct pointer into cache->memory
+	vand.vx v9, v9, x14
+	vsll.vi v9, v9, 6
+	vadd.vx v9, v9, x10
+
+	// Prefetch element 0
+	vmv.x.s x5, v9
+	prefetch.r (x5)
+
+	// Prefetch element 1
+	vslidedown.vi v18, v9, 1
+	vmv.x.s x5, v18
+	prefetch.r (x5)
+
+	// Prefetch element 2
+	vslidedown.vi v18, v9, 2
+	vmv.x.s x5, v18
+	prefetch.r (x5)
+
+	// Prefetch element 3
+	vslidedown.vi v18, v9, 3
+	vmv.x.s x5, v18
+	prefetch.r (x5)
+
+	// v9 = byte offset into cache->memory
+	vsub.vx v9, v9, x10
+
+// Step 6. XOR all registers with data loaded from randomx cache
+DECL(randomx_riscv64_vector_sshash_xor):
+	vluxei64.v v18, (x10), v9
+	vxor.vv v0, v0, v18
+
+	add x5, x10, 8
+	vluxei64.v v18, (x5), v9
+	vxor.vv v1, v1, v18
+
+	add x5, x10, 16
+	vluxei64.v v18, (x5), v9
+	vxor.vv v2, v2, v18
+
+	add x5, x10, 24
+	vluxei64.v v18, (x5), v9
+	vxor.vv v3, v3, v18
+
+	add x5, x10, 32
+	vluxei64.v v18, (x5), v9
+	vxor.vv v4, v4, v18
+
+	add x5, x10, 40
+	vluxei64.v v18, (x5), v9
+	vxor.vv v5, v5, v18
+
+	add x5, x10, 48
+	vluxei64.v v18, (x5), v9
+	vxor.vv v6, v6, v18
+
+	add x5, x10, 56
+	vluxei64.v v18, (x5), v9
+	vxor.vv v7, v7, v18
+
+// Step 7. Set cacheIndex to the value of the register that has the longest dependency chain in the SuperscalarHash function executed in step 5.
+DECL(randomx_riscv64_vector_sshash_set_cache_index):
+	// JIT compiler will pick a single instruction reading from the required register
+	vmv.v.v v9, v0
+	vmv.v.v v9, v1
+	vmv.v.v v9, v2
+	vmv.v.v v9, v3
+	vmv.v.v v9, v4
+	vmv.v.v v9, v5
+	vmv.v.v v9, v6
+	vmv.v.v v9, v7
+
+DECL(randomx_riscv64_vector_sshash_end):
diff --git a/src/crypto/randomx/jit_compiler_rv64_vector_static.h b/src/crypto/randomx/jit_compiler_rv64_vector_static.h
new file mode 100644
index 000000000..09bab597e
--- /dev/null
+++ b/src/crypto/randomx/jit_compiler_rv64_vector_static.h
@@ -0,0 +1,58 @@
+/*
+Copyright (c) 2018-2020, tevador    <tevador@gmail.com>
+Copyright (c) 2019-2021, XMRig      <https://github.com/xmrig>, <support@xmrig.com>
+Copyright (c) 2025, SChernykh       <https://github.com/SChernykh>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#if defined(__cplusplus)
+#include <cstdint>
+#else
+#include <stdint.h>
+#endif
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+struct randomx_cache;
+
+void randomx_riscv64_vector_sshash_begin();
+void randomx_riscv64_vector_sshash_imul_rcp_literals();
+void randomx_riscv64_vector_sshash_dataset_init(struct randomx_cache* cache, uint8_t* output_buf, uint32_t startBlock, uint32_t endBlock);
+void randomx_riscv64_vector_sshash_cache_prefetch();
+void randomx_riscv64_vector_sshash_generated_instructions();
+void randomx_riscv64_vector_sshash_generated_instructions_end();
+void randomx_riscv64_vector_sshash_cache_prefetch();
+void randomx_riscv64_vector_sshash_xor();
+void randomx_riscv64_vector_sshash_set_cache_index();
+void randomx_riscv64_vector_sshash_end();
+
+#if defined(__cplusplus)
+}
+#endif
diff --git a/src/crypto/randomx/randomx.cpp b/src/crypto/randomx/randomx.cpp
index 1126c7a2e..1609a4af3 100644
--- a/src/crypto/randomx/randomx.cpp
+++ b/src/crypto/randomx/randomx.cpp
@@ -39,6 +39,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "crypto/randomx/jit_compiler_x86_static.hpp"
 #elif (XMRIG_ARM == 8)
 #include "crypto/randomx/jit_compiler_a64_static.hpp"
+#elif defined(__riscv) && defined(__riscv_xlen) && (__riscv_xlen == 64)
+#include "crypto/randomx/jit_compiler_rv64_static.hpp"
 #endif
 
 #include "backend/cpu/Cpu.h"
@@ -190,7 +192,7 @@ RandomX_ConfigurationBase::RandomX_ConfigurationBase()
 #	endif
 }
 
-#if (XMRIG_ARM == 8)
+#if (XMRIG_ARM == 8) || defined(XMRIG_RISCV)
 static uint32_t Log2(size_t value) { return (value > 1) ? (Log2(value / 2) + 1) : 0; }
 #endif
 
@@ -274,6 +276,14 @@ typedef void(randomx::JitCompilerX86::* InstructionGeneratorX86_2)(const randomx
 
 #define JIT_HANDLE(x, prev) randomx::JitCompilerA64::engine[k] = &randomx::JitCompilerA64::h_##x
 
+#elif defined(XMRIG_RISCV)
+
+	Log2_ScratchpadL1 = Log2(ScratchpadL1_Size);
+	Log2_ScratchpadL2 = Log2(ScratchpadL2_Size);
+	Log2_ScratchpadL3 = Log2(ScratchpadL3_Size);
+
+#define JIT_HANDLE(x, prev) randomx::JitCompilerRV64::engine[k] = &randomx::JitCompilerRV64::v1_##x
+
 #else
 #define JIT_HANDLE(x, prev)
 #endif
diff --git a/src/crypto/randomx/randomx.h b/src/crypto/randomx/randomx.h
index c2d244447..70abff348 100644
--- a/src/crypto/randomx/randomx.h
+++ b/src/crypto/randomx/randomx.h
@@ -133,7 +133,7 @@ struct RandomX_ConfigurationBase
 	uint32_t ScratchpadL3Mask_Calculated;
 	uint32_t ScratchpadL3Mask64_Calculated;
 
-#	if (XMRIG_ARM == 8)
+#	if (XMRIG_ARM == 8) || defined(XMRIG_RISCV)
 	uint32_t Log2_ScratchpadL1;
 	uint32_t Log2_ScratchpadL2;
 	uint32_t Log2_ScratchpadL3;
diff --git a/src/crypto/randomx/reciprocal.c b/src/crypto/randomx/reciprocal.c
index 87cda2677..ebd7662ca 100644
--- a/src/crypto/randomx/reciprocal.c
+++ b/src/crypto/randomx/reciprocal.c
@@ -73,8 +73,20 @@ uint64_t randomx_reciprocal(uint64_t divisor) {
 
 #if !RANDOMX_HAVE_FAST_RECIPROCAL
 
+#ifdef __GNUC__
+uint64_t randomx_reciprocal_fast(uint64_t divisor)
+{
+	const uint64_t q = (1ULL << 63) / divisor;
+	const uint64_t r = (1ULL << 63) % divisor;
+
+	const uint64_t shift = 64 - __builtin_clzll(divisor);
+
+	return (q << shift) + ((r << shift) / divisor);
+}
+#else
 uint64_t randomx_reciprocal_fast(uint64_t divisor) {
 	return randomx_reciprocal(divisor);
 }
+#endif
 
 #endif
diff --git a/src/crypto/randomx/soft_aes.cpp b/src/crypto/randomx/soft_aes.cpp
index 04fb7ac0e..aa5cdd494 100644
--- a/src/crypto/randomx/soft_aes.cpp
+++ b/src/crypto/randomx/soft_aes.cpp
@@ -39,6 +39,9 @@ alignas(64) uint32_t lutDec1[256];
 alignas(64) uint32_t lutDec2[256];
 alignas(64) uint32_t lutDec3[256];
 
+alignas(64) uint8_t lutEncIndex[4][32];
+alignas(64) uint8_t lutDecIndex[4][32];
+
 static uint32_t mul_gf2(uint32_t b, uint32_t c)
 {
 	uint32_t s = 0;
@@ -115,5 +118,49 @@ static struct SAESInitializer
 			lutDec2[i] = w; w = (w << 8) | (w >> 24);
 			lutDec3[i] = w;
 		}
+
+		memset(lutEncIndex, -1, sizeof(lutEncIndex));
+		memset(lutDecIndex, -1, sizeof(lutDecIndex));
+
+		lutEncIndex[0][ 0] =  0;
+		lutEncIndex[0][ 4] =  4;
+		lutEncIndex[0][ 8] =  8;
+		lutEncIndex[0][12] = 12;
+		lutEncIndex[1][ 0] =  5;
+		lutEncIndex[1][ 4] =  9;
+		lutEncIndex[1][ 8] = 13;
+		lutEncIndex[1][12] =  1;
+		lutEncIndex[2][ 0] = 10;
+		lutEncIndex[2][ 4] = 14;
+		lutEncIndex[2][ 8] =  2;
+		lutEncIndex[2][12] =  6;
+		lutEncIndex[3][ 0] = 15;
+		lutEncIndex[3][ 4] =  3;
+		lutEncIndex[3][ 8] =  7;
+		lutEncIndex[3][12] = 11;
+
+		lutDecIndex[0][ 0] =  0;
+		lutDecIndex[0][ 4] =  4;
+		lutDecIndex[0][ 8] =  8;
+		lutDecIndex[0][12] = 12;
+		lutDecIndex[1][ 0] = 13;
+		lutDecIndex[1][ 4] =  1;
+		lutDecIndex[1][ 8] =  5;
+		lutDecIndex[1][12] =  9;
+		lutDecIndex[2][ 0] = 10;
+		lutDecIndex[2][ 4] = 14;
+		lutDecIndex[2][ 8] =  2;
+		lutDecIndex[2][12] =  6;
+		lutDecIndex[3][ 0] =  7;
+		lutDecIndex[3][ 4] = 11;
+		lutDecIndex[3][ 8] = 15;
+		lutDecIndex[3][12] =  3;
+
+		for (uint32_t i = 0; i < 4; ++i) {
+			for (uint32_t j = 0; j < 16; j += 4) {
+				lutEncIndex[i][j + 16] = lutEncIndex[i][j] + 16;
+				lutDecIndex[i][j + 16] = lutDecIndex[i][j] + 16;
+			}
+		}
 	}
 } aes_initializer;
diff --git a/src/crypto/randomx/soft_aes.h b/src/crypto/randomx/soft_aes.h
index 2b7d5a1e9..4e133910d 100644
--- a/src/crypto/randomx/soft_aes.h
+++ b/src/crypto/randomx/soft_aes.h
@@ -41,6 +41,9 @@ extern uint32_t lutDec1[256];
 extern uint32_t lutDec2[256];
 extern uint32_t lutDec3[256];
 
+extern uint8_t lutEncIndex[4][32];
+extern uint8_t lutDecIndex[4][32];
+
 template<int soft> rx_vec_i128 aesenc(rx_vec_i128 in, rx_vec_i128 key);
 template<int soft> rx_vec_i128 aesdec(rx_vec_i128 in, rx_vec_i128 key);
 
@@ -147,3 +150,32 @@ template<>
 FORCE_INLINE rx_vec_i128 aesdec<0>(rx_vec_i128 in, rx_vec_i128 key) {
 	return rx_aesdec_vec_i128(in, key);
 }
+
+#if defined(XMRIG_RISCV) && defined(XMRIG_RVV_ENABLED)
+#include <riscv_vector.h>
+
+FORCE_INLINE vuint32m1_t softaes_vector_double(
+	vuint32m1_t in,
+	vuint32m1_t key,
+	vuint8m1_t i0, vuint8m1_t i1, vuint8m1_t i2, vuint8m1_t i3,
+	const uint32_t* lut0, const uint32_t* lut1, const uint32_t *lut2, const uint32_t* lut3)
+{
+	const vuint8m1_t in8 = __riscv_vreinterpret_v_u32m1_u8m1(in);
+
+	const vuint32m1_t index0 = __riscv_vreinterpret_v_u8m1_u32m1(__riscv_vrgather_vv_u8m1(in8, i0, 32));
+	const vuint32m1_t index1 = __riscv_vreinterpret_v_u8m1_u32m1(__riscv_vrgather_vv_u8m1(in8, i1, 32));
+	const vuint32m1_t index2 = __riscv_vreinterpret_v_u8m1_u32m1(__riscv_vrgather_vv_u8m1(in8, i2, 32));
+	const vuint32m1_t index3 = __riscv_vreinterpret_v_u8m1_u32m1(__riscv_vrgather_vv_u8m1(in8, i3, 32));
+
+	vuint32m1_t s0 = __riscv_vluxei32_v_u32m1(lut0, __riscv_vsll_vx_u32m1(index0, 2, 8), 8);
+	vuint32m1_t s1 = __riscv_vluxei32_v_u32m1(lut1, __riscv_vsll_vx_u32m1(index1, 2, 8), 8);
+	vuint32m1_t s2 = __riscv_vluxei32_v_u32m1(lut2, __riscv_vsll_vx_u32m1(index2, 2, 8), 8);
+	vuint32m1_t s3 = __riscv_vluxei32_v_u32m1(lut3, __riscv_vsll_vx_u32m1(index3, 2, 8), 8);
+
+	s0 = __riscv_vxor_vv_u32m1(s0, s1, 8);
+	s2 = __riscv_vxor_vv_u32m1(s2, s3, 8);
+	s0 = __riscv_vxor_vv_u32m1(s0, s2, 8);
+
+	return __riscv_vxor_vv_u32m1(s0, key, 8);
+}
+#endif // defined(XMRIG_RISCV) && defined(XMRIG_RVV_ENABLED)
diff --git a/src/crypto/randomx/tests/riscv64_vector.s b/src/crypto/randomx/tests/riscv64_vector.s
new file mode 100644
index 000000000..ee4c234f7
--- /dev/null
+++ b/src/crypto/randomx/tests/riscv64_vector.s
@@ -0,0 +1,14 @@
+/* RISC-V - test if the vector extension and prefetch instruction are present */
+
+.text
+.option arch, rv64gcv_zicbop
+.global main
+
+main:
+	lla x5, main
+	prefetch.r (x5)
+	li x5, 4
+	vsetvli x6, x5, e64, m1, ta, ma
+	vxor.vv v0, v0, v0
+	sub x10, x5, x6
+	ret
diff --git a/src/crypto/randomx/tests/riscv64_zba.s b/src/crypto/randomx/tests/riscv64_zba.s
new file mode 100644
index 000000000..e1947e7a6
--- /dev/null
+++ b/src/crypto/randomx/tests/riscv64_zba.s
@@ -0,0 +1,9 @@
+/* RISC-V - test if the Zba extension is present */
+
+.text
+.global main
+
+main:
+    sh1add x6, x6, x7
+    li x10, 0
+    ret
diff --git a/src/crypto/randomx/tests/riscv64_zbb.s b/src/crypto/randomx/tests/riscv64_zbb.s
new file mode 100644
index 000000000..d922043f0
--- /dev/null
+++ b/src/crypto/randomx/tests/riscv64_zbb.s
@@ -0,0 +1,9 @@
+/* RISC-V - test if the Zbb extension is present */
+
+.text
+.global main
+
+main:
+    ror x6, x6, x7
+    li x10, 0
+    ret
diff --git a/src/crypto/rx/RxDataset.cpp b/src/crypto/rx/RxDataset.cpp
index 86b3a3f6d..3495d7baa 100644
--- a/src/crypto/rx/RxDataset.cpp
+++ b/src/crypto/rx/RxDataset.cpp
@@ -43,6 +43,12 @@ static void init_dataset_wrapper(randomx_dataset *dataset, randomx_cache *cache,
         randomx_init_dataset(dataset, cache, startItem, itemCount - (itemCount % 5));
         randomx_init_dataset(dataset, cache, startItem + itemCount - 5, 5);
     }
+#ifdef XMRIG_RISCV
+    else if (itemCount % 4) {
+        randomx_init_dataset(dataset, cache, startItem, itemCount - (itemCount % 4));
+        randomx_init_dataset(dataset, cache, startItem + itemCount - 4, 4);
+    }
+#endif
     else {
         randomx_init_dataset(dataset, cache, startItem, itemCount);
     }
@@ -209,7 +215,7 @@ void xmrig::RxDataset::allocate(bool hugePages, bool oneGbPages)
         return;
     }
 
-    m_memory  = new VirtualMemory(maxSize(), hugePages, oneGbPages, false, m_node);
+    m_memory  = new VirtualMemory(maxSize(), hugePages, oneGbPages, false, m_node, VirtualMemory::kDefaultHugePageSize);
 
     if (m_memory->isOneGbPages()) {
         m_scratchpadOffset = maxSize() + RANDOMX_CACHE_MAX_SIZE;
diff --git a/src/crypto/rx/RxVm.cpp b/src/crypto/rx/RxVm.cpp
index acaa25e05..6ffe210d4 100644
--- a/src/crypto/rx/RxVm.cpp
+++ b/src/crypto/rx/RxVm.cpp
@@ -29,9 +29,17 @@ randomx_vm *xmrig::RxVm::create(RxDataset *dataset, uint8_t *scratchpad, bool so
 {
     int flags = 0;
 
+    // On RISC-V, force software AES path even if CPU reports AES capability.
+    // The RandomX portable intrinsics will throw at runtime when HAVE_AES is not defined
+    // for this architecture. Until native AES intrinsics are wired for RISC-V, avoid
+    // setting HARD_AES to prevent "Platform doesn't support hardware AES" aborts.
+#   ifndef XMRIG_RISCV
     if (!softAes) {
        flags |= RANDOMX_FLAG_HARD_AES;
     }
+#   else
+    (void)softAes; // unused on RISC-V to force soft AES
+#   endif
 
     if (dataset->get()) {
         flags |= RANDOMX_FLAG_FULL_MEM;
diff --git a/src/net/JobResults.cpp b/src/net/JobResults.cpp
index 19a1dc43c..57e4a8db0 100644
--- a/src/net/JobResults.cpp
+++ b/src/net/JobResults.cpp
@@ -115,7 +115,7 @@ static inline void checkHash(const JobBundle &bundle, std::vector<JobResult> &re
 static void getResults(JobBundle &bundle, std::vector<JobResult> &results, uint32_t &errors, bool hwAES)
 {
     const auto &algorithm = bundle.job.algorithm();
-    auto memory           = new VirtualMemory(algorithm.l3(), false, false, false);
+    auto memory           = new VirtualMemory(algorithm.l3(), false, false, false, 0, VirtualMemory::kDefaultHugePageSize);
     alignas(16) uint8_t hash[32]{ 0 };
 
     if (algorithm.family() == Algorithm::RANDOM_X) {
diff --git a/src/version.h b/src/version.h
index 9176a3d95..805fe727c 100644
--- a/src/version.h
+++ b/src/version.h
@@ -2,18 +2,7 @@
  * Copyright (c) 2018-2025 SChernykh   <https://github.com/SChernykh>
  * Copyright (c) 2016-2025 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
  *
- *   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program. If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: GPL-3.0-or-later
  */
 
 #ifndef XMRIG_VERSION_H
@@ -22,18 +11,20 @@
 #define APP_ID        "xmrig"
 #define APP_NAME      "XMRig"
 #define APP_DESC      "XMRig miner"
-#define APP_VERSION   "6.24.0"
+#define APP_VERSION   "6.25.0-dev"
 #define APP_DOMAIN    "xmrig.com"
 #define APP_SITE      "www.xmrig.com"
 #define APP_COPYRIGHT "Copyright (C) 2016-2025 xmrig.com"
 #define APP_KIND      "miner"
 
 #define APP_VER_MAJOR  6
-#define APP_VER_MINOR  24
+#define APP_VER_MINOR  25
 #define APP_VER_PATCH  0
 
 #ifdef _MSC_VER
-#   if (_MSC_VER >= 1930)
+#   if (_MSC_VER >= 1950)
+#       define MSVC_VERSION 2026
+#   elif (_MSC_VER >=1930 && _MSC_VER < 1950)
 #       define MSVC_VERSION 2022
 #   elif (_MSC_VER >= 1920 && _MSC_VER < 1930)
 #       define MSVC_VERSION 2019
@@ -64,6 +55,10 @@
 #    define APP_OS "Linux"
 #elif defined XMRIG_OS_FREEBSD
 #    define APP_OS "FreeBSD"
+#elif defined XMRIG_OS_OPENBSD
+#    define APP_OS "OpenBSD"
+#elif defined XMRIG_OS_HAIKU
+#    define APP_OS "Haiku"
 #else
 #    define APP_OS "Unknown OS"
 #endif
@@ -73,6 +68,8 @@
 
 #ifdef XMRIG_ARM
 #   define APP_ARCH "ARMv" STR2(XMRIG_ARM)
+#elif defined(XMRIG_RISCV)
+#   define APP_ARCH "RISC-V"
 #else
 #   if defined(__x86_64__) || defined(__amd64__) || defined(_M_X64) || defined(_M_AMD64)
 #       define APP_ARCH "x86-64"