1
0
mirror of https://github.com/xmrig/xmrig.git synced 2026-01-03 00:22:45 -05:00

Compare commits

..

21 Commits

Author SHA1 Message Date
xmrig
1bd59129c4 Merge pull request #3750 from SChernykh/dev
RISC-V: use vector hardware AES instead of scalar
2026-01-01 15:43:36 +07:00
SChernykh
8ccf7de304 RISC-V: use vector hardware AES instead of scalar 2025-12-31 23:37:55 +01:00
xmrig
30ffb9cb27 Merge pull request #3749 from SChernykh/dev
RISC-V: detect and use hardware AES
2025-12-30 14:13:44 +07:00
SChernykh
d3a84c4b52 RISC-V: detect and use hardware AES 2025-12-29 22:10:07 +01:00
xmrig
eb49237aaa Merge pull request #3748 from SChernykh/dev
RISC-V: auto-detect and use vector code for all RandomX AES functions
2025-12-28 13:12:50 +07:00
SChernykh
e1efd3dc7f RISC-V: auto-detect and use vector code for all RandomX AES functions 2025-12-27 21:30:14 +01:00
xmrig
e3d0135708 Merge pull request #3746 from SChernykh/dev
RISC-V: vectorized RandomX main loop
2025-12-27 18:40:47 +07:00
SChernykh
f661e1eb30 RISC-V: vectorized RandomX main loop 2025-12-26 22:11:39 +01:00
XMRig
99488751f1 v6.25.1-dev 2025-12-23 20:53:43 +07:00
XMRig
5fb0321c84 Merge branch 'master' into dev 2025-12-23 20:53:11 +07:00
XMRig
753859caea v6.25.0 2025-12-23 19:44:52 +07:00
XMRig
712a5a5e66 Merge branch 'dev' 2025-12-23 19:44:21 +07:00
XMRig
290a0de6e5 v6.25.0-dev 2025-12-23 19:37:24 +07:00
xmrig
e0564b5fdd Merge pull request #3743 from SChernykh/dev
Linux: added support for transparent huge pages
2025-12-12 01:20:03 +07:00
SChernykh
482a1f0b40 Linux: added support for transparent huge pages 2025-12-11 11:23:18 +01:00
xmrig
856813c1ae Merge pull request #3740 from SChernykh/dev
RISC-V: added vectorized soft AES
2025-12-06 19:39:47 +07:00
SChernykh
23da1a90f5 RISC-V: added vectorized soft AES 2025-12-05 21:09:22 +01:00
xmrig
7981e4a76a Merge pull request #3736 from SChernykh/dev
RISC-V: added vectorized dataset init
2025-12-01 10:46:03 +07:00
SChernykh
7ef5142a52 RISC-V: added vectorized dataset init (activated by setting init-avx2 to 1 in config.json) 2025-11-30 19:15:15 +01:00
xmrig
db5c6d9190 Merge pull request #3733 from void-512/master
Add detection for MSVC/2026
2025-11-13 15:52:43 +07:00
Tony Wang
e88009d575 add detection for MSVC/2026 2025-11-12 17:32:57 -05:00
39 changed files with 2987 additions and 112 deletions

1
.gitignore vendored
View File

@@ -5,3 +5,4 @@ scripts/deps
/.idea
/src/backend/opencl/cl/cn/cryptonight_gen.cl
.vscode
/.qtcreator

View File

@@ -1,3 +1,17 @@
# v6.25.0
- [#3680](https://github.com/xmrig/xmrig/pull/3680) Added `armv8l` to the list of 32-bit ARM targets.
- [#3708](https://github.com/xmrig/xmrig/pull/3708) Minor Aarch64 JIT changes (better instruction selection, don't emit instructions that add 0, etc).
- [#3718](https://github.com/xmrig/xmrig/pull/3718) Solo mining: added support for FCMP++ hardfork.
- [#3722](https://github.com/xmrig/xmrig/pull/3722) Added Zen4 (Hawk Point) CPUs detection.
- [#3725](https://github.com/xmrig/xmrig/pull/3725) Added **RISC-V** support with JIT compiler.
- [#3731](https://github.com/xmrig/xmrig/pull/3731) Added initial Haiku OS support.
- [#3733](https://github.com/xmrig/xmrig/pull/3733) Added detection for MSVC/2026.
- [#3736](https://github.com/xmrig/xmrig/pull/3736) RISC-V: added vectorized dataset init.
- [#3740](https://github.com/xmrig/xmrig/pull/3740) RISC-V: added vectorized soft AES.
- [#3743](https://github.com/xmrig/xmrig/pull/3743) Linux: added support for transparent huge pages.
- Improved LibreSSL support.
- Improved compatibility for automatically enabling huge pages on Linux systems without NUMA support.
# v6.24.0
- [#3671](https://github.com/xmrig/xmrig/pull/3671) Fixed detection of L2 cache size for some complex NUMA topologies.
- [#3674](https://github.com/xmrig/xmrig/pull/3674) Fixed ARMv7 build.

View File

@@ -51,10 +51,34 @@ if (XMRIG_RISCV)
# default build uses the RV64GC baseline
set(RVARCH "rv64gc")
# for native builds, enable Zba and Zbb if supported by the CPU
if(ARCH STREQUAL "native")
enable_language(ASM)
try_run(RANDOMX_VECTOR_RUN_FAIL
RANDOMX_VECTOR_COMPILE_OK
${CMAKE_CURRENT_BINARY_DIR}/
${CMAKE_CURRENT_SOURCE_DIR}/src/crypto/randomx/tests/riscv64_vector.s
COMPILE_DEFINITIONS "-march=rv64gcv")
if (RANDOMX_VECTOR_COMPILE_OK AND NOT RANDOMX_VECTOR_RUN_FAIL)
set(RVARCH_V ON)
message(STATUS "RISC-V vector extension detected")
else()
set(RVARCH_V OFF)
endif()
try_run(RANDOMX_ZICBOP_RUN_FAIL
RANDOMX_ZICBOP_COMPILE_OK
${CMAKE_CURRENT_BINARY_DIR}/
${CMAKE_CURRENT_SOURCE_DIR}/src/crypto/randomx/tests/riscv64_zicbop.s
COMPILE_DEFINITIONS "-march=rv64gc_zicbop")
if (RANDOMX_ZICBOP_COMPILE_OK AND NOT RANDOMX_ZICBOP_RUN_FAIL)
set(RVARCH_ZICBOP ON)
message(STATUS "RISC-V zicbop extension detected")
else()
set(RVARCH_ZICBOP OFF)
endif()
try_run(RANDOMX_ZBA_RUN_FAIL
RANDOMX_ZBA_COMPILE_OK
${CMAKE_CURRENT_BINARY_DIR}/
@@ -62,8 +86,10 @@ if (XMRIG_RISCV)
COMPILE_DEFINITIONS "-march=rv64gc_zba")
if (RANDOMX_ZBA_COMPILE_OK AND NOT RANDOMX_ZBA_RUN_FAIL)
set(RVARCH "${RVARCH}_zba")
set(RVARCH_ZBA ON)
message(STATUS "RISC-V zba extension detected")
else()
set(RVARCH_ZBA OFF)
endif()
try_run(RANDOMX_ZBB_RUN_FAIL
@@ -73,8 +99,57 @@ if (XMRIG_RISCV)
COMPILE_DEFINITIONS "-march=rv64gc_zbb")
if (RANDOMX_ZBB_COMPILE_OK AND NOT RANDOMX_ZBB_RUN_FAIL)
set(RVARCH "${RVARCH}_zbb")
set(RVARCH_ZBB ON)
message(STATUS "RISC-V zbb extension detected")
else()
set(RVARCH_ZBB OFF)
endif()
try_run(RANDOMX_ZVKB_RUN_FAIL
RANDOMX_ZVKB_COMPILE_OK
${CMAKE_CURRENT_BINARY_DIR}/
${CMAKE_CURRENT_SOURCE_DIR}/src/crypto/randomx/tests/riscv64_zvkb.s
COMPILE_DEFINITIONS "-march=rv64gcv_zvkb")
if (RANDOMX_ZVKB_COMPILE_OK AND NOT RANDOMX_ZVKB_RUN_FAIL)
set(RVARCH_ZVKB ON)
message(STATUS "RISC-V zvkb extension detected")
else()
set(RVARCH_ZVKB OFF)
endif()
try_run(RANDOMX_ZVKNED_RUN_FAIL
RANDOMX_ZVKNED_COMPILE_OK
${CMAKE_CURRENT_BINARY_DIR}/
${CMAKE_CURRENT_SOURCE_DIR}/src/crypto/randomx/tests/riscv64_zvkned.s
COMPILE_DEFINITIONS "-march=rv64gcv_zvkned")
if (RANDOMX_ZVKNED_COMPILE_OK AND NOT RANDOMX_ZVKNED_RUN_FAIL)
set(RVARCH_ZVKNED ON)
message(STATUS "RISC-V zvkned extension detected")
else()
set(RVARCH_ZVKNED OFF)
endif()
# for native builds, enable Zba and Zbb if supported by the CPU
if (ARCH STREQUAL "native")
if (RVARCH_V)
set(RVARCH "${RVARCH}v")
endif()
if (RVARCH_ZICBOP)
set(RVARCH "${RVARCH}_zicbop")
endif()
if (RVARCH_ZBA)
set(RVARCH "${RVARCH}_zba")
endif()
if (RVARCH_ZBB)
set(RVARCH "${RVARCH}_zbb")
endif()
if (RVARCH_ZVKB)
set(RVARCH "${RVARCH}_zvkb")
endif()
if (RVARCH_ZVKNED)
set(RVARCH "${RVARCH}_zvkned")
endif()
endif()

View File

@@ -83,10 +83,36 @@ if (WITH_RANDOMX)
elseif (XMRIG_RISCV AND CMAKE_SIZEOF_VOID_P EQUAL 8)
list(APPEND SOURCES_CRYPTO
src/crypto/randomx/jit_compiler_rv64_static.S
src/crypto/randomx/jit_compiler_rv64_vector_static.S
src/crypto/randomx/jit_compiler_rv64.cpp
src/crypto/randomx/jit_compiler_rv64_vector.cpp
src/crypto/randomx/aes_hash_rv64_vector.cpp
src/crypto/randomx/aes_hash_rv64_zvkned.cpp
)
# cheat because cmake and ccache hate each other
set_property(SOURCE src/crypto/randomx/jit_compiler_rv64_static.S PROPERTY LANGUAGE C)
set_property(SOURCE src/crypto/randomx/jit_compiler_rv64_vector_static.S PROPERTY LANGUAGE C)
set(RV64_VECTOR_FILE_ARCH "rv64gcv")
if (ARCH STREQUAL "native")
if (RVARCH_ZICBOP)
set(RV64_VECTOR_FILE_ARCH "${RV64_VECTOR_FILE_ARCH}_zicbop")
endif()
if (RVARCH_ZBA)
set(RV64_VECTOR_FILE_ARCH "${RV64_VECTOR_FILE_ARCH}_zba")
endif()
if (RVARCH_ZBB)
set(RV64_VECTOR_FILE_ARCH "${RV64_VECTOR_FILE_ARCH}_zbb")
endif()
if (RVARCH_ZVKB)
set(RV64_VECTOR_FILE_ARCH "${RV64_VECTOR_FILE_ARCH}_zvkb")
endif()
endif()
set_source_files_properties(src/crypto/randomx/jit_compiler_rv64_vector_static.S PROPERTIES COMPILE_FLAGS "-march=${RV64_VECTOR_FILE_ARCH}")
set_source_files_properties(src/crypto/randomx/aes_hash_rv64_vector.cpp PROPERTIES COMPILE_FLAGS "-O3 -march=${RV64_VECTOR_FILE_ARCH}")
set_source_files_properties(src/crypto/randomx/aes_hash_rv64_zvkned.cpp PROPERTIES COMPILE_FLAGS "-O3 -march=${RV64_VECTOR_FILE_ARCH}_zvkned")
else()
list(APPEND SOURCES_CRYPTO
src/crypto/randomx/jit_compiler_fallback.cpp

View File

@@ -89,11 +89,16 @@ static void print_cpu(const Config *)
{
const auto info = Cpu::info();
Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s%s (%zu)") " %s %sAES%s",
Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s%s (%zu)") " %s %s%sAES%s",
"CPU",
info->brand(),
info->packages(),
ICpuInfo::is64bit() ? GREEN_BOLD("64-bit") : RED_BOLD("32-bit"),
#ifdef XMRIG_RISCV
info->hasRISCV_Vector() ? GREEN_BOLD_S "RVV " : RED_BOLD_S "-RVV ",
#else
"",
#endif
info->hasAES() ? GREEN_BOLD_S : RED_BOLD_S "-",
info->isVM() ? RED_BOLD_S " VM" : ""
);

View File

@@ -87,14 +87,14 @@ xmrig::CpuWorker<N>::CpuWorker(size_t id, const CpuLaunchData &data) :
if (!cn_heavyZen3Memory) {
// Round up number of threads to the multiple of 8
const size_t num_threads = ((m_threads + 7) / 8) * 8;
cn_heavyZen3Memory = new VirtualMemory(m_algorithm.l3() * num_threads, data.hugePages, false, false, node());
cn_heavyZen3Memory = new VirtualMemory(m_algorithm.l3() * num_threads, data.hugePages, false, false, node(), VirtualMemory::kDefaultHugePageSize);
}
m_memory = cn_heavyZen3Memory;
}
else
# endif
{
m_memory = new VirtualMemory(m_algorithm.l3() * N, data.hugePages, false, true, node());
m_memory = new VirtualMemory(m_algorithm.l3() * N, data.hugePages, false, true, node(), VirtualMemory::kDefaultHugePageSize);
}
# ifdef XMRIG_ALGO_GHOSTRIDER

View File

@@ -85,6 +85,7 @@ public:
FLAG_POPCNT,
FLAG_CAT_L3,
FLAG_VM,
FLAG_RISCV_VECTOR,
FLAG_MAX
};
@@ -109,6 +110,7 @@ public:
virtual bool hasOneGbPages() const = 0;
virtual bool hasXOP() const = 0;
virtual bool isVM() const = 0;
virtual bool hasRISCV_Vector() const = 0;
virtual bool jccErratum() const = 0;
virtual const char *backend() const = 0;
virtual const char *brand() const = 0;

View File

@@ -58,8 +58,8 @@
namespace xmrig {
constexpr size_t kCpuFlagsSize = 15;
static const std::array<const char *, kCpuFlagsSize> flagNames = { "aes", "vaes", "avx", "avx2", "avx512f", "bmi2", "osxsave", "pdpe1gb", "sse2", "ssse3", "sse4.1", "xop", "popcnt", "cat_l3", "vm" };
constexpr size_t kCpuFlagsSize = 16;
static const std::array<const char *, kCpuFlagsSize> flagNames = { "aes", "vaes", "avx", "avx2", "avx512f", "bmi2", "osxsave", "pdpe1gb", "sse2", "ssse3", "sse4.1", "xop", "popcnt", "cat_l3", "vm", "rvv" };
static_assert(kCpuFlagsSize == ICpuInfo::FLAG_MAX, "kCpuFlagsSize and FLAG_MAX mismatch");

View File

@@ -52,6 +52,7 @@ protected:
inline bool hasOneGbPages() const override { return has(FLAG_PDPE1GB); }
inline bool hasXOP() const override { return has(FLAG_XOP); }
inline bool isVM() const override { return has(FLAG_VM); }
inline bool hasRISCV_Vector() const override { return has(FLAG_RISCV_VECTOR); }
inline bool jccErratum() const override { return m_jccErratum; }
inline const char *brand() const override { return m_brand; }
inline const std::vector<int32_t> &units() const override { return m_units; }

View File

@@ -34,7 +34,7 @@ namespace xmrig {
extern String cpu_name_riscv();
extern bool has_riscv_vector();
extern bool has_riscv_crypto();
extern bool has_riscv_aes();
} // namespace xmrig
@@ -55,8 +55,11 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
strncpy(m_brand, name.data(), sizeof(m_brand) - 1);
}
// Check for crypto extensions (Zknd/Zkne/Zknh - AES and SHA)
m_flags.set(FLAG_AES, has_riscv_crypto());
// Check for vector extensions
m_flags.set(FLAG_RISCV_VECTOR, has_riscv_vector());
// Check for AES extensions (Zknd/Zkne)
m_flags.set(FLAG_AES, has_riscv_aes());
// RISC-V typically supports 1GB huge pages
m_flags.set(FLAG_PDPE1GB, std::ifstream("/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages").good());

View File

@@ -32,9 +32,9 @@ struct riscv_cpu_desc
String isa;
String uarch;
bool has_vector = false;
bool has_crypto = false;
bool has_aes = false;
inline bool isReady() const { return !model.isNull(); }
inline bool isReady() const { return !isa.isNull(); }
};
static bool lookup_riscv(char *line, const char *pattern, String &value)
@@ -81,22 +81,32 @@ static bool read_riscv_cpuinfo(riscv_cpu_desc *desc)
lookup_riscv(buf, "model name", desc->model);
if (lookup_riscv(buf, "isa", desc->isa)) {
// Check for vector extensions
if (strstr(buf, "zve") || strstr(buf, "v_")) {
desc->isa.toLower();
for (const String& s : desc->isa.split('_')) {
const char* p = s.data();
const size_t n = s.size();
if ((s.size() > 4) && (memcmp(p, "rv64", 4) == 0)) {
for (size_t i = 4; i < n; ++i) {
if (p[i] == 'v') {
desc->has_vector = true;
break;
}
}
}
else if (s == "zve64d") {
desc->has_vector = true;
}
// Check for crypto extensions (AES, SHA, etc.)
// zkn* = NIST crypto suite, zks* = SM crypto suite
// Note: zba/zbb/zbc/zbs are bit-manipulation, NOT crypto
if (strstr(buf, "zknd") || strstr(buf, "zkne") || strstr(buf, "zknh") ||
strstr(buf, "zksed") || strstr(buf, "zksh")) {
desc->has_crypto = true;
else if ((s == "zvkn") || (s == "zvknc") || (s == "zvkned") || (s == "zvkng")){
desc->has_aes = true;
}
}
}
lookup_riscv(buf, "uarch", desc->uarch);
if (desc->isReady() && !desc->isa.isNull()) {
if (desc->isReady()) {
break;
}
}
@@ -128,11 +138,11 @@ bool has_riscv_vector()
return false;
}
bool has_riscv_crypto()
bool has_riscv_aes()
{
riscv_cpu_desc desc;
if (read_riscv_cpuinfo(&desc)) {
return desc.has_crypto;
return desc.has_aes;
}
return false;
}

View File

@@ -49,7 +49,7 @@ xmrig::MemoryPool::MemoryPool(size_t size, bool hugePages, uint32_t node)
constexpr size_t alignment = 1 << 24;
m_memory = new VirtualMemory(size * pageSize + alignment, hugePages, false, false, node);
m_memory = new VirtualMemory(size * pageSize + alignment, hugePages, false, false, node, VirtualMemory::kDefaultHugePageSize);
m_alignOffset = (alignment - (((size_t)m_memory->scratchpad()) % alignment)) % alignment;
}

View File

@@ -75,6 +75,16 @@ xmrig::VirtualMemory::VirtualMemory(size_t size, bool hugePages, bool oneGbPages
}
m_scratchpad = static_cast<uint8_t*>(_mm_malloc(m_size, alignSize));
// Huge pages failed to allocate, but try to enable transparent huge pages for the range
if (alignSize >= kDefaultHugePageSize) {
if (m_scratchpad) {
adviseLargePages(m_scratchpad, m_size);
}
else {
m_scratchpad = static_cast<uint8_t*>(_mm_malloc(m_size, 64));
}
}
}

View File

@@ -65,6 +65,7 @@ public:
static void *allocateExecutableMemory(size_t size, bool hugePages);
static void *allocateLargePagesMemory(size_t size);
static void *allocateOneGbPagesMemory(size_t size);
static bool adviseLargePages(void *p, size_t size);
static void destroy();
static void flushInstructionCache(void *p, size_t size);
static void freeLargePagesMemory(void *p, size_t size);

View File

@@ -276,6 +276,16 @@ bool xmrig::VirtualMemory::allocateOneGbPagesMemory()
}
bool xmrig::VirtualMemory::adviseLargePages(void *p, size_t size)
{
# ifdef XMRIG_OS_LINUX
return (madvise(p, size, MADV_HUGEPAGE) == 0);
# else
return false;
# endif
}
void xmrig::VirtualMemory::freeLargePagesMemory()
{
if (m_flags.test(FLAG_LOCK)) {

View File

@@ -260,6 +260,12 @@ bool xmrig::VirtualMemory::allocateOneGbPagesMemory()
}
bool xmrig::VirtualMemory::adviseLargePages(void *p, size_t size)
{
return false;
}
void xmrig::VirtualMemory::freeLargePagesMemory()
{
freeLargePagesMemory(m_scratchpad, m_size);

View File

@@ -38,6 +38,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "crypto/randomx/common.hpp"
#include "crypto/rx/Profiler.h"
#ifdef XMRIG_RISCV
#include "backend/cpu/Cpu.h"
#include "crypto/randomx/aes_hash_rv64_vector.hpp"
#include "crypto/randomx/aes_hash_rv64_zvkned.hpp"
#endif
#define AES_HASH_1R_STATE0 0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d
#define AES_HASH_1R_STATE1 0xace78057, 0xf59e125a, 0x15c7b798, 0x338d996e
#define AES_HASH_1R_STATE2 0xe8a07ce4, 0x5079506b, 0xae62c7d0, 0x6a770017
@@ -59,7 +65,20 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Hashing throughput: >20 GiB/s per CPU core with hardware AES
*/
template<int softAes>
void hashAes1Rx4(const void *input, size_t inputSize, void *hash) {
void hashAes1Rx4(const void *input, size_t inputSize, void *hash)
{
#ifdef XMRIG_RISCV
if (xmrig::Cpu::info()->hasAES()) {
hashAes1Rx4_zvkned(input, inputSize, hash);
return;
}
if (xmrig::Cpu::info()->hasRISCV_Vector()) {
hashAes1Rx4_RVV<softAes>(input, inputSize, hash);
return;
}
#endif
const uint8_t* inptr = (uint8_t*)input;
const uint8_t* inputEnd = inptr + inputSize;
@@ -127,7 +146,20 @@ template void hashAes1Rx4<true>(const void *input, size_t inputSize, void *hash)
calls to this function.
*/
template<int softAes>
void fillAes1Rx4(void *state, size_t outputSize, void *buffer) {
void fillAes1Rx4(void *state, size_t outputSize, void *buffer)
{
#ifdef XMRIG_RISCV
if (xmrig::Cpu::info()->hasAES()) {
fillAes1Rx4_zvkned(state, outputSize, buffer);
return;
}
if (xmrig::Cpu::info()->hasRISCV_Vector()) {
fillAes1Rx4_RVV<softAes>(state, outputSize, buffer);
return;
}
#endif
const uint8_t* outptr = (uint8_t*)buffer;
const uint8_t* outputEnd = outptr + outputSize;
@@ -171,7 +203,20 @@ static constexpr randomx::Instruction inst{ 0xFF, 7, 7, 0xFF, 0xFFFFFFFFU };
alignas(16) static const randomx::Instruction inst_mask[2] = { inst, inst };
template<int softAes>
void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
void fillAes4Rx4(void *state, size_t outputSize, void *buffer)
{
#ifdef XMRIG_RISCV
if (xmrig::Cpu::info()->hasAES()) {
fillAes4Rx4_zvkned(state, outputSize, buffer);
return;
}
if (xmrig::Cpu::info()->hasRISCV_Vector()) {
fillAes4Rx4_RVV<softAes>(state, outputSize, buffer);
return;
}
#endif
const uint8_t* outptr = (uint8_t*)buffer;
const uint8_t* outputEnd = outptr + outputSize;
@@ -236,9 +281,22 @@ template void fillAes4Rx4<true>(void *state, size_t outputSize, void *buffer);
template void fillAes4Rx4<false>(void *state, size_t outputSize, void *buffer);
template<int softAes, int unroll>
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state)
{
PROFILE_SCOPE(RandomX_AES);
#ifdef XMRIG_RISCV
if (xmrig::Cpu::info()->hasAES()) {
hashAndFillAes1Rx4_zvkned(scratchpad, scratchpadSize, hash, fill_state);
return;
}
if (xmrig::Cpu::info()->hasRISCV_Vector()) {
hashAndFillAes1Rx4_RVV<softAes, unroll>(scratchpad, scratchpadSize, hash, fill_state);
return;
}
#endif
uint8_t* scratchpadPtr = (uint8_t*)scratchpad;
const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize;
@@ -387,37 +445,48 @@ hashAndFillAes1Rx4_impl* softAESImpl = &hashAndFillAes1Rx4<1,1>;
void SelectSoftAESImpl(size_t threadsCount)
{
constexpr uint64_t test_length_ms = 100;
const std::array<hashAndFillAes1Rx4_impl *, 4> impl = {
&hashAndFillAes1Rx4<1,1>,
&hashAndFillAes1Rx4<2,1>,
&hashAndFillAes1Rx4<2,2>,
&hashAndFillAes1Rx4<2,4>,
};
size_t fast_idx = 0;
double fast_speed = 0.0;
for (size_t run = 0; run < 3; ++run) {
for (size_t i = 0; i < impl.size(); ++i) {
const double t1 = xmrig::Chrono::highResolutionMSecs();
std::vector<uint32_t> count(threadsCount, 0);
std::vector<std::thread> threads;
for (size_t t = 0; t < threadsCount; ++t) {
threads.emplace_back([&, t]() {
std::vector<uint8_t> scratchpad(10 * 1024);
alignas(16) uint8_t hash[64] = {};
alignas(16) uint8_t state[64] = {};
do {
(*impl[i])(scratchpad.data(), scratchpad.size(), hash, state);
++count[t];
} while (xmrig::Chrono::highResolutionMSecs() - t1 < test_length_ms);
});
}
uint32_t total = 0;
for (size_t t = 0; t < threadsCount; ++t) {
threads[t].join();
total += count[t];
}
const double t2 = xmrig::Chrono::highResolutionMSecs();
const double speed = total * 1e3 / (t2 - t1);
if (speed > fast_speed) {
fast_idx = i;
fast_speed = speed;

View File

@@ -0,0 +1,322 @@
/*
Copyright (c) 2025 SChernykh <https://github.com/SChernykh>
Copyright (c) 2025 XMRig <support@xmrig.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <riscv_vector.h>
#include "crypto/randomx/soft_aes.h"
#include "crypto/randomx/randomx.h"
static FORCE_INLINE vuint32m1_t softaes_vector_double(
vuint32m1_t in,
vuint32m1_t key,
vuint8m1_t i0, vuint8m1_t i1, vuint8m1_t i2, vuint8m1_t i3,
const uint32_t* lut0, const uint32_t* lut1, const uint32_t *lut2, const uint32_t* lut3)
{
const vuint8m1_t in8 = __riscv_vreinterpret_v_u32m1_u8m1(in);
const vuint32m1_t index0 = __riscv_vreinterpret_v_u8m1_u32m1(__riscv_vrgather_vv_u8m1(in8, i0, 32));
const vuint32m1_t index1 = __riscv_vreinterpret_v_u8m1_u32m1(__riscv_vrgather_vv_u8m1(in8, i1, 32));
const vuint32m1_t index2 = __riscv_vreinterpret_v_u8m1_u32m1(__riscv_vrgather_vv_u8m1(in8, i2, 32));
const vuint32m1_t index3 = __riscv_vreinterpret_v_u8m1_u32m1(__riscv_vrgather_vv_u8m1(in8, i3, 32));
vuint32m1_t s0 = __riscv_vluxei32_v_u32m1(lut0, __riscv_vsll_vx_u32m1(index0, 2, 8), 8);
vuint32m1_t s1 = __riscv_vluxei32_v_u32m1(lut1, __riscv_vsll_vx_u32m1(index1, 2, 8), 8);
vuint32m1_t s2 = __riscv_vluxei32_v_u32m1(lut2, __riscv_vsll_vx_u32m1(index2, 2, 8), 8);
vuint32m1_t s3 = __riscv_vluxei32_v_u32m1(lut3, __riscv_vsll_vx_u32m1(index3, 2, 8), 8);
s0 = __riscv_vxor_vv_u32m1(s0, s1, 8);
s2 = __riscv_vxor_vv_u32m1(s2, s3, 8);
s0 = __riscv_vxor_vv_u32m1(s0, s2, 8);
return __riscv_vxor_vv_u32m1(s0, key, 8);
}
static constexpr uint32_t AES_HASH_1R_STATE02[8] = { 0x92b52c0d, 0x9fa856de, 0xcc82db47, 0xd7983aad, 0x6a770017, 0xae62c7d0, 0x5079506b, 0xe8a07ce4 };
static constexpr uint32_t AES_HASH_1R_STATE13[8] = { 0x338d996e, 0x15c7b798, 0xf59e125a, 0xace78057, 0x630a240c, 0x07ad828d, 0x79a10005, 0x7e994948 };
static constexpr uint32_t AES_GEN_1R_KEY02[8] = { 0x6daca553, 0x62716609, 0xdbb5552b, 0xb4f44917, 0x3f1262f1, 0x9f947ec6, 0xf4c0794f, 0x3e20e345 };
static constexpr uint32_t AES_GEN_1R_KEY13[8] = { 0x6d7caf07, 0x846a710d, 0x1725d378, 0x0da1dc4e, 0x6aef8135, 0xb1ba317c, 0x16314c88, 0x49169154 };
static constexpr uint32_t AES_HASH_1R_XKEY00[8] = { 0xf6fa8389, 0x8b24949f, 0x90dc56bf, 0x06890201, 0xf6fa8389, 0x8b24949f, 0x90dc56bf, 0x06890201 };
static constexpr uint32_t AES_HASH_1R_XKEY11[8] = { 0x61b263d1, 0x51f4e03c, 0xee1043c6, 0xed18f99b, 0x61b263d1, 0x51f4e03c, 0xee1043c6, 0xed18f99b };
static constexpr uint32_t AES_HASH_STRIDE_X2[8] = { 0, 4, 8, 12, 32, 36, 40, 44 };
static constexpr uint32_t AES_HASH_STRIDE_X4[8] = { 0, 4, 8, 12, 64, 68, 72, 76 };
template<int softAes>
void hashAes1Rx4_RVV(const void *input, size_t inputSize, void *hash) {
const uint8_t* inptr = (const uint8_t*)input;
const uint8_t* inputEnd = inptr + inputSize;
//intial state
vuint32m1_t state02 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE02, 8);
vuint32m1_t state13 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE13, 8);
const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X2, 8);
const vuint8m1_t lutenc_index0 = __riscv_vle8_v_u8m1(lutEncIndex[0], 32);
const vuint8m1_t lutenc_index1 = __riscv_vle8_v_u8m1(lutEncIndex[1], 32);
const vuint8m1_t lutenc_index2 = __riscv_vle8_v_u8m1(lutEncIndex[2], 32);
const vuint8m1_t lutenc_index3 = __riscv_vle8_v_u8m1(lutEncIndex[3], 32);
const vuint8m1_t& lutdec_index0 = lutenc_index0;
const vuint8m1_t lutdec_index1 = __riscv_vle8_v_u8m1(lutDecIndex[1], 32);
const vuint8m1_t& lutdec_index2 = lutenc_index2;
const vuint8m1_t lutdec_index3 = __riscv_vle8_v_u8m1(lutDecIndex[3], 32);
//process 64 bytes at a time in 4 lanes
while (inptr < inputEnd) {
state02 = softaes_vector_double(state02, __riscv_vluxei32_v_u32m1((uint32_t*)inptr + 0, stride, 8), lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
state13 = softaes_vector_double(state13, __riscv_vluxei32_v_u32m1((uint32_t*)inptr + 4, stride, 8), lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
inptr += 64;
}
//two extra rounds to achieve full diffusion
const vuint32m1_t xkey00 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY00, 8);
const vuint32m1_t xkey11 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY11, 8);
state02 = softaes_vector_double(state02, xkey00, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
state13 = softaes_vector_double(state13, xkey00, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
state02 = softaes_vector_double(state02, xkey11, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
state13 = softaes_vector_double(state13, xkey11, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
//output hash
__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 0, stride, state02, 8);
__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 4, stride, state13, 8);
}
template void hashAes1Rx4_RVV<false>(const void *input, size_t inputSize, void *hash);
template void hashAes1Rx4_RVV<true>(const void *input, size_t inputSize, void *hash);
template<int softAes>
void fillAes1Rx4_RVV(void *state, size_t outputSize, void *buffer) {
const uint8_t* outptr = (uint8_t*)buffer;
const uint8_t* outputEnd = outptr + outputSize;
const vuint32m1_t key02 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY02, 8);
const vuint32m1_t key13 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY13, 8);
const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X2, 8);
vuint32m1_t state02 = __riscv_vluxei32_v_u32m1((uint32_t*)state + 0, stride, 8);
vuint32m1_t state13 = __riscv_vluxei32_v_u32m1((uint32_t*)state + 4, stride, 8);
const vuint8m1_t lutenc_index0 = __riscv_vle8_v_u8m1(lutEncIndex[0], 32);
const vuint8m1_t lutenc_index1 = __riscv_vle8_v_u8m1(lutEncIndex[1], 32);
const vuint8m1_t lutenc_index2 = __riscv_vle8_v_u8m1(lutEncIndex[2], 32);
const vuint8m1_t lutenc_index3 = __riscv_vle8_v_u8m1(lutEncIndex[3], 32);
const vuint8m1_t& lutdec_index0 = lutenc_index0;
const vuint8m1_t lutdec_index1 = __riscv_vle8_v_u8m1(lutDecIndex[1], 32);
const vuint8m1_t& lutdec_index2 = lutenc_index2;
const vuint8m1_t lutdec_index3 = __riscv_vle8_v_u8m1(lutDecIndex[3], 32);
while (outptr < outputEnd) {
state02 = softaes_vector_double(state02, key02, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
state13 = softaes_vector_double(state13, key13, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
__riscv_vsuxei32_v_u32m1((uint32_t*)outptr + 0, stride, state02, 8);
__riscv_vsuxei32_v_u32m1((uint32_t*)outptr + 4, stride, state13, 8);
outptr += 64;
}
__riscv_vsuxei32_v_u32m1((uint32_t*)state + 0, stride, state02, 8);
__riscv_vsuxei32_v_u32m1((uint32_t*)state + 4, stride, state13, 8);
}
template void fillAes1Rx4_RVV<false>(void *state, size_t outputSize, void *buffer);
template void fillAes1Rx4_RVV<true>(void *state, size_t outputSize, void *buffer);
template<int softAes>
void fillAes4Rx4_RVV(void *state, size_t outputSize, void *buffer) {
const uint8_t* outptr = (uint8_t*)buffer;
const uint8_t* outputEnd = outptr + outputSize;
const vuint32m1_t stride4 = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X4, 8);
const vuint32m1_t key04 = __riscv_vluxei32_v_u32m1((uint32_t*)(RandomX_CurrentConfig.fillAes4Rx4_Key + 0), stride4, 8);
const vuint32m1_t key15 = __riscv_vluxei32_v_u32m1((uint32_t*)(RandomX_CurrentConfig.fillAes4Rx4_Key + 1), stride4, 8);
const vuint32m1_t key26 = __riscv_vluxei32_v_u32m1((uint32_t*)(RandomX_CurrentConfig.fillAes4Rx4_Key + 2), stride4, 8);
const vuint32m1_t key37 = __riscv_vluxei32_v_u32m1((uint32_t*)(RandomX_CurrentConfig.fillAes4Rx4_Key + 3), stride4, 8);
const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X2, 8);
vuint32m1_t state02 = __riscv_vluxei32_v_u32m1((uint32_t*)state + 0, stride, 8);
vuint32m1_t state13 = __riscv_vluxei32_v_u32m1((uint32_t*)state + 4, stride, 8);
const vuint8m1_t lutenc_index0 = __riscv_vle8_v_u8m1(lutEncIndex[0], 32);
const vuint8m1_t lutenc_index1 = __riscv_vle8_v_u8m1(lutEncIndex[1], 32);
const vuint8m1_t lutenc_index2 = __riscv_vle8_v_u8m1(lutEncIndex[2], 32);
const vuint8m1_t lutenc_index3 = __riscv_vle8_v_u8m1(lutEncIndex[3], 32);
const vuint8m1_t& lutdec_index0 = lutenc_index0;
const vuint8m1_t lutdec_index1 = __riscv_vle8_v_u8m1(lutDecIndex[1], 32);
const vuint8m1_t& lutdec_index2 = lutenc_index2;
const vuint8m1_t lutdec_index3 = __riscv_vle8_v_u8m1(lutDecIndex[3], 32);
while (outptr < outputEnd) {
state02 = softaes_vector_double(state02, key04, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
state13 = softaes_vector_double(state13, key04, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
state02 = softaes_vector_double(state02, key15, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
state13 = softaes_vector_double(state13, key15, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
state02 = softaes_vector_double(state02, key26, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
state13 = softaes_vector_double(state13, key26, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
state02 = softaes_vector_double(state02, key37, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
state13 = softaes_vector_double(state13, key37, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
__riscv_vsuxei32_v_u32m1((uint32_t*)outptr + 0, stride, state02, 8);
__riscv_vsuxei32_v_u32m1((uint32_t*)outptr + 4, stride, state13, 8);
outptr += 64;
}
}
template void fillAes4Rx4_RVV<false>(void *state, size_t outputSize, void *buffer);
template void fillAes4Rx4_RVV<true>(void *state, size_t outputSize, void *buffer);
template<int softAes, int unroll>
void hashAndFillAes1Rx4_RVV(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
uint8_t* scratchpadPtr = (uint8_t*)scratchpad;
const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize;
vuint32m1_t hash_state02 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE02, 8);
vuint32m1_t hash_state13 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE13, 8);
const vuint32m1_t key02 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY02, 8);
const vuint32m1_t key13 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY13, 8);
const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X2, 8);
vuint32m1_t fill_state02 = __riscv_vluxei32_v_u32m1((uint32_t*)fill_state + 0, stride, 8);
vuint32m1_t fill_state13 = __riscv_vluxei32_v_u32m1((uint32_t*)fill_state + 4, stride, 8);
const vuint8m1_t lutenc_index0 = __riscv_vle8_v_u8m1(lutEncIndex[0], 32);
const vuint8m1_t lutenc_index1 = __riscv_vle8_v_u8m1(lutEncIndex[1], 32);
const vuint8m1_t lutenc_index2 = __riscv_vle8_v_u8m1(lutEncIndex[2], 32);
const vuint8m1_t lutenc_index3 = __riscv_vle8_v_u8m1(lutEncIndex[3], 32);
const vuint8m1_t& lutdec_index0 = lutenc_index0;
const vuint8m1_t lutdec_index1 = __riscv_vle8_v_u8m1(lutDecIndex[1], 32);
const vuint8m1_t& lutdec_index2 = lutenc_index2;
const vuint8m1_t lutdec_index3 = __riscv_vle8_v_u8m1(lutDecIndex[3], 32);
//process 64 bytes at a time in 4 lanes
while (scratchpadPtr < scratchpadEnd) {
#define HASH_STATE(k) \
hash_state02 = softaes_vector_double(hash_state02, __riscv_vluxei32_v_u32m1((uint32_t*)scratchpadPtr + k * 16 + 0, stride, 8), lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3); \
hash_state13 = softaes_vector_double(hash_state13, __riscv_vluxei32_v_u32m1((uint32_t*)scratchpadPtr + k * 16 + 4, stride, 8), lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
#define FILL_STATE(k) \
fill_state02 = softaes_vector_double(fill_state02, key02, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3); \
fill_state13 = softaes_vector_double(fill_state13, key13, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3); \
__riscv_vsuxei32_v_u32m1((uint32_t*)scratchpadPtr + k * 16 + 0, stride, fill_state02, 8); \
__riscv_vsuxei32_v_u32m1((uint32_t*)scratchpadPtr + k * 16 + 4, stride, fill_state13, 8);
switch (softAes) {
case 0:
HASH_STATE(0);
HASH_STATE(1);
FILL_STATE(0);
FILL_STATE(1);
scratchpadPtr += 128;
break;
default:
switch (unroll) {
case 4:
HASH_STATE(0);
FILL_STATE(0);
HASH_STATE(1);
FILL_STATE(1);
HASH_STATE(2);
FILL_STATE(2);
HASH_STATE(3);
FILL_STATE(3);
scratchpadPtr += 64 * 4;
break;
case 2:
HASH_STATE(0);
FILL_STATE(0);
HASH_STATE(1);
FILL_STATE(1);
scratchpadPtr += 64 * 2;
break;
default:
HASH_STATE(0);
FILL_STATE(0);
scratchpadPtr += 64;
break;
}
break;
}
}
#undef HASH_STATE
#undef FILL_STATE
__riscv_vsuxei32_v_u32m1((uint32_t*)fill_state + 0, stride, fill_state02, 8);
__riscv_vsuxei32_v_u32m1((uint32_t*)fill_state + 4, stride, fill_state13, 8);
//two extra rounds to achieve full diffusion
const vuint32m1_t xkey00 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY00, 8);
const vuint32m1_t xkey11 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY11, 8);
hash_state02 = softaes_vector_double(hash_state02, xkey00, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
hash_state13 = softaes_vector_double(hash_state13, xkey00, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
hash_state02 = softaes_vector_double(hash_state02, xkey11, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
hash_state13 = softaes_vector_double(hash_state13, xkey11, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
//output hash
__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 0, stride, hash_state02, 8);
__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 4, stride, hash_state13, 8);
}
template void hashAndFillAes1Rx4_RVV<0,2>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);
template void hashAndFillAes1Rx4_RVV<1,1>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);
template void hashAndFillAes1Rx4_RVV<2,1>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);
template void hashAndFillAes1Rx4_RVV<2,2>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);
template void hashAndFillAes1Rx4_RVV<2,4>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);

View File

@@ -0,0 +1,42 @@
/*
Copyright (c) 2025 SChernykh <https://github.com/SChernykh>
Copyright (c) 2025 XMRig <support@xmrig.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
template<int softAes>
void hashAes1Rx4_RVV(const void *input, size_t inputSize, void *hash);
template<int softAes>
void fillAes1Rx4_RVV(void *state, size_t outputSize, void *buffer);
template<int softAes>
void fillAes4Rx4_RVV(void *state, size_t outputSize, void *buffer);
template<int softAes, int unroll>
void hashAndFillAes1Rx4_RVV(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);

View File

@@ -0,0 +1,199 @@
/*
Copyright (c) 2025 SChernykh <https://github.com/SChernykh>
Copyright (c) 2025 XMRig <support@xmrig.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "crypto/randomx/aes_hash.hpp"
#include "crypto/randomx/randomx.h"
#include "crypto/rx/Profiler.h"
#include <riscv_vector.h>
static FORCE_INLINE vuint32m1_t aesenc_zvkned(vuint32m1_t a, vuint32m1_t b) { return __riscv_vaesem_vv_u32m1(a, b, 8); }
static FORCE_INLINE vuint32m1_t aesdec_zvkned(vuint32m1_t a, vuint32m1_t b, vuint32m1_t zero) { return __riscv_vxor_vv_u32m1(__riscv_vaesdm_vv_u32m1(a, zero, 8), b, 8); }
static constexpr uint32_t AES_HASH_1R_STATE02[8] = { 0x92b52c0d, 0x9fa856de, 0xcc82db47, 0xd7983aad, 0x6a770017, 0xae62c7d0, 0x5079506b, 0xe8a07ce4 };
static constexpr uint32_t AES_HASH_1R_STATE13[8] = { 0x338d996e, 0x15c7b798, 0xf59e125a, 0xace78057, 0x630a240c, 0x07ad828d, 0x79a10005, 0x7e994948 };
static constexpr uint32_t AES_GEN_1R_KEY02[8] = { 0x6daca553, 0x62716609, 0xdbb5552b, 0xb4f44917, 0x3f1262f1, 0x9f947ec6, 0xf4c0794f, 0x3e20e345 };
static constexpr uint32_t AES_GEN_1R_KEY13[8] = { 0x6d7caf07, 0x846a710d, 0x1725d378, 0x0da1dc4e, 0x6aef8135, 0xb1ba317c, 0x16314c88, 0x49169154 };
static constexpr uint32_t AES_HASH_1R_XKEY00[8] = { 0xf6fa8389, 0x8b24949f, 0x90dc56bf, 0x06890201, 0xf6fa8389, 0x8b24949f, 0x90dc56bf, 0x06890201 };
static constexpr uint32_t AES_HASH_1R_XKEY11[8] = { 0x61b263d1, 0x51f4e03c, 0xee1043c6, 0xed18f99b, 0x61b263d1, 0x51f4e03c, 0xee1043c6, 0xed18f99b };
static constexpr uint32_t AES_HASH_STRIDE_X2[8] = { 0, 4, 8, 12, 32, 36, 40, 44 };
static constexpr uint32_t AES_HASH_STRIDE_X4[8] = { 0, 4, 8, 12, 64, 68, 72, 76 };
void hashAes1Rx4_zvkned(const void *input, size_t inputSize, void *hash)
{
const uint8_t* inptr = (const uint8_t*)input;
const uint8_t* inputEnd = inptr + inputSize;
//intial state
vuint32m1_t state02 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE02, 8);
vuint32m1_t state13 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE13, 8);
const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X2, 8);
const vuint32m1_t zero = {};
//process 64 bytes at a time in 4 lanes
while (inptr < inputEnd) {
state02 = aesenc_zvkned(state02, __riscv_vluxei32_v_u32m1((uint32_t*)inptr + 0, stride, 8));
state13 = aesdec_zvkned(state13, __riscv_vluxei32_v_u32m1((uint32_t*)inptr + 4, stride, 8), zero);
inptr += 64;
}
//two extra rounds to achieve full diffusion
const vuint32m1_t xkey00 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY00, 8);
const vuint32m1_t xkey11 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY11, 8);
state02 = aesenc_zvkned(state02, xkey00);
state13 = aesdec_zvkned(state13, xkey00, zero);
state02 = aesenc_zvkned(state02, xkey11);
state13 = aesdec_zvkned(state13, xkey11, zero);
//output hash
__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 0, stride, state02, 8);
__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 4, stride, state13, 8);
}
void fillAes1Rx4_zvkned(void *state, size_t outputSize, void *buffer)
{
const uint8_t* outptr = (uint8_t*)buffer;
const uint8_t* outputEnd = outptr + outputSize;
const vuint32m1_t key02 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY02, 8);
const vuint32m1_t key13 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY13, 8);
const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X2, 8);
const vuint32m1_t zero = {};
vuint32m1_t state02 = __riscv_vluxei32_v_u32m1((uint32_t*)state + 0, stride, 8);
vuint32m1_t state13 = __riscv_vluxei32_v_u32m1((uint32_t*)state + 4, stride, 8);
while (outptr < outputEnd) {
state02 = aesdec_zvkned(state02, key02, zero);
state13 = aesenc_zvkned(state13, key13);
__riscv_vsuxei32_v_u32m1((uint32_t*)outptr + 0, stride, state02, 8);
__riscv_vsuxei32_v_u32m1((uint32_t*)outptr + 4, stride, state13, 8);
outptr += 64;
}
__riscv_vsuxei32_v_u32m1((uint32_t*)state + 0, stride, state02, 8);
__riscv_vsuxei32_v_u32m1((uint32_t*)state + 4, stride, state13, 8);
}
void fillAes4Rx4_zvkned(void *state, size_t outputSize, void *buffer)
{
const uint8_t* outptr = (uint8_t*)buffer;
const uint8_t* outputEnd = outptr + outputSize;
const vuint32m1_t stride4 = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X4, 8);
const vuint32m1_t key04 = __riscv_vluxei32_v_u32m1((uint32_t*)(RandomX_CurrentConfig.fillAes4Rx4_Key + 0), stride4, 8);
const vuint32m1_t key15 = __riscv_vluxei32_v_u32m1((uint32_t*)(RandomX_CurrentConfig.fillAes4Rx4_Key + 1), stride4, 8);
const vuint32m1_t key26 = __riscv_vluxei32_v_u32m1((uint32_t*)(RandomX_CurrentConfig.fillAes4Rx4_Key + 2), stride4, 8);
const vuint32m1_t key37 = __riscv_vluxei32_v_u32m1((uint32_t*)(RandomX_CurrentConfig.fillAes4Rx4_Key + 3), stride4, 8);
const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X2, 8);
const vuint32m1_t zero = {};
vuint32m1_t state02 = __riscv_vluxei32_v_u32m1((uint32_t*)state + 0, stride, 8);
vuint32m1_t state13 = __riscv_vluxei32_v_u32m1((uint32_t*)state + 4, stride, 8);
while (outptr < outputEnd) {
state02 = aesdec_zvkned(state02, key04, zero);
state13 = aesenc_zvkned(state13, key04);
state02 = aesdec_zvkned(state02, key15, zero);
state13 = aesenc_zvkned(state13, key15);
state02 = aesdec_zvkned(state02, key26, zero);
state13 = aesenc_zvkned(state13, key26);
state02 = aesdec_zvkned(state02, key37, zero);
state13 = aesenc_zvkned(state13, key37);
__riscv_vsuxei32_v_u32m1((uint32_t*)outptr + 0, stride, state02, 8);
__riscv_vsuxei32_v_u32m1((uint32_t*)outptr + 4, stride, state13, 8);
outptr += 64;
}
}
void hashAndFillAes1Rx4_zvkned(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state)
{
uint8_t* scratchpadPtr = (uint8_t*)scratchpad;
const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize;
vuint32m1_t hash_state02 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE02, 8);
vuint32m1_t hash_state13 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE13, 8);
const vuint32m1_t key02 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY02, 8);
const vuint32m1_t key13 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY13, 8);
const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X2, 8);
const vuint32m1_t zero = {};
vuint32m1_t fill_state02 = __riscv_vluxei32_v_u32m1((uint32_t*)fill_state + 0, stride, 8);
vuint32m1_t fill_state13 = __riscv_vluxei32_v_u32m1((uint32_t*)fill_state + 4, stride, 8);
//process 64 bytes at a time in 4 lanes
while (scratchpadPtr < scratchpadEnd) {
hash_state02 = aesenc_zvkned(hash_state02, __riscv_vluxei32_v_u32m1((uint32_t*)scratchpadPtr + 0, stride, 8));
hash_state13 = aesdec_zvkned(hash_state13, __riscv_vluxei32_v_u32m1((uint32_t*)scratchpadPtr + 4, stride, 8), zero);
fill_state02 = aesdec_zvkned(fill_state02, key02, zero);
fill_state13 = aesenc_zvkned(fill_state13, key13);
__riscv_vsuxei32_v_u32m1((uint32_t*)scratchpadPtr + 0, stride, fill_state02, 8);
__riscv_vsuxei32_v_u32m1((uint32_t*)scratchpadPtr + 4, stride, fill_state13, 8);
scratchpadPtr += 64;
}
__riscv_vsuxei32_v_u32m1((uint32_t*)fill_state + 0, stride, fill_state02, 8);
__riscv_vsuxei32_v_u32m1((uint32_t*)fill_state + 4, stride, fill_state13, 8);
//two extra rounds to achieve full diffusion
const vuint32m1_t xkey00 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY00, 8);
const vuint32m1_t xkey11 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY11, 8);
hash_state02 = aesenc_zvkned(hash_state02, xkey00);
hash_state13 = aesdec_zvkned(hash_state13, xkey00, zero);
hash_state02 = aesenc_zvkned(hash_state02, xkey11);
hash_state13 = aesdec_zvkned(hash_state13, xkey11, zero);
//output hash
__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 0, stride, hash_state02, 8);
__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 4, stride, hash_state13, 8);
}

View File

@@ -0,0 +1,35 @@
/*
Copyright (c) 2025 SChernykh <https://github.com/SChernykh>
Copyright (c) 2025 XMRig <support@xmrig.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
void hashAes1Rx4_zvkned(const void *input, size_t inputSize, void *hash);
void fillAes1Rx4_zvkned(void *state, size_t outputSize, void *buffer);
void fillAes4Rx4_zvkned(void *state, size_t outputSize, void *buffer);
void hashAndFillAes1Rx4_zvkned(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);

View File

@@ -30,8 +30,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <cstring>
#include <climits>
#include <cassert>
#include "backend/cpu/Cpu.h"
#include "crypto/randomx/jit_compiler_rv64.hpp"
#include "crypto/randomx/jit_compiler_rv64_static.hpp"
#include "crypto/randomx/jit_compiler_rv64_vector.h"
#include "crypto/randomx/jit_compiler_rv64_vector_static.h"
#include "crypto/randomx/superscalar.hpp"
#include "crypto/randomx/program.hpp"
#include "crypto/randomx/reciprocal.h"
@@ -618,23 +621,49 @@ namespace randomx {
entryProgram = state.code + LiteralPoolSize + sizeDataInit;
//jal x1, SuperscalarHash
emitJump(state, ReturnReg, LiteralPoolSize + offsetFixDataCall, SuperScalarHashOffset);
if (xmrig::Cpu::info()->hasRISCV_Vector()) {
vectorCodeSize = ((uint8_t*)randomx_riscv64_vector_code_end) - ((uint8_t*)randomx_riscv64_vector_code_begin);
vectorCode = static_cast<uint8_t*>(allocExecutableMemory(vectorCodeSize, hugePagesJIT && hugePagesEnable));
if (vectorCode) {
memcpy(vectorCode, reinterpret_cast<uint8_t*>(randomx_riscv64_vector_code_begin), vectorCodeSize);
entryProgramVector = vectorCode + (((uint8_t*)randomx_riscv64_vector_program_begin) - ((uint8_t*)randomx_riscv64_vector_code_begin));
}
}
}
JitCompilerRV64::~JitCompilerRV64() {
freePagedMemory(state.code, CodeSize);
if (vectorCode) {
freePagedMemory(vectorCode, vectorCodeSize);
}
}
void JitCompilerRV64::enableWriting() const
{
xmrig::VirtualMemory::protectRW(entryDataInit, ExecutableSize);
if (vectorCode) {
xmrig::VirtualMemory::protectRW(vectorCode, vectorCodeSize);
}
}
void JitCompilerRV64::enableExecution() const
{
xmrig::VirtualMemory::protectRX(entryDataInit, ExecutableSize);
if (vectorCode) {
xmrig::VirtualMemory::protectRX(vectorCode, vectorCodeSize);
}
}
void JitCompilerRV64::generateProgram(Program& prog, ProgramConfiguration& pcfg, uint32_t) {
if (vectorCode) {
generateProgramVectorRV64(vectorCode, prog, pcfg, inst_map, nullptr, 0);
return;
}
emitProgramPrefix(state, prog, pcfg);
int32_t fixPos = state.codePos;
state.emit(codeDataRead, sizeDataRead);
@@ -645,6 +674,11 @@ namespace randomx {
}
void JitCompilerRV64::generateProgramLight(Program& prog, ProgramConfiguration& pcfg, uint32_t datasetOffset) {
if (vectorCode) {
generateProgramVectorRV64(vectorCode, prog, pcfg, inst_map, entryDataInit, datasetOffset);
return;
}
emitProgramPrefix(state, prog, pcfg);
int32_t fixPos = state.codePos;
state.emit(codeDataReadLight, sizeDataReadLight);
@@ -666,6 +700,11 @@ namespace randomx {
template<size_t N>
void JitCompilerRV64::generateSuperscalarHash(SuperscalarProgram(&programs)[N]) {
if (vectorCode) {
entryDataInitVector = generateDatasetInitVectorRV64(vectorCode, programs, RandomX_ConfigurationBase::CacheAccesses);
// No return here because we also need the scalar dataset init function for the light mode
}
state.codePos = SuperScalarHashOffset;
state.rcpCount = 0;
state.emit(codeSshInit, sizeSshInit);
@@ -1160,5 +1199,6 @@ namespace randomx {
void JitCompilerRV64::v1_NOP(HANDLER_ARGS) {
}
InstructionGeneratorRV64 JitCompilerRV64::engine[256] = {};
alignas(64) InstructionGeneratorRV64 JitCompilerRV64::engine[256] = {};
alignas(64) uint8_t JitCompilerRV64::inst_map[256] = {};
}

View File

@@ -90,10 +90,10 @@ namespace randomx {
void generateDatasetInitCode() {}
ProgramFunc* getProgramFunc() {
return (ProgramFunc*)entryProgram;
return (ProgramFunc*)(vectorCode ? entryProgramVector : entryProgram);
}
DatasetInitFunc* getDatasetInitFunc() {
return (DatasetInitFunc*)entryDataInit;
return (DatasetInitFunc*)(vectorCode ? entryDataInitVector : entryDataInit);
}
uint8_t* getCode() {
return state.code;
@@ -104,10 +104,17 @@ namespace randomx {
void enableExecution() const;
static InstructionGeneratorRV64 engine[256];
static uint8_t inst_map[256];
private:
CompilerState state;
void* entryDataInit;
void* entryProgram;
uint8_t* vectorCode = nullptr;
size_t vectorCodeSize = 0;
void* entryDataInit = nullptr;
void* entryDataInitVector = nullptr;
void* entryProgram = nullptr;
void* entryProgramVector = nullptr;
public:
static void v1_IADD_RS(HANDLER_ARGS);

View File

@@ -0,0 +1,893 @@
/*
Copyright (c) 2018-2020, tevador <tevador@gmail.com>
Copyright (c) 2019-2021, XMRig <https://github.com/xmrig>, <support@xmrig.com>
Copyright (c) 2025, SChernykh <https://github.com/SChernykh>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "crypto/randomx/configuration.h"
#include "crypto/randomx/jit_compiler_rv64_vector.h"
#include "crypto/randomx/jit_compiler_rv64_vector_static.h"
#include "crypto/randomx/reciprocal.h"
#include "crypto/randomx/superscalar.hpp"
#include "crypto/randomx/program.hpp"
namespace randomx {
#define ADDR(x) ((uint8_t*) &(x))
#define DIST(x, y) (ADDR(y) - ADDR(x))
void* generateDatasetInitVectorRV64(uint8_t* buf, SuperscalarProgram* programs, size_t num_programs)
{
uint8_t* p = buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_sshash_generated_instructions);
uint8_t* literals = buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_sshash_imul_rcp_literals);
uint8_t* cur_literal = literals;
for (size_t i = 0; i < num_programs; ++i) {
// Step 4
size_t k = DIST(randomx_riscv64_vector_sshash_cache_prefetch, randomx_riscv64_vector_sshash_xor);
memcpy(p, reinterpret_cast<void*>(randomx_riscv64_vector_sshash_cache_prefetch), k);
p += k;
// Step 5
for (uint32_t j = 0; j < programs[i].size; ++j) {
const uint32_t dst = programs[i].programBuffer[j].dst & 7;
const uint32_t src = programs[i].programBuffer[j].src & 7;
const uint32_t modShift = (programs[i].programBuffer[j].mod >> 2) & 3;
const uint32_t imm32 = programs[i].programBuffer[j].imm32;
uint32_t inst;
#define EMIT(data) inst = (data); memcpy(p, &inst, 4); p += 4
switch (static_cast<SuperscalarInstructionType>(programs[i].programBuffer[j].opcode)) {
case SuperscalarInstructionType::ISUB_R:
// 57 00 00 0A vsub.vv v0, v0, v0
EMIT(0x0A000057 | (dst << 7) | (src << 15) | (dst << 20));
break;
case SuperscalarInstructionType::IXOR_R:
// 57 00 00 2E vxor.vv v0, v0, v0
EMIT(0x2E000057 | (dst << 7) | (src << 15) | (dst << 20));
break;
case SuperscalarInstructionType::IADD_RS:
if (modShift == 0) {
// 57 00 00 02 vadd.vv v0, v0, v0
EMIT(0x02000057 | (dst << 7) | (src << 15) | (dst << 20));
}
else {
// 57 39 00 96 vsll.vi v18, v0, 0
// 57 00 09 02 vadd.vv v0, v0, v18
EMIT(0x96003957 | (modShift << 15) | (src << 20));
EMIT(0x02090057 | (dst << 7) | (dst << 20));
}
break;
case SuperscalarInstructionType::IMUL_R:
// 57 20 00 96 vmul.vv v0, v0, v0
EMIT(0x96002057 | (dst << 7) | (src << 15) | (dst << 20));
break;
case SuperscalarInstructionType::IROR_C:
{
#ifdef __riscv_zvkb
// 57 30 00 52 vror.vi v0, v0, 0
EMIT(0x52003057 | (dst << 7) | (dst << 20) | ((imm32 & 31) << 15) | ((imm32 & 32) << 21));
#else // __riscv_zvkb
const uint32_t shift_right = imm32 & 63;
const uint32_t shift_left = 64 - shift_right;
if (shift_right < 32) {
// 57 39 00 A2 vsrl.vi v18, v0, 0
EMIT(0xA2003957 | (shift_right << 15) | (dst << 20));
}
else {
// 93 02 00 00 li x5, 0
// 57 C9 02 A2 vsrl.vx v18, v0, x5
EMIT(0x00000293 | (shift_right << 20));
EMIT(0xA202C957 | (dst << 20));
}
if (shift_left < 32) {
// 57 30 00 96 vsll.vi v0, v0, 0
EMIT(0x96003057 | (dst << 7) | (shift_left << 15) | (dst << 20));
}
else {
// 93 02 00 00 li x5, 0
// 57 C0 02 96 vsll.vx v0, v0, x5
EMIT(0x00000293 | (shift_left << 20));
EMIT(0x9602C057 | (dst << 7) | (dst << 20));
}
// 57 00 20 2B vor.vv v0, v18, v0
EMIT(0x2B200057 | (dst << 7) | (dst << 15));
#endif // __riscv_zvkb
}
break;
case SuperscalarInstructionType::IADD_C7:
case SuperscalarInstructionType::IADD_C8:
case SuperscalarInstructionType::IADD_C9:
// B7 02 00 00 lui x5, 0
// 9B 82 02 00 addiw x5, x5, 0
// 57 C0 02 02 vadd.vx v0, v0, x5
EMIT(0x000002B7 | ((imm32 + ((imm32 & 0x800) << 1)) & 0xFFFFF000));
EMIT(0x0002829B | ((imm32 & 0x00000FFF) << 20));
EMIT(0x0202C057 | (dst << 7) | (dst << 20));
break;
case SuperscalarInstructionType::IXOR_C7:
case SuperscalarInstructionType::IXOR_C8:
case SuperscalarInstructionType::IXOR_C9:
// B7 02 00 00 lui x5, 0
// 9B 82 02 00 addiw x5, x5, 0
// 57 C0 02 2E vxor.vx v0, v0, x5
EMIT(0x000002B7 | ((imm32 + ((imm32 & 0x800) << 1)) & 0xFFFFF000));
EMIT(0x0002829B | ((imm32 & 0x00000FFF) << 20));
EMIT(0x2E02C057 | (dst << 7) | (dst << 20));
break;
case SuperscalarInstructionType::IMULH_R:
// 57 20 00 92 vmulhu.vv v0, v0, v0
EMIT(0x92002057 | (dst << 7) | (src << 15) | (dst << 20));
break;
case SuperscalarInstructionType::ISMULH_R:
// 57 20 00 9E vmulh.vv v0, v0, v0
EMIT(0x9E002057 | (dst << 7) | (src << 15) | (dst << 20));
break;
case SuperscalarInstructionType::IMUL_RCP:
{
uint32_t offset = cur_literal - literals;
if (offset == 2040) {
literals += 2040;
offset = 0;
// 93 87 87 7F add x15, x15, 2040
EMIT(0x7F878793);
}
const uint64_t r = randomx_reciprocal_fast(imm32);
memcpy(cur_literal, &r, 8);
cur_literal += 8;
// 83 B2 07 00 ld x5, (x15)
// 57 E0 02 96 vmul.vx v0, v0, x5
EMIT(0x0007B283 | (offset << 20));
EMIT(0x9602E057 | (dst << 7) | (dst << 20));
}
break;
default:
UNREACHABLE;
}
}
// Step 6
k = DIST(randomx_riscv64_vector_sshash_xor, randomx_riscv64_vector_sshash_end);
memcpy(p, reinterpret_cast<void*>(randomx_riscv64_vector_sshash_xor), k);
p += k;
// Step 7. Set cacheIndex to the value of the register that has the longest dependency chain in the SuperscalarHash function executed in step 5.
if (i + 1 < num_programs) {
// vmv.v.v v9, v0 + programs[i].getAddressRegister()
const uint32_t t = 0x5E0004D7 + (static_cast<uint32_t>(programs[i].getAddressRegister()) << 15);
memcpy(p, &t, 4);
p += 4;
}
}
// Emit "J randomx_riscv64_vector_sshash_generated_instructions_end" instruction
const uint8_t* e = buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_sshash_generated_instructions_end);
const uint32_t k = e - p;
const uint32_t j = 0x6F | ((k & 0x7FE) << 20) | ((k & 0x800) << 9) | (k & 0xFF000);
memcpy(p, &j, 4);
char* result = (char*)(buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_sshash_dataset_init));
#ifdef __GNUC__
__builtin___clear_cache(result, (char*)(buf + DIST(randomx_riscv64_vector_sshash_begin, randomx_riscv64_vector_sshash_end)));
#endif
return result;
}
#define emit16(value) { const uint16_t t = value; memcpy(p, &t, 2); p += 2; }
#define emit32(value) { const uint32_t t = value; memcpy(p, &t, 4); p += 4; }
#define emit64(value) { const uint64_t t = value; memcpy(p, &t, 8); p += 8; }
#define emit_data(arr) { memcpy(p, arr, sizeof(arr)); p += sizeof(arr); }
static void imm_to_x5(uint32_t imm, uint8_t*& p)
{
const uint32_t imm_hi = (imm + ((imm & 0x800) << 1)) & 0xFFFFF000U;
const uint32_t imm_lo = imm & 0x00000FFFU;
if (imm_hi == 0) {
// li x5, imm_lo
emit32(0x00000293 + (imm_lo << 20));
return;
}
if (imm_lo == 0) {
// lui x5, imm_hi
emit32(0x000002B7 + imm_hi);
return;
}
if (imm_hi < (32 << 12)) {
//c.lui x5, imm_hi
emit16(0x6281 + (imm_hi >> 10));
}
else {
// lui x5, imm_hi
emit32(0x000002B7 + imm_hi);
}
// addiw x5, x5, imm_lo
emit32(0x0002829B | (imm_lo << 20));
}
static void loadFromScratchpad(uint32_t src, uint32_t dst, uint32_t mod, uint32_t imm, uint8_t*& p)
{
if (src == dst) {
imm &= RandomX_CurrentConfig.ScratchpadL3Mask_Calculated;
if (imm <= 2047) {
// ld x5, imm(x12)
emit32(0x00063283 | (imm << 20));
}
else if (imm <= 2047 * 2) {
// addi x5, x12, 2047
emit32(0x7FF60293);
// ld x5, (imm - 2047)(x5)
emit32(0x0002B283 | ((imm - 2047) << 20));
}
else {
// lui x5, imm & 0xFFFFF000U
emit32(0x000002B7 | ((imm + ((imm & 0x800) << 1)) & 0xFFFFF000U));
// c.add x5, x12
emit16(0x92B2);
// ld x5, (imm & 0xFFF)(x5)
emit32(0x0002B283 | ((imm & 0xFFF) << 20));
}
return;
}
uint32_t shift = 32;
uint32_t mask_reg;
if ((mod & 3) == 0) {
shift -= RandomX_CurrentConfig.Log2_ScratchpadL2;
mask_reg = 17;
}
else {
shift -= RandomX_CurrentConfig.Log2_ScratchpadL1;
mask_reg = 16;
}
imm = static_cast<uint32_t>(static_cast<int32_t>(imm << shift) >> shift);
// 0-0x7FF, 0xFFFFF800-0xFFFFFFFF fit into 12 bit (a single addi instruction)
if (imm - 0xFFFFF800U < 0x1000U) {
// addi x5, x20 + src, imm
emit32(0x000A0293 + (src << 15) + (imm << 20));
}
else {
imm_to_x5(imm, p);
// c.add x5, x20 + src
emit16(0x92D2 + (src << 2));
}
// and x5, x5, mask_reg
emit32(0x0002F2B3 + (mask_reg << 20));
// c.add x5, x12
emit16(0x92B2);
// ld x5, 0(x5)
emit32(0x0002B283);
}
void* generateProgramVectorRV64(uint8_t* buf, Program& prog, ProgramConfiguration& pcfg, const uint8_t (&inst_map)[256], void* entryDataInitScalar, uint32_t datasetOffset)
{
uint64_t* params = (uint64_t*)(buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_params));
params[0] = RandomX_CurrentConfig.ScratchpadL1_Size - 8;
params[1] = RandomX_CurrentConfig.ScratchpadL2_Size - 8;
params[2] = RandomX_CurrentConfig.ScratchpadL3_Size - 8;
params[3] = RandomX_CurrentConfig.DatasetBaseSize - 64;
params[4] = (1 << RandomX_ConfigurationBase::JumpBits) - 1;
uint64_t* imul_rcp_literals = (uint64_t*)(buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_imul_rcp_literals));
uint64_t* cur_literal = imul_rcp_literals;
uint32_t* spaddr_xor = (uint32_t*)(buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_main_loop_spaddr_xor));
uint32_t* spaddr_xor2 = (uint32_t*)(buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_scratchpad_prefetch));
uint32_t* mx_xor = (uint32_t*)(buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_main_loop_mx_xor));
uint32_t* mx_xor_light = (uint32_t*)(buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_main_loop_mx_xor_light_mode));
*spaddr_xor = 0x014A47B3 + (pcfg.readReg0 << 15) + (pcfg.readReg1 << 20); // xor x15, readReg0, readReg1
*spaddr_xor2 = 0x014A42B3 + (pcfg.readReg0 << 15) + (pcfg.readReg1 << 20); // xor x5, readReg0, readReg1
const uint32_t mx_xor_value = 0x014A42B3 + (pcfg.readReg2 << 15) + (pcfg.readReg3 << 20); // xor x5, readReg2, readReg3
*mx_xor = mx_xor_value;
*mx_xor_light = mx_xor_value;
if (entryDataInitScalar) {
void* light_mode_data = buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_main_loop_light_mode_data);
const uint64_t data[2] = { reinterpret_cast<uint64_t>(entryDataInitScalar), datasetOffset };
memcpy(light_mode_data, &data, sizeof(data));
}
uint8_t* p = (uint8_t*)(buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_main_loop_instructions));
// 57C8025E vmv.v.x v16, x5
// 57A9034B vsext.vf2 v18, v16
// 5798214B vfcvt.f.x.v v16, v18
static constexpr uint8_t group_f_convert[] = {
0x57, 0xC8, 0x02, 0x5E, 0x57, 0xA9, 0x03, 0x4B, 0x57, 0x98, 0x21, 0x4B
};
// 57080627 vand.vv v16, v16, v12
// 5788062B vor.vv v16, v16, v13
static constexpr uint8_t group_e_post_process[] = { 0x57, 0x08, 0x06, 0x27, 0x57, 0x88, 0x06, 0x2B };
uint8_t* last_modified[RegistersCount] = { p, p, p, p, p, p, p, p };
uint8_t readReg01[RegistersCount] = {};
readReg01[pcfg.readReg0] = 1;
readReg01[pcfg.readReg1] = 1;
uint32_t scratchpad_prefetch_pos = 0;
for (int32_t i = static_cast<int32_t>(prog.getSize()) - 1; i >= 0; --i) {
Instruction instr = prog(i);
const InstructionType inst_type = static_cast<InstructionType>(inst_map[instr.opcode]);
if (inst_type == InstructionType::CBRANCH) {
scratchpad_prefetch_pos = i;
break;
}
if (inst_type < InstructionType::FSWAP_R) {
const uint32_t src = instr.src % RegistersCount;
const uint32_t dst = instr.dst % RegistersCount;
if ((inst_type == InstructionType::ISWAP_R) && (src != dst) && (readReg01[src] || readReg01[dst])) {
scratchpad_prefetch_pos = i;
break;
}
if ((inst_type == InstructionType::IMUL_RCP) && readReg01[dst] && !isZeroOrPowerOf2(instr.getImm32())) {
scratchpad_prefetch_pos = i;
break;
}
if (readReg01[dst]) {
scratchpad_prefetch_pos = i;
break;
}
}
}
for (uint32_t i = 0, n = prog.getSize(); i < n; ++i) {
Instruction instr = prog(i);
uint32_t src = instr.src % RegistersCount;
uint32_t dst = instr.dst % RegistersCount;
const uint32_t shift = instr.getModShift();
uint32_t imm = instr.getImm32();
const uint32_t mod = instr.mod;
switch (static_cast<InstructionType>(inst_map[instr.opcode])) {
case InstructionType::IADD_RS:
if (shift == 0) {
// c.add x20 + dst, x20 + src
emit16(0x9A52 + (src << 2) + (dst << 7));
}
else {
#ifdef __riscv_zba
// sh{shift}add x20 + dst, x20 + src, x20 + dst
emit32(0x214A0A33 + (shift << 13) + (dst << 7) + (src << 15) + (dst << 20));
#else // __riscv_zba
// slli x5, x20 + src, shift
emit32(0x000A1293 + (src << 15) + (shift << 20));
// c.add x20 + dst, x5
emit16(0x9A16 + (dst << 7));
#endif // __riscv_zba
}
if (dst == RegisterNeedsDisplacement) {
imm_to_x5(imm, p);
// c.add x20 + dst, x5
emit16(0x9A16 + (dst << 7));
}
last_modified[dst] = p;
break;
case InstructionType::IADD_M:
loadFromScratchpad(src, dst, mod, imm, p);
// c.add x20 + dst, x5
emit16(0x9A16 + (dst << 7));
last_modified[dst] = p;
break;
case InstructionType::ISUB_R:
if (src != dst) {
// sub x20 + dst, x20 + dst, x20 + src
emit32(0x414A0A33 + (dst << 7) + (dst << 15) + (src << 20));
}
else {
imm_to_x5(-imm, p);
// c.add x20 + dst, x5
emit16(0x9A16 + (dst << 7));
}
last_modified[dst] = p;
break;
case InstructionType::ISUB_M:
loadFromScratchpad(src, dst, mod, imm, p);
// sub x20 + dst, x20 + dst, x5
emit32(0x405A0A33 + (dst << 7) + (dst << 15));
last_modified[dst] = p;
break;
case InstructionType::IMUL_R:
if (src != dst) {
// mul x20 + dst, x20 + dst, x20 + src
emit32(0x034A0A33 + (dst << 7) + (dst << 15) + (src << 20));
}
else {
imm_to_x5(imm, p);
// mul x20 + dst, x20 + dst, x5
emit32(0x025A0A33 + (dst << 7) + (dst << 15));
}
last_modified[dst] = p;
break;
case InstructionType::IMUL_M:
loadFromScratchpad(src, dst, mod, imm, p);
// mul x20 + dst, x20 + dst, x5
emit32(0x025A0A33 + (dst << 7) + (dst << 15));
last_modified[dst] = p;
break;
case InstructionType::IMULH_R:
// mulhu x20 + dst, x20 + dst, x20 + src
emit32(0x034A3A33 + (dst << 7) + (dst << 15) + (src << 20));
last_modified[dst] = p;
break;
case InstructionType::IMULH_M:
loadFromScratchpad(src, dst, mod, imm, p);
// mulhu x20 + dst, x20 + dst, x5
emit32(0x025A3A33 + (dst << 7) + (dst << 15));
last_modified[dst] = p;
break;
case InstructionType::ISMULH_R:
// mulh x20 + dst, x20 + dst, x20 + src
emit32(0x034A1A33 + (dst << 7) + (dst << 15) + (src << 20));
last_modified[dst] = p;
break;
case InstructionType::ISMULH_M:
loadFromScratchpad(src, dst, mod, imm, p);
// mulh x20 + dst, x20 + dst, x5
emit32(0x025A1A33 + (dst << 7) + (dst << 15));
last_modified[dst] = p;
break;
case InstructionType::IMUL_RCP:
if (!isZeroOrPowerOf2(imm)) {
const uint64_t offset = (cur_literal - imul_rcp_literals) * 8;
*(cur_literal++) = randomx_reciprocal_fast(imm);
static constexpr uint32_t rcp_regs[26] = {
/* Integer */ 8, 10, 28, 29, 30, 31,
/* Float */ 0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 16, 17, 28, 29, 30, 31
};
if (offset < 6 * 8) {
// mul x20 + dst, x20 + dst, rcp_reg
emit32(0x020A0A33 + (dst << 7) + (dst << 15) + (rcp_regs[offset / 8] << 20));
}
else if (offset < 26 * 8) {
// fmv.x.d x5, rcp_reg
emit32(0xE20002D3 + (rcp_regs[offset / 8] << 15));
// mul x20 + dst, x20 + dst, x5
emit32(0x025A0A33 + (dst << 7) + (dst << 15));
}
else {
// ld x5, offset(x18)
emit32(0x00093283 + (offset << 20));
// mul x20 + dst, x20 + dst, x5
emit32(0x025A0A33 + (dst << 7) + (dst << 15));
}
last_modified[dst] = p;
}
break;
case InstructionType::INEG_R:
// sub x20 + dst, x0, x20 + dst
emit32(0x41400A33 + (dst << 7) + (dst << 20));
last_modified[dst] = p;
break;
case InstructionType::IXOR_R:
if (src != dst) {
// xor x20 + dst, x20 + dst, x20 + src
emit32(0x014A4A33 + (dst << 7) + (dst << 15) + (src << 20));
}
else {
imm_to_x5(imm, p);
// xor x20, x20, x5
emit32(0x005A4A33 + (dst << 7) + (dst << 15));
}
last_modified[dst] = p;
break;
case InstructionType::IXOR_M:
loadFromScratchpad(src, dst, mod, imm, p);
// xor x20, x20, x5
emit32(0x005A4A33 + (dst << 7) + (dst << 15));
last_modified[dst] = p;
break;
#ifdef __riscv_zbb
case InstructionType::IROR_R:
if (src != dst) {
// ror x20 + dst, x20 + dst, x20 + src
emit32(0x614A5A33 + (dst << 7) + (dst << 15) + (src << 20));
}
else {
// rori x20 + dst, x20 + dst, imm
emit32(0x600A5A13 + (dst << 7) + (dst << 15) + ((imm & 63) << 20));
}
last_modified[dst] = p;
break;
case InstructionType::IROL_R:
if (src != dst) {
// rol x20 + dst, x20 + dst, x20 + src
emit32(0x614A1A33 + (dst << 7) + (dst << 15) + (src << 20));
}
else {
// rori x20 + dst, x20 + dst, -imm
emit32(0x600A5A13 + (dst << 7) + (dst << 15) + ((-imm & 63) << 20));
}
last_modified[dst] = p;
break;
#else // __riscv_zbb
case InstructionType::IROR_R:
if (src != dst) {
// sub x5, x0, x20 + src
emit32(0x414002B3 + (src << 20));
// srl x6, x20 + dst, x20 + src
emit32(0x014A5333 + (dst << 15) + (src << 20));
// sll x20 + dst, x20 + dst, x5
emit32(0x005A1A33 + (dst << 7) + (dst << 15));
// or x20 + dst, x20 + dst, x6
emit32(0x006A6A33 + (dst << 7) + (dst << 15));
}
else {
// srli x5, x20 + dst, imm
emit32(0x000A5293 + (dst << 15) + ((imm & 63) << 20));
// slli x6, x20 + dst, -imm
emit32(0x000A1313 + (dst << 15) + ((-imm & 63) << 20));
// or x20 + dst, x5, x6
emit32(0x0062EA33 + (dst << 7));
}
last_modified[dst] = p;
break;
case InstructionType::IROL_R:
if (src != dst) {
// sub x5, x0, x20 + src
emit32(0x414002B3 + (src << 20));
// sll x6, x20 + dst, x20 + src
emit32(0x014A1333 + (dst << 15) + (src << 20));
// srl x20 + dst, x20 + dst, x5
emit32(0x005A5A33 + (dst << 7) + (dst << 15));
// or x20 + dst, x20 + dst, x6
emit32(0x006A6A33 + (dst << 7) + (dst << 15));
}
else {
// srli x5, x20 + dst, -imm
emit32(0x000A5293 + (dst << 15) + ((-imm & 63) << 20));
// slli x6, x20 + dst, imm
emit32(0x000A1313 + (dst << 15) + ((imm & 63) << 20));
// or x20 + dst, x5, x6
emit32(0x0062EA33 + (dst << 7));
}
last_modified[dst] = p;
break;
#endif // __riscv_zbb
case InstructionType::ISWAP_R:
if (src != dst) {
// c.mv x5, x20 + dst
emit16(0x82D2 + (dst << 2));
// c.mv x20 + dst, x20 + src
emit16(0x8A52 + (src << 2) + (dst << 7));
// c.mv x20 + src, x5
emit16(0x8A16 + (src << 7));
last_modified[src] = p;
last_modified[dst] = p;
}
break;
case InstructionType::FSWAP_R:
// vmv.x.s x5, v0 + dst
emit32(0x420022D7 + (dst << 20));
// vslide1down.vx v0 + dst, v0 + dst, x5
emit32(0x3E02E057 + (dst << 7) + (dst << 20));
break;
case InstructionType::FADD_R:
src %= RegisterCountFlt;
dst %= RegisterCountFlt;
// vfadd.vv v0 + dst, v0 + dst, v8 + src
emit32(0x02041057 + (dst << 7) + (src << 15) + (dst << 20));
break;
case InstructionType::FADD_M:
dst %= RegisterCountFlt;
loadFromScratchpad(src, RegistersCount, mod, imm, p);
emit_data(group_f_convert);
// vfadd.vv v0 + dst, v0 + dst, v16
emit32(0x02081057 + (dst << 7) + (dst << 20));
break;
case InstructionType::FSUB_R:
src %= RegisterCountFlt;
dst %= RegisterCountFlt;
// vfsub.vv v0 + dst, v0 + dst, v8 + src
emit32(0x0A041057 + (dst << 7) + (src << 15) + (dst << 20));
break;
case InstructionType::FSUB_M:
dst %= RegisterCountFlt;
loadFromScratchpad(src, RegistersCount, mod, imm, p);
emit_data(group_f_convert);
// vfsub.vv v0 + dst, v0 + dst, v16
emit32(0x0A081057 + (dst << 7) + (dst << 20));
break;
case InstructionType::FSCAL_R:
dst %= RegisterCountFlt;
// vxor.vv v0, v0, v14
emit32(0x2E070057 + (dst << 7) + (dst << 20));
break;
case InstructionType::FMUL_R:
src %= RegisterCountFlt;
dst %= RegisterCountFlt;
// vfmul.vv v4 + dst, v4 + dst, v8 + src
emit32(0x92441257 + (dst << 7) + (src << 15) + (dst << 20));
break;
case InstructionType::FDIV_M:
dst %= RegisterCountFlt;
loadFromScratchpad(src, RegistersCount, mod, imm, p);
emit_data(group_f_convert);
emit_data(group_e_post_process);
// vfdiv.vv v0 + dst, v0 + dst, v16
emit32(0x82481257 + (dst << 7) + (dst << 20));
break;
case InstructionType::FSQRT_R:
dst %= RegisterCountFlt;
// vfsqrt.v v4 + dst, v4 + dst
emit32(0x4E401257 + (dst << 7) + (dst << 20));
break;
case InstructionType::CBRANCH:
{
const uint32_t shift = (mod >> 4) + RandomX_ConfigurationBase::JumpOffset;
imm |= (1UL << shift);
if (RandomX_ConfigurationBase::JumpOffset > 0 || shift > 0) {
imm &= ~(1UL << (shift - 1));
}
// slli x6, x7, shift
// x6 = branchMask
emit32(0x00039313 + (shift << 20));
// x5 = imm
imm_to_x5(imm, p);
// c.add x20 + dst, x5
emit16(0x9A16 + (dst << 7));
// and x5, x20 + dst, x6
emit32(0x006A72B3 + (dst << 15));
const int offset = static_cast<int>(last_modified[dst] - p);
if (offset >= -4096) {
// beqz x5, offset
const uint32_t k = static_cast<uint32_t>(offset);
emit32(0x80028063 | ((k & 0x1E) << 7) | ((k & 0x7E0) << 20) | ((k & 0x800) >> 4));
}
else {
// bnez x5, 8
emit32(0x00029463);
// j offset
const uint32_t k = static_cast<uint32_t>(offset - 4);
emit32(0x8000006F | ((k & 0x7FE) << 20) | ((k & 0x800) << 9) | (k & 0xFF000));
}
for (uint32_t j = 0; j < RegistersCount; ++j) {
last_modified[j] = p;
}
}
break;
case InstructionType::CFROUND:
if ((imm - 1) & 63) {
#ifdef __riscv_zbb
// rori x5, x20 + src, imm - 1
emit32(0x600A5293 + (src << 15) + (((imm - 1) & 63) << 20));
#else // __riscv_zbb
// srli x5, x20 + src, imm - 1
emit32(0x000A5293 + (src << 15) + (((imm - 1) & 63) << 20));
// slli x6, x20 + src, 1 - imm
emit32(0x000A1313 + (src << 15) + (((1 - imm) & 63) << 20));
// or x5, x5, x6
emit32(0x0062E2B3);
#endif // __riscv_zbb
// andi x5, x5, 6
emit32(0x0062F293);
}
else {
// andi x5, x20 + src, 6
emit32(0x006A7293 + (src << 15));
}
// li x6, 01111000b
// x6 = CFROUND lookup table
emit32(0x07800313);
// srl x5, x6, x5
emit32(0x005352B3);
// andi x5, x5, 3
emit32(0x0032F293);
// csrw frm, x5
emit32(0x00229073);
break;
case InstructionType::ISTORE:
{
uint32_t mask_reg;
uint32_t shift = 32;
if ((mod >> 4) >= 14) {
shift -= RandomX_CurrentConfig.Log2_ScratchpadL3;
mask_reg = 1; // x1 = L3 mask
}
else {
if ((mod & 3) == 0) {
shift -= RandomX_CurrentConfig.Log2_ScratchpadL2;
mask_reg = 17; // x17 = L2 mask
}
else {
shift -= RandomX_CurrentConfig.Log2_ScratchpadL1;
mask_reg = 16; // x16 = L1 mask
}
}
imm = static_cast<uint32_t>(static_cast<int32_t>(imm << shift) >> shift);
imm_to_x5(imm, p);
// c.add x5, x20 + dst
emit16(0x92D2 + (dst << 2));
// and x5, x5, x0 + mask_reg
emit32(0x0002F2B3 + (mask_reg << 20));
// c.add x5, x12
emit16(0x92B2);
// sd x20 + src, 0(x5)
emit32(0x0142B023 + (src << 20));
}
break;
default:
UNREACHABLE;
}
// Prefetch scratchpad lines for the next main loop iteration
// scratchpad_prefetch_pos is a conservative estimate of the earliest place in the code where we can do it
if (i == scratchpad_prefetch_pos) {
uint8_t* e = (uint8_t*)(buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_scratchpad_prefetch_end));
const size_t n = e - ((uint8_t*)spaddr_xor2);
memcpy(p, spaddr_xor2, n);
p += n;
}
}
const uint8_t* e;
if (entryDataInitScalar) {
// Emit "J randomx_riscv64_vector_program_main_loop_instructions_end_light_mode" instruction
e = buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_main_loop_instructions_end_light_mode);
}
else {
// Emit "J randomx_riscv64_vector_program_main_loop_instructions_end" instruction
e = buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_main_loop_instructions_end);
}
const uint32_t k = e - p;
emit32(0x6F | ((k & 0x7FE) << 20) | ((k & 0x800) << 9) | (k & 0xFF000));
#ifdef __GNUC__
char* p1 = (char*)(buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_params));
char* p2 = (char*)(buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_end));
__builtin___clear_cache(p1, p2);
#endif
return buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_begin);
}
} // namespace randomx

View File

@@ -0,0 +1,45 @@
/*
Copyright (c) 2018-2020, tevador <tevador@gmail.com>
Copyright (c) 2019-2021, XMRig <https://github.com/xmrig>, <support@xmrig.com>
Copyright (c) 2025, SChernykh <https://github.com/SChernykh>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <cstdint>
#include <cstdlib>
namespace randomx {
class SuperscalarProgram;
struct ProgramConfiguration;
class Program;
void* generateDatasetInitVectorRV64(uint8_t* buf, SuperscalarProgram* programs, size_t num_programs);
void* generateProgramVectorRV64(uint8_t* buf, Program& prog, ProgramConfiguration& pcfg, const uint8_t (&inst_map)[256], void* entryDataInitScalar, uint32_t datasetOffset);
} // namespace randomx

View File

@@ -0,0 +1,874 @@
/*
Copyright (c) 2018-2020, tevador <tevador@gmail.com>
Copyright (c) 2019-2021, XMRig <https://github.com/xmrig>, <support@xmrig.com>
Copyright (c) 2025, SChernykh <https://github.com/SChernykh>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "configuration.h"
// Compatibility macros
#if !defined(RANDOMX_CACHE_ACCESSES) && defined(RANDOMX_CACHE_MAX_ACCESSES)
#define RANDOMX_CACHE_ACCESSES RANDOMX_CACHE_MAX_ACCESSES
#endif
#if defined(RANDOMX_ARGON_MEMORY)
#define RANDOMX_CACHE_MASK RANDOMX_ARGON_MEMORY * 1024 / 64 - 1
#elif defined(RANDOMX_CACHE_MAX_SIZE)
#define RANDOMX_CACHE_MASK RANDOMX_CACHE_MAX_SIZE / 64 - 1
#endif
#define DECL(x) x
.text
#ifndef __riscv_v
#error This file requires rv64gcv
#endif
.option pic
.global DECL(randomx_riscv64_vector_code_begin)
.global DECL(randomx_riscv64_vector_sshash_begin)
.global DECL(randomx_riscv64_vector_sshash_imul_rcp_literals)
.global DECL(randomx_riscv64_vector_sshash_dataset_init)
.global DECL(randomx_riscv64_vector_sshash_generated_instructions)
.global DECL(randomx_riscv64_vector_sshash_generated_instructions_end)
.global DECL(randomx_riscv64_vector_sshash_cache_prefetch)
.global DECL(randomx_riscv64_vector_sshash_xor)
.global DECL(randomx_riscv64_vector_sshash_end)
.global DECL(randomx_riscv64_vector_program_params)
.global DECL(randomx_riscv64_vector_program_imul_rcp_literals)
.global DECL(randomx_riscv64_vector_program_begin)
.global DECL(randomx_riscv64_vector_program_main_loop_instructions)
.global DECL(randomx_riscv64_vector_program_main_loop_instructions_end)
.global DECL(randomx_riscv64_vector_program_main_loop_mx_xor)
.global DECL(randomx_riscv64_vector_program_main_loop_spaddr_xor)
.global DECL(randomx_riscv64_vector_program_main_loop_light_mode_data)
.global DECL(randomx_riscv64_vector_program_main_loop_instructions_end_light_mode)
.global DECL(randomx_riscv64_vector_program_main_loop_mx_xor_light_mode)
.global DECL(randomx_riscv64_vector_program_scratchpad_prefetch)
.global DECL(randomx_riscv64_vector_program_scratchpad_prefetch_end)
.global DECL(randomx_riscv64_vector_program_end)
.global DECL(randomx_riscv64_vector_code_end)
.balign 8
DECL(randomx_riscv64_vector_code_begin):
DECL(randomx_riscv64_vector_sshash_begin):
sshash_constant_0: .dword 6364136223846793005
sshash_constant_1: .dword 9298411001130361340
sshash_constant_2: .dword 12065312585734608966
sshash_constant_3: .dword 9306329213124626780
sshash_constant_4: .dword 5281919268842080866
sshash_constant_5: .dword 10536153434571861004
sshash_constant_6: .dword 3398623926847679864
sshash_constant_7: .dword 9549104520008361294
sshash_offsets: .dword 0,1,2,3
store_offsets: .dword 0,64,128,192
DECL(randomx_riscv64_vector_sshash_imul_rcp_literals): .fill 512,8,0
/*
Reference: https://github.com/tevador/RandomX/blob/master/doc/specs.md#73-dataset-block-generation
Register layout
---------------
x5 = temporary
x10 = randomx cache
x11 = output buffer
x12 = startBlock
x13 = endBlock
x14 = cache mask
x15 = imul_rcp literal pointer
v0-v7 = r0-r7
v8 = itemNumber
v9 = cacheIndex, then a pointer into cache->memory (for prefetch), then a byte offset into cache->memory
v10-v17 = sshash constants
v18 = temporary
v19 = dataset item store offsets
*/
DECL(randomx_riscv64_vector_sshash_dataset_init):
// Process 4 64-bit values at a time
vsetivli zero, 4, e64, m1, ta, ma
// Load cache->memory pointer
ld x10, (x10)
// Init cache mask
li x14, RANDOMX_CACHE_MASK
// Init dataset item store offsets
lla x5, store_offsets
vle64.v v19, (x5)
// Init itemNumber vector to (startBlock, startBlock + 1, startBlock + 2, startBlock + 3)
lla x5, sshash_offsets
vle64.v v8, (x5)
vadd.vx v8, v8, x12
// Load constants (stride = x0 = 0, so a 64-bit value will be broadcast into each element of a vector)
lla x5, sshash_constant_0
vlse64.v v10, (x5), x0
lla x5, sshash_constant_1
vlse64.v v11, (x5), x0
lla x5, sshash_constant_2
vlse64.v v12, (x5), x0
lla x5, sshash_constant_3
vlse64.v v13, (x5), x0
lla x5, sshash_constant_4
vlse64.v v14, (x5), x0
lla x5, sshash_constant_5
vlse64.v v15, (x5), x0
lla x5, sshash_constant_6
vlse64.v v16, (x5), x0
lla x5, sshash_constant_7
vlse64.v v17, (x5), x0
// Calculate the end pointer for dataset init
sub x13, x13, x12
slli x13, x13, 6
add x13, x13, x11
init_item:
// Step 1. Init r0-r7
// r0 = (itemNumber + 1) * 6364136223846793005
vmv.v.v v0, v8
vmadd.vv v0, v10, v10
// r_i = r0 ^ c_i for i = 1..7
vxor.vv v1, v0, v11
vxor.vv v2, v0, v12
vxor.vv v3, v0, v13
vxor.vv v4, v0, v14
vxor.vv v5, v0, v15
vxor.vv v6, v0, v16
vxor.vv v7, v0, v17
// Step 2. Let cacheIndex = itemNumber
vmv.v.v v9, v8
// Step 3 is implicit (all iterations are inlined, there is no "i")
// Init imul_rcp literal pointer
lla x15, randomx_riscv64_vector_sshash_imul_rcp_literals
DECL(randomx_riscv64_vector_sshash_generated_instructions):
// Generated by JIT compiler
//
// Step 4. randomx_riscv64_vector_sshash_cache_prefetch
// Step 5. SuperscalarHash[i]
// Step 6. randomx_riscv64_vector_sshash_xor
//
// Above steps will be repeated RANDOMX_CACHE_ACCESSES times
.fill RANDOMX_CACHE_ACCESSES * 2048, 4, 0
DECL(randomx_riscv64_vector_sshash_generated_instructions_end):
// Step 9. Concatenate registers r0-r7 in little endian format to get the final Dataset item data.
vsuxei64.v v0, (x11), v19
add x5, x11, 8
vsuxei64.v v1, (x5), v19
add x5, x11, 16
vsuxei64.v v2, (x5), v19
add x5, x11, 24
vsuxei64.v v3, (x5), v19
add x5, x11, 32
vsuxei64.v v4, (x5), v19
add x5, x11, 40
vsuxei64.v v5, (x5), v19
add x5, x11, 48
vsuxei64.v v6, (x5), v19
add x5, x11, 56
vsuxei64.v v7, (x5), v19
// Iterate to the next 4 items
vadd.vi v8, v8, 4
add x11, x11, 256
bltu x11, x13, init_item
ret
// Step 4. Load a 64-byte item from the Cache. The item index is given by cacheIndex modulo the total number of 64-byte items in Cache.
DECL(randomx_riscv64_vector_sshash_cache_prefetch):
// v9 = convert from cacheIndex to a direct pointer into cache->memory
vand.vx v9, v9, x14
vsll.vi v9, v9, 6
vadd.vx v9, v9, x10
// Prefetch element 0
vmv.x.s x5, v9
#ifdef __riscv_zicbop
prefetch.r (x5)
#else
ld x5, (x5)
#endif
// Prefetch element 1
vslidedown.vi v18, v9, 1
vmv.x.s x5, v18
#ifdef __riscv_zicbop
prefetch.r (x5)
#else
ld x5, (x5)
#endif
// Prefetch element 2
vslidedown.vi v18, v9, 2
vmv.x.s x5, v18
#ifdef __riscv_zicbop
prefetch.r (x5)
#else
ld x5, (x5)
#endif
// Prefetch element 3
vslidedown.vi v18, v9, 3
vmv.x.s x5, v18
#ifdef __riscv_zicbop
prefetch.r (x5)
#else
ld x5, (x5)
#endif
// v9 = byte offset into cache->memory
vsub.vx v9, v9, x10
// Step 6. XOR all registers with data loaded from randomx cache
DECL(randomx_riscv64_vector_sshash_xor):
vluxei64.v v18, (x10), v9
vxor.vv v0, v0, v18
add x5, x10, 8
vluxei64.v v18, (x5), v9
vxor.vv v1, v1, v18
add x5, x10, 16
vluxei64.v v18, (x5), v9
vxor.vv v2, v2, v18
add x5, x10, 24
vluxei64.v v18, (x5), v9
vxor.vv v3, v3, v18
add x5, x10, 32
vluxei64.v v18, (x5), v9
vxor.vv v4, v4, v18
add x5, x10, 40
vluxei64.v v18, (x5), v9
vxor.vv v5, v5, v18
add x5, x10, 48
vluxei64.v v18, (x5), v9
vxor.vv v6, v6, v18
add x5, x10, 56
vluxei64.v v18, (x5), v9
vxor.vv v7, v7, v18
DECL(randomx_riscv64_vector_sshash_end):
/*
Reference: https://github.com/tevador/RandomX/blob/master/doc/specs.md#46-vm-execution
C declarations:
struct RegisterFile {
uint64_t r[8];
double f[4][2];
double e[4][2];
double a[4][2];
};
struct MemoryRegisters {
uint32_t mx, ma;
uint8_t* memory; // dataset (fast mode) or cache (light mode)
};
void ProgramFunc(RegisterFile* reg, MemoryRegisters* mem, uint8_t* scratchpad, uint64_t iterations);
Register layout
---------------
x0 = zero
x1 = scratchpad L3 mask
x2 = stack pointer
x3 = global pointer (unused)
x4 = thread pointer (unused)
x5 = temporary
x6 = temporary
x7 = branch mask (unshifted)
x8 = frame pointer, also 64-bit literal inside the loop
x9 = scratchpad L3 mask (64-byte aligned)
x10 = RegisterFile* reg, also 64-bit literal inside the loop
x11 = MemoryRegisters* mem, then dataset/cache pointer
x12 = scratchpad
x13 = iterations
x14 = mx, ma (always stored with dataset mask applied)
x15 = spAddr0, spAddr1
x16 = scratchpad L1 mask
x17 = scratchpad L2 mask
x18 = IMUL_RCP literals pointer
x19 = dataset mask
x20-x27 = r0-r7
x28-x31 = 64-bit literals
f0-f7 = 64-bit literals
f10-f17 = 64-bit literals
f28-f31 = 64-bit literals
v0-v3 = f0-f3
v4-v7 = e0-e3
v8-v11 = a0-a3
v12 = E 'and' mask = 0x00ffffffffffffff'00ffffffffffffff
v13 = E 'or' mask = 0x3*00000000******'3*00000000******
v14 = scale mask = 0x80f0000000000000'80f0000000000000
v15 = unused
v16 = temporary
v17 = unused
v18 = temporary
v19-v31 = unused
*/
.balign 8
DECL(randomx_riscv64_vector_program_params):
// JIT compiler will adjust these values for different RandomX variants
randomx_masks: .dword 16376, 262136, 2097144, 2147483584, 255
DECL(randomx_riscv64_vector_program_imul_rcp_literals):
imul_rcp_literals: .fill RANDOMX_PROGRAM_MAX_SIZE, 8, 0
DECL(randomx_riscv64_vector_program_begin):
addi sp, sp, -112
sd x8, 96(sp) // save old frame pointer
addi x8, sp, 112 // setup new frame pointer
sd x1, 104(sp) // save return address
// Save callee-saved registers
sd x9, 0(sp)
sd x18, 8(sp)
sd x19, 16(sp)
sd x20, 24(sp)
sd x21, 32(sp)
sd x22, 40(sp)
sd x23, 48(sp)
sd x24, 56(sp)
sd x25, 64(sp)
sd x26, 72(sp)
sd x27, 80(sp)
// Save x10 as it will be used as an IMUL_RCP literal
sd x10, 88(sp)
// Load mx, ma and dataset pointer
ld x14, (x11)
ld x11, 8(x11)
// Initialize spAddr0-spAddr1
mv x15, x14
// Set registers r0-r7 to zero
li x20, 0
li x21, 0
li x22, 0
li x23, 0
li x24, 0
li x25, 0
li x26, 0
li x27, 0
// Load masks
lla x5, randomx_masks
ld x16, 0(x5)
ld x17, 8(x5)
ld x1, 16(x5)
ld x19, 24(x5)
ld x7, 32(x5)
addi x9, x1, -56
// Set vector registers to 2x64 bit
vsetivli zero, 2, e64, m1, ta, ma
// Apply dataset mask to mx, ma
slli x5, x19, 32
or x5, x5, x19
and x14, x14, x5
// Load group A registers
addi x5, x10, 192
vle64.v v8, (x5)
addi x5, x10, 208
vle64.v v9, (x5)
addi x5, x10, 224
vle64.v v10, (x5)
addi x5, x10, 240
vle64.v v11, (x5)
// Load E 'and' mask
vmv.v.i v12, -1
vsrl.vi v12, v12, 8
// Load E 'or' mask (stored in reg.f[0])
addi x5, x10, 64
vle64.v v13, (x5)
// Load scale mask
lui x5, 0x80f00
slli x5, x5, 32
vmv.v.x v14, x5
// IMUL_RCP literals pointer
lla x18, imul_rcp_literals
// Load IMUL_RCP literals
ld x8, 0(x18)
ld x10, 8(x18)
ld x28, 16(x18)
ld x29, 24(x18)
ld x30, 32(x18)
ld x31, 40(x18)
fld f0, 48(x18)
fld f1, 56(x18)
fld f2, 64(x18)
fld f3, 72(x18)
fld f4, 80(x18)
fld f5, 88(x18)
fld f6, 96(x18)
fld f7, 104(x18)
fld f10, 112(x18)
fld f11, 120(x18)
fld f12, 128(x18)
fld f13, 136(x18)
fld f14, 144(x18)
fld f15, 152(x18)
fld f16, 160(x18)
fld f17, 168(x18)
fld f28, 176(x18)
fld f29, 184(x18)
fld f30, 192(x18)
fld f31, 200(x18)
randomx_riscv64_vector_program_main_loop:
and x5, x15, x9 // x5 = spAddr0 & 64-byte aligned L3 mask
add x5, x5, x12 // x5 = &scratchpad[spAddr0 & 64-byte aligned L3 mask]
// read a 64-byte line from scratchpad (indexed by spAddr0) and XOR it with r0-r7
ld x6, 0(x5)
xor x20, x20, x6
ld x6, 8(x5)
xor x21, x21, x6
ld x6, 16(x5)
xor x22, x22, x6
ld x6, 24(x5)
xor x23, x23, x6
ld x6, 32(x5)
xor x24, x24, x6
ld x6, 40(x5)
xor x25, x25, x6
ld x6, 48(x5)
xor x26, x26, x6
ld x6, 56(x5)
xor x27, x27, x6
srli x5, x15, 32 // x5 = spAddr1
and x5, x5, x9 // x5 = spAddr1 & 64-byte aligned L3 mask
add x5, x5, x12 // x5 = &scratchpad[spAddr1 & 64-byte aligned L3 mask]
// read a 64-byte line from scratchpad (indexed by spAddr1) and initialize f0-f3, e0-e3 registers
// Set vector registers to 2x32 bit
vsetivli zero, 2, e32, m1, ta, ma
// load f0
vle32.v v16, (x5)
vfwcvt.f.x.v v0, v16
// load f1
addi x6, x5, 8
vle32.v v1, (x6)
// Use v16 as an intermediary register because vfwcvt accepts only registers with even numbers here
vfwcvt.f.x.v v16, v1
vmv1r.v v1, v16
// load f2
addi x6, x5, 16
vle32.v v16, (x6)
vfwcvt.f.x.v v2, v16
// load f3
addi x6, x5, 24
vle32.v v3, (x6)
vfwcvt.f.x.v v16, v3
vmv1r.v v3, v16
// load e0
addi x6, x5, 32
vle32.v v16, (x6)
vfwcvt.f.x.v v4, v16
// load e1
addi x6, x5, 40
vle32.v v5, (x6)
vfwcvt.f.x.v v16, v5
vmv1r.v v5, v16
// load e2
addi x6, x5, 48
vle32.v v16, (x6)
vfwcvt.f.x.v v6, v16
// load e3
addi x6, x5, 56
vle32.v v7, (x6)
vfwcvt.f.x.v v16, v7
vmv1r.v v7, v16
// Set vector registers back to 2x64 bit
vsetivli zero, 2, e64, m1, ta, ma
// post-process e0-e3
vand.vv v4, v4, v12
vand.vv v5, v5, v12
vand.vv v6, v6, v12
vand.vv v7, v7, v12
vor.vv v4, v4, v13
vor.vv v5, v5, v13
vor.vv v6, v6, v13
vor.vv v7, v7, v13
DECL(randomx_riscv64_vector_program_main_loop_instructions):
// Generated by JIT compiler
// FDIV_M can generate up to 50 bytes of code (round it up to 52 - a multiple of 4)
// +32 bytes for the scratchpad prefetch and the final jump instruction
.fill RANDOMX_PROGRAM_MAX_SIZE * 52 + 32, 1, 0
DECL(randomx_riscv64_vector_program_main_loop_instructions_end):
// Calculate dataset pointer for dataset read
// Do it here to break false dependency from readReg2 and readReg3 (see below)
srli x6, x14, 32 // x6 = ma & dataset mask
DECL(randomx_riscv64_vector_program_main_loop_mx_xor):
xor x5, x24, x26 // x5 = readReg2 ^ readReg3 (JIT compiler will substitute the actual registers)
and x5, x5, x19 // x5 = (readReg2 ^ readReg3) & dataset mask
xor x14, x14, x5 // mx ^= (readReg2 ^ readReg3) & dataset mask
and x5, x14, x19 // x5 = mx & dataset mask
add x5, x5, x11 // x5 = &dataset[mx & dataset mask]
#ifdef __riscv_zicbop
prefetch.r (x5)
#else
ld x5, (x5)
#endif
add x5, x6, x11 // x5 = &dataset[ma & dataset mask]
// read a 64-byte line from dataset and XOR it with r0-r7
ld x6, 0(x5)
xor x20, x20, x6
ld x6, 8(x5)
xor x21, x21, x6
ld x6, 16(x5)
xor x22, x22, x6
ld x6, 24(x5)
xor x23, x23, x6
ld x6, 32(x5)
xor x24, x24, x6
ld x6, 40(x5)
xor x25, x25, x6
ld x6, 48(x5)
xor x26, x26, x6
ld x6, 56(x5)
xor x27, x27, x6
randomx_riscv64_vector_program_main_loop_swap_mx_ma:
// swap mx <-> ma
#ifdef __riscv_zbb
rori x14, x14, 32
#else
srli x5, x14, 32
slli x14, x14, 32
or x14, x14, x5
#endif
srli x5, x15, 32 // x5 = spAddr1
and x5, x5, x9 // x5 = spAddr1 & 64-byte aligned L3 mask
add x5, x5, x12 // x5 = &scratchpad[spAddr1 & 64-byte aligned L3 mask]
// store registers r0-r7 to the scratchpad
sd x20, 0(x5)
sd x21, 8(x5)
sd x22, 16(x5)
sd x23, 24(x5)
sd x24, 32(x5)
sd x25, 40(x5)
sd x26, 48(x5)
sd x27, 56(x5)
and x5, x15, x9 // x5 = spAddr0 & 64-byte aligned L3 mask
add x5, x5, x12 // x5 = &scratchpad[spAddr0 & 64-byte aligned L3 mask]
DECL(randomx_riscv64_vector_program_main_loop_spaddr_xor):
xor x15, x20, x22 // spAddr0-spAddr1 = readReg0 ^ readReg1 (JIT compiler will substitute the actual registers)
// store registers f0-f3 to the scratchpad (f0-f3 are first combined with e0-e3)
vxor.vv v0, v0, v4
vxor.vv v1, v1, v5
vxor.vv v2, v2, v6
vxor.vv v3, v3, v7
vse64.v v0, (x5)
addi x6, x5, 16
vse64.v v1, (x6)
addi x6, x5, 32
vse64.v v2, (x6)
addi x6, x5, 48
vse64.v v3, (x6)
addi x13, x13, -1
beqz x13, randomx_riscv64_vector_program_main_loop_end
j randomx_riscv64_vector_program_main_loop
randomx_riscv64_vector_program_main_loop_end:
// Restore x8 and x10
addi x8, sp, 112
ld x10, 88(sp)
// Store integer registers
sd x20, 0(x10)
sd x21, 8(x10)
sd x22, 16(x10)
sd x23, 24(x10)
sd x24, 32(x10)
sd x25, 40(x10)
sd x26, 48(x10)
sd x27, 56(x10)
// Store FP registers
addi x5, x10, 64
vse64.v v0, (x5)
addi x5, x10, 80
vse64.v v1, (x5)
addi x5, x10, 96
vse64.v v2, (x5)
addi x5, x10, 112
vse64.v v3, (x5)
addi x5, x10, 128
vse64.v v4, (x5)
addi x5, x10, 144
vse64.v v5, (x5)
addi x5, x10, 160
vse64.v v6, (x5)
addi x5, x10, 176
vse64.v v7, (x5)
// Restore callee-saved registers
ld x9, 0(sp)
ld x18, 8(sp)
ld x19, 16(sp)
ld x20, 24(sp)
ld x21, 32(sp)
ld x22, 40(sp)
ld x23, 48(sp)
ld x24, 56(sp)
ld x25, 64(sp)
ld x26, 72(sp)
ld x27, 80(sp)
ld x8, 96(sp) // old frame pointer
ld x1, 104(sp) // return address
addi sp, sp, 112
ret
DECL(randomx_riscv64_vector_program_main_loop_light_mode_data):
// 1) Pointer to the scalar dataset init function
// 2) Dataset offset
.dword 0, 0
DECL(randomx_riscv64_vector_program_main_loop_instructions_end_light_mode):
// Calculate dataset pointer for dataset read
// Do it here to break false dependency from readReg2 and readReg3 (see below)
srli x6, x14, 32 // x6 = ma & dataset mask
DECL(randomx_riscv64_vector_program_main_loop_mx_xor_light_mode):
xor x5, x24, x26 // x5 = readReg2 ^ readReg3 (JIT compiler will substitute the actual registers)
and x5, x5, x19 // x5 = (readReg2 ^ readReg3) & dataset mask
xor x14, x14, x5 // mx ^= (readReg2 ^ readReg3) & dataset mask
// Save all registers modified when calling dataset_init_scalar_func_ptr
addi sp, sp, -192
// bytes [0, 127] - saved registers
// bytes [128, 191] - output buffer
sd x1, 0(sp)
sd x7, 16(sp)
sd x10, 24(sp)
sd x11, 32(sp)
sd x12, 40(sp)
sd x13, 48(sp)
sd x14, 56(sp)
sd x15, 64(sp)
sd x16, 72(sp)
sd x17, 80(sp)
sd x28, 88(sp)
sd x29, 96(sp)
sd x30, 104(sp)
sd x31, 112(sp)
// setup randomx_riscv64_vector_sshash_dataset_init's parameters
// x10 = pointer to pointer to cache memory
// pointer to cache memory was saved in "sd x11, 32(sp)", so x10 = sp + 32
addi x10, sp, 32
// x11 = output buffer (64 bytes)
addi x11, sp, 128
// x12 = start block
lla x5, randomx_riscv64_vector_program_main_loop_light_mode_data
ld x12, 8(x5)
add x12, x12, x6
srli x12, x12, 6
// x13 = end block
addi x13, x12, 1
ld x5, 0(x5)
jalr x1, 0(x5)
// restore registers
ld x1, 0(sp)
ld x7, 16(sp)
ld x10, 24(sp)
ld x11, 32(sp)
ld x12, 40(sp)
ld x13, 48(sp)
ld x14, 56(sp)
ld x15, 64(sp)
ld x16, 72(sp)
ld x17, 80(sp)
ld x28, 88(sp)
ld x29, 96(sp)
ld x30, 104(sp)
ld x31, 112(sp)
// read a 64-byte line from dataset and XOR it with r0-r7
ld x5, 128(sp)
xor x20, x20, x5
ld x5, 136(sp)
xor x21, x21, x5
ld x5, 144(sp)
xor x22, x22, x5
ld x5, 152(sp)
xor x23, x23, x5
ld x5, 160(sp)
xor x24, x24, x5
ld x5, 168(sp)
xor x25, x25, x5
ld x5, 176(sp)
xor x26, x26, x5
ld x5, 184(sp)
xor x27, x27, x5
addi sp, sp, 192
j randomx_riscv64_vector_program_main_loop_swap_mx_ma
DECL(randomx_riscv64_vector_program_scratchpad_prefetch):
xor x5, x20, x22 // spAddr0-spAddr1 = readReg0 ^ readReg1 (JIT compiler will substitute the actual registers)
srli x6, x5, 32 // x6 = spAddr1
and x5, x5, x9 // x5 = spAddr0 & 64-byte aligned L3 mask
and x6, x6, x9 // x6 = spAddr1 & 64-byte aligned L3 mask
c.add x5, x12 // x5 = &scratchpad[spAddr0 & 64-byte aligned L3 mask]
c.add x6, x12 // x6 = &scratchpad[spAddr1 & 64-byte aligned L3 mask]
#ifdef __riscv_zicbop
prefetch.r (x5)
prefetch.r (x6)
#else
ld x5, (x5)
ld x6, (x6)
#endif
DECL(randomx_riscv64_vector_program_scratchpad_prefetch_end):
DECL(randomx_riscv64_vector_program_end):
DECL(randomx_riscv64_vector_code_end):

View File

@@ -0,0 +1,75 @@
/*
Copyright (c) 2018-2020, tevador <tevador@gmail.com>
Copyright (c) 2019-2021, XMRig <https://github.com/xmrig>, <support@xmrig.com>
Copyright (c) 2025, SChernykh <https://github.com/SChernykh>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#if defined(__cplusplus)
#include <cstdint>
#else
#include <stdint.h>
#endif
#if defined(__cplusplus)
extern "C" {
#endif
struct randomx_cache;
void randomx_riscv64_vector_code_begin();
void randomx_riscv64_vector_sshash_begin();
void randomx_riscv64_vector_sshash_imul_rcp_literals();
void randomx_riscv64_vector_sshash_dataset_init(struct randomx_cache* cache, uint8_t* output_buf, uint32_t startBlock, uint32_t endBlock);
void randomx_riscv64_vector_sshash_cache_prefetch();
void randomx_riscv64_vector_sshash_generated_instructions();
void randomx_riscv64_vector_sshash_generated_instructions_end();
void randomx_riscv64_vector_sshash_cache_prefetch();
void randomx_riscv64_vector_sshash_xor();
void randomx_riscv64_vector_sshash_end();
void randomx_riscv64_vector_program_params();
void randomx_riscv64_vector_program_imul_rcp_literals();
void randomx_riscv64_vector_program_begin();
void randomx_riscv64_vector_program_main_loop_instructions();
void randomx_riscv64_vector_program_main_loop_instructions_end();
void randomx_riscv64_vector_program_main_loop_mx_xor();
void randomx_riscv64_vector_program_main_loop_spaddr_xor();
void randomx_riscv64_vector_program_main_loop_light_mode_data();
void randomx_riscv64_vector_program_main_loop_instructions_end_light_mode();
void randomx_riscv64_vector_program_main_loop_mx_xor_light_mode();
void randomx_riscv64_vector_program_end();
void randomx_riscv64_vector_program_scratchpad_prefetch();
void randomx_riscv64_vector_program_scratchpad_prefetch_end();
void randomx_riscv64_vector_code_end();
#if defined(__cplusplus)
}
#endif

View File

@@ -282,7 +282,10 @@ typedef void(randomx::JitCompilerX86::* InstructionGeneratorX86_2)(const randomx
Log2_ScratchpadL2 = Log2(ScratchpadL2_Size);
Log2_ScratchpadL3 = Log2(ScratchpadL3_Size);
#define JIT_HANDLE(x, prev) randomx::JitCompilerRV64::engine[k] = &randomx::JitCompilerRV64::v1_##x
#define JIT_HANDLE(x, prev) do { \
randomx::JitCompilerRV64::engine[k] = &randomx::JitCompilerRV64::v1_##x; \
randomx::JitCompilerRV64::inst_map[k] = static_cast<uint8_t>(randomx::InstructionType::x); \
} while (0)
#else
#define JIT_HANDLE(x, prev)

View File

@@ -73,8 +73,20 @@ uint64_t randomx_reciprocal(uint64_t divisor) {
#if !RANDOMX_HAVE_FAST_RECIPROCAL
#ifdef __GNUC__
uint64_t randomx_reciprocal_fast(uint64_t divisor)
{
const uint64_t q = (1ULL << 63) / divisor;
const uint64_t r = (1ULL << 63) % divisor;
const uint64_t shift = 64 - __builtin_clzll(divisor);
return (q << shift) + ((r << shift) / divisor);
}
#else
uint64_t randomx_reciprocal_fast(uint64_t divisor) {
return randomx_reciprocal(divisor);
}
#endif
#endif

View File

@@ -39,6 +39,9 @@ alignas(64) uint32_t lutDec1[256];
alignas(64) uint32_t lutDec2[256];
alignas(64) uint32_t lutDec3[256];
alignas(64) uint8_t lutEncIndex[4][32];
alignas(64) uint8_t lutDecIndex[4][32];
static uint32_t mul_gf2(uint32_t b, uint32_t c)
{
uint32_t s = 0;
@@ -115,5 +118,49 @@ static struct SAESInitializer
lutDec2[i] = w; w = (w << 8) | (w >> 24);
lutDec3[i] = w;
}
memset(lutEncIndex, -1, sizeof(lutEncIndex));
memset(lutDecIndex, -1, sizeof(lutDecIndex));
lutEncIndex[0][ 0] = 0;
lutEncIndex[0][ 4] = 4;
lutEncIndex[0][ 8] = 8;
lutEncIndex[0][12] = 12;
lutEncIndex[1][ 0] = 5;
lutEncIndex[1][ 4] = 9;
lutEncIndex[1][ 8] = 13;
lutEncIndex[1][12] = 1;
lutEncIndex[2][ 0] = 10;
lutEncIndex[2][ 4] = 14;
lutEncIndex[2][ 8] = 2;
lutEncIndex[2][12] = 6;
lutEncIndex[3][ 0] = 15;
lutEncIndex[3][ 4] = 3;
lutEncIndex[3][ 8] = 7;
lutEncIndex[3][12] = 11;
lutDecIndex[0][ 0] = 0;
lutDecIndex[0][ 4] = 4;
lutDecIndex[0][ 8] = 8;
lutDecIndex[0][12] = 12;
lutDecIndex[1][ 0] = 13;
lutDecIndex[1][ 4] = 1;
lutDecIndex[1][ 8] = 5;
lutDecIndex[1][12] = 9;
lutDecIndex[2][ 0] = 10;
lutDecIndex[2][ 4] = 14;
lutDecIndex[2][ 8] = 2;
lutDecIndex[2][12] = 6;
lutDecIndex[3][ 0] = 7;
lutDecIndex[3][ 4] = 11;
lutDecIndex[3][ 8] = 15;
lutDecIndex[3][12] = 3;
for (uint32_t i = 0; i < 4; ++i) {
for (uint32_t j = 0; j < 16; j += 4) {
lutEncIndex[i][j + 16] = lutEncIndex[i][j] + 16;
lutDecIndex[i][j + 16] = lutDecIndex[i][j] + 16;
}
}
}
} aes_initializer;

View File

@@ -41,6 +41,9 @@ extern uint32_t lutDec1[256];
extern uint32_t lutDec2[256];
extern uint32_t lutDec3[256];
extern uint8_t lutEncIndex[4][32];
extern uint8_t lutDecIndex[4][32];
template<int soft> rx_vec_i128 aesenc(rx_vec_i128 in, rx_vec_i128 key);
template<int soft> rx_vec_i128 aesdec(rx_vec_i128 in, rx_vec_i128 key);

View File

@@ -0,0 +1,12 @@
/* RISC-V - test if the vector extension is present */
.text
.option arch, rv64gcv
.global main
main:
li x5, 4
vsetvli x6, x5, e64, m1, ta, ma
vxor.vv v0, v0, v0
sub x10, x5, x6
ret

View File

@@ -0,0 +1,11 @@
/* RISC-V - test if the prefetch instruction is present */
.text
.option arch, rv64gc_zicbop
.global main
main:
lla x5, main
prefetch.r (x5)
mv x10, x0
ret

View File

@@ -0,0 +1,13 @@
/* RISC-V - test if the vector bit manipulation extension is present */
.text
.option arch, rv64gcv_zvkb
.global main
main:
vsetivli zero, 8, e32, m1, ta, ma
vror.vv v0, v0, v0
vror.vx v0, v0, x5
vror.vi v0, v0, 1
li x10, 0
ret

View File

@@ -0,0 +1,12 @@
/* RISC-V - test if the vector bit manipulation extension is present */
.text
.option arch, rv64gcv_zvkned
.global main
main:
vsetivli zero, 8, e32, m1, ta, ma
vaesem.vv v0, v0
vaesdm.vv v0, v0
li x10, 0
ret

View File

@@ -58,7 +58,7 @@ namespace randomx {
void CompiledVm<softAes>::execute() {
PROFILE_SCOPE(RandomX_JIT_execute);
# ifdef XMRIG_ARM
# if defined(XMRIG_ARM) || defined(XMRIG_RISCV)
memcpy(reg.f, config.eMask, sizeof(config.eMask));
# endif
compiler.getProgramFunc()(reg, mem, scratchpad, RandomX_CurrentConfig.ProgramIterations);

View File

@@ -43,6 +43,12 @@ static void init_dataset_wrapper(randomx_dataset *dataset, randomx_cache *cache,
randomx_init_dataset(dataset, cache, startItem, itemCount - (itemCount % 5));
randomx_init_dataset(dataset, cache, startItem + itemCount - 5, 5);
}
#ifdef XMRIG_RISCV
else if (itemCount % 4) {
randomx_init_dataset(dataset, cache, startItem, itemCount - (itemCount % 4));
randomx_init_dataset(dataset, cache, startItem + itemCount - 4, 4);
}
#endif
else {
randomx_init_dataset(dataset, cache, startItem, itemCount);
}
@@ -209,7 +215,7 @@ void xmrig::RxDataset::allocate(bool hugePages, bool oneGbPages)
return;
}
m_memory = new VirtualMemory(maxSize(), hugePages, oneGbPages, false, m_node);
m_memory = new VirtualMemory(maxSize(), hugePages, oneGbPages, false, m_node, VirtualMemory::kDefaultHugePageSize);
if (m_memory->isOneGbPages()) {
m_scratchpadOffset = maxSize() + RANDOMX_CACHE_MAX_SIZE;

View File

@@ -115,7 +115,7 @@ static inline void checkHash(const JobBundle &bundle, std::vector<JobResult> &re
static void getResults(JobBundle &bundle, std::vector<JobResult> &results, uint32_t &errors, bool hwAES)
{
const auto &algorithm = bundle.job.algorithm();
auto memory = new VirtualMemory(algorithm.l3(), false, false, false);
auto memory = new VirtualMemory(algorithm.l3(), false, false, false, 0, VirtualMemory::kDefaultHugePageSize);
alignas(16) uint8_t hash[32]{ 0 };
if (algorithm.family() == Algorithm::RANDOM_X) {

View File

@@ -2,18 +2,7 @@
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
* SPDX-License-Identifier: GPL-3.0-or-later
*/
#ifndef XMRIG_VERSION_H
@@ -22,18 +11,20 @@
#define APP_ID "xmrig"
#define APP_NAME "XMRig"
#define APP_DESC "XMRig miner"
#define APP_VERSION "6.24.1-dev"
#define APP_VERSION "6.25.1-dev"
#define APP_DOMAIN "xmrig.com"
#define APP_SITE "www.xmrig.com"
#define APP_COPYRIGHT "Copyright (C) 2016-2025 xmrig.com"
#define APP_KIND "miner"
#define APP_VER_MAJOR 6
#define APP_VER_MINOR 24
#define APP_VER_MINOR 25
#define APP_VER_PATCH 1
#ifdef _MSC_VER
# if (_MSC_VER >= 1930)
# if (_MSC_VER >= 1950)
# define MSVC_VERSION 2026
# elif (_MSC_VER >=1930 && _MSC_VER < 1950)
# define MSVC_VERSION 2022
# elif (_MSC_VER >= 1920 && _MSC_VER < 1930)
# define MSVC_VERSION 2019