mirror of
https://github.com/xmrig/xmrig.git
synced 2025-12-07 16:05:05 -05:00
Compare commits
9 Commits
a44b21cef3
...
dev
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
856813c1ae | ||
|
|
23da1a90f5 | ||
|
|
7981e4a76a | ||
|
|
7ef5142a52 | ||
|
|
db5c6d9190 | ||
|
|
e88009d575 | ||
|
|
5115597e7f | ||
|
|
4cdc35f966 | ||
|
|
b02519b9f5 |
@@ -55,6 +55,18 @@ if (XMRIG_RISCV)
|
|||||||
if(ARCH STREQUAL "native")
|
if(ARCH STREQUAL "native")
|
||||||
enable_language(ASM)
|
enable_language(ASM)
|
||||||
|
|
||||||
|
try_run(RANDOMX_VECTOR_RUN_FAIL
|
||||||
|
RANDOMX_VECTOR_COMPILE_OK
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/src/crypto/randomx/tests/riscv64_vector.s
|
||||||
|
COMPILE_DEFINITIONS "-march=rv64gcv_zicbop")
|
||||||
|
|
||||||
|
if (RANDOMX_VECTOR_COMPILE_OK AND NOT RANDOMX_VECTOR_RUN_FAIL)
|
||||||
|
set(RVARCH "${RVARCH}v_zicbop")
|
||||||
|
add_definitions(-DXMRIG_RVV_ENABLED)
|
||||||
|
message(STATUS "RISC-V vector extension detected")
|
||||||
|
endif()
|
||||||
|
|
||||||
try_run(RANDOMX_ZBA_RUN_FAIL
|
try_run(RANDOMX_ZBA_RUN_FAIL
|
||||||
RANDOMX_ZBA_COMPILE_OK
|
RANDOMX_ZBA_COMPILE_OK
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/
|
${CMAKE_CURRENT_BINARY_DIR}/
|
||||||
|
|||||||
@@ -46,6 +46,8 @@ if (CMAKE_CXX_COMPILER_ID MATCHES GNU)
|
|||||||
else()
|
else()
|
||||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static -Wl,--large-address-aware")
|
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static -Wl,--large-address-aware")
|
||||||
endif()
|
endif()
|
||||||
|
elseif(CMAKE_SYSTEM_NAME STREQUAL "Haiku")
|
||||||
|
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc")
|
||||||
else()
|
else()
|
||||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc -static-libstdc++")
|
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc -static-libstdc++")
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
@@ -19,6 +19,8 @@ else()
|
|||||||
set(XMRIG_OS_FREEBSD ON)
|
set(XMRIG_OS_FREEBSD ON)
|
||||||
elseif(CMAKE_SYSTEM_NAME STREQUAL OpenBSD)
|
elseif(CMAKE_SYSTEM_NAME STREQUAL OpenBSD)
|
||||||
set(XMRIG_OS_OPENBSD ON)
|
set(XMRIG_OS_OPENBSD ON)
|
||||||
|
elseif(CMAKE_SYSTEM_NAME STREQUAL "Haiku")
|
||||||
|
set(XMRIG_OS_HAIKU ON)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
@@ -47,6 +49,8 @@ elseif(XMRIG_OS_UNIX)
|
|||||||
add_definitions(-DXMRIG_OS_FREEBSD)
|
add_definitions(-DXMRIG_OS_FREEBSD)
|
||||||
elseif (XMRIG_OS_OPENBSD)
|
elseif (XMRIG_OS_OPENBSD)
|
||||||
add_definitions(-DXMRIG_OS_OPENBSD)
|
add_definitions(-DXMRIG_OS_OPENBSD)
|
||||||
|
elseif (XMRIG_OS_HAIKU)
|
||||||
|
add_definitions(-DXMRIG_OS_HAIKU)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|||||||
@@ -83,10 +83,13 @@ if (WITH_RANDOMX)
|
|||||||
elseif (XMRIG_RISCV AND CMAKE_SIZEOF_VOID_P EQUAL 8)
|
elseif (XMRIG_RISCV AND CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||||
list(APPEND SOURCES_CRYPTO
|
list(APPEND SOURCES_CRYPTO
|
||||||
src/crypto/randomx/jit_compiler_rv64_static.S
|
src/crypto/randomx/jit_compiler_rv64_static.S
|
||||||
|
src/crypto/randomx/jit_compiler_rv64_vector_static.S
|
||||||
src/crypto/randomx/jit_compiler_rv64.cpp
|
src/crypto/randomx/jit_compiler_rv64.cpp
|
||||||
|
src/crypto/randomx/jit_compiler_rv64_vector.cpp
|
||||||
)
|
)
|
||||||
# cheat because cmake and ccache hate each other
|
# cheat because cmake and ccache hate each other
|
||||||
set_property(SOURCE src/crypto/randomx/jit_compiler_rv64_static.S PROPERTY LANGUAGE C)
|
set_property(SOURCE src/crypto/randomx/jit_compiler_rv64_static.S PROPERTY LANGUAGE C)
|
||||||
|
set_property(SOURCE src/crypto/randomx/jit_compiler_rv64_vector_static.S PROPERTY LANGUAGE C)
|
||||||
else()
|
else()
|
||||||
list(APPEND SOURCES_CRYPTO
|
list(APPEND SOURCES_CRYPTO
|
||||||
src/crypto/randomx/jit_compiler_fallback.cpp
|
src/crypto/randomx/jit_compiler_fallback.cpp
|
||||||
|
|||||||
2
src/3rdparty/libethash/endian.h
vendored
2
src/3rdparty/libethash/endian.h
vendored
@@ -31,7 +31,7 @@
|
|||||||
#include <libkern/OSByteOrder.h>
|
#include <libkern/OSByteOrder.h>
|
||||||
#define ethash_swap_u32(input_) OSSwapInt32(input_)
|
#define ethash_swap_u32(input_) OSSwapInt32(input_)
|
||||||
#define ethash_swap_u64(input_) OSSwapInt64(input_)
|
#define ethash_swap_u64(input_) OSSwapInt64(input_)
|
||||||
#elif defined(__FreeBSD__) || defined(__DragonFly__) || defined(__NetBSD__)
|
#elif defined(__FreeBSD__) || defined(__DragonFly__) || defined(__NetBSD__) || defined(__HAIKU__)
|
||||||
#define ethash_swap_u32(input_) bswap32(input_)
|
#define ethash_swap_u32(input_) bswap32(input_)
|
||||||
#define ethash_swap_u64(input_) bswap64(input_)
|
#define ethash_swap_u64(input_) bswap64(input_)
|
||||||
#elif defined(__OpenBSD__)
|
#elif defined(__OpenBSD__)
|
||||||
|
|||||||
@@ -71,7 +71,7 @@ char *xmrig::Platform::createUserAgent()
|
|||||||
|
|
||||||
|
|
||||||
#ifndef XMRIG_FEATURE_HWLOC
|
#ifndef XMRIG_FEATURE_HWLOC
|
||||||
#if defined(__DragonFly__) || defined(XMRIG_OS_OPENBSD)
|
#if defined(__DragonFly__) || defined(XMRIG_OS_OPENBSD) || defined(XMRIG_OS_HAIKU)
|
||||||
|
|
||||||
bool xmrig::Platform::setThreadAffinity(uint64_t cpu_id)
|
bool xmrig::Platform::setThreadAffinity(uint64_t cpu_id)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/* XMRig
|
/* XMRig
|
||||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -35,15 +35,69 @@ constexpr size_t twoMiB = 2U * 1024U * 1024U;
|
|||||||
constexpr size_t oneGiB = 1024U * 1024U * 1024U;
|
constexpr size_t oneGiB = 1024U * 1024U * 1024U;
|
||||||
|
|
||||||
|
|
||||||
static inline std::string sysfs_path(uint32_t node, size_t hugePageSize, bool nr)
|
static bool sysfs_write(const std::string &path, uint64_t value)
|
||||||
|
{
|
||||||
|
std::ofstream file(path, std::ios::out | std::ios::binary | std::ios::trunc);
|
||||||
|
if (!file.is_open()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
file << value;
|
||||||
|
file.flush();
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int64_t sysfs_read(const std::string &path)
|
||||||
|
{
|
||||||
|
std::ifstream file(path);
|
||||||
|
if (!file.is_open()) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t value = 0;
|
||||||
|
file >> value;
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static std::string sysfs_path(uint32_t node, size_t hugePageSize, bool nr)
|
||||||
{
|
{
|
||||||
return fmt::format("/sys/devices/system/node/node{}/hugepages/hugepages-{}kB/{}_hugepages", node, hugePageSize / 1024, nr ? "nr" : "free");
|
return fmt::format("/sys/devices/system/node/node{}/hugepages/hugepages-{}kB/{}_hugepages", node, hugePageSize / 1024, nr ? "nr" : "free");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static inline bool write_nr_hugepages(uint32_t node, size_t hugePageSize, uint64_t count) { return LinuxMemory::write(sysfs_path(node, hugePageSize, true).c_str(), count); }
|
static std::string sysfs_path(size_t hugePageSize, bool nr)
|
||||||
static inline int64_t free_hugepages(uint32_t node, size_t hugePageSize) { return LinuxMemory::read(sysfs_path(node, hugePageSize, false).c_str()); }
|
{
|
||||||
static inline int64_t nr_hugepages(uint32_t node, size_t hugePageSize) { return LinuxMemory::read(sysfs_path(node, hugePageSize, true).c_str()); }
|
return fmt::format("/sys/kernel/mm/hugepages/hugepages-{}kB/{}_hugepages", hugePageSize / 1024, nr ? "nr" : "free");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static bool write_nr_hugepages(uint32_t node, size_t hugePageSize, uint64_t count)
|
||||||
|
{
|
||||||
|
if (sysfs_write(sysfs_path(node, hugePageSize, true), count)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return sysfs_write(sysfs_path(hugePageSize, true), count);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int64_t sysfs_read_hugepages(uint32_t node, size_t hugePageSize, bool nr)
|
||||||
|
{
|
||||||
|
const int64_t value = sysfs_read(sysfs_path(node, hugePageSize, nr));
|
||||||
|
if (value >= 0) {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
return sysfs_read(sysfs_path(hugePageSize, nr));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline int64_t free_hugepages(uint32_t node, size_t hugePageSize) { return sysfs_read_hugepages(node, hugePageSize, false); }
|
||||||
|
static inline int64_t nr_hugepages(uint32_t node, size_t hugePageSize) { return sysfs_read_hugepages(node, hugePageSize, true); }
|
||||||
|
|
||||||
|
|
||||||
} // namespace xmrig
|
} // namespace xmrig
|
||||||
@@ -62,31 +116,3 @@ bool xmrig::LinuxMemory::reserve(size_t size, uint32_t node, size_t hugePageSize
|
|||||||
|
|
||||||
return write_nr_hugepages(node, hugePageSize, std::max<size_t>(nr_hugepages(node, hugePageSize), 0) + (required - available));
|
return write_nr_hugepages(node, hugePageSize, std::max<size_t>(nr_hugepages(node, hugePageSize), 0) + (required - available));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool xmrig::LinuxMemory::write(const char *path, uint64_t value)
|
|
||||||
{
|
|
||||||
std::ofstream file(path, std::ios::out | std::ios::binary | std::ios::trunc);
|
|
||||||
if (!file.is_open()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
file << value;
|
|
||||||
file.flush();
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int64_t xmrig::LinuxMemory::read(const char *path)
|
|
||||||
{
|
|
||||||
std::ifstream file(path);
|
|
||||||
if (!file.is_open()) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t value = 0;
|
|
||||||
file >> value;
|
|
||||||
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/* XMRig
|
/* XMRig
|
||||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
* Copyright (c) 2018-2025 SChernykh <https://github.com/SChernykh>
|
||||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
* Copyright (c) 2016-2025 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -31,13 +31,10 @@ class LinuxMemory
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
static bool reserve(size_t size, uint32_t node, size_t hugePageSize);
|
static bool reserve(size_t size, uint32_t node, size_t hugePageSize);
|
||||||
|
|
||||||
static bool write(const char *path, uint64_t value);
|
|
||||||
static int64_t read(const char *path);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
} /* namespace xmrig */
|
} // namespace xmrig
|
||||||
|
|
||||||
|
|
||||||
#endif /* XMRIG_LINUXMEMORY_H */
|
#endif // XMRIG_LINUXMEMORY_H
|
||||||
|
|||||||
@@ -86,7 +86,7 @@ bool xmrig::VirtualMemory::isHugepagesAvailable()
|
|||||||
{
|
{
|
||||||
# ifdef XMRIG_OS_LINUX
|
# ifdef XMRIG_OS_LINUX
|
||||||
return std::ifstream("/proc/sys/vm/nr_hugepages").good() || std::ifstream("/sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages").good();
|
return std::ifstream("/proc/sys/vm/nr_hugepages").good() || std::ifstream("/sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages").good();
|
||||||
# elif defined(XMRIG_OS_MACOS) && defined(XMRIG_ARM)
|
# elif defined(XMRIG_OS_MACOS) && defined(XMRIG_ARM) || defined(XMRIG_OS_HAIKU)
|
||||||
return false;
|
return false;
|
||||||
# else
|
# else
|
||||||
return true;
|
return true;
|
||||||
@@ -156,7 +156,8 @@ void *xmrig::VirtualMemory::allocateExecutableMemory(size_t size, bool hugePages
|
|||||||
if (!mem) {
|
if (!mem) {
|
||||||
mem = mmap(0, size, PROT_READ | PROT_WRITE | SECURE_PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
mem = mmap(0, size, PROT_READ | PROT_WRITE | SECURE_PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||||
}
|
}
|
||||||
|
# elif defined(XMRIG_OS_HAIKU)
|
||||||
|
void *mem = mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||||
# else
|
# else
|
||||||
|
|
||||||
void *mem = nullptr;
|
void *mem = nullptr;
|
||||||
@@ -181,6 +182,8 @@ void *xmrig::VirtualMemory::allocateLargePagesMemory(size_t size)
|
|||||||
void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
|
void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
|
||||||
# elif defined(XMRIG_OS_FREEBSD)
|
# elif defined(XMRIG_OS_FREEBSD)
|
||||||
void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER | MAP_PREFAULT_READ, -1, 0);
|
void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER | MAP_PREFAULT_READ, -1, 0);
|
||||||
|
# elif defined(XMRIG_OS_HAIKU)
|
||||||
|
void *mem = nullptr;
|
||||||
# else
|
# else
|
||||||
void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE | hugePagesFlag(hugePageSize()), 0, 0);
|
void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE | hugePagesFlag(hugePageSize()), 0, 0);
|
||||||
# endif
|
# endif
|
||||||
|
|||||||
@@ -235,6 +235,131 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
|
|||||||
template void fillAes4Rx4<true>(void *state, size_t outputSize, void *buffer);
|
template void fillAes4Rx4<true>(void *state, size_t outputSize, void *buffer);
|
||||||
template void fillAes4Rx4<false>(void *state, size_t outputSize, void *buffer);
|
template void fillAes4Rx4<false>(void *state, size_t outputSize, void *buffer);
|
||||||
|
|
||||||
|
#if defined(XMRIG_RISCV) && defined(XMRIG_RVV_ENABLED)
|
||||||
|
static constexpr uint32_t AES_HASH_1R_STATE02[8] = { 0x92b52c0d, 0x9fa856de, 0xcc82db47, 0xd7983aad, 0x6a770017, 0xae62c7d0, 0x5079506b, 0xe8a07ce4 };
|
||||||
|
static constexpr uint32_t AES_HASH_1R_STATE13[8] = { 0x338d996e, 0x15c7b798, 0xf59e125a, 0xace78057, 0x630a240c, 0x07ad828d, 0x79a10005, 0x7e994948 };
|
||||||
|
|
||||||
|
static constexpr uint32_t AES_GEN_1R_KEY02[8] = { 0x6daca553, 0x62716609, 0xdbb5552b, 0xb4f44917, 0x3f1262f1, 0x9f947ec6, 0xf4c0794f, 0x3e20e345 };
|
||||||
|
static constexpr uint32_t AES_GEN_1R_KEY13[8] = { 0x6d7caf07, 0x846a710d, 0x1725d378, 0x0da1dc4e, 0x6aef8135, 0xb1ba317c, 0x16314c88, 0x49169154 };
|
||||||
|
|
||||||
|
static constexpr uint32_t AES_HASH_1R_XKEY00[8] = { 0xf6fa8389, 0x8b24949f, 0x90dc56bf, 0x06890201, 0xf6fa8389, 0x8b24949f, 0x90dc56bf, 0x06890201 };
|
||||||
|
static constexpr uint32_t AES_HASH_1R_XKEY11[8] = { 0x61b263d1, 0x51f4e03c, 0xee1043c6, 0xed18f99b, 0x61b263d1, 0x51f4e03c, 0xee1043c6, 0xed18f99b };
|
||||||
|
|
||||||
|
static constexpr uint32_t AES_HASH_STRIDE[8] = { 0, 4, 8, 12, 32, 36, 40, 44 };
|
||||||
|
|
||||||
|
template<int softAes, int unroll>
|
||||||
|
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
|
||||||
|
PROFILE_SCOPE(RandomX_AES);
|
||||||
|
|
||||||
|
uint8_t* scratchpadPtr = (uint8_t*)scratchpad;
|
||||||
|
const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize;
|
||||||
|
|
||||||
|
vuint32m1_t hash_state02 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE02, 8);
|
||||||
|
vuint32m1_t hash_state13 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE13, 8);
|
||||||
|
|
||||||
|
const vuint32m1_t key02 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY02, 8);
|
||||||
|
const vuint32m1_t key13 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY13, 8);
|
||||||
|
|
||||||
|
const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE, 8);
|
||||||
|
|
||||||
|
vuint32m1_t fill_state02 = __riscv_vluxei32_v_u32m1((uint32_t*)fill_state + 0, stride, 8);
|
||||||
|
vuint32m1_t fill_state13 = __riscv_vluxei32_v_u32m1((uint32_t*)fill_state + 4, stride, 8);
|
||||||
|
|
||||||
|
const vuint8m1_t lutenc_index0 = __riscv_vle8_v_u8m1(lutEncIndex[0], 32);
|
||||||
|
const vuint8m1_t lutenc_index1 = __riscv_vle8_v_u8m1(lutEncIndex[1], 32);
|
||||||
|
const vuint8m1_t lutenc_index2 = __riscv_vle8_v_u8m1(lutEncIndex[2], 32);
|
||||||
|
const vuint8m1_t lutenc_index3 = __riscv_vle8_v_u8m1(lutEncIndex[3], 32);
|
||||||
|
|
||||||
|
const vuint8m1_t& lutdec_index0 = lutenc_index0;
|
||||||
|
const vuint8m1_t lutdec_index1 = __riscv_vle8_v_u8m1(lutDecIndex[1], 32);
|
||||||
|
const vuint8m1_t& lutdec_index2 = lutenc_index2;
|
||||||
|
const vuint8m1_t lutdec_index3 = __riscv_vle8_v_u8m1(lutDecIndex[3], 32);
|
||||||
|
|
||||||
|
//process 64 bytes at a time in 4 lanes
|
||||||
|
while (scratchpadPtr < scratchpadEnd) {
|
||||||
|
#define HASH_STATE(k) \
|
||||||
|
hash_state02 = softaes_vector_double(hash_state02, __riscv_vluxei32_v_u32m1((uint32_t*)scratchpadPtr + k * 16 + 0, stride, 8), lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3); \
|
||||||
|
hash_state13 = softaes_vector_double(hash_state13, __riscv_vluxei32_v_u32m1((uint32_t*)scratchpadPtr + k * 16 + 4, stride, 8), lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
|
||||||
|
|
||||||
|
#define FILL_STATE(k) \
|
||||||
|
fill_state02 = softaes_vector_double(fill_state02, key02, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3); \
|
||||||
|
fill_state13 = softaes_vector_double(fill_state13, key13, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3); \
|
||||||
|
__riscv_vsuxei32_v_u32m1((uint32_t*)scratchpadPtr + k * 16 + 0, stride, fill_state02, 8); \
|
||||||
|
__riscv_vsuxei32_v_u32m1((uint32_t*)scratchpadPtr + k * 16 + 4, stride, fill_state13, 8);
|
||||||
|
|
||||||
|
switch (softAes) {
|
||||||
|
case 0:
|
||||||
|
HASH_STATE(0);
|
||||||
|
HASH_STATE(1);
|
||||||
|
|
||||||
|
FILL_STATE(0);
|
||||||
|
FILL_STATE(1);
|
||||||
|
|
||||||
|
scratchpadPtr += 128;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
switch (unroll) {
|
||||||
|
case 4:
|
||||||
|
HASH_STATE(0);
|
||||||
|
FILL_STATE(0);
|
||||||
|
|
||||||
|
HASH_STATE(1);
|
||||||
|
FILL_STATE(1);
|
||||||
|
|
||||||
|
HASH_STATE(2);
|
||||||
|
FILL_STATE(2);
|
||||||
|
|
||||||
|
HASH_STATE(3);
|
||||||
|
FILL_STATE(3);
|
||||||
|
|
||||||
|
scratchpadPtr += 64 * 4;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 2:
|
||||||
|
HASH_STATE(0);
|
||||||
|
FILL_STATE(0);
|
||||||
|
|
||||||
|
HASH_STATE(1);
|
||||||
|
FILL_STATE(1);
|
||||||
|
|
||||||
|
scratchpadPtr += 64 * 2;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
HASH_STATE(0);
|
||||||
|
FILL_STATE(0);
|
||||||
|
|
||||||
|
scratchpadPtr += 64;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef HASH_STATE
|
||||||
|
#undef FILL_STATE
|
||||||
|
|
||||||
|
__riscv_vsuxei32_v_u32m1((uint32_t*)fill_state + 0, stride, fill_state02, 8);
|
||||||
|
__riscv_vsuxei32_v_u32m1((uint32_t*)fill_state + 4, stride, fill_state13, 8);
|
||||||
|
|
||||||
|
//two extra rounds to achieve full diffusion
|
||||||
|
const vuint32m1_t xkey00 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY00, 8);
|
||||||
|
const vuint32m1_t xkey11 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY11, 8);
|
||||||
|
|
||||||
|
hash_state02 = softaes_vector_double(hash_state02, xkey00, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
|
||||||
|
hash_state13 = softaes_vector_double(hash_state13, xkey00, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
|
||||||
|
|
||||||
|
hash_state02 = softaes_vector_double(hash_state02, xkey11, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
|
||||||
|
hash_state13 = softaes_vector_double(hash_state13, xkey11, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
|
||||||
|
|
||||||
|
//output hash
|
||||||
|
__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 0, stride, hash_state02, 8);
|
||||||
|
__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 4, stride, hash_state13, 8);
|
||||||
|
}
|
||||||
|
|
||||||
|
#else // defined(XMRIG_RISCV) && defined(XMRIG_RVV_ENABLED)
|
||||||
|
|
||||||
template<int softAes, int unroll>
|
template<int softAes, int unroll>
|
||||||
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
|
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
|
||||||
PROFILE_SCOPE(RandomX_AES);
|
PROFILE_SCOPE(RandomX_AES);
|
||||||
@@ -375,6 +500,7 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi
|
|||||||
rx_store_vec_i128((rx_vec_i128*)hash + 2, hash_state2);
|
rx_store_vec_i128((rx_vec_i128*)hash + 2, hash_state2);
|
||||||
rx_store_vec_i128((rx_vec_i128*)hash + 3, hash_state3);
|
rx_store_vec_i128((rx_vec_i128*)hash + 3, hash_state3);
|
||||||
}
|
}
|
||||||
|
#endif // defined(XMRIG_RISCV) && defined(XMRIG_RVV_ENABLED)
|
||||||
|
|
||||||
template void hashAndFillAes1Rx4<0,2>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);
|
template void hashAndFillAes1Rx4<0,2>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);
|
||||||
template void hashAndFillAes1Rx4<1,1>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);
|
template void hashAndFillAes1Rx4<1,1>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);
|
||||||
|
|||||||
@@ -32,6 +32,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include "crypto/randomx/jit_compiler_rv64.hpp"
|
#include "crypto/randomx/jit_compiler_rv64.hpp"
|
||||||
#include "crypto/randomx/jit_compiler_rv64_static.hpp"
|
#include "crypto/randomx/jit_compiler_rv64_static.hpp"
|
||||||
|
#include "crypto/randomx/jit_compiler_rv64_vector.h"
|
||||||
|
#include "crypto/randomx/jit_compiler_rv64_vector_static.h"
|
||||||
#include "crypto/randomx/superscalar.hpp"
|
#include "crypto/randomx/superscalar.hpp"
|
||||||
#include "crypto/randomx/program.hpp"
|
#include "crypto/randomx/program.hpp"
|
||||||
#include "crypto/randomx/reciprocal.h"
|
#include "crypto/randomx/reciprocal.h"
|
||||||
@@ -618,20 +620,32 @@ namespace randomx {
|
|||||||
entryProgram = state.code + LiteralPoolSize + sizeDataInit;
|
entryProgram = state.code + LiteralPoolSize + sizeDataInit;
|
||||||
//jal x1, SuperscalarHash
|
//jal x1, SuperscalarHash
|
||||||
emitJump(state, ReturnReg, LiteralPoolSize + offsetFixDataCall, SuperScalarHashOffset);
|
emitJump(state, ReturnReg, LiteralPoolSize + offsetFixDataCall, SuperScalarHashOffset);
|
||||||
|
|
||||||
|
vectorCodeSize = ((uint8_t*)randomx_riscv64_vector_sshash_end) - ((uint8_t*)randomx_riscv64_vector_sshash_begin);
|
||||||
|
vectorCode = static_cast<uint8_t*>(allocExecutableMemory(vectorCodeSize, hugePagesJIT && hugePagesEnable));
|
||||||
}
|
}
|
||||||
|
|
||||||
JitCompilerRV64::~JitCompilerRV64() {
|
JitCompilerRV64::~JitCompilerRV64() {
|
||||||
freePagedMemory(state.code, CodeSize);
|
freePagedMemory(state.code, CodeSize);
|
||||||
|
freePagedMemory(vectorCode, vectorCodeSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerRV64::enableWriting() const
|
void JitCompilerRV64::enableWriting() const
|
||||||
{
|
{
|
||||||
xmrig::VirtualMemory::protectRW(entryDataInit, ExecutableSize);
|
xmrig::VirtualMemory::protectRW(entryDataInit, ExecutableSize);
|
||||||
|
|
||||||
|
if (vectorCode) {
|
||||||
|
xmrig::VirtualMemory::protectRW(vectorCode, vectorCodeSize);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerRV64::enableExecution() const
|
void JitCompilerRV64::enableExecution() const
|
||||||
{
|
{
|
||||||
xmrig::VirtualMemory::protectRX(entryDataInit, ExecutableSize);
|
xmrig::VirtualMemory::protectRX(entryDataInit, ExecutableSize);
|
||||||
|
|
||||||
|
if (vectorCode) {
|
||||||
|
xmrig::VirtualMemory::protectRX(vectorCode, vectorCodeSize);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerRV64::generateProgram(Program& prog, ProgramConfiguration& pcfg, uint32_t) {
|
void JitCompilerRV64::generateProgram(Program& prog, ProgramConfiguration& pcfg, uint32_t) {
|
||||||
@@ -666,6 +680,11 @@ namespace randomx {
|
|||||||
|
|
||||||
template<size_t N>
|
template<size_t N>
|
||||||
void JitCompilerRV64::generateSuperscalarHash(SuperscalarProgram(&programs)[N]) {
|
void JitCompilerRV64::generateSuperscalarHash(SuperscalarProgram(&programs)[N]) {
|
||||||
|
if (optimizedDatasetInit > 0) {
|
||||||
|
entryDataInitOptimized = generateDatasetInitVectorRV64(vectorCode, vectorCodeSize, programs, RandomX_ConfigurationBase::CacheAccesses);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
state.codePos = SuperScalarHashOffset;
|
state.codePos = SuperScalarHashOffset;
|
||||||
state.rcpCount = 0;
|
state.rcpCount = 0;
|
||||||
state.emit(codeSshInit, sizeSshInit);
|
state.emit(codeSshInit, sizeSshInit);
|
||||||
@@ -703,6 +722,10 @@ namespace randomx {
|
|||||||
|
|
||||||
template void JitCompilerRV64::generateSuperscalarHash(SuperscalarProgram(&)[RANDOMX_CACHE_MAX_ACCESSES]);
|
template void JitCompilerRV64::generateSuperscalarHash(SuperscalarProgram(&)[RANDOMX_CACHE_MAX_ACCESSES]);
|
||||||
|
|
||||||
|
DatasetInitFunc* JitCompilerRV64::getDatasetInitFunc() {
|
||||||
|
return (DatasetInitFunc*)((optimizedDatasetInit > 0) ? entryDataInitOptimized : entryDataInit);
|
||||||
|
}
|
||||||
|
|
||||||
void JitCompilerRV64::v1_IADD_RS(HANDLER_ARGS) {
|
void JitCompilerRV64::v1_IADD_RS(HANDLER_ARGS) {
|
||||||
state.registerUsage[isn.dst] = i;
|
state.registerUsage[isn.dst] = i;
|
||||||
int shift = isn.getModShift();
|
int shift = isn.getModShift();
|
||||||
|
|||||||
@@ -92,9 +92,7 @@ namespace randomx {
|
|||||||
ProgramFunc* getProgramFunc() {
|
ProgramFunc* getProgramFunc() {
|
||||||
return (ProgramFunc*)entryProgram;
|
return (ProgramFunc*)entryProgram;
|
||||||
}
|
}
|
||||||
DatasetInitFunc* getDatasetInitFunc() {
|
DatasetInitFunc* getDatasetInitFunc();
|
||||||
return (DatasetInitFunc*)entryDataInit;
|
|
||||||
}
|
|
||||||
uint8_t* getCode() {
|
uint8_t* getCode() {
|
||||||
return state.code;
|
return state.code;
|
||||||
}
|
}
|
||||||
@@ -106,7 +104,12 @@ namespace randomx {
|
|||||||
static InstructionGeneratorRV64 engine[256];
|
static InstructionGeneratorRV64 engine[256];
|
||||||
private:
|
private:
|
||||||
CompilerState state;
|
CompilerState state;
|
||||||
|
|
||||||
|
uint8_t* vectorCode;
|
||||||
|
size_t vectorCodeSize;
|
||||||
|
|
||||||
void* entryDataInit;
|
void* entryDataInit;
|
||||||
|
void* entryDataInitOptimized;
|
||||||
void* entryProgram;
|
void* entryProgram;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|||||||
207
src/crypto/randomx/jit_compiler_rv64_vector.cpp
Normal file
207
src/crypto/randomx/jit_compiler_rv64_vector.cpp
Normal file
@@ -0,0 +1,207 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2018-2020, tevador <tevador@gmail.com>
|
||||||
|
Copyright (c) 2019-2021, XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
|
Copyright (c) 2025, SChernykh <https://github.com/SChernykh>
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the copyright holder nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "crypto/randomx/configuration.h"
|
||||||
|
#include "crypto/randomx/jit_compiler_rv64_vector.h"
|
||||||
|
#include "crypto/randomx/jit_compiler_rv64_vector_static.h"
|
||||||
|
#include "crypto/randomx/reciprocal.h"
|
||||||
|
#include "crypto/randomx/superscalar.hpp"
|
||||||
|
|
||||||
|
namespace randomx {
|
||||||
|
|
||||||
|
#define ADDR(x) ((uint8_t*) &(x))
|
||||||
|
#define DIST(x, y) (ADDR(y) - ADDR(x))
|
||||||
|
|
||||||
|
void* generateDatasetInitVectorRV64(uint8_t* buf, size_t buf_size, SuperscalarProgram* programs, size_t num_programs)
|
||||||
|
{
|
||||||
|
memcpy(buf, reinterpret_cast<void*>(randomx_riscv64_vector_sshash_begin), buf_size);
|
||||||
|
|
||||||
|
uint8_t* p = buf + DIST(randomx_riscv64_vector_sshash_begin, randomx_riscv64_vector_sshash_generated_instructions);
|
||||||
|
|
||||||
|
uint8_t* literals = buf + DIST(randomx_riscv64_vector_sshash_begin, randomx_riscv64_vector_sshash_imul_rcp_literals);
|
||||||
|
uint8_t* cur_literal = literals;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < num_programs; ++i) {
|
||||||
|
// Step 4
|
||||||
|
size_t k = DIST(randomx_riscv64_vector_sshash_cache_prefetch, randomx_riscv64_vector_sshash_xor);
|
||||||
|
memcpy(p, reinterpret_cast<void*>(randomx_riscv64_vector_sshash_cache_prefetch), k);
|
||||||
|
p += k;
|
||||||
|
|
||||||
|
// Step 5
|
||||||
|
for (uint32_t j = 0; j < programs[i].size; ++j) {
|
||||||
|
const uint32_t dst = programs[i].programBuffer[j].dst & 7;
|
||||||
|
const uint32_t src = programs[i].programBuffer[j].src & 7;
|
||||||
|
const uint32_t modShift = (programs[i].programBuffer[j].mod >> 2) & 3;
|
||||||
|
const uint32_t imm32 = programs[i].programBuffer[j].imm32;
|
||||||
|
|
||||||
|
uint32_t inst;
|
||||||
|
#define EMIT(data) inst = (data); memcpy(p, &inst, 4); p += 4
|
||||||
|
|
||||||
|
switch (static_cast<SuperscalarInstructionType>(programs[i].programBuffer[j].opcode)) {
|
||||||
|
case SuperscalarInstructionType::ISUB_R:
|
||||||
|
// 57 00 00 0A vsub.vv v0, v0, v0
|
||||||
|
EMIT(0x0A000057 | (dst << 7) | (src << 15) | (dst << 20));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SuperscalarInstructionType::IXOR_R:
|
||||||
|
// 57 00 00 2E vxor.vv v0, v0, v0
|
||||||
|
EMIT(0x2E000057 | (dst << 7) | (src << 15) | (dst << 20));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SuperscalarInstructionType::IADD_RS:
|
||||||
|
// 57 39 00 96 vsll.vi v18, v0, 0
|
||||||
|
// 57 00 09 02 vadd.vv v0, v0, v18
|
||||||
|
EMIT(0x96003957 | (modShift << 15) | (src << 20));
|
||||||
|
EMIT(0x02090057 | (dst << 7) | (dst << 20));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SuperscalarInstructionType::IMUL_R:
|
||||||
|
// 57 20 00 96 vmul.vv v0, v0, v0
|
||||||
|
EMIT(0x96002057 | (dst << 7) | (src << 15) | (dst << 20));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SuperscalarInstructionType::IROR_C:
|
||||||
|
{
|
||||||
|
const uint32_t shift_right = imm32 & 63;
|
||||||
|
const uint32_t shift_left = 64 - shift_right;
|
||||||
|
|
||||||
|
if (shift_right < 32) {
|
||||||
|
// 57 39 00 A2 vsrl.vi v18, v0, 0
|
||||||
|
EMIT(0xA2003957 | (shift_right << 15) | (dst << 20));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// 93 02 00 00 li x5, 0
|
||||||
|
// 57 C9 02 A2 vsrl.vx v18, v0, x5
|
||||||
|
EMIT(0x00000293 | (shift_right << 20));
|
||||||
|
EMIT(0xA202C957 | (dst << 20));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (shift_left < 32) {
|
||||||
|
// 57 30 00 96 vsll.vi v0, v0, 0
|
||||||
|
EMIT(0x96003057 | (dst << 7) | (shift_left << 15) | (dst << 20));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// 93 02 00 00 li x5, 0
|
||||||
|
// 57 C0 02 96 vsll.vx v0, v0, x5
|
||||||
|
EMIT(0x00000293 | (shift_left << 20));
|
||||||
|
EMIT(0x9602C057 | (dst << 7) | (dst << 20));
|
||||||
|
}
|
||||||
|
|
||||||
|
// 57 00 20 2B vor.vv v0, v18, v0
|
||||||
|
EMIT(0x2B200057 | (dst << 7) | (dst << 15));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SuperscalarInstructionType::IADD_C7:
|
||||||
|
case SuperscalarInstructionType::IADD_C8:
|
||||||
|
case SuperscalarInstructionType::IADD_C9:
|
||||||
|
// B7 02 00 00 lui x5, 0
|
||||||
|
// 9B 82 02 00 addiw x5, x5, 0
|
||||||
|
// 57 C0 02 02 vadd.vx v0, v0, x5
|
||||||
|
EMIT(0x000002B7 | ((imm32 + ((imm32 & 0x800) << 1)) & 0xFFFFF000));
|
||||||
|
EMIT(0x0002829B | ((imm32 & 0x00000FFF)) << 20);
|
||||||
|
EMIT(0x0202C057 | (dst << 7) | (dst << 20));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SuperscalarInstructionType::IXOR_C7:
|
||||||
|
case SuperscalarInstructionType::IXOR_C8:
|
||||||
|
case SuperscalarInstructionType::IXOR_C9:
|
||||||
|
// B7 02 00 00 lui x5, 0
|
||||||
|
// 9B 82 02 00 addiw x5, x5, 0
|
||||||
|
// 57 C0 02 2E vxor.vx v0, v0, x5
|
||||||
|
EMIT(0x000002B7 | ((imm32 + ((imm32 & 0x800) << 1)) & 0xFFFFF000));
|
||||||
|
EMIT(0x0002829B | ((imm32 & 0x00000FFF)) << 20);
|
||||||
|
EMIT(0x2E02C057 | (dst << 7) | (dst << 20));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SuperscalarInstructionType::IMULH_R:
|
||||||
|
// 57 20 00 92 vmulhu.vv v0, v0, v0
|
||||||
|
EMIT(0x92002057 | (dst << 7) | (src << 15) | (dst << 20));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SuperscalarInstructionType::ISMULH_R:
|
||||||
|
// 57 20 00 9E vmulh.vv v0, v0, v0
|
||||||
|
EMIT(0x9E002057 | (dst << 7) | (src << 15) | (dst << 20));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SuperscalarInstructionType::IMUL_RCP:
|
||||||
|
{
|
||||||
|
uint32_t offset = cur_literal - literals;
|
||||||
|
|
||||||
|
if (offset == 2040) {
|
||||||
|
literals += 2040;
|
||||||
|
offset = 0;
|
||||||
|
|
||||||
|
// 93 87 87 7F add x15, x15, 2040
|
||||||
|
EMIT(0x7F878793);
|
||||||
|
}
|
||||||
|
|
||||||
|
const uint64_t r = randomx_reciprocal_fast(imm32);
|
||||||
|
memcpy(cur_literal, &r, 8);
|
||||||
|
cur_literal += 8;
|
||||||
|
|
||||||
|
// 83 B2 07 00 ld x5, (x15)
|
||||||
|
// 57 E0 02 96 vmul.vx v0, v0, x5
|
||||||
|
EMIT(0x0007B283 | (offset << 20));
|
||||||
|
EMIT(0x9602E057 | (dst << 7) | (dst << 20));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 6
|
||||||
|
k = DIST(randomx_riscv64_vector_sshash_xor, randomx_riscv64_vector_sshash_set_cache_index);
|
||||||
|
memcpy(p, reinterpret_cast<void*>(randomx_riscv64_vector_sshash_xor), k);
|
||||||
|
p += k;
|
||||||
|
|
||||||
|
// Step 7
|
||||||
|
if (i + 1 < num_programs) {
|
||||||
|
memcpy(p, reinterpret_cast<uint8_t*>(randomx_riscv64_vector_sshash_set_cache_index) + programs[i].getAddressRegister() * 4, 4);
|
||||||
|
p += 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Emit "J randomx_riscv64_vector_sshash_generated_instructions_end" instruction
|
||||||
|
const uint8_t* e = buf + DIST(randomx_riscv64_vector_sshash_begin, randomx_riscv64_vector_sshash_generated_instructions_end);
|
||||||
|
const uint32_t k = e - p;
|
||||||
|
const uint32_t j = 0x6F | ((k & 0x7FE) << 20) | ((k & 0x800) << 9) | (k & 0xFF000);
|
||||||
|
memcpy(p, &j, 4);
|
||||||
|
|
||||||
|
#ifdef __GNUC__
|
||||||
|
__builtin___clear_cache((char*) buf, (char*)(buf + buf_size));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return buf + DIST(randomx_riscv64_vector_sshash_begin, randomx_riscv64_vector_sshash_dataset_init);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace randomx
|
||||||
42
src/crypto/randomx/jit_compiler_rv64_vector.h
Normal file
42
src/crypto/randomx/jit_compiler_rv64_vector.h
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2018-2020, tevador <tevador@gmail.com>
|
||||||
|
Copyright (c) 2019-2021, XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
|
Copyright (c) 2025, SChernykh <https://github.com/SChernykh>
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the copyright holder nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
#include <cstdlib>
|
||||||
|
|
||||||
|
namespace randomx {
|
||||||
|
|
||||||
|
class SuperscalarProgram;
|
||||||
|
|
||||||
|
void* generateDatasetInitVectorRV64(uint8_t* buf, size_t buf_size, SuperscalarProgram* programs, size_t num_programs);
|
||||||
|
|
||||||
|
} // namespace randomx
|
||||||
296
src/crypto/randomx/jit_compiler_rv64_vector_static.S
Normal file
296
src/crypto/randomx/jit_compiler_rv64_vector_static.S
Normal file
@@ -0,0 +1,296 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2018-2020, tevador <tevador@gmail.com>
|
||||||
|
Copyright (c) 2019-2021, XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
|
Copyright (c) 2025, SChernykh <https://github.com/SChernykh>
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the copyright holder nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "configuration.h"
|
||||||
|
|
||||||
|
// Compatibility macros
|
||||||
|
|
||||||
|
#if !defined(RANDOMX_CACHE_ACCESSES) && defined(RANDOMX_CACHE_MAX_ACCESSES)
|
||||||
|
#define RANDOMX_CACHE_ACCESSES RANDOMX_CACHE_MAX_ACCESSES
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(RANDOMX_ARGON_MEMORY)
|
||||||
|
#define RANDOMX_CACHE_MASK RANDOMX_ARGON_MEMORY * 1024 / 64 - 1
|
||||||
|
#elif defined(RANDOMX_CACHE_MAX_SIZE)
|
||||||
|
#define RANDOMX_CACHE_MASK RANDOMX_CACHE_MAX_SIZE / 64 - 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define DECL(x) x
|
||||||
|
|
||||||
|
.text
|
||||||
|
|
||||||
|
.option arch, rv64gcv_zicbop
|
||||||
|
.option pic
|
||||||
|
|
||||||
|
.global DECL(randomx_riscv64_vector_sshash_begin)
|
||||||
|
.global DECL(randomx_riscv64_vector_sshash_imul_rcp_literals)
|
||||||
|
.global DECL(randomx_riscv64_vector_sshash_dataset_init)
|
||||||
|
.global DECL(randomx_riscv64_vector_sshash_generated_instructions)
|
||||||
|
.global DECL(randomx_riscv64_vector_sshash_generated_instructions_end)
|
||||||
|
.global DECL(randomx_riscv64_vector_sshash_cache_prefetch)
|
||||||
|
.global DECL(randomx_riscv64_vector_sshash_xor)
|
||||||
|
.global DECL(randomx_riscv64_vector_sshash_set_cache_index)
|
||||||
|
.global DECL(randomx_riscv64_vector_sshash_end)
|
||||||
|
|
||||||
|
.balign 8
|
||||||
|
|
||||||
|
DECL(randomx_riscv64_vector_sshash_begin):
|
||||||
|
|
||||||
|
sshash_constant_0: .dword 6364136223846793005
|
||||||
|
sshash_constant_1: .dword 9298411001130361340
|
||||||
|
sshash_constant_2: .dword 12065312585734608966
|
||||||
|
sshash_constant_3: .dword 9306329213124626780
|
||||||
|
sshash_constant_4: .dword 5281919268842080866
|
||||||
|
sshash_constant_5: .dword 10536153434571861004
|
||||||
|
sshash_constant_6: .dword 3398623926847679864
|
||||||
|
sshash_constant_7: .dword 9549104520008361294
|
||||||
|
sshash_offsets: .dword 0,1,2,3
|
||||||
|
store_offsets: .dword 0,64,128,192
|
||||||
|
|
||||||
|
DECL(randomx_riscv64_vector_sshash_imul_rcp_literals): .fill 512,8,0
|
||||||
|
|
||||||
|
/*
|
||||||
|
Reference: https://github.com/tevador/RandomX/blob/master/doc/specs.md#73-dataset-block-generation
|
||||||
|
|
||||||
|
Register layout
|
||||||
|
---------------
|
||||||
|
x5 = temporary
|
||||||
|
|
||||||
|
x10 = randomx cache
|
||||||
|
x11 = output buffer
|
||||||
|
x12 = startBlock
|
||||||
|
x13 = endBlock
|
||||||
|
|
||||||
|
x14 = cache mask
|
||||||
|
x15 = imul_rcp literal pointer
|
||||||
|
|
||||||
|
v0-v7 = r0-r7
|
||||||
|
v8 = itemNumber
|
||||||
|
v9 = cacheIndex, then a pointer into cache->memory (for prefetch), then a byte offset into cache->memory
|
||||||
|
|
||||||
|
v10-v17 = sshash constants
|
||||||
|
|
||||||
|
v18 = temporary
|
||||||
|
|
||||||
|
v19 = dataset item store offsets
|
||||||
|
*/
|
||||||
|
|
||||||
|
DECL(randomx_riscv64_vector_sshash_dataset_init):
|
||||||
|
// Process 4 64-bit values at a time
|
||||||
|
li x5, 4
|
||||||
|
vsetvli x5, x5, e64, m1, ta, ma
|
||||||
|
|
||||||
|
// Load cache->memory pointer
|
||||||
|
ld x10, (x10)
|
||||||
|
|
||||||
|
// Init cache mask
|
||||||
|
li x14, RANDOMX_CACHE_MASK
|
||||||
|
|
||||||
|
// Init dataset item store offsets
|
||||||
|
lla x5, store_offsets
|
||||||
|
vle64.v v19, (x5)
|
||||||
|
|
||||||
|
// Init itemNumber vector to (startBlock, startBlock + 1, startBlock + 2, startBlock + 3)
|
||||||
|
lla x5, sshash_offsets
|
||||||
|
vle64.v v8, (x5)
|
||||||
|
vadd.vx v8, v8, x12
|
||||||
|
|
||||||
|
// Load constants (stride = x0 = 0, so a 64-bit value will be broadcast into each element of a vector)
|
||||||
|
lla x5, sshash_constant_0
|
||||||
|
vlse64.v v10, (x5), x0
|
||||||
|
|
||||||
|
lla x5, sshash_constant_1
|
||||||
|
vlse64.v v11, (x5), x0
|
||||||
|
|
||||||
|
lla x5, sshash_constant_2
|
||||||
|
vlse64.v v12, (x5), x0
|
||||||
|
|
||||||
|
lla x5, sshash_constant_3
|
||||||
|
vlse64.v v13, (x5), x0
|
||||||
|
|
||||||
|
lla x5, sshash_constant_4
|
||||||
|
vlse64.v v14, (x5), x0
|
||||||
|
|
||||||
|
lla x5, sshash_constant_5
|
||||||
|
vlse64.v v15, (x5), x0
|
||||||
|
|
||||||
|
lla x5, sshash_constant_6
|
||||||
|
vlse64.v v16, (x5), x0
|
||||||
|
|
||||||
|
lla x5, sshash_constant_7
|
||||||
|
vlse64.v v17, (x5), x0
|
||||||
|
|
||||||
|
// Calculate the end pointer for dataset init
|
||||||
|
sub x13, x13, x12
|
||||||
|
slli x13, x13, 6
|
||||||
|
add x13, x13, x11
|
||||||
|
|
||||||
|
init_item:
|
||||||
|
// Step 1. Init r0-r7
|
||||||
|
|
||||||
|
// r0 = (itemNumber + 1) * 6364136223846793005
|
||||||
|
vmv.v.v v0, v8
|
||||||
|
vmadd.vv v0, v10, v10
|
||||||
|
|
||||||
|
// r_i = r0 ^ c_i for i = 1..7
|
||||||
|
vxor.vv v1, v0, v11
|
||||||
|
vxor.vv v2, v0, v12
|
||||||
|
vxor.vv v3, v0, v13
|
||||||
|
vxor.vv v4, v0, v14
|
||||||
|
vxor.vv v5, v0, v15
|
||||||
|
vxor.vv v6, v0, v16
|
||||||
|
vxor.vv v7, v0, v17
|
||||||
|
|
||||||
|
// Step 2. Let cacheIndex = itemNumber
|
||||||
|
vmv.v.v v9, v8
|
||||||
|
|
||||||
|
// Step 3 is implicit (all iterations are inlined, there is no "i")
|
||||||
|
|
||||||
|
// Init imul_rcp literal pointer
|
||||||
|
lla x15, randomx_riscv64_vector_sshash_imul_rcp_literals
|
||||||
|
|
||||||
|
DECL(randomx_riscv64_vector_sshash_generated_instructions):
|
||||||
|
// Generated by JIT compiler
|
||||||
|
//
|
||||||
|
// Step 4. randomx_riscv64_vector_sshash_cache_prefetch
|
||||||
|
// Step 5. SuperscalarHash[i]
|
||||||
|
// Step 6. randomx_riscv64_vector_sshash_xor
|
||||||
|
// Step 7. randomx_riscv64_vector_sshash_set_cache_index
|
||||||
|
//
|
||||||
|
// Above steps will be repeated RANDOMX_CACHE_ACCESSES times
|
||||||
|
.fill RANDOMX_CACHE_ACCESSES * 2048, 4, 0
|
||||||
|
|
||||||
|
DECL(randomx_riscv64_vector_sshash_generated_instructions_end):
|
||||||
|
// Step 9. Concatenate registers r0-r7 in little endian format to get the final Dataset item data.
|
||||||
|
vsuxei64.v v0, (x11), v19
|
||||||
|
|
||||||
|
add x5, x11, 8
|
||||||
|
vsuxei64.v v1, (x5), v19
|
||||||
|
|
||||||
|
add x5, x11, 16
|
||||||
|
vsuxei64.v v2, (x5), v19
|
||||||
|
|
||||||
|
add x5, x11, 24
|
||||||
|
vsuxei64.v v3, (x5), v19
|
||||||
|
|
||||||
|
add x5, x11, 32
|
||||||
|
vsuxei64.v v4, (x5), v19
|
||||||
|
|
||||||
|
add x5, x11, 40
|
||||||
|
vsuxei64.v v5, (x5), v19
|
||||||
|
|
||||||
|
add x5, x11, 48
|
||||||
|
vsuxei64.v v6, (x5), v19
|
||||||
|
|
||||||
|
add x5, x11, 56
|
||||||
|
vsuxei64.v v7, (x5), v19
|
||||||
|
|
||||||
|
// Iterate to the next 4 items
|
||||||
|
vadd.vi v8, v8, 4
|
||||||
|
add x11, x11, 256
|
||||||
|
bltu x11, x13, init_item
|
||||||
|
|
||||||
|
ret
|
||||||
|
|
||||||
|
// Step 4. Load a 64-byte item from the Cache. The item index is given by cacheIndex modulo the total number of 64-byte items in Cache.
|
||||||
|
DECL(randomx_riscv64_vector_sshash_cache_prefetch):
|
||||||
|
// v9 = convert from cacheIndex to a direct pointer into cache->memory
|
||||||
|
vand.vx v9, v9, x14
|
||||||
|
vsll.vi v9, v9, 6
|
||||||
|
vadd.vx v9, v9, x10
|
||||||
|
|
||||||
|
// Prefetch element 0
|
||||||
|
vmv.x.s x5, v9
|
||||||
|
prefetch.r (x5)
|
||||||
|
|
||||||
|
// Prefetch element 1
|
||||||
|
vslidedown.vi v18, v9, 1
|
||||||
|
vmv.x.s x5, v18
|
||||||
|
prefetch.r (x5)
|
||||||
|
|
||||||
|
// Prefetch element 2
|
||||||
|
vslidedown.vi v18, v9, 2
|
||||||
|
vmv.x.s x5, v18
|
||||||
|
prefetch.r (x5)
|
||||||
|
|
||||||
|
// Prefetch element 3
|
||||||
|
vslidedown.vi v18, v9, 3
|
||||||
|
vmv.x.s x5, v18
|
||||||
|
prefetch.r (x5)
|
||||||
|
|
||||||
|
// v9 = byte offset into cache->memory
|
||||||
|
vsub.vx v9, v9, x10
|
||||||
|
|
||||||
|
// Step 6. XOR all registers with data loaded from randomx cache
|
||||||
|
DECL(randomx_riscv64_vector_sshash_xor):
|
||||||
|
vluxei64.v v18, (x10), v9
|
||||||
|
vxor.vv v0, v0, v18
|
||||||
|
|
||||||
|
add x5, x10, 8
|
||||||
|
vluxei64.v v18, (x5), v9
|
||||||
|
vxor.vv v1, v1, v18
|
||||||
|
|
||||||
|
add x5, x10, 16
|
||||||
|
vluxei64.v v18, (x5), v9
|
||||||
|
vxor.vv v2, v2, v18
|
||||||
|
|
||||||
|
add x5, x10, 24
|
||||||
|
vluxei64.v v18, (x5), v9
|
||||||
|
vxor.vv v3, v3, v18
|
||||||
|
|
||||||
|
add x5, x10, 32
|
||||||
|
vluxei64.v v18, (x5), v9
|
||||||
|
vxor.vv v4, v4, v18
|
||||||
|
|
||||||
|
add x5, x10, 40
|
||||||
|
vluxei64.v v18, (x5), v9
|
||||||
|
vxor.vv v5, v5, v18
|
||||||
|
|
||||||
|
add x5, x10, 48
|
||||||
|
vluxei64.v v18, (x5), v9
|
||||||
|
vxor.vv v6, v6, v18
|
||||||
|
|
||||||
|
add x5, x10, 56
|
||||||
|
vluxei64.v v18, (x5), v9
|
||||||
|
vxor.vv v7, v7, v18
|
||||||
|
|
||||||
|
// Step 7. Set cacheIndex to the value of the register that has the longest dependency chain in the SuperscalarHash function executed in step 5.
|
||||||
|
DECL(randomx_riscv64_vector_sshash_set_cache_index):
|
||||||
|
// JIT compiler will pick a single instruction reading from the required register
|
||||||
|
vmv.v.v v9, v0
|
||||||
|
vmv.v.v v9, v1
|
||||||
|
vmv.v.v v9, v2
|
||||||
|
vmv.v.v v9, v3
|
||||||
|
vmv.v.v v9, v4
|
||||||
|
vmv.v.v v9, v5
|
||||||
|
vmv.v.v v9, v6
|
||||||
|
vmv.v.v v9, v7
|
||||||
|
|
||||||
|
DECL(randomx_riscv64_vector_sshash_end):
|
||||||
58
src/crypto/randomx/jit_compiler_rv64_vector_static.h
Normal file
58
src/crypto/randomx/jit_compiler_rv64_vector_static.h
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2018-2020, tevador <tevador@gmail.com>
|
||||||
|
Copyright (c) 2019-2021, XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
|
Copyright (c) 2025, SChernykh <https://github.com/SChernykh>
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the copyright holder nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#if defined(__cplusplus)
|
||||||
|
#include <cstdint>
|
||||||
|
#else
|
||||||
|
#include <stdint.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__cplusplus)
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
struct randomx_cache;
|
||||||
|
|
||||||
|
void randomx_riscv64_vector_sshash_begin();
|
||||||
|
void randomx_riscv64_vector_sshash_imul_rcp_literals();
|
||||||
|
void randomx_riscv64_vector_sshash_dataset_init(struct randomx_cache* cache, uint8_t* output_buf, uint32_t startBlock, uint32_t endBlock);
|
||||||
|
void randomx_riscv64_vector_sshash_cache_prefetch();
|
||||||
|
void randomx_riscv64_vector_sshash_generated_instructions();
|
||||||
|
void randomx_riscv64_vector_sshash_generated_instructions_end();
|
||||||
|
void randomx_riscv64_vector_sshash_cache_prefetch();
|
||||||
|
void randomx_riscv64_vector_sshash_xor();
|
||||||
|
void randomx_riscv64_vector_sshash_set_cache_index();
|
||||||
|
void randomx_riscv64_vector_sshash_end();
|
||||||
|
|
||||||
|
#if defined(__cplusplus)
|
||||||
|
}
|
||||||
|
#endif
|
||||||
@@ -73,8 +73,20 @@ uint64_t randomx_reciprocal(uint64_t divisor) {
|
|||||||
|
|
||||||
#if !RANDOMX_HAVE_FAST_RECIPROCAL
|
#if !RANDOMX_HAVE_FAST_RECIPROCAL
|
||||||
|
|
||||||
|
#ifdef __GNUC__
|
||||||
|
uint64_t randomx_reciprocal_fast(uint64_t divisor)
|
||||||
|
{
|
||||||
|
const uint64_t q = (1ULL << 63) / divisor;
|
||||||
|
const uint64_t r = (1ULL << 63) % divisor;
|
||||||
|
|
||||||
|
const uint64_t shift = 64 - __builtin_clzll(divisor);
|
||||||
|
|
||||||
|
return (q << shift) + ((r << shift) / divisor);
|
||||||
|
}
|
||||||
|
#else
|
||||||
uint64_t randomx_reciprocal_fast(uint64_t divisor) {
|
uint64_t randomx_reciprocal_fast(uint64_t divisor) {
|
||||||
return randomx_reciprocal(divisor);
|
return randomx_reciprocal(divisor);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -39,6 +39,9 @@ alignas(64) uint32_t lutDec1[256];
|
|||||||
alignas(64) uint32_t lutDec2[256];
|
alignas(64) uint32_t lutDec2[256];
|
||||||
alignas(64) uint32_t lutDec3[256];
|
alignas(64) uint32_t lutDec3[256];
|
||||||
|
|
||||||
|
alignas(64) uint8_t lutEncIndex[4][32];
|
||||||
|
alignas(64) uint8_t lutDecIndex[4][32];
|
||||||
|
|
||||||
static uint32_t mul_gf2(uint32_t b, uint32_t c)
|
static uint32_t mul_gf2(uint32_t b, uint32_t c)
|
||||||
{
|
{
|
||||||
uint32_t s = 0;
|
uint32_t s = 0;
|
||||||
@@ -115,5 +118,49 @@ static struct SAESInitializer
|
|||||||
lutDec2[i] = w; w = (w << 8) | (w >> 24);
|
lutDec2[i] = w; w = (w << 8) | (w >> 24);
|
||||||
lutDec3[i] = w;
|
lutDec3[i] = w;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
memset(lutEncIndex, -1, sizeof(lutEncIndex));
|
||||||
|
memset(lutDecIndex, -1, sizeof(lutDecIndex));
|
||||||
|
|
||||||
|
lutEncIndex[0][ 0] = 0;
|
||||||
|
lutEncIndex[0][ 4] = 4;
|
||||||
|
lutEncIndex[0][ 8] = 8;
|
||||||
|
lutEncIndex[0][12] = 12;
|
||||||
|
lutEncIndex[1][ 0] = 5;
|
||||||
|
lutEncIndex[1][ 4] = 9;
|
||||||
|
lutEncIndex[1][ 8] = 13;
|
||||||
|
lutEncIndex[1][12] = 1;
|
||||||
|
lutEncIndex[2][ 0] = 10;
|
||||||
|
lutEncIndex[2][ 4] = 14;
|
||||||
|
lutEncIndex[2][ 8] = 2;
|
||||||
|
lutEncIndex[2][12] = 6;
|
||||||
|
lutEncIndex[3][ 0] = 15;
|
||||||
|
lutEncIndex[3][ 4] = 3;
|
||||||
|
lutEncIndex[3][ 8] = 7;
|
||||||
|
lutEncIndex[3][12] = 11;
|
||||||
|
|
||||||
|
lutDecIndex[0][ 0] = 0;
|
||||||
|
lutDecIndex[0][ 4] = 4;
|
||||||
|
lutDecIndex[0][ 8] = 8;
|
||||||
|
lutDecIndex[0][12] = 12;
|
||||||
|
lutDecIndex[1][ 0] = 13;
|
||||||
|
lutDecIndex[1][ 4] = 1;
|
||||||
|
lutDecIndex[1][ 8] = 5;
|
||||||
|
lutDecIndex[1][12] = 9;
|
||||||
|
lutDecIndex[2][ 0] = 10;
|
||||||
|
lutDecIndex[2][ 4] = 14;
|
||||||
|
lutDecIndex[2][ 8] = 2;
|
||||||
|
lutDecIndex[2][12] = 6;
|
||||||
|
lutDecIndex[3][ 0] = 7;
|
||||||
|
lutDecIndex[3][ 4] = 11;
|
||||||
|
lutDecIndex[3][ 8] = 15;
|
||||||
|
lutDecIndex[3][12] = 3;
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < 4; ++i) {
|
||||||
|
for (uint32_t j = 0; j < 16; j += 4) {
|
||||||
|
lutEncIndex[i][j + 16] = lutEncIndex[i][j] + 16;
|
||||||
|
lutDecIndex[i][j + 16] = lutDecIndex[i][j] + 16;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} aes_initializer;
|
} aes_initializer;
|
||||||
|
|||||||
@@ -41,6 +41,9 @@ extern uint32_t lutDec1[256];
|
|||||||
extern uint32_t lutDec2[256];
|
extern uint32_t lutDec2[256];
|
||||||
extern uint32_t lutDec3[256];
|
extern uint32_t lutDec3[256];
|
||||||
|
|
||||||
|
extern uint8_t lutEncIndex[4][32];
|
||||||
|
extern uint8_t lutDecIndex[4][32];
|
||||||
|
|
||||||
template<int soft> rx_vec_i128 aesenc(rx_vec_i128 in, rx_vec_i128 key);
|
template<int soft> rx_vec_i128 aesenc(rx_vec_i128 in, rx_vec_i128 key);
|
||||||
template<int soft> rx_vec_i128 aesdec(rx_vec_i128 in, rx_vec_i128 key);
|
template<int soft> rx_vec_i128 aesdec(rx_vec_i128 in, rx_vec_i128 key);
|
||||||
|
|
||||||
@@ -147,3 +150,32 @@ template<>
|
|||||||
FORCE_INLINE rx_vec_i128 aesdec<0>(rx_vec_i128 in, rx_vec_i128 key) {
|
FORCE_INLINE rx_vec_i128 aesdec<0>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||||
return rx_aesdec_vec_i128(in, key);
|
return rx_aesdec_vec_i128(in, key);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(XMRIG_RISCV) && defined(XMRIG_RVV_ENABLED)
|
||||||
|
#include <riscv_vector.h>
|
||||||
|
|
||||||
|
FORCE_INLINE vuint32m1_t softaes_vector_double(
|
||||||
|
vuint32m1_t in,
|
||||||
|
vuint32m1_t key,
|
||||||
|
vuint8m1_t i0, vuint8m1_t i1, vuint8m1_t i2, vuint8m1_t i3,
|
||||||
|
const uint32_t* lut0, const uint32_t* lut1, const uint32_t *lut2, const uint32_t* lut3)
|
||||||
|
{
|
||||||
|
const vuint8m1_t in8 = __riscv_vreinterpret_v_u32m1_u8m1(in);
|
||||||
|
|
||||||
|
const vuint32m1_t index0 = __riscv_vreinterpret_v_u8m1_u32m1(__riscv_vrgather_vv_u8m1(in8, i0, 32));
|
||||||
|
const vuint32m1_t index1 = __riscv_vreinterpret_v_u8m1_u32m1(__riscv_vrgather_vv_u8m1(in8, i1, 32));
|
||||||
|
const vuint32m1_t index2 = __riscv_vreinterpret_v_u8m1_u32m1(__riscv_vrgather_vv_u8m1(in8, i2, 32));
|
||||||
|
const vuint32m1_t index3 = __riscv_vreinterpret_v_u8m1_u32m1(__riscv_vrgather_vv_u8m1(in8, i3, 32));
|
||||||
|
|
||||||
|
vuint32m1_t s0 = __riscv_vluxei32_v_u32m1(lut0, __riscv_vsll_vx_u32m1(index0, 2, 8), 8);
|
||||||
|
vuint32m1_t s1 = __riscv_vluxei32_v_u32m1(lut1, __riscv_vsll_vx_u32m1(index1, 2, 8), 8);
|
||||||
|
vuint32m1_t s2 = __riscv_vluxei32_v_u32m1(lut2, __riscv_vsll_vx_u32m1(index2, 2, 8), 8);
|
||||||
|
vuint32m1_t s3 = __riscv_vluxei32_v_u32m1(lut3, __riscv_vsll_vx_u32m1(index3, 2, 8), 8);
|
||||||
|
|
||||||
|
s0 = __riscv_vxor_vv_u32m1(s0, s1, 8);
|
||||||
|
s2 = __riscv_vxor_vv_u32m1(s2, s3, 8);
|
||||||
|
s0 = __riscv_vxor_vv_u32m1(s0, s2, 8);
|
||||||
|
|
||||||
|
return __riscv_vxor_vv_u32m1(s0, key, 8);
|
||||||
|
}
|
||||||
|
#endif // defined(XMRIG_RISCV) && defined(XMRIG_RVV_ENABLED)
|
||||||
|
|||||||
14
src/crypto/randomx/tests/riscv64_vector.s
Normal file
14
src/crypto/randomx/tests/riscv64_vector.s
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
/* RISC-V - test if the vector extension and prefetch instruction are present */
|
||||||
|
|
||||||
|
.text
|
||||||
|
.option arch, rv64gcv_zicbop
|
||||||
|
.global main
|
||||||
|
|
||||||
|
main:
|
||||||
|
lla x5, main
|
||||||
|
prefetch.r (x5)
|
||||||
|
li x5, 4
|
||||||
|
vsetvli x6, x5, e64, m1, ta, ma
|
||||||
|
vxor.vv v0, v0, v0
|
||||||
|
sub x10, x5, x6
|
||||||
|
ret
|
||||||
@@ -43,6 +43,12 @@ static void init_dataset_wrapper(randomx_dataset *dataset, randomx_cache *cache,
|
|||||||
randomx_init_dataset(dataset, cache, startItem, itemCount - (itemCount % 5));
|
randomx_init_dataset(dataset, cache, startItem, itemCount - (itemCount % 5));
|
||||||
randomx_init_dataset(dataset, cache, startItem + itemCount - 5, 5);
|
randomx_init_dataset(dataset, cache, startItem + itemCount - 5, 5);
|
||||||
}
|
}
|
||||||
|
#ifdef XMRIG_RISCV
|
||||||
|
else if (itemCount % 4) {
|
||||||
|
randomx_init_dataset(dataset, cache, startItem, itemCount - (itemCount % 4));
|
||||||
|
randomx_init_dataset(dataset, cache, startItem + itemCount - 4, 4);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
else {
|
else {
|
||||||
randomx_init_dataset(dataset, cache, startItem, itemCount);
|
randomx_init_dataset(dataset, cache, startItem, itemCount);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -33,7 +33,9 @@
|
|||||||
#define APP_VER_PATCH 1
|
#define APP_VER_PATCH 1
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
# if (_MSC_VER >= 1930)
|
# if (_MSC_VER >= 1950)
|
||||||
|
# define MSVC_VERSION 2026
|
||||||
|
# elif (_MSC_VER >=1930 && _MSC_VER < 1950)
|
||||||
# define MSVC_VERSION 2022
|
# define MSVC_VERSION 2022
|
||||||
# elif (_MSC_VER >= 1920 && _MSC_VER < 1930)
|
# elif (_MSC_VER >= 1920 && _MSC_VER < 1930)
|
||||||
# define MSVC_VERSION 2019
|
# define MSVC_VERSION 2019
|
||||||
@@ -66,6 +68,8 @@
|
|||||||
# define APP_OS "FreeBSD"
|
# define APP_OS "FreeBSD"
|
||||||
#elif defined XMRIG_OS_OPENBSD
|
#elif defined XMRIG_OS_OPENBSD
|
||||||
# define APP_OS "OpenBSD"
|
# define APP_OS "OpenBSD"
|
||||||
|
#elif defined XMRIG_OS_HAIKU
|
||||||
|
# define APP_OS "Haiku"
|
||||||
#else
|
#else
|
||||||
# define APP_OS "Unknown OS"
|
# define APP_OS "Unknown OS"
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
Reference in New Issue
Block a user