diff --git a/cmake/randomx.cmake b/cmake/randomx.cmake index 5aa20b807..c15024c97 100644 --- a/cmake/randomx.cmake +++ b/cmake/randomx.cmake @@ -83,10 +83,13 @@ if (WITH_RANDOMX) elseif (XMRIG_RISCV AND CMAKE_SIZEOF_VOID_P EQUAL 8) list(APPEND SOURCES_CRYPTO src/crypto/randomx/jit_compiler_rv64_static.S + src/crypto/randomx/jit_compiler_rv64_vector_static.S src/crypto/randomx/jit_compiler_rv64.cpp + src/crypto/randomx/jit_compiler_rv64_vector.cpp ) # cheat because cmake and ccache hate each other set_property(SOURCE src/crypto/randomx/jit_compiler_rv64_static.S PROPERTY LANGUAGE C) + set_property(SOURCE src/crypto/randomx/jit_compiler_rv64_vector_static.S PROPERTY LANGUAGE C) else() list(APPEND SOURCES_CRYPTO src/crypto/randomx/jit_compiler_fallback.cpp diff --git a/src/crypto/randomx/jit_compiler_rv64.cpp b/src/crypto/randomx/jit_compiler_rv64.cpp index 130cf9015..161343471 100644 --- a/src/crypto/randomx/jit_compiler_rv64.cpp +++ b/src/crypto/randomx/jit_compiler_rv64.cpp @@ -32,6 +32,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include "crypto/randomx/jit_compiler_rv64.hpp" #include "crypto/randomx/jit_compiler_rv64_static.hpp" +#include "crypto/randomx/jit_compiler_rv64_vector.h" +#include "crypto/randomx/jit_compiler_rv64_vector_static.h" #include "crypto/randomx/superscalar.hpp" #include "crypto/randomx/program.hpp" #include "crypto/randomx/reciprocal.h" @@ -618,20 +620,32 @@ namespace randomx { entryProgram = state.code + LiteralPoolSize + sizeDataInit; //jal x1, SuperscalarHash emitJump(state, ReturnReg, LiteralPoolSize + offsetFixDataCall, SuperScalarHashOffset); + + vectorCodeSize = ((uint8_t*)randomx_riscv64_vector_sshash_end) - ((uint8_t*)randomx_riscv64_vector_sshash_begin); + vectorCode = static_cast(allocExecutableMemory(vectorCodeSize, hugePagesJIT && hugePagesEnable)); } JitCompilerRV64::~JitCompilerRV64() { freePagedMemory(state.code, CodeSize); + freePagedMemory(vectorCode, vectorCodeSize); } void JitCompilerRV64::enableWriting() const { xmrig::VirtualMemory::protectRW(entryDataInit, ExecutableSize); + + if (vectorCode) { + xmrig::VirtualMemory::protectRW(vectorCode, vectorCodeSize); + } } void JitCompilerRV64::enableExecution() const { xmrig::VirtualMemory::protectRX(entryDataInit, ExecutableSize); + + if (vectorCode) { + xmrig::VirtualMemory::protectRX(vectorCode, vectorCodeSize); + } } void JitCompilerRV64::generateProgram(Program& prog, ProgramConfiguration& pcfg, uint32_t) { @@ -666,6 +680,11 @@ namespace randomx { template void JitCompilerRV64::generateSuperscalarHash(SuperscalarProgram(&programs)[N]) { + if (optimizedDatasetInit > 0) { + entryDataInitOptimized = generateDatasetInitVectorRV64(vectorCode, vectorCodeSize, programs, RandomX_ConfigurationBase::CacheAccesses); + return; + } + state.codePos = SuperScalarHashOffset; state.rcpCount = 0; state.emit(codeSshInit, sizeSshInit); @@ -703,6 +722,10 @@ namespace randomx { template void JitCompilerRV64::generateSuperscalarHash(SuperscalarProgram(&)[RANDOMX_CACHE_MAX_ACCESSES]); + DatasetInitFunc* JitCompilerRV64::getDatasetInitFunc() { + return (DatasetInitFunc*)((optimizedDatasetInit > 0) ? entryDataInitOptimized : entryDataInit); + } + void JitCompilerRV64::v1_IADD_RS(HANDLER_ARGS) { state.registerUsage[isn.dst] = i; int shift = isn.getModShift(); diff --git a/src/crypto/randomx/jit_compiler_rv64.hpp b/src/crypto/randomx/jit_compiler_rv64.hpp index 3eac10a2d..dbad88e1b 100644 --- a/src/crypto/randomx/jit_compiler_rv64.hpp +++ b/src/crypto/randomx/jit_compiler_rv64.hpp @@ -92,9 +92,7 @@ namespace randomx { ProgramFunc* getProgramFunc() { return (ProgramFunc*)entryProgram; } - DatasetInitFunc* getDatasetInitFunc() { - return (DatasetInitFunc*)entryDataInit; - } + DatasetInitFunc* getDatasetInitFunc(); uint8_t* getCode() { return state.code; } @@ -106,7 +104,12 @@ namespace randomx { static InstructionGeneratorRV64 engine[256]; private: CompilerState state; + + uint8_t* vectorCode; + size_t vectorCodeSize; + void* entryDataInit; + void* entryDataInitOptimized; void* entryProgram; public: diff --git a/src/crypto/randomx/jit_compiler_rv64_vector.cpp b/src/crypto/randomx/jit_compiler_rv64_vector.cpp new file mode 100644 index 000000000..8dc95613e --- /dev/null +++ b/src/crypto/randomx/jit_compiler_rv64_vector.cpp @@ -0,0 +1,207 @@ +/* +Copyright (c) 2018-2020, tevador +Copyright (c) 2019-2021, XMRig , +Copyright (c) 2025, SChernykh + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "crypto/randomx/configuration.h" +#include "crypto/randomx/jit_compiler_rv64_vector.h" +#include "crypto/randomx/jit_compiler_rv64_vector_static.h" +#include "crypto/randomx/reciprocal.h" +#include "crypto/randomx/superscalar.hpp" + +namespace randomx { + +#define ADDR(x) ((uint8_t*) &(x)) +#define DIST(x, y) (ADDR(y) - ADDR(x)) + +void* generateDatasetInitVectorRV64(uint8_t* buf, size_t buf_size, SuperscalarProgram* programs, size_t num_programs) +{ + memcpy(buf, reinterpret_cast(randomx_riscv64_vector_sshash_begin), buf_size); + + uint8_t* p = buf + DIST(randomx_riscv64_vector_sshash_begin, randomx_riscv64_vector_sshash_generated_instructions); + + uint8_t* literals = buf + DIST(randomx_riscv64_vector_sshash_begin, randomx_riscv64_vector_sshash_imul_rcp_literals); + uint8_t* cur_literal = literals; + + for (size_t i = 0; i < num_programs; ++i) { + // Step 4 + size_t k = DIST(randomx_riscv64_vector_sshash_cache_prefetch, randomx_riscv64_vector_sshash_xor); + memcpy(p, reinterpret_cast(randomx_riscv64_vector_sshash_cache_prefetch), k); + p += k; + + // Step 5 + for (uint32_t j = 0; j < programs[i].size; ++j) { + const uint32_t dst = programs[i].programBuffer[j].dst & 7; + const uint32_t src = programs[i].programBuffer[j].src & 7; + const uint32_t modShift = (programs[i].programBuffer[j].mod >> 2) & 3; + const uint32_t imm32 = programs[i].programBuffer[j].imm32; + + uint32_t inst; + #define EMIT(data) inst = (data); memcpy(p, &inst, 4); p += 4 + + switch (static_cast(programs[i].programBuffer[j].opcode)) { + case SuperscalarInstructionType::ISUB_R: + // 57 00 00 0A vsub.vv v0, v0, v0 + EMIT(0x0A000057 | (dst << 7) | (src << 15) | (dst << 20)); + break; + + case SuperscalarInstructionType::IXOR_R: + // 57 00 00 2E vxor.vv v0, v0, v0 + EMIT(0x2E000057 | (dst << 7) | (src << 15) | (dst << 20)); + break; + + case SuperscalarInstructionType::IADD_RS: + // 57 39 00 96 vsll.vi v18, v0, 0 + // 57 00 09 02 vadd.vv v0, v0, v18 + EMIT(0x96003957 | (modShift << 15) | (src << 20)); + EMIT(0x02090057 | (dst << 7) | (dst << 20)); + break; + + case SuperscalarInstructionType::IMUL_R: + // 57 20 00 96 vmul.vv v0, v0, v0 + EMIT(0x96002057 | (dst << 7) | (src << 15) | (dst << 20)); + break; + + case SuperscalarInstructionType::IROR_C: + { + const uint32_t shift_right = imm32 & 63; + const uint32_t shift_left = 64 - shift_right; + + if (shift_right < 32) { + // 57 39 00 A2 vsrl.vi v18, v0, 0 + EMIT(0xA2003957 | (shift_right << 15) | (dst << 20)); + } + else { + // 93 02 00 00 li x5, 0 + // 57 C9 02 A2 vsrl.vx v18, v0, x5 + EMIT(0x00000293 | (shift_right << 20)); + EMIT(0xA202C957 | (dst << 20)); + } + + if (shift_left < 32) { + // 57 30 00 96 vsll.vi v0, v0, 0 + EMIT(0x96003057 | (dst << 7) | (shift_left << 15) | (dst << 20)); + } + else { + // 93 02 00 00 li x5, 0 + // 57 C0 02 96 vsll.vx v0, v0, x5 + EMIT(0x00000293 | (shift_left << 20)); + EMIT(0x9602C057 | (dst << 7) | (dst << 20)); + } + + // 57 00 20 2B vor.vv v0, v18, v0 + EMIT(0x2B200057 | (dst << 7) | (dst << 15)); + } + break; + + case SuperscalarInstructionType::IADD_C7: + case SuperscalarInstructionType::IADD_C8: + case SuperscalarInstructionType::IADD_C9: + // B7 02 00 00 lui x5, 0 + // 9B 82 02 00 addiw x5, x5, 0 + // 57 C0 02 02 vadd.vx v0, v0, x5 + EMIT(0x000002B7 | ((imm32 + ((imm32 & 0x800) << 1)) & 0xFFFFF000)); + EMIT(0x0002829B | ((imm32 & 0x00000FFF)) << 20); + EMIT(0x0202C057 | (dst << 7) | (dst << 20)); + break; + + case SuperscalarInstructionType::IXOR_C7: + case SuperscalarInstructionType::IXOR_C8: + case SuperscalarInstructionType::IXOR_C9: + // B7 02 00 00 lui x5, 0 + // 9B 82 02 00 addiw x5, x5, 0 + // 57 C0 02 2E vxor.vx v0, v0, x5 + EMIT(0x000002B7 | ((imm32 + ((imm32 & 0x800) << 1)) & 0xFFFFF000)); + EMIT(0x0002829B | ((imm32 & 0x00000FFF)) << 20); + EMIT(0x2E02C057 | (dst << 7) | (dst << 20)); + break; + + case SuperscalarInstructionType::IMULH_R: + // 57 20 00 92 vmulhu.vv v0, v0, v0 + EMIT(0x92002057 | (dst << 7) | (src << 15) | (dst << 20)); + break; + + case SuperscalarInstructionType::ISMULH_R: + // 57 20 00 9E vmulh.vv v0, v0, v0 + EMIT(0x9E002057 | (dst << 7) | (src << 15) | (dst << 20)); + break; + + case SuperscalarInstructionType::IMUL_RCP: + { + uint32_t offset = cur_literal - literals; + + if (offset == 2040) { + literals += 2040; + offset = 0; + + // 93 87 87 7F add x15, x15, 2040 + EMIT(0x7F878793); + } + + const uint64_t r = randomx_reciprocal_fast(imm32); + memcpy(cur_literal, &r, 8); + cur_literal += 8; + + // 83 B2 07 00 ld x5, (x15) + // 57 E0 02 96 vmul.vx v0, v0, x5 + EMIT(0x0007B283 | (offset << 20)); + EMIT(0x9602E057 | (dst << 7) | (dst << 20)); + } + break; + + default: + break; + } + } + + // Step 6 + k = DIST(randomx_riscv64_vector_sshash_xor, randomx_riscv64_vector_sshash_set_cache_index); + memcpy(p, reinterpret_cast(randomx_riscv64_vector_sshash_xor), k); + p += k; + + // Step 7 + if (i + 1 < num_programs) { + memcpy(p, reinterpret_cast(randomx_riscv64_vector_sshash_set_cache_index) + programs[i].getAddressRegister() * 4, 4); + p += 4; + } + } + + // Emit "J randomx_riscv64_vector_sshash_generated_instructions_end" instruction + const uint8_t* e = buf + DIST(randomx_riscv64_vector_sshash_begin, randomx_riscv64_vector_sshash_generated_instructions_end); + const uint32_t k = e - p; + const uint32_t j = 0x6F | ((k & 0x7FE) << 20) | ((k & 0x800) << 9) | (k & 0xFF000); + memcpy(p, &j, 4); + +#ifdef __GNUC__ + __builtin___clear_cache((char*) buf, (char*)(buf + buf_size)); +#endif + + return buf + DIST(randomx_riscv64_vector_sshash_begin, randomx_riscv64_vector_sshash_dataset_init); +} + +} // namespace randomx diff --git a/src/crypto/randomx/jit_compiler_rv64_vector.h b/src/crypto/randomx/jit_compiler_rv64_vector.h new file mode 100644 index 000000000..ea06862e5 --- /dev/null +++ b/src/crypto/randomx/jit_compiler_rv64_vector.h @@ -0,0 +1,42 @@ +/* +Copyright (c) 2018-2020, tevador +Copyright (c) 2019-2021, XMRig , +Copyright (c) 2025, SChernykh + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma once + +#include +#include + +namespace randomx { + +class SuperscalarProgram; + +void* generateDatasetInitVectorRV64(uint8_t* buf, size_t buf_size, SuperscalarProgram* programs, size_t num_programs); + +} // namespace randomx diff --git a/src/crypto/randomx/jit_compiler_rv64_vector_static.S b/src/crypto/randomx/jit_compiler_rv64_vector_static.S new file mode 100644 index 000000000..ac63c625f --- /dev/null +++ b/src/crypto/randomx/jit_compiler_rv64_vector_static.S @@ -0,0 +1,296 @@ +/* +Copyright (c) 2018-2020, tevador +Copyright (c) 2019-2021, XMRig , +Copyright (c) 2025, SChernykh + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "configuration.h" + +// Compatibility macros + +#if !defined(RANDOMX_CACHE_ACCESSES) && defined(RANDOMX_CACHE_MAX_ACCESSES) +#define RANDOMX_CACHE_ACCESSES RANDOMX_CACHE_MAX_ACCESSES +#endif + +#if defined(RANDOMX_ARGON_MEMORY) +#define RANDOMX_CACHE_MASK RANDOMX_ARGON_MEMORY * 1024 / 64 - 1 +#elif defined(RANDOMX_CACHE_MAX_SIZE) +#define RANDOMX_CACHE_MASK RANDOMX_CACHE_MAX_SIZE / 64 - 1 +#endif + +#define DECL(x) x + +.text + +.option arch, rv64gcv_zicbop +.option pic + +.global DECL(randomx_riscv64_vector_sshash_begin) +.global DECL(randomx_riscv64_vector_sshash_imul_rcp_literals) +.global DECL(randomx_riscv64_vector_sshash_dataset_init) +.global DECL(randomx_riscv64_vector_sshash_generated_instructions) +.global DECL(randomx_riscv64_vector_sshash_generated_instructions_end) +.global DECL(randomx_riscv64_vector_sshash_cache_prefetch) +.global DECL(randomx_riscv64_vector_sshash_xor) +.global DECL(randomx_riscv64_vector_sshash_set_cache_index) +.global DECL(randomx_riscv64_vector_sshash_end) + +.balign 8 + +DECL(randomx_riscv64_vector_sshash_begin): + +sshash_constant_0: .dword 6364136223846793005 +sshash_constant_1: .dword 9298411001130361340 +sshash_constant_2: .dword 12065312585734608966 +sshash_constant_3: .dword 9306329213124626780 +sshash_constant_4: .dword 5281919268842080866 +sshash_constant_5: .dword 10536153434571861004 +sshash_constant_6: .dword 3398623926847679864 +sshash_constant_7: .dword 9549104520008361294 +sshash_offsets: .dword 0,1,2,3 +store_offsets: .dword 0,64,128,192 + +DECL(randomx_riscv64_vector_sshash_imul_rcp_literals): .fill 512,8,0 + +/* +Reference: https://github.com/tevador/RandomX/blob/master/doc/specs.md#73-dataset-block-generation + +Register layout +--------------- +x5 = temporary + +x10 = randomx cache +x11 = output buffer +x12 = startBlock +x13 = endBlock + +x14 = cache mask +x15 = imul_rcp literal pointer + +v0-v7 = r0-r7 +v8 = itemNumber +v9 = cacheIndex, then a pointer into cache->memory (for prefetch), then a byte offset into cache->memory + +v10-v17 = sshash constants + +v18 = temporary + +v19 = dataset item store offsets +*/ + +DECL(randomx_riscv64_vector_sshash_dataset_init): + // Process 4 64-bit values at a time + li x5, 4 + vsetvli x5, x5, e64, m1, ta, ma + + // Load cache->memory pointer + ld x10, (x10) + + // Init cache mask + li x14, RANDOMX_CACHE_MASK + + // Init dataset item store offsets + lla x5, store_offsets + vle64.v v19, (x5) + + // Init itemNumber vector to (startBlock, startBlock + 1, startBlock + 2, startBlock + 3) + lla x5, sshash_offsets + vle64.v v8, (x5) + vadd.vx v8, v8, x12 + + // Load constants (stride = x0 = 0, so a 64-bit value will be broadcast into each element of a vector) + lla x5, sshash_constant_0 + vlse64.v v10, (x5), x0 + + lla x5, sshash_constant_1 + vlse64.v v11, (x5), x0 + + lla x5, sshash_constant_2 + vlse64.v v12, (x5), x0 + + lla x5, sshash_constant_3 + vlse64.v v13, (x5), x0 + + lla x5, sshash_constant_4 + vlse64.v v14, (x5), x0 + + lla x5, sshash_constant_5 + vlse64.v v15, (x5), x0 + + lla x5, sshash_constant_6 + vlse64.v v16, (x5), x0 + + lla x5, sshash_constant_7 + vlse64.v v17, (x5), x0 + + // Calculate the end pointer for dataset init + sub x13, x13, x12 + slli x13, x13, 6 + add x13, x13, x11 + +init_item: + // Step 1. Init r0-r7 + + // r0 = (itemNumber + 1) * 6364136223846793005 + vmv.v.v v0, v8 + vmadd.vv v0, v10, v10 + + // r_i = r0 ^ c_i for i = 1..7 + vxor.vv v1, v0, v11 + vxor.vv v2, v0, v12 + vxor.vv v3, v0, v13 + vxor.vv v4, v0, v14 + vxor.vv v5, v0, v15 + vxor.vv v6, v0, v16 + vxor.vv v7, v0, v17 + + // Step 2. Let cacheIndex = itemNumber + vmv.v.v v9, v8 + + // Step 3 is implicit (all iterations are inlined, there is no "i") + + // Init imul_rcp literal pointer + lla x15, randomx_riscv64_vector_sshash_imul_rcp_literals + +DECL(randomx_riscv64_vector_sshash_generated_instructions): + // Generated by JIT compiler + // + // Step 4. randomx_riscv64_vector_sshash_cache_prefetch + // Step 5. SuperscalarHash[i] + // Step 6. randomx_riscv64_vector_sshash_xor + // Step 7. randomx_riscv64_vector_sshash_set_cache_index + // + // Above steps will be repeated RANDOMX_CACHE_ACCESSES times + .fill RANDOMX_CACHE_ACCESSES * 2048, 4, 0 + +DECL(randomx_riscv64_vector_sshash_generated_instructions_end): + // Step 9. Concatenate registers r0-r7 in little endian format to get the final Dataset item data. + vsuxei64.v v0, (x11), v19 + + add x5, x11, 8 + vsuxei64.v v1, (x5), v19 + + add x5, x11, 16 + vsuxei64.v v2, (x5), v19 + + add x5, x11, 24 + vsuxei64.v v3, (x5), v19 + + add x5, x11, 32 + vsuxei64.v v4, (x5), v19 + + add x5, x11, 40 + vsuxei64.v v5, (x5), v19 + + add x5, x11, 48 + vsuxei64.v v6, (x5), v19 + + add x5, x11, 56 + vsuxei64.v v7, (x5), v19 + + // Iterate to the next 4 items + vadd.vi v8, v8, 4 + add x11, x11, 256 + bltu x11, x13, init_item + + ret + +// Step 4. Load a 64-byte item from the Cache. The item index is given by cacheIndex modulo the total number of 64-byte items in Cache. +DECL(randomx_riscv64_vector_sshash_cache_prefetch): + // v9 = convert from cacheIndex to a direct pointer into cache->memory + vand.vx v9, v9, x14 + vsll.vi v9, v9, 6 + vadd.vx v9, v9, x10 + + // Prefetch element 0 + vmv.x.s x5, v9 + prefetch.r (x5) + + // Prefetch element 1 + vslidedown.vi v18, v9, 1 + vmv.x.s x5, v18 + prefetch.r (x5) + + // Prefetch element 2 + vslidedown.vi v18, v9, 2 + vmv.x.s x5, v18 + prefetch.r (x5) + + // Prefetch element 3 + vslidedown.vi v18, v9, 3 + vmv.x.s x5, v18 + prefetch.r (x5) + + // v9 = byte offset into cache->memory + vsub.vx v9, v9, x10 + +// Step 6. XOR all registers with data loaded from randomx cache +DECL(randomx_riscv64_vector_sshash_xor): + vluxei64.v v18, (x10), v9 + vxor.vv v0, v0, v18 + + add x5, x10, 8 + vluxei64.v v18, (x5), v9 + vxor.vv v1, v1, v18 + + add x5, x10, 16 + vluxei64.v v18, (x5), v9 + vxor.vv v2, v2, v18 + + add x5, x10, 24 + vluxei64.v v18, (x5), v9 + vxor.vv v3, v3, v18 + + add x5, x10, 32 + vluxei64.v v18, (x5), v9 + vxor.vv v4, v4, v18 + + add x5, x10, 40 + vluxei64.v v18, (x5), v9 + vxor.vv v5, v5, v18 + + add x5, x10, 48 + vluxei64.v v18, (x5), v9 + vxor.vv v6, v6, v18 + + add x5, x10, 56 + vluxei64.v v18, (x5), v9 + vxor.vv v7, v7, v18 + +// Step 7. Set cacheIndex to the value of the register that has the longest dependency chain in the SuperscalarHash function executed in step 5. +DECL(randomx_riscv64_vector_sshash_set_cache_index): + // JIT compiler will pick a single instruction reading from the required register + vmv.v.v v9, v0 + vmv.v.v v9, v1 + vmv.v.v v9, v2 + vmv.v.v v9, v3 + vmv.v.v v9, v4 + vmv.v.v v9, v5 + vmv.v.v v9, v6 + vmv.v.v v9, v7 + +DECL(randomx_riscv64_vector_sshash_end): diff --git a/src/crypto/randomx/jit_compiler_rv64_vector_static.h b/src/crypto/randomx/jit_compiler_rv64_vector_static.h new file mode 100644 index 000000000..09bab597e --- /dev/null +++ b/src/crypto/randomx/jit_compiler_rv64_vector_static.h @@ -0,0 +1,58 @@ +/* +Copyright (c) 2018-2020, tevador +Copyright (c) 2019-2021, XMRig , +Copyright (c) 2025, SChernykh + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma once + +#if defined(__cplusplus) +#include +#else +#include +#endif + +#if defined(__cplusplus) +extern "C" { +#endif + +struct randomx_cache; + +void randomx_riscv64_vector_sshash_begin(); +void randomx_riscv64_vector_sshash_imul_rcp_literals(); +void randomx_riscv64_vector_sshash_dataset_init(struct randomx_cache* cache, uint8_t* output_buf, uint32_t startBlock, uint32_t endBlock); +void randomx_riscv64_vector_sshash_cache_prefetch(); +void randomx_riscv64_vector_sshash_generated_instructions(); +void randomx_riscv64_vector_sshash_generated_instructions_end(); +void randomx_riscv64_vector_sshash_cache_prefetch(); +void randomx_riscv64_vector_sshash_xor(); +void randomx_riscv64_vector_sshash_set_cache_index(); +void randomx_riscv64_vector_sshash_end(); + +#if defined(__cplusplus) +} +#endif diff --git a/src/crypto/randomx/reciprocal.c b/src/crypto/randomx/reciprocal.c index 87cda2677..ebd7662ca 100644 --- a/src/crypto/randomx/reciprocal.c +++ b/src/crypto/randomx/reciprocal.c @@ -73,8 +73,20 @@ uint64_t randomx_reciprocal(uint64_t divisor) { #if !RANDOMX_HAVE_FAST_RECIPROCAL +#ifdef __GNUC__ +uint64_t randomx_reciprocal_fast(uint64_t divisor) +{ + const uint64_t q = (1ULL << 63) / divisor; + const uint64_t r = (1ULL << 63) % divisor; + + const uint64_t shift = 64 - __builtin_clzll(divisor); + + return (q << shift) + ((r << shift) / divisor); +} +#else uint64_t randomx_reciprocal_fast(uint64_t divisor) { return randomx_reciprocal(divisor); } +#endif #endif diff --git a/src/crypto/rx/RxDataset.cpp b/src/crypto/rx/RxDataset.cpp index 86b3a3f6d..9c41dd43f 100644 --- a/src/crypto/rx/RxDataset.cpp +++ b/src/crypto/rx/RxDataset.cpp @@ -43,6 +43,12 @@ static void init_dataset_wrapper(randomx_dataset *dataset, randomx_cache *cache, randomx_init_dataset(dataset, cache, startItem, itemCount - (itemCount % 5)); randomx_init_dataset(dataset, cache, startItem + itemCount - 5, 5); } +#ifdef XMRIG_RISCV + else if (itemCount % 4) { + randomx_init_dataset(dataset, cache, startItem, itemCount - (itemCount % 4)); + randomx_init_dataset(dataset, cache, startItem + itemCount - 4, 4); + } +#endif else { randomx_init_dataset(dataset, cache, startItem, itemCount); }