1
0
mirror of https://github.com/xmrig/xmrig.git synced 2026-02-01 09:43:03 -05:00

Merge pull request #3774 from SChernykh/dev

RandomX v2 (RISC-V)
This commit is contained in:
xmrig
2026-02-01 12:16:08 +07:00
committed by GitHub
10 changed files with 398 additions and 103 deletions

View File

@@ -110,7 +110,7 @@ if (WITH_RANDOMX)
endif()
endif()
set_source_files_properties(src/crypto/randomx/jit_compiler_rv64_vector_static.S PROPERTIES COMPILE_FLAGS "-march=${RV64_VECTOR_FILE_ARCH}")
set_source_files_properties(src/crypto/randomx/jit_compiler_rv64_vector_static.S PROPERTIES COMPILE_FLAGS "-march=${RV64_VECTOR_FILE_ARCH}_zvkned")
set_source_files_properties(src/crypto/randomx/aes_hash_rv64_vector.cpp PROPERTIES COMPILE_FLAGS "-O3 -march=${RV64_VECTOR_FILE_ARCH}")
set_source_files_properties(src/crypto/randomx/aes_hash_rv64_zvkned.cpp PROPERTIES COMPILE_FLAGS "-O3 -march=${RV64_VECTOR_FILE_ARCH}_zvkned")
else()

View File

@@ -75,7 +75,7 @@ void hashAes1Rx4(const void *input, size_t inputSize, void *hash)
}
if (xmrig::Cpu::info()->hasRISCV_Vector()) {
hashAes1Rx4_RVV<softAes>(input, inputSize, hash);
hashAes1Rx4_RVV(input, inputSize, hash);
return;
}
#endif
@@ -156,7 +156,7 @@ void fillAes1Rx4(void *state, size_t outputSize, void *buffer)
}
if (xmrig::Cpu::info()->hasRISCV_Vector()) {
fillAes1Rx4_RVV<softAes>(state, outputSize, buffer);
fillAes1Rx4_RVV(state, outputSize, buffer);
return;
}
#endif
@@ -213,7 +213,7 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer)
}
if (xmrig::Cpu::info()->hasRISCV_Vector()) {
fillAes4Rx4_RVV<softAes>(state, outputSize, buffer);
fillAes4Rx4_RVV(state, outputSize, buffer);
return;
}
#endif
@@ -297,7 +297,7 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi
}
if (xmrig::Cpu::info()->hasRISCV_Vector()) {
hashAndFillAes1Rx4_RVV<softAes, unroll>(scratchpad, scratchpadSize, hash, fill_state);
hashAndFillAes1Rx4_RVV(scratchpad, scratchpadSize, hash, fill_state);
return;
}
#endif

View File

@@ -69,7 +69,16 @@ static constexpr uint32_t AES_HASH_1R_XKEY11[8] = { 0x61b263d1, 0x51f4e03c, 0xee
static constexpr uint32_t AES_HASH_STRIDE_X2[8] = { 0, 4, 8, 12, 32, 36, 40, 44 };
static constexpr uint32_t AES_HASH_STRIDE_X4[8] = { 0, 4, 8, 12, 64, 68, 72, 76 };
template<int softAes>
#define lutEnc0 lutEnc[0]
#define lutEnc1 lutEnc[1]
#define lutEnc2 lutEnc[2]
#define lutEnc3 lutEnc[3]
#define lutDec0 lutDec[0]
#define lutDec1 lutDec[1]
#define lutDec2 lutDec[2]
#define lutDec3 lutDec[3]
void hashAes1Rx4_RVV(const void *input, size_t inputSize, void *hash) {
const uint8_t* inptr = (const uint8_t*)input;
const uint8_t* inputEnd = inptr + inputSize;
@@ -113,10 +122,6 @@ void hashAes1Rx4_RVV(const void *input, size_t inputSize, void *hash) {
__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 4, stride, state13, 8);
}
template void hashAes1Rx4_RVV<false>(const void *input, size_t inputSize, void *hash);
template void hashAes1Rx4_RVV<true>(const void *input, size_t inputSize, void *hash);
template<int softAes>
void fillAes1Rx4_RVV(void *state, size_t outputSize, void *buffer) {
const uint8_t* outptr = (uint8_t*)buffer;
const uint8_t* outputEnd = outptr + outputSize;
@@ -153,10 +158,6 @@ void fillAes1Rx4_RVV(void *state, size_t outputSize, void *buffer) {
__riscv_vsuxei32_v_u32m1((uint32_t*)state + 4, stride, state13, 8);
}
template void fillAes1Rx4_RVV<false>(void *state, size_t outputSize, void *buffer);
template void fillAes1Rx4_RVV<true>(void *state, size_t outputSize, void *buffer);
template<int softAes>
void fillAes4Rx4_RVV(void *state, size_t outputSize, void *buffer) {
const uint8_t* outptr = (uint8_t*)buffer;
const uint8_t* outputEnd = outptr + outputSize;
@@ -203,10 +204,6 @@ void fillAes4Rx4_RVV(void *state, size_t outputSize, void *buffer) {
}
}
template void fillAes4Rx4_RVV<false>(void *state, size_t outputSize, void *buffer);
template void fillAes4Rx4_RVV<true>(void *state, size_t outputSize, void *buffer);
template<int softAes, int unroll>
void hashAndFillAes1Rx4_RVV(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
uint8_t* scratchpadPtr = (uint8_t*)scratchpad;
const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize;
@@ -244,54 +241,13 @@ void hashAndFillAes1Rx4_RVV(void *scratchpad, size_t scratchpadSize, void *hash,
__riscv_vsuxei32_v_u32m1((uint32_t*)scratchpadPtr + k * 16 + 0, stride, fill_state02, 8); \
__riscv_vsuxei32_v_u32m1((uint32_t*)scratchpadPtr + k * 16 + 4, stride, fill_state13, 8);
switch (softAes) {
case 0:
HASH_STATE(0);
HASH_STATE(1);
HASH_STATE(0);
HASH_STATE(1);
FILL_STATE(0);
FILL_STATE(1);
FILL_STATE(0);
FILL_STATE(1);
scratchpadPtr += 128;
break;
default:
switch (unroll) {
case 4:
HASH_STATE(0);
FILL_STATE(0);
HASH_STATE(1);
FILL_STATE(1);
HASH_STATE(2);
FILL_STATE(2);
HASH_STATE(3);
FILL_STATE(3);
scratchpadPtr += 64 * 4;
break;
case 2:
HASH_STATE(0);
FILL_STATE(0);
HASH_STATE(1);
FILL_STATE(1);
scratchpadPtr += 64 * 2;
break;
default:
HASH_STATE(0);
FILL_STATE(0);
scratchpadPtr += 64;
break;
}
break;
}
scratchpadPtr += 128;
}
#undef HASH_STATE
@@ -314,9 +270,3 @@ void hashAndFillAes1Rx4_RVV(void *scratchpad, size_t scratchpadSize, void *hash,
__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 0, stride, hash_state02, 8);
__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 4, stride, hash_state13, 8);
}
template void hashAndFillAes1Rx4_RVV<0,2>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);
template void hashAndFillAes1Rx4_RVV<1,1>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);
template void hashAndFillAes1Rx4_RVV<2,1>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);
template void hashAndFillAes1Rx4_RVV<2,2>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);
template void hashAndFillAes1Rx4_RVV<2,4>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);

View File

@@ -29,14 +29,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
template<int softAes>
void hashAes1Rx4_RVV(const void *input, size_t inputSize, void *hash);
template<int softAes>
void fillAes1Rx4_RVV(void *state, size_t outputSize, void *buffer);
template<int softAes>
void fillAes4Rx4_RVV(void *state, size_t outputSize, void *buffer);
template<int softAes, int unroll>
void hashAndFillAes1Rx4_RVV(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);

View File

@@ -39,6 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "crypto/randomx/program.hpp"
#include "crypto/randomx/reciprocal.h"
#include "crypto/randomx/virtual_memory.hpp"
#include "crypto/randomx/soft_aes.h"
#include "crypto/common/VirtualMemory.h"
@@ -253,10 +254,12 @@ namespace randomx {
static const uint8_t* codePrologue = (uint8_t*)&randomx_riscv64_prologue;
static const uint8_t* codeLoopBegin = (uint8_t*)&randomx_riscv64_loop_begin;
static const uint8_t* codeDataRead = (uint8_t*)&randomx_riscv64_data_read;
static const uint8_t* codeDataRead2 = (uint8_t*)&randomx_riscv64_data_read_v2_tweak;
static const uint8_t* codeDataReadLight = (uint8_t*)&randomx_riscv64_data_read_light;
static const uint8_t* codeDataReadLight1 = (uint8_t*)&randomx_riscv64_data_read_light_v1;
static const uint8_t* codeDataReadLight2 = (uint8_t*)&randomx_riscv64_data_read_light_v2;
static const uint8_t* codeFixLoopCall = (uint8_t*)&randomx_riscv64_fix_loop_call;
static const uint8_t* codeSpadStore = (uint8_t*)&randomx_riscv64_spad_store;
static const uint8_t* codeSpadStoreHardAes = (uint8_t*)&randomx_riscv64_spad_store_hardaes;
static const uint8_t* codeSpadStoreSoftAes = (uint8_t*)&randomx_riscv64_spad_store_softaes;
static const uint8_t* codeLoopEnd = (uint8_t*)&randomx_riscv64_loop_end;
static const uint8_t* codeFixContinueLoop = (uint8_t*)&randomx_riscv64_fix_continue_loop;
@@ -272,9 +275,13 @@ namespace randomx {
static const int32_t sizeDataInit = codePrologue - codeDataInit;
static const int32_t sizePrologue = codeLoopBegin - codePrologue;
static const int32_t sizeLoopBegin = codeDataRead - codeLoopBegin;
static const int32_t sizeDataRead = codeDataReadLight - codeDataRead;
static const int32_t sizeDataReadLight = codeSpadStore - codeDataReadLight;
static const int32_t sizeSpadStore = codeSpadStoreHardAes - codeSpadStore;
static const int32_t sizeDataRead = codeDataRead2 - codeDataRead;
static const int32_t sizeDataRead2 = codeDataReadLight - codeDataRead2;
static const int32_t sizeDataReadLight = codeDataReadLight1 - codeDataReadLight;
static const int32_t sizeDataReadLight1 = codeDataReadLight2 - codeDataReadLight1;
static const int32_t sizeDataReadLight2 = codeFixLoopCall - codeDataReadLight2;
static const int32_t sizeFixLoopCall = codeSpadStore - codeFixLoopCall;
static const int32_t sizeSpadStore = codeSpadStoreSoftAes - codeSpadStore;
static const int32_t sizeSpadStoreSoftAes = codeLoopEnd - codeSpadStoreSoftAes;
static const int32_t sizeLoopEnd = codeEpilogue - codeLoopEnd;
static const int32_t sizeEpilogue = codeSoftAes - codeEpilogue;
@@ -284,7 +291,6 @@ namespace randomx {
static const int32_t sizeSshPrefetch = codeSshEnd - codeSshPrefetch;
static const int32_t offsetFixDataCall = codeFixDataCall - codeDataInit;
static const int32_t offsetFixLoopCall = codeFixLoopCall - codeDataReadLight;
static const int32_t offsetFixContinueLoop = codeFixContinueLoop - codeLoopEnd;
static const int32_t LoopTopPos = LiteralPoolSize + sizeDataInit + sizePrologue;
@@ -479,8 +485,15 @@ namespace randomx {
static void emitProgramPrefix(CompilerState& state, Program& prog, ProgramConfiguration& pcfg) {
state.codePos = RandomXCodePos;
state.rcpCount = 0;
state.emitAt(LiteralPoolOffset + sizeLiterals, pcfg.eMask[0]);
state.emitAt(LiteralPoolOffset + sizeLiterals + 8, pcfg.eMask[1]);
if (RandomX_CurrentConfig.Tweak_V2_AES) {
state.emitAt(LiteralPoolOffset + sizeLiterals + 16, (uint64_t) &lutEnc[2][0]);
state.emitAt(LiteralPoolOffset + sizeLiterals + 24, (uint64_t) &lutDec[2][0]);
}
for (unsigned i = 0; i < RegistersCount; ++i) {
state.registerUsage[i] = -1;
}
@@ -493,7 +506,13 @@ namespace randomx {
}
static void emitProgramSuffix(CompilerState& state, ProgramConfiguration& pcfg) {
state.emit(codeSpadStore, sizeSpadStore);
if (RandomX_CurrentConfig.Tweak_V2_AES) {
state.emit(codeSpadStoreSoftAes, sizeSpadStoreSoftAes);
}
else {
state.emit(codeSpadStore, sizeSpadStore);
}
int32_t fixPos = state.codePos;
state.emit(codeLoopEnd, sizeLoopEnd);
//xor x26, x{readReg0}, x{readReg1}
@@ -502,6 +521,10 @@ namespace randomx {
//j LoopTop
emitJump(state, 0, fixPos, LoopTopPos);
state.emit(codeEpilogue, sizeEpilogue);
if (RandomX_CurrentConfig.Tweak_V2_AES) {
state.emit(codeSoftAes, sizeSoftAes);
}
}
static void generateSuperscalarCode(CodeBuffer& buf, Instruction isn, bool lastLiteral) {
@@ -669,6 +692,9 @@ namespace randomx {
state.emit(codeDataRead, sizeDataRead);
//xor x8, x{readReg2}, x{readReg3}
state.emitAt(fixPos, rvi(rv64::XOR, Tmp1Reg, regR(pcfg.readReg2), regR(pcfg.readReg3)));
int32_t fixPos2 = state.codePos;
state.emit(codeDataRead2, sizeDataRead2);
state.emitAt(fixPos2, (uint16_t)(RandomX_CurrentConfig.Tweak_V2_PREFETCH ? 0x1402 : 0x0001));
emitProgramSuffix(state, pcfg);
clearCache(state);
}
@@ -691,7 +717,14 @@ namespace randomx {
state.emitAt(fixPos + 4, rv64::LUI | (uimm << 12) | rvrd(Tmp2Reg));
//addi x9, x9, {limm}
state.emitAt(fixPos + 8, rvi(rv64::ADDI, Tmp2Reg, Tmp2Reg, limm));
fixPos += offsetFixLoopCall;
if (RandomX_CurrentConfig.Tweak_V2_PREFETCH) {
state.emit(codeDataReadLight2, sizeDataReadLight2);
}
else {
state.emit(codeDataReadLight1, sizeDataReadLight1);
}
fixPos = state.codePos;
state.emit(codeFixLoopCall, sizeFixLoopCall);
//jal x1, SuperscalarHash
emitJump(state, ReturnReg, fixPos, SuperScalarHashOffset);
emitProgramSuffix(state, pcfg);
@@ -1175,10 +1208,22 @@ namespace randomx {
//c.or x8, x9
state.emit(rvc(rv64::C_OR, Tmp1Reg + OffsetXC, Tmp2Reg + OffsetXC));
#endif
if (RandomX_CurrentConfig.Tweak_V2_CFROUND) {
//andi x9, x8, 240
state.emit(rvi(rv64::ANDI, Tmp2Reg, Tmp1Reg, 240));
//c.bnez x9, +12
state.emit(uint16_t(0xE491));
}
//c.andi x8, 12
state.emit(rvc(rv64::C_ANDI, Tmp1Reg + OffsetXC, 12));
}
else {
if (RandomX_CurrentConfig.Tweak_V2_CFROUND) {
//andi x9, x{src}, 240
state.emit(rvi(rv64::ANDI, Tmp2Reg, regR(isn.src), 240));
//c.bnez x9, +14
state.emit(uint16_t(0xE499));
}
//and x8, x{src}, 12
state.emit(rvi(rv64::ANDI, Tmp1Reg, regR(isn.src), 12));
}

View File

@@ -40,10 +40,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.global DECL(randomx_riscv64_prologue)
.global DECL(randomx_riscv64_loop_begin)
.global DECL(randomx_riscv64_data_read)
.global DECL(randomx_riscv64_data_read_v2_tweak)
.global DECL(randomx_riscv64_data_read_light)
.global DECL(randomx_riscv64_data_read_light_v1)
.global DECL(randomx_riscv64_data_read_light_v2)
.global DECL(randomx_riscv64_fix_loop_call)
.global DECL(randomx_riscv64_spad_store)
.global DECL(randomx_riscv64_spad_store_hardaes)
.global DECL(randomx_riscv64_spad_store_softaes)
.global DECL(randomx_riscv64_loop_end)
.global DECL(randomx_riscv64_fix_continue_loop)
@@ -408,7 +410,9 @@ DECL(randomx_riscv64_data_read):
slli x8, x8, 32
srli x8, x8, 32
#endif
/* update "mx" */
DECL(randomx_riscv64_data_read_v2_tweak):
slli x8, x8, 32 /* JIT compiler will replace it with "nop" for RandomX v1 */
/* update "mp" */
xor x25, x25, x8
/* read dataset and update registers */
ld x8, 0(x7)
@@ -456,13 +460,22 @@ DECL(randomx_riscv64_data_read_light):
slli x25, x25, 32
or x25, x25, x31
#endif
DECL(randomx_riscv64_data_read_light_v1):
slli x8, x8, 32
/* update "mx" */
/* update "mp" */
xor x25, x25, x8
/* the next dataset item */
and x7, x25, x1
srli x7, x7, 6
add x7, x7, x9
DECL(randomx_riscv64_data_read_light_v2):
/* the next dataset item */
and x7, x25, x1
srli x7, x7, 6
add x7, x7, x9
and x8, x8, x1
/* update "mp" */
xor x25, x25, x8
DECL(randomx_riscv64_fix_loop_call):
jal superscalar_hash /* JIT compiler will adjust the offset */
xor x16, x16, x8
@@ -536,9 +549,6 @@ DECL(randomx_riscv64_spad_store):
sd x30, 56(x26)
fmv.d.x f7, x30
DECL(randomx_riscv64_spad_store_hardaes):
nop /* not implemented */
DECL(randomx_riscv64_spad_store_softaes):
/* store integer registers */
sd x16, 0(x27)

View File

@@ -36,10 +36,12 @@ extern "C" {
void randomx_riscv64_prologue();
void randomx_riscv64_loop_begin();
void randomx_riscv64_data_read();
void randomx_riscv64_data_read_v2_tweak();
void randomx_riscv64_data_read_light();
void randomx_riscv64_data_read_light_v1();
void randomx_riscv64_data_read_light_v2();
void randomx_riscv64_fix_loop_call();
void randomx_riscv64_spad_store();
void randomx_riscv64_spad_store_hardaes();
void randomx_riscv64_spad_store_softaes();
void randomx_riscv64_loop_end();
void randomx_riscv64_fix_continue_loop();

View File

@@ -34,12 +34,16 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "crypto/randomx/reciprocal.h"
#include "crypto/randomx/superscalar.hpp"
#include "crypto/randomx/program.hpp"
#include "crypto/randomx/soft_aes.h"
#include "backend/cpu/Cpu.h"
namespace randomx {
#define ADDR(x) ((uint8_t*) &(x))
#define DIST(x, y) (ADDR(y) - ADDR(x))
#define JUMP(offset) (0x6F | (((offset) & 0x7FE) << 20) | (((offset) & 0x800) << 9) | ((offset) & 0xFF000))
void* generateDatasetInitVectorRV64(uint8_t* buf, SuperscalarProgram* programs, size_t num_programs)
{
uint8_t* p = buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_sshash_generated_instructions);
@@ -205,8 +209,7 @@ void* generateDatasetInitVectorRV64(uint8_t* buf, SuperscalarProgram* programs,
// Emit "J randomx_riscv64_vector_sshash_generated_instructions_end" instruction
const uint8_t* e = buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_sshash_generated_instructions_end);
const uint32_t k = e - p;
const uint32_t j = 0x6F | ((k & 0x7FE) << 20) | ((k & 0x800) << 9) | (k & 0xFF000);
const uint32_t j = JUMP(e - p);
memcpy(p, &j, 4);
char* result = (char*)(buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_sshash_dataset_init));
@@ -323,6 +326,26 @@ void* generateProgramVectorRV64(uint8_t* buf, Program& prog, ProgramConfiguratio
params[3] = RandomX_CurrentConfig.DatasetBaseSize - 64;
params[4] = (1 << RandomX_ConfigurationBase::JumpBits) - 1;
const bool hasAES = xmrig::Cpu::info()->hasAES();
if (RandomX_CurrentConfig.Tweak_V2_AES && !hasAES) {
params[5] = (uint64_t) &lutEnc[2][0];
params[6] = (uint64_t) &lutDec[2][0];
params[7] = (uint64_t) lutEncIndex;
params[8] = (uint64_t) lutDecIndex;
uint32_t* p1 = (uint32_t*)(buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_v2_soft_aes_init));
// Restore vsetivli zero, 4, e32, m1, ta, ma
*p1 = 0xCD027057;
}
else {
uint32_t* p1 = (uint32_t*)(buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_v2_soft_aes_init));
// Emit "J randomx_riscv64_vector_program_main_loop" instruction
*p1 = JUMP(DIST(randomx_riscv64_vector_program_v2_soft_aes_init, randomx_riscv64_vector_program_main_loop));
}
uint64_t* imul_rcp_literals = (uint64_t*)(buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_imul_rcp_literals));
uint64_t* cur_literal = imul_rcp_literals;
@@ -338,6 +361,16 @@ void* generateProgramVectorRV64(uint8_t* buf, Program& prog, ProgramConfiguratio
*mx_xor = mx_xor_value;
*mx_xor_light = mx_xor_value;
// "slli x5, x5, 32" for RandomX v2, "nop" for RandomX v1
const uint16_t mp_reg_value = RandomX_CurrentConfig.Tweak_V2_PREFETCH ? 0x1282 : 0x0001;
memcpy(((uint8_t*)mx_xor) + 8, &mp_reg_value, sizeof(mp_reg_value));
memcpy(((uint8_t*)mx_xor_light) + 8, &mp_reg_value, sizeof(mp_reg_value));
// "srli x5, x14, 32" for RandomX v2, "srli x5, x14, 0" for RandomX v1
const uint32_t mp_reg_value2 = RandomX_CurrentConfig.Tweak_V2_PREFETCH ? 0x02075293 : 0x00075293;
memcpy(((uint8_t*)mx_xor) + 14, &mp_reg_value2, sizeof(mp_reg_value2));
if (entryDataInitScalar) {
void* light_mode_data = buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_main_loop_light_mode_data);
@@ -760,10 +793,24 @@ void* generateProgramVectorRV64(uint8_t* buf, Program& prog, ProgramConfiguratio
emit32(0x0062E2B3);
#endif // __riscv_zbb
if (RandomX_CurrentConfig.Tweak_V2_CFROUND) {
// andi x6, x5, 120
emit32(0x0782F313);
// bnez x6, +24
emit32(0x00031C63);
}
// andi x5, x5, 6
emit32(0x0062F293);
}
else {
if (RandomX_CurrentConfig.Tweak_V2_CFROUND) {
// andi x6, x20 + src, 120
emit32(0x078A7313 + (src << 15));
// bnez x6, +24
emit32(0x00031C63);
}
// andi x5, x20 + src, 6
emit32(0x006A7293 + (src << 15));
}
@@ -813,6 +860,9 @@ void* generateProgramVectorRV64(uint8_t* buf, Program& prog, ProgramConfiguratio
}
break;
case InstructionType::NOP:
break;
default:
UNREACHABLE;
}
@@ -829,8 +879,26 @@ void* generateProgramVectorRV64(uint8_t* buf, Program& prog, ProgramConfiguratio
e = buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_main_loop_instructions_end);
}
const uint32_t k = e - p;
emit32(0x6F | ((k & 0x7FE) << 20) | ((k & 0x800) << 9) | (k & 0xFF000));
emit32(JUMP(e - p));
if (RandomX_CurrentConfig.Tweak_V2_AES) {
uint32_t* p1 = (uint32_t*)(buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_main_loop_fe_mix));
if (hasAES) {
// Restore vsetivli zero, 4, e32, m1, ta, ma
*p1 = 0xCD027057;
}
else {
// Emit "J randomx_riscv64_vector_program_main_loop_fe_mix_v2_soft_aes" instruction
*p1 = JUMP(DIST(randomx_riscv64_vector_program_main_loop_fe_mix, randomx_riscv64_vector_program_main_loop_fe_mix_v2_soft_aes));
}
}
else {
uint32_t* p1 = (uint32_t*)(buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_main_loop_fe_mix));
// Emit "J randomx_riscv64_vector_program_main_loop_fe_mix_v1" instruction
*p1 = JUMP(DIST(randomx_riscv64_vector_program_main_loop_fe_mix, randomx_riscv64_vector_program_main_loop_fe_mix_v1));
}
#ifdef __GNUC__
char* p1 = (char*)(buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_params));

View File

@@ -66,16 +66,22 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.global DECL(randomx_riscv64_vector_program_params)
.global DECL(randomx_riscv64_vector_program_imul_rcp_literals)
.global DECL(randomx_riscv64_vector_program_begin)
.global DECL(randomx_riscv64_vector_program_v2_soft_aes_init)
.global DECL(randomx_riscv64_vector_program_main_loop)
.global DECL(randomx_riscv64_vector_program_main_loop_instructions)
.global DECL(randomx_riscv64_vector_program_main_loop_instructions_end)
.global DECL(randomx_riscv64_vector_program_main_loop_mx_xor)
.global DECL(randomx_riscv64_vector_program_main_loop_spaddr_xor)
.global DECL(randomx_riscv64_vector_program_main_loop_fe_mix)
.global DECL(randomx_riscv64_vector_program_main_loop_light_mode_data)
.global DECL(randomx_riscv64_vector_program_main_loop_instructions_end_light_mode)
.global DECL(randomx_riscv64_vector_program_main_loop_mx_xor_light_mode)
.global DECL(randomx_riscv64_vector_program_scratchpad_prefetch)
.global DECL(randomx_riscv64_vector_program_main_loop_fe_mix_v1)
.global DECL(randomx_riscv64_vector_program_main_loop_fe_mix_v2_soft_aes)
.global DECL(randomx_riscv64_vector_program_end)
.global DECL(randomx_riscv64_vector_code_end)
@@ -375,12 +381,21 @@ v12 = E 'and' mask = 0x00ffffffffffffff'00ffffffffffffff
v13 = E 'or' mask = 0x3*00000000******'3*00000000******
v14 = scale mask = 0x80f0000000000000'80f0000000000000
v15 = unused
v15 = all zeroes
v16 = temporary
v17 = unused
v18 = temporary
v19-v31 = unused
v19 = unused
v20 = randomx_aes_lut_enc_index[0]
v21 = randomx_aes_lut_enc_index[1]
v22 = randomx_aes_lut_enc_index[2]
v23 = randomx_aes_lut_enc_index[3]
v24 = randomx_aes_lut_dec_index[0]
v25 = randomx_aes_lut_dec_index[1]
v26 = randomx_aes_lut_dec_index[2]
v27 = randomx_aes_lut_dec_index[3]
v28-v31 = temporary in aesenc_soft/aesdec_soft
*/
.balign 8
@@ -390,6 +405,11 @@ DECL(randomx_riscv64_vector_program_params):
// JIT compiler will adjust these values for different RandomX variants
randomx_masks: .dword 16376, 262136, 2097144, 2147483584, 255
randomx_aes_lut_enc_ptr: .dword 0
randomx_aes_lut_dec_ptr: .dword 0
randomx_aes_lut_enc_index_ptr: .dword 0
randomx_aes_lut_dec_index_ptr: .dword 0
DECL(randomx_riscv64_vector_program_imul_rcp_literals):
imul_rcp_literals: .fill RANDOMX_PROGRAM_MAX_SIZE, 8, 0
@@ -507,7 +527,44 @@ DECL(randomx_riscv64_vector_program_begin):
fld f30, 192(x18)
fld f31, 200(x18)
randomx_riscv64_vector_program_main_loop:
// Set v15 to zero
vxor.vv v15, v15, v15
DECL(randomx_riscv64_vector_program_v2_soft_aes_init):
// JIT compiler will place a jump to the main loop here if needed
// Load randomx_aes_lut_enc_index/randomx_aes_lut_dec_index
vsetivli zero, 4, e32, m1, ta, ma
lla x5, randomx_aes_lut_enc_index_ptr
ld x5, (x5)
vle32.v v20, (x5)
addi x6, x5, 32
vle32.v v21, (x6)
addi x6, x5, 64
vle32.v v22, (x6)
addi x6, x5, 96
vle32.v v23, (x6)
lla x5, randomx_aes_lut_dec_index_ptr
ld x5, (x5)
vle32.v v24, (x5)
addi x6, x5, 32
vle32.v v25, (x6)
addi x6, x5, 64
vle32.v v26, (x6)
addi x6, x5, 96
vle32.v v27, (x6)
vsetivli zero, 2, e64, m1, ta, ma
DECL(randomx_riscv64_vector_program_main_loop):
and x5, x15, x9 // x5 = spAddr0 & 64-byte aligned L3 mask
add x5, x5, x12 // x5 = &scratchpad[spAddr0 & 64-byte aligned L3 mask]
@@ -609,11 +666,13 @@ DECL(randomx_riscv64_vector_program_main_loop_instructions_end):
DECL(randomx_riscv64_vector_program_main_loop_mx_xor):
xor x5, x24, x26 // x5 = readReg2 ^ readReg3 (JIT compiler will substitute the actual registers)
and x5, x5, x19 // x5 = (readReg2 ^ readReg3) & dataset mask
xor x14, x14, x5 // mx ^= (readReg2 ^ readReg3) & dataset mask
slli x5, x5, 32 // JIT compiler will replace it with "nop" for v1
xor x14, x14, x5 // mp ^= (readReg2 ^ readReg3) & dataset mask
add x5, x14, x11 // x5 = &dataset[mx & dataset mask]
srli x5, x14, 32 // JIT compiler will replace it with "srli x5, x14, 0" for v1
and x5, x5, x19 // x5 = mp & dataset mask
add x5, x5, x11 // x5 = &dataset[mp & dataset mask]
#ifdef __riscv_zicbop
prefetch.r (x5)
@@ -689,11 +748,47 @@ DECL(randomx_riscv64_vector_program_main_loop_spaddr_xor):
xor x15, x20, x22 // spAddr0-spAddr1 = readReg0 ^ readReg1 (JIT compiler will substitute the actual registers)
// store registers f0-f3 to the scratchpad (f0-f3 are first combined with e0-e3)
vxor.vv v0, v0, v4
// v2 FE mix code is the main code path
// JIT compiler will place a jump to v1 or v2 soft AES code here if needed
DECL(randomx_riscv64_vector_program_main_loop_fe_mix):
vsetivli zero, 4, e32, m1, ta, ma
// f0 = aesenc(f0, e0), f1 = aesdec(f1, e0), f2 = aesenc(f2, e0), f3 = aesdec(f3, e0)
vaesem.vv v0, v4
vaesdm.vv v1, v15
vaesem.vv v2, v4
vaesdm.vv v3, v15
vxor.vv v1, v1, v4
vxor.vv v3, v3, v4
// f0 = aesenc(f0, e1), f1 = aesdec(f1, e1), f2 = aesenc(f2, e1), f3 = aesdec(f3, e1)
vaesem.vv v0, v5
vaesdm.vv v1, v15
vaesem.vv v2, v5
vaesdm.vv v3, v15
vxor.vv v1, v1, v5
vxor.vv v2, v2, v6
vxor.vv v3, v3, v5
// f0 = aesenc(f0, e2), f1 = aesdec(f1, e2), f2 = aesenc(f2, e2), f3 = aesdec(f3, e2)
vaesem.vv v0, v6
vaesdm.vv v1, v15
vaesem.vv v2, v6
vaesdm.vv v3, v15
vxor.vv v1, v1, v6
vxor.vv v3, v3, v6
// f0 = aesenc(f0, e3), f1 = aesdec(f1, e3), f2 = aesenc(f2, e3), f3 = aesdec(f3, e3)
vaesem.vv v0, v7
vaesdm.vv v1, v15
vaesem.vv v2, v7
vaesdm.vv v3, v15
vxor.vv v1, v1, v7
vxor.vv v3, v3, v7
vsetivli zero, 2, e64, m1, ta, ma
randomx_riscv64_vector_program_main_loop_fe_store:
vse64.v v0, (x5)
addi x6, x5, 16
@@ -782,6 +877,7 @@ DECL(randomx_riscv64_vector_program_main_loop_instructions_end_light_mode):
DECL(randomx_riscv64_vector_program_main_loop_mx_xor_light_mode):
xor x5, x24, x26 // x5 = readReg2 ^ readReg3 (JIT compiler will substitute the actual registers)
and x5, x5, x19 // x5 = (readReg2 ^ readReg3) & dataset mask
slli x5, x5, 32 // JIT compiler will replace it with "nop" for v1
xor x14, x14, x5 // mx ^= (readReg2 ^ readReg3) & dataset mask
// Save all registers modified when calling dataset_init_scalar_func_ptr
@@ -864,6 +960,131 @@ DECL(randomx_riscv64_vector_program_main_loop_mx_xor_light_mode):
j randomx_riscv64_vector_program_scratchpad_prefetch
DECL(randomx_riscv64_vector_program_main_loop_fe_mix_v1):
vxor.vv v0, v0, v4
vxor.vv v1, v1, v5
vxor.vv v2, v2, v6
vxor.vv v3, v3, v7
j randomx_riscv64_vector_program_main_loop_fe_store
/*
aesenc middle round
x5 = pointer to aesenc LUT
v16 = input and return value
*/
.macro aesenc_soft input, key
vsetivli zero, 16, e8, m1, ta, ma
vrgather.vv v28, \input, v20
vrgather.vv v29, \input, v21
vrgather.vv v30, \input, v22
vrgather.vv v31, \input, v23
vsetivli zero, 4, e32, m1, ta, ma
vsll.vi v28, v28, 2
vsll.vi v29, v29, 2
vsll.vi v30, v30, 2
vsll.vi v31, v31, 2
addi x6, x5, -2048
vluxei32.v v28, (x6), v28
addi x6, x5, -1024
vluxei32.v v29, (x6), v29
vluxei32.v v30, (x5), v30
addi x6, x5, 1024
vluxei32.v v31, (x6), v31
vxor.vv v28, v28, v29
vxor.vv v30, v30, v31
vxor.vv \input, v28, v30
vxor.vv \input, \input, \key
.endm
/*
aesdec middle round
x5 = pointer to aesdec LUT
v16 = input and return value
*/
.macro aesdec_soft input, key
vsetivli zero, 16, e8, m1, ta, ma
vrgather.vv v28, \input, v24
vrgather.vv v29, \input, v25
vrgather.vv v30, \input, v26
vrgather.vv v31, \input, v27
vsetivli zero, 4, e32, m1, ta, ma
vsll.vi v28, v28, 2
vsll.vi v29, v29, 2
vsll.vi v30, v30, 2
vsll.vi v31, v31, 2
addi x6, x5, -2048
vluxei32.v v28, (x6), v28
addi x6, x5, -1024
vluxei32.v v29, (x6), v29
vluxei32.v v30, (x5), v30
addi x6, x5, 1024
vluxei32.v v31, (x6), v31
vxor.vv v28, v28, v29
vxor.vv v30, v30, v31
vxor.vv \input, v28, v30
vxor.vv \input, \input, \key
.endm
DECL(randomx_riscv64_vector_program_main_loop_fe_mix_v2_soft_aes):
// save x5
vmv.s.x v16, x5
lla x5, randomx_aes_lut_enc_ptr
ld x5, (x5)
// f0 = aesenc(f0, e0), f0 = aesenc(f0, e1), f0 = aesenc(f0, e2), f0 = aesenc(f0, e3)
aesenc_soft v0, v4
aesenc_soft v0, v5
aesenc_soft v0, v6
aesenc_soft v0, v7
// f2 = aesenc(f2, e0), f2 = aesenc(f2, e1), f2 = aesenc(f2, e2), f2 = aesenc(f2, e3)
aesenc_soft v2, v4
aesenc_soft v2, v5
aesenc_soft v2, v6
aesenc_soft v2, v7
lla x5, randomx_aes_lut_dec_ptr
ld x5, (x5)
// f1 = aesdec(f1, e0), f1 = aesdec(f1, e1), f1 = aesdec(f1, e2), f1 = aesdec(f1, e3)
aesdec_soft v1, v4
aesdec_soft v1, v5
aesdec_soft v1, v6
aesdec_soft v1, v7
// f3 = aesdec(f3, e0), f3 = aesdec(f3, e1), f3 = aesdec(f3, e2), f3 = aesdec(f3, e3)
aesdec_soft v3, v4
aesdec_soft v3, v5
aesdec_soft v3, v6
aesdec_soft v3, v7
// Set vector registers back to 2x64 bit
vsetivli zero, 2, e64, m1, ta, ma
// restore x5
vmv.x.s x5, v16
j randomx_riscv64_vector_program_main_loop_fe_store
DECL(randomx_riscv64_vector_program_end):
DECL(randomx_riscv64_vector_code_end):

View File

@@ -57,16 +57,22 @@ void randomx_riscv64_vector_sshash_end();
void randomx_riscv64_vector_program_params();
void randomx_riscv64_vector_program_imul_rcp_literals();
void randomx_riscv64_vector_program_begin();
void randomx_riscv64_vector_program_v2_soft_aes_init();
void randomx_riscv64_vector_program_main_loop();
void randomx_riscv64_vector_program_main_loop_instructions();
void randomx_riscv64_vector_program_main_loop_instructions_end();
void randomx_riscv64_vector_program_main_loop_mx_xor();
void randomx_riscv64_vector_program_main_loop_spaddr_xor();
void randomx_riscv64_vector_program_main_loop_fe_mix();
void randomx_riscv64_vector_program_main_loop_light_mode_data();
void randomx_riscv64_vector_program_main_loop_instructions_end_light_mode();
void randomx_riscv64_vector_program_main_loop_mx_xor_light_mode();
void randomx_riscv64_vector_program_end();
void randomx_riscv64_vector_program_scratchpad_prefetch();
void randomx_riscv64_vector_program_main_loop_fe_mix_v1();
void randomx_riscv64_vector_program_main_loop_fe_mix_v2_soft_aes();
void randomx_riscv64_vector_code_end();
#if defined(__cplusplus)