1
0
mirror of https://github.com/xmrig/xmrig.git synced 2026-06-18 10:22:39 -04:00

Merge pull request #3812 from SChernykh/dev

RandomX: 2.5% faster dataset init on RISC-V
This commit is contained in:
xmrig
2026-05-07 23:08:49 +07:00
committed by GitHub
2 changed files with 18 additions and 54 deletions

View File

@@ -243,9 +243,11 @@ static void imm_to_x5(uint32_t imm, uint8_t*& p)
return; return;
} }
if (imm_hi < (32 << 12)) { const int32_t simm_hi = static_cast<int32_t>(imm_hi);
if ((simm_hi >= -(32 << 12)) && (simm_hi < (32 << 12))) {
//c.lui x5, imm_hi //c.lui x5, imm_hi
emit16(0x6281 + (imm_hi >> 10)); emit16(0x6281 | ((imm_hi & 0x1F000) >> 10) | ((simm_hi < 0) ? 0x1000 : 0));
} }
else { else {
// lui x5, imm_hi // lui x5, imm_hi

View File

@@ -129,6 +129,8 @@ v10-v17 = sshash constants
v18 = temporary v18 = temporary
v19 = dataset item store offsets v19 = dataset item store offsets
v24-v31 = temporary
*/ */
DECL(randomx_riscv64_vector_sshash_dataset_init): DECL(randomx_riscv64_vector_sshash_dataset_init):
@@ -180,6 +182,7 @@ DECL(randomx_riscv64_vector_sshash_dataset_init):
slli x13, x13, 6 slli x13, x13, 6
add x13, x13, x11 add x13, x13, x11
.balign 64
init_item: init_item:
// Step 1. Init r0-r7 // Step 1. Init r0-r7
@@ -216,28 +219,7 @@ DECL(randomx_riscv64_vector_sshash_generated_instructions):
DECL(randomx_riscv64_vector_sshash_generated_instructions_end): DECL(randomx_riscv64_vector_sshash_generated_instructions_end):
// Step 9. Concatenate registers r0-r7 in little endian format to get the final Dataset item data. // Step 9. Concatenate registers r0-r7 in little endian format to get the final Dataset item data.
vsuxei64.v v0, (x11), v19 vsuxseg8ei64.v v0, (x11), v19
add x5, x11, 8
vsuxei64.v v1, (x5), v19
add x5, x11, 16
vsuxei64.v v2, (x5), v19
add x5, x11, 24
vsuxei64.v v3, (x5), v19
add x5, x11, 32
vsuxei64.v v4, (x5), v19
add x5, x11, 40
vsuxei64.v v5, (x5), v19
add x5, x11, 48
vsuxei64.v v6, (x5), v19
add x5, x11, 56
vsuxei64.v v7, (x5), v19
// Iterate to the next 4 items // Iterate to the next 4 items
vadd.vi v8, v8, 4 vadd.vi v8, v8, 4
@@ -293,36 +275,15 @@ DECL(randomx_riscv64_vector_sshash_cache_prefetch):
// Step 6. XOR all registers with data loaded from randomx cache // Step 6. XOR all registers with data loaded from randomx cache
DECL(randomx_riscv64_vector_sshash_xor): DECL(randomx_riscv64_vector_sshash_xor):
vluxei64.v v18, (x10), v9 vluxseg8ei64.v v24, (x10), v9
vxor.vv v0, v0, v18 vxor.vv v0, v0, v24
vxor.vv v1, v1, v25
add x5, x10, 8 vxor.vv v2, v2, v26
vluxei64.v v18, (x5), v9 vxor.vv v3, v3, v27
vxor.vv v1, v1, v18 vxor.vv v4, v4, v28
vxor.vv v5, v5, v29
add x5, x10, 16 vxor.vv v6, v6, v30
vluxei64.v v18, (x5), v9 vxor.vv v7, v7, v31
vxor.vv v2, v2, v18
add x5, x10, 24
vluxei64.v v18, (x5), v9
vxor.vv v3, v3, v18
add x5, x10, 32
vluxei64.v v18, (x5), v9
vxor.vv v4, v4, v18
add x5, x10, 40
vluxei64.v v18, (x5), v9
vxor.vv v5, v5, v18
add x5, x10, 48
vluxei64.v v18, (x5), v9
vxor.vv v6, v6, v18
add x5, x10, 56
vluxei64.v v18, (x5), v9
vxor.vv v7, v7, v18
DECL(randomx_riscv64_vector_sshash_end): DECL(randomx_riscv64_vector_sshash_end):
@@ -564,6 +525,7 @@ DECL(randomx_riscv64_vector_program_v2_soft_aes_init):
vsetivli zero, 2, e64, m1, ta, ma vsetivli zero, 2, e64, m1, ta, ma
.balign 64
DECL(randomx_riscv64_vector_program_main_loop): DECL(randomx_riscv64_vector_program_main_loop):
and x5, x15, x9 // x5 = spAddr0 & 64-byte aligned L3 mask and x5, x15, x9 // x5 = spAddr0 & 64-byte aligned L3 mask
add x5, x5, x12 // x5 = &scratchpad[spAddr0 & 64-byte aligned L3 mask] add x5, x5, x12 // x5 = &scratchpad[spAddr0 & 64-byte aligned L3 mask]