1
0
mirror of https://github.com/xmrig/xmrig.git synced 2026-01-02 16:12:45 -05:00

RISC-V: use vector hardware AES instead of scalar

This commit is contained in:
SChernykh
2025-12-31 22:38:41 +01:00
parent 30ffb9cb27
commit 8ccf7de304
10 changed files with 294 additions and 231 deletions

View File

@@ -105,6 +105,32 @@ if (XMRIG_RISCV)
set(RVARCH_ZBB OFF) set(RVARCH_ZBB OFF)
endif() endif()
try_run(RANDOMX_ZVKB_RUN_FAIL
RANDOMX_ZVKB_COMPILE_OK
${CMAKE_CURRENT_BINARY_DIR}/
${CMAKE_CURRENT_SOURCE_DIR}/src/crypto/randomx/tests/riscv64_zvkb.s
COMPILE_DEFINITIONS "-march=rv64gcv_zvkb")
if (RANDOMX_ZVKB_COMPILE_OK AND NOT RANDOMX_ZVKB_RUN_FAIL)
set(RVARCH_ZVKB ON)
message(STATUS "RISC-V zvkb extension detected")
else()
set(RVARCH_ZVKB OFF)
endif()
try_run(RANDOMX_ZVKNED_RUN_FAIL
RANDOMX_ZVKNED_COMPILE_OK
${CMAKE_CURRENT_BINARY_DIR}/
${CMAKE_CURRENT_SOURCE_DIR}/src/crypto/randomx/tests/riscv64_zvkned.s
COMPILE_DEFINITIONS "-march=rv64gcv_zvkned")
if (RANDOMX_ZVKNED_COMPILE_OK AND NOT RANDOMX_ZVKNED_RUN_FAIL)
set(RVARCH_ZVKNED ON)
message(STATUS "RISC-V zvkned extension detected")
else()
set(RVARCH_ZVKNED OFF)
endif()
# for native builds, enable Zba and Zbb if supported by the CPU # for native builds, enable Zba and Zbb if supported by the CPU
if (ARCH STREQUAL "native") if (ARCH STREQUAL "native")
if (RVARCH_V) if (RVARCH_V)
@@ -119,6 +145,12 @@ if (XMRIG_RISCV)
if (RVARCH_ZBB) if (RVARCH_ZBB)
set(RVARCH "${RVARCH}_zbb") set(RVARCH "${RVARCH}_zbb")
endif() endif()
if (RVARCH_ZVKB)
set(RVARCH "${RVARCH}_zvkb")
endif()
if (RVARCH_ZVKNED)
set(RVARCH "${RVARCH}_zvkned")
endif()
endif() endif()
message(STATUS "Using -march=${RVARCH}") message(STATUS "Using -march=${RVARCH}")

View File

@@ -87,33 +87,32 @@ if (WITH_RANDOMX)
src/crypto/randomx/jit_compiler_rv64.cpp src/crypto/randomx/jit_compiler_rv64.cpp
src/crypto/randomx/jit_compiler_rv64_vector.cpp src/crypto/randomx/jit_compiler_rv64_vector.cpp
src/crypto/randomx/aes_hash_rv64_vector.cpp src/crypto/randomx/aes_hash_rv64_vector.cpp
src/crypto/randomx/aes_hash_rv64_zkn.cpp src/crypto/randomx/aes_hash_rv64_zvkned.cpp
) )
# cheat because cmake and ccache hate each other # cheat because cmake and ccache hate each other
set_property(SOURCE src/crypto/randomx/jit_compiler_rv64_static.S PROPERTY LANGUAGE C) set_property(SOURCE src/crypto/randomx/jit_compiler_rv64_static.S PROPERTY LANGUAGE C)
set_property(SOURCE src/crypto/randomx/jit_compiler_rv64_vector_static.S PROPERTY LANGUAGE C) set_property(SOURCE src/crypto/randomx/jit_compiler_rv64_vector_static.S PROPERTY LANGUAGE C)
set(RV64_VECTOR_FILE_ARCH "rv64gcv") set(RV64_VECTOR_FILE_ARCH "rv64gcv")
set(RV64_AES_FILE_ARCH "rv64gc_zknd_zkne")
if (ARCH STREQUAL "native") if (ARCH STREQUAL "native")
if (RVARCH_ZICBOP) if (RVARCH_ZICBOP)
set(RV64_VECTOR_FILE_ARCH "${RV64_VECTOR_FILE_ARCH}_zicbop") set(RV64_VECTOR_FILE_ARCH "${RV64_VECTOR_FILE_ARCH}_zicbop")
set(RV64_AES_FILE_ARCH "${RV64_AES_FILE_ARCH}_zicbop")
endif() endif()
if (RVARCH_ZBA) if (RVARCH_ZBA)
set(RV64_VECTOR_FILE_ARCH "${RV64_VECTOR_FILE_ARCH}_zba") set(RV64_VECTOR_FILE_ARCH "${RV64_VECTOR_FILE_ARCH}_zba")
set(RV64_AES_FILE_ARCH "${RV64_AES_FILE_ARCH}_zba")
endif() endif()
if (RVARCH_ZBB) if (RVARCH_ZBB)
set(RV64_VECTOR_FILE_ARCH "${RV64_VECTOR_FILE_ARCH}_zbb") set(RV64_VECTOR_FILE_ARCH "${RV64_VECTOR_FILE_ARCH}_zbb")
set(RV64_AES_FILE_ARCH "${RV64_AES_FILE_ARCH}_zbb") endif()
if (RVARCH_ZVKB)
set(RV64_VECTOR_FILE_ARCH "${RV64_VECTOR_FILE_ARCH}_zvkb")
endif() endif()
endif() endif()
set_source_files_properties(src/crypto/randomx/jit_compiler_rv64_vector_static.S PROPERTIES COMPILE_FLAGS "-march=${RV64_VECTOR_FILE_ARCH}") set_source_files_properties(src/crypto/randomx/jit_compiler_rv64_vector_static.S PROPERTIES COMPILE_FLAGS "-march=${RV64_VECTOR_FILE_ARCH}")
set_source_files_properties(src/crypto/randomx/aes_hash_rv64_vector.cpp PROPERTIES COMPILE_FLAGS "-O3 -march=${RV64_VECTOR_FILE_ARCH}") set_source_files_properties(src/crypto/randomx/aes_hash_rv64_vector.cpp PROPERTIES COMPILE_FLAGS "-O3 -march=${RV64_VECTOR_FILE_ARCH}")
set_source_files_properties(src/crypto/randomx/aes_hash_rv64_zkn.cpp PROPERTIES COMPILE_FLAGS "-O3 -march=${RV64_AES_FILE_ARCH}") set_source_files_properties(src/crypto/randomx/aes_hash_rv64_zvkned.cpp PROPERTIES COMPILE_FLAGS "-O3 -march=${RV64_VECTOR_FILE_ARCH}_zvkned")
else() else()
list(APPEND SOURCES_CRYPTO list(APPEND SOURCES_CRYPTO
src/crypto/randomx/jit_compiler_fallback.cpp src/crypto/randomx/jit_compiler_fallback.cpp

View File

@@ -81,14 +81,26 @@ static bool read_riscv_cpuinfo(riscv_cpu_desc *desc)
lookup_riscv(buf, "model name", desc->model); lookup_riscv(buf, "model name", desc->model);
if (lookup_riscv(buf, "isa", desc->isa)) { if (lookup_riscv(buf, "isa", desc->isa)) {
// Check for vector extensions desc->isa.toLower();
if (strstr(buf, "zve64d") || strstr(buf, "v_") || strstr(buf, "vh_")) {
desc->has_vector = true;
}
// AES support requires both zknd and zkne extensions (they can be shown as a part of "zk" or "zkn") for (const String& s : desc->isa.split('_')) {
if (strstr(buf, "zk_") || strstr(buf, "zkn_") || (strstr(buf, "zknd_") && strstr(buf, "zkne_"))) { const char* p = s.data();
desc->has_aes = true; const size_t n = s.size();
if ((s.size() > 4) && (memcmp(p, "rv64", 4) == 0)) {
for (size_t i = 4; i < n; ++i) {
if (p[i] == 'v') {
desc->has_vector = true;
break;
}
}
}
else if (s == "zve64d") {
desc->has_vector = true;
}
else if ((s == "zvkn") || (s == "zvknc") || (s == "zvkned") || (s == "zvkng")){
desc->has_aes = true;
}
} }
} }

View File

@@ -41,7 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifdef XMRIG_RISCV #ifdef XMRIG_RISCV
#include "backend/cpu/Cpu.h" #include "backend/cpu/Cpu.h"
#include "crypto/randomx/aes_hash_rv64_vector.hpp" #include "crypto/randomx/aes_hash_rv64_vector.hpp"
#include "crypto/randomx/aes_hash_rv64_zkn.hpp" #include "crypto/randomx/aes_hash_rv64_zvkned.hpp"
#endif #endif
#define AES_HASH_1R_STATE0 0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d #define AES_HASH_1R_STATE0 0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d
@@ -69,7 +69,7 @@ void hashAes1Rx4(const void *input, size_t inputSize, void *hash)
{ {
#ifdef XMRIG_RISCV #ifdef XMRIG_RISCV
if (xmrig::Cpu::info()->hasAES()) { if (xmrig::Cpu::info()->hasAES()) {
hashAes1Rx4_zkn(input, inputSize, hash); hashAes1Rx4_zvkned(input, inputSize, hash);
return; return;
} }
@@ -150,7 +150,7 @@ void fillAes1Rx4(void *state, size_t outputSize, void *buffer)
{ {
#ifdef XMRIG_RISCV #ifdef XMRIG_RISCV
if (xmrig::Cpu::info()->hasAES()) { if (xmrig::Cpu::info()->hasAES()) {
fillAes1Rx4_zkn(state, outputSize, buffer); fillAes1Rx4_zvkned(state, outputSize, buffer);
return; return;
} }
@@ -207,7 +207,7 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer)
{ {
#ifdef XMRIG_RISCV #ifdef XMRIG_RISCV
if (xmrig::Cpu::info()->hasAES()) { if (xmrig::Cpu::info()->hasAES()) {
fillAes4Rx4_zkn(state, outputSize, buffer); fillAes4Rx4_zvkned(state, outputSize, buffer);
return; return;
} }
@@ -287,7 +287,7 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi
#ifdef XMRIG_RISCV #ifdef XMRIG_RISCV
if (xmrig::Cpu::info()->hasAES()) { if (xmrig::Cpu::info()->hasAES()) {
hashAndFillAes1Rx4_zkn(scratchpad, scratchpadSize, hash, fill_state); hashAndFillAes1Rx4_zvkned(scratchpad, scratchpadSize, hash, fill_state);
return; return;
} }

View File

@@ -1,209 +0,0 @@
/*
Copyright (c) 2025 SChernykh <https://github.com/SChernykh>
Copyright (c) 2025 XMRig <support@xmrig.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "crypto/randomx/aes_hash.hpp"
#include "crypto/randomx/randomx.h"
#include "crypto/rx/Profiler.h"
static FORCE_INLINE uint64_t aes64esm(uint64_t a, uint64_t b) { uint64_t t; asm("aes64esm %0,%1,%2" : "=r"(t) : "r"(a), "r"(b)); return t; }
static FORCE_INLINE uint64_t aes64dsm(uint64_t a, uint64_t b) { uint64_t t; asm("aes64dsm %0,%1,%2" : "=r"(t) : "r"(a), "r"(b)); return t; }
static FORCE_INLINE void aesenc_zkn(uint64_t& in0, uint64_t& in1, uint64_t key0, uint64_t key1)
{
const uint64_t t0 = aes64esm(in0, in1);
const uint64_t t1 = aes64esm(in1, in0);
in0 = t0 ^ key0;
in1 = t1 ^ key1;
}
static FORCE_INLINE void aesdec_zkn(uint64_t& in0, uint64_t& in1, uint64_t key0, uint64_t key1)
{
const uint64_t t0 = aes64dsm(in0, in1);
const uint64_t t1 = aes64dsm(in1, in0);
in0 = t0 ^ key0;
in1 = t1 ^ key1;
}
static const uint64_t AES_HASH_1R_STATE[4][2] = {
{ 0x9fa856de92b52c0dull, 0xd7983aadcc82db47ull },
{ 0x15c7b798338d996eull, 0xace78057f59e125aull },
{ 0xae62c7d06a770017ull, 0xe8a07ce45079506bull },
{ 0x07ad828d630a240cull, 0x7e99494879a10005ull },
};
static const uint64_t AES_HASH_1R_XKEY[2][2] = {
{ 0x8b24949ff6fa8389ull, 0x0689020190dc56bfull },
{ 0x51f4e03c61b263d1ull, 0xed18f99bee1043c6ull },
};
void hashAes1Rx4_zkn(const void *input, size_t inputSize, void *hash)
{
const uint64_t* inptr = (uint64_t*)input;
const uint64_t* inputEnd = inptr + inputSize / sizeof(uint64_t);
uint64_t state[4][2];
memcpy(state, AES_HASH_1R_STATE, sizeof(state));
while (inptr < inputEnd) {
aesenc_zkn(state[0][0], state[0][1], inptr[0], inptr[1]);
aesdec_zkn(state[1][0], state[1][1], inptr[2], inptr[3]);
aesenc_zkn(state[2][0], state[2][1], inptr[4], inptr[5]);
aesdec_zkn(state[3][0], state[3][1], inptr[6], inptr[7]);
inptr += 8;
}
for (int i = 0; i < 2; ++i) {
const uint64_t xkey0 = AES_HASH_1R_XKEY[i][0];
const uint64_t xkey1 = AES_HASH_1R_XKEY[i][1];
aesenc_zkn(state[0][0], state[0][1], xkey0, xkey1);
aesdec_zkn(state[1][0], state[1][1], xkey0, xkey1);
aesenc_zkn(state[2][0], state[2][1], xkey0, xkey1);
aesdec_zkn(state[3][0], state[3][1], xkey0, xkey1);
}
memcpy(hash, state, sizeof(state));
}
static const uint64_t AES_GEN_1R_KEY[4][2] = {
{ 0x627166096daca553ull, 0xb4f44917dbb5552bull },
{ 0x846a710d6d7caf07ull, 0x0da1dc4e1725d378ull },
{ 0x9f947ec63f1262f1ull, 0x3e20e345f4c0794full },
{ 0xb1ba317c6aef8135ull, 0x4916915416314c88ull },
};
void fillAes1Rx4_zkn(void *state, size_t outputSize, void *buffer)
{
uint8_t* outptr = (uint8_t*)buffer;
const uint8_t* outputEnd = outptr + outputSize;
uint64_t key[4][2];
memcpy(key, AES_GEN_1R_KEY, sizeof(key));
uint64_t cur_state[4][2];
memcpy(cur_state, state, sizeof(cur_state));
while (outptr < outputEnd) {
aesdec_zkn(cur_state[0][0], cur_state[0][1], key[0][0], key[0][1]);
aesenc_zkn(cur_state[1][0], cur_state[1][1], key[1][0], key[1][1]);
aesdec_zkn(cur_state[2][0], cur_state[2][1], key[2][0], key[2][1]);
aesenc_zkn(cur_state[3][0], cur_state[3][1], key[3][0], key[3][1]);
memcpy(outptr, cur_state, sizeof(cur_state));
outptr += 64;
}
memcpy(state, cur_state, sizeof(cur_state));
}
void fillAes4Rx4_zkn(void *state, size_t outputSize, void *buffer)
{
uint8_t* outptr = (uint8_t*)buffer;
const uint8_t* outputEnd = outptr + outputSize;
uint64_t key[8][2];
memcpy(key, RandomX_CurrentConfig.fillAes4Rx4_Key, sizeof(key));
uint64_t cur_state[4][2];
memcpy(cur_state, state, sizeof(cur_state));
while (outptr < outputEnd) {
aesdec_zkn(cur_state[0][0], cur_state[0][1], key[0][0], key[0][1]);
aesenc_zkn(cur_state[1][0], cur_state[1][1], key[0][0], key[0][1]);
aesdec_zkn(cur_state[2][0], cur_state[2][1], key[4][0], key[4][1]);
aesenc_zkn(cur_state[3][0], cur_state[3][1], key[4][0], key[4][1]);
aesdec_zkn(cur_state[0][0], cur_state[0][1], key[1][0], key[1][1]);
aesenc_zkn(cur_state[1][0], cur_state[1][1], key[1][0], key[1][1]);
aesdec_zkn(cur_state[2][0], cur_state[2][1], key[5][0], key[5][1]);
aesenc_zkn(cur_state[3][0], cur_state[3][1], key[5][0], key[5][1]);
aesdec_zkn(cur_state[0][0], cur_state[0][1], key[2][0], key[2][1]);
aesenc_zkn(cur_state[1][0], cur_state[1][1], key[2][0], key[2][1]);
aesdec_zkn(cur_state[2][0], cur_state[2][1], key[6][0], key[6][1]);
aesenc_zkn(cur_state[3][0], cur_state[3][1], key[6][0], key[6][1]);
aesdec_zkn(cur_state[0][0], cur_state[0][1], key[3][0], key[3][1]);
aesenc_zkn(cur_state[1][0], cur_state[1][1], key[3][0], key[3][1]);
aesdec_zkn(cur_state[2][0], cur_state[2][1], key[7][0], key[7][1]);
aesenc_zkn(cur_state[3][0], cur_state[3][1], key[7][0], key[7][1]);
memcpy(outptr, cur_state, sizeof(cur_state));
outptr += 64;
}
}
void hashAndFillAes1Rx4_zkn(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state)
{
PROFILE_SCOPE(RandomX_AES);
uint64_t* scratchpadPtr = (uint64_t*)scratchpad;
const uint64_t* scratchpadEnd = scratchpadPtr + scratchpadSize / sizeof(uint64_t);
uint64_t cur_hash_state[4][2];
memcpy(cur_hash_state, AES_HASH_1R_STATE, sizeof(cur_hash_state));
uint64_t key[4][2];
memcpy(key, AES_GEN_1R_KEY, sizeof(key));
uint64_t cur_fill_state[4][2];
memcpy(cur_fill_state, fill_state, sizeof(cur_fill_state));
while (scratchpadPtr < scratchpadEnd) {
aesenc_zkn(cur_hash_state[0][0], cur_hash_state[0][1], scratchpadPtr[0], scratchpadPtr[1]);
aesdec_zkn(cur_hash_state[1][0], cur_hash_state[1][1], scratchpadPtr[2], scratchpadPtr[3]);
aesenc_zkn(cur_hash_state[2][0], cur_hash_state[2][1], scratchpadPtr[4], scratchpadPtr[5]);
aesdec_zkn(cur_hash_state[3][0], cur_hash_state[3][1], scratchpadPtr[6], scratchpadPtr[7]);
aesdec_zkn(cur_fill_state[0][0], cur_fill_state[0][1], key[0][0], key[0][1]);
aesenc_zkn(cur_fill_state[1][0], cur_fill_state[1][1], key[1][0], key[1][1]);
aesdec_zkn(cur_fill_state[2][0], cur_fill_state[2][1], key[2][0], key[2][1]);
aesenc_zkn(cur_fill_state[3][0], cur_fill_state[3][1], key[3][0], key[3][1]);
memcpy(scratchpadPtr, cur_fill_state, sizeof(cur_fill_state));
scratchpadPtr += 8;
}
memcpy(fill_state, cur_fill_state, sizeof(cur_fill_state));
for (int i = 0; i < 2; ++i) {
const uint64_t xkey0 = AES_HASH_1R_XKEY[i][0];
const uint64_t xkey1 = AES_HASH_1R_XKEY[i][1];
aesenc_zkn(cur_hash_state[0][0], cur_hash_state[0][1], xkey0, xkey1);
aesdec_zkn(cur_hash_state[1][0], cur_hash_state[1][1], xkey0, xkey1);
aesenc_zkn(cur_hash_state[2][0], cur_hash_state[2][1], xkey0, xkey1);
aesdec_zkn(cur_hash_state[3][0], cur_hash_state[3][1], xkey0, xkey1);
}
memcpy(hash, cur_hash_state, sizeof(cur_hash_state));
}

View File

@@ -0,0 +1,199 @@
/*
Copyright (c) 2025 SChernykh <https://github.com/SChernykh>
Copyright (c) 2025 XMRig <support@xmrig.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "crypto/randomx/aes_hash.hpp"
#include "crypto/randomx/randomx.h"
#include "crypto/rx/Profiler.h"
#include <riscv_vector.h>
static FORCE_INLINE vuint32m1_t aesenc_zvkned(vuint32m1_t a, vuint32m1_t b) { return __riscv_vaesem_vv_u32m1(a, b, 8); }
static FORCE_INLINE vuint32m1_t aesdec_zvkned(vuint32m1_t a, vuint32m1_t b, vuint32m1_t zero) { return __riscv_vxor_vv_u32m1(__riscv_vaesdm_vv_u32m1(a, zero, 8), b, 8); }
static constexpr uint32_t AES_HASH_1R_STATE02[8] = { 0x92b52c0d, 0x9fa856de, 0xcc82db47, 0xd7983aad, 0x6a770017, 0xae62c7d0, 0x5079506b, 0xe8a07ce4 };
static constexpr uint32_t AES_HASH_1R_STATE13[8] = { 0x338d996e, 0x15c7b798, 0xf59e125a, 0xace78057, 0x630a240c, 0x07ad828d, 0x79a10005, 0x7e994948 };
static constexpr uint32_t AES_GEN_1R_KEY02[8] = { 0x6daca553, 0x62716609, 0xdbb5552b, 0xb4f44917, 0x3f1262f1, 0x9f947ec6, 0xf4c0794f, 0x3e20e345 };
static constexpr uint32_t AES_GEN_1R_KEY13[8] = { 0x6d7caf07, 0x846a710d, 0x1725d378, 0x0da1dc4e, 0x6aef8135, 0xb1ba317c, 0x16314c88, 0x49169154 };
static constexpr uint32_t AES_HASH_1R_XKEY00[8] = { 0xf6fa8389, 0x8b24949f, 0x90dc56bf, 0x06890201, 0xf6fa8389, 0x8b24949f, 0x90dc56bf, 0x06890201 };
static constexpr uint32_t AES_HASH_1R_XKEY11[8] = { 0x61b263d1, 0x51f4e03c, 0xee1043c6, 0xed18f99b, 0x61b263d1, 0x51f4e03c, 0xee1043c6, 0xed18f99b };
static constexpr uint32_t AES_HASH_STRIDE_X2[8] = { 0, 4, 8, 12, 32, 36, 40, 44 };
static constexpr uint32_t AES_HASH_STRIDE_X4[8] = { 0, 4, 8, 12, 64, 68, 72, 76 };
void hashAes1Rx4_zvkned(const void *input, size_t inputSize, void *hash)
{
const uint8_t* inptr = (const uint8_t*)input;
const uint8_t* inputEnd = inptr + inputSize;
//intial state
vuint32m1_t state02 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE02, 8);
vuint32m1_t state13 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE13, 8);
const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X2, 8);
const vuint32m1_t zero = {};
//process 64 bytes at a time in 4 lanes
while (inptr < inputEnd) {
state02 = aesenc_zvkned(state02, __riscv_vluxei32_v_u32m1((uint32_t*)inptr + 0, stride, 8));
state13 = aesdec_zvkned(state13, __riscv_vluxei32_v_u32m1((uint32_t*)inptr + 4, stride, 8), zero);
inptr += 64;
}
//two extra rounds to achieve full diffusion
const vuint32m1_t xkey00 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY00, 8);
const vuint32m1_t xkey11 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY11, 8);
state02 = aesenc_zvkned(state02, xkey00);
state13 = aesdec_zvkned(state13, xkey00, zero);
state02 = aesenc_zvkned(state02, xkey11);
state13 = aesdec_zvkned(state13, xkey11, zero);
//output hash
__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 0, stride, state02, 8);
__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 4, stride, state13, 8);
}
void fillAes1Rx4_zvkned(void *state, size_t outputSize, void *buffer)
{
const uint8_t* outptr = (uint8_t*)buffer;
const uint8_t* outputEnd = outptr + outputSize;
const vuint32m1_t key02 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY02, 8);
const vuint32m1_t key13 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY13, 8);
const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X2, 8);
const vuint32m1_t zero = {};
vuint32m1_t state02 = __riscv_vluxei32_v_u32m1((uint32_t*)state + 0, stride, 8);
vuint32m1_t state13 = __riscv_vluxei32_v_u32m1((uint32_t*)state + 4, stride, 8);
while (outptr < outputEnd) {
state02 = aesdec_zvkned(state02, key02, zero);
state13 = aesenc_zvkned(state13, key13);
__riscv_vsuxei32_v_u32m1((uint32_t*)outptr + 0, stride, state02, 8);
__riscv_vsuxei32_v_u32m1((uint32_t*)outptr + 4, stride, state13, 8);
outptr += 64;
}
__riscv_vsuxei32_v_u32m1((uint32_t*)state + 0, stride, state02, 8);
__riscv_vsuxei32_v_u32m1((uint32_t*)state + 4, stride, state13, 8);
}
void fillAes4Rx4_zvkned(void *state, size_t outputSize, void *buffer)
{
const uint8_t* outptr = (uint8_t*)buffer;
const uint8_t* outputEnd = outptr + outputSize;
const vuint32m1_t stride4 = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X4, 8);
const vuint32m1_t key04 = __riscv_vluxei32_v_u32m1((uint32_t*)(RandomX_CurrentConfig.fillAes4Rx4_Key + 0), stride4, 8);
const vuint32m1_t key15 = __riscv_vluxei32_v_u32m1((uint32_t*)(RandomX_CurrentConfig.fillAes4Rx4_Key + 1), stride4, 8);
const vuint32m1_t key26 = __riscv_vluxei32_v_u32m1((uint32_t*)(RandomX_CurrentConfig.fillAes4Rx4_Key + 2), stride4, 8);
const vuint32m1_t key37 = __riscv_vluxei32_v_u32m1((uint32_t*)(RandomX_CurrentConfig.fillAes4Rx4_Key + 3), stride4, 8);
const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X2, 8);
const vuint32m1_t zero = {};
vuint32m1_t state02 = __riscv_vluxei32_v_u32m1((uint32_t*)state + 0, stride, 8);
vuint32m1_t state13 = __riscv_vluxei32_v_u32m1((uint32_t*)state + 4, stride, 8);
while (outptr < outputEnd) {
state02 = aesdec_zvkned(state02, key04, zero);
state13 = aesenc_zvkned(state13, key04);
state02 = aesdec_zvkned(state02, key15, zero);
state13 = aesenc_zvkned(state13, key15);
state02 = aesdec_zvkned(state02, key26, zero);
state13 = aesenc_zvkned(state13, key26);
state02 = aesdec_zvkned(state02, key37, zero);
state13 = aesenc_zvkned(state13, key37);
__riscv_vsuxei32_v_u32m1((uint32_t*)outptr + 0, stride, state02, 8);
__riscv_vsuxei32_v_u32m1((uint32_t*)outptr + 4, stride, state13, 8);
outptr += 64;
}
}
void hashAndFillAes1Rx4_zvkned(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state)
{
uint8_t* scratchpadPtr = (uint8_t*)scratchpad;
const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize;
vuint32m1_t hash_state02 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE02, 8);
vuint32m1_t hash_state13 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE13, 8);
const vuint32m1_t key02 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY02, 8);
const vuint32m1_t key13 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY13, 8);
const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X2, 8);
const vuint32m1_t zero = {};
vuint32m1_t fill_state02 = __riscv_vluxei32_v_u32m1((uint32_t*)fill_state + 0, stride, 8);
vuint32m1_t fill_state13 = __riscv_vluxei32_v_u32m1((uint32_t*)fill_state + 4, stride, 8);
//process 64 bytes at a time in 4 lanes
while (scratchpadPtr < scratchpadEnd) {
hash_state02 = aesenc_zvkned(hash_state02, __riscv_vluxei32_v_u32m1((uint32_t*)scratchpadPtr + 0, stride, 8));
hash_state13 = aesdec_zvkned(hash_state13, __riscv_vluxei32_v_u32m1((uint32_t*)scratchpadPtr + 4, stride, 8), zero);
fill_state02 = aesdec_zvkned(fill_state02, key02, zero);
fill_state13 = aesenc_zvkned(fill_state13, key13);
__riscv_vsuxei32_v_u32m1((uint32_t*)scratchpadPtr + 0, stride, fill_state02, 8);
__riscv_vsuxei32_v_u32m1((uint32_t*)scratchpadPtr + 4, stride, fill_state13, 8);
scratchpadPtr += 64;
}
__riscv_vsuxei32_v_u32m1((uint32_t*)fill_state + 0, stride, fill_state02, 8);
__riscv_vsuxei32_v_u32m1((uint32_t*)fill_state + 4, stride, fill_state13, 8);
//two extra rounds to achieve full diffusion
const vuint32m1_t xkey00 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY00, 8);
const vuint32m1_t xkey11 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY11, 8);
hash_state02 = aesenc_zvkned(hash_state02, xkey00);
hash_state13 = aesdec_zvkned(hash_state13, xkey00, zero);
hash_state02 = aesenc_zvkned(hash_state02, xkey11);
hash_state13 = aesdec_zvkned(hash_state13, xkey11, zero);
//output hash
__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 0, stride, hash_state02, 8);
__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 4, stride, hash_state13, 8);
}

View File

@@ -29,7 +29,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once #pragma once
void hashAes1Rx4_zkn(const void *input, size_t inputSize, void *hash); void hashAes1Rx4_zvkned(const void *input, size_t inputSize, void *hash);
void fillAes1Rx4_zkn(void *state, size_t outputSize, void *buffer); void fillAes1Rx4_zvkned(void *state, size_t outputSize, void *buffer);
void fillAes4Rx4_zkn(void *state, size_t outputSize, void *buffer); void fillAes4Rx4_zvkned(void *state, size_t outputSize, void *buffer);
void hashAndFillAes1Rx4_zkn(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state); void hashAndFillAes1Rx4_zvkned(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);

View File

@@ -94,6 +94,10 @@ void* generateDatasetInitVectorRV64(uint8_t* buf, SuperscalarProgram* programs,
case SuperscalarInstructionType::IROR_C: case SuperscalarInstructionType::IROR_C:
{ {
#ifdef __riscv_zvkb
// 57 30 00 52 vror.vi v0, v0, 0
EMIT(0x52003057 | (dst << 7) | (dst << 20) | ((imm32 & 31) << 15) | ((imm32 & 32) << 21));
#else // __riscv_zvkb
const uint32_t shift_right = imm32 & 63; const uint32_t shift_right = imm32 & 63;
const uint32_t shift_left = 64 - shift_right; const uint32_t shift_left = 64 - shift_right;
@@ -121,6 +125,7 @@ void* generateDatasetInitVectorRV64(uint8_t* buf, SuperscalarProgram* programs,
// 57 00 20 2B vor.vv v0, v18, v0 // 57 00 20 2B vor.vv v0, v18, v0
EMIT(0x2B200057 | (dst << 7) | (dst << 15)); EMIT(0x2B200057 | (dst << 7) | (dst << 15));
#endif // __riscv_zvkb
} }
break; break;

View File

@@ -0,0 +1,13 @@
/* RISC-V - test if the vector bit manipulation extension is present */
.text
.option arch, rv64gcv_zvkb
.global main
main:
vsetivli zero, 8, e32, m1, ta, ma
vror.vv v0, v0, v0
vror.vx v0, v0, x5
vror.vi v0, v0, 1
li x10, 0
ret

View File

@@ -0,0 +1,12 @@
/* RISC-V - test if the vector bit manipulation extension is present */
.text
.option arch, rv64gcv_zvkned
.global main
main:
vsetivli zero, 8, e32, m1, ta, ma
vaesem.vv v0, v0
vaesdm.vv v0, v0
li x10, 0
ret