mirror of
https://github.com/xmrig/xmrig.git
synced 2026-01-23 14:52:52 -05:00
Compare commits
18 Commits
30ffb9cb27
...
dev
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5a80c65d31 | ||
|
|
67cc6cfd1c | ||
|
|
db24bf5154 | ||
|
|
0d9a372e49 | ||
|
|
c1e3d386fe | ||
|
|
5ca4828255 | ||
|
|
1a04bf2904 | ||
|
|
5feb764b27 | ||
|
|
cb7511507f | ||
|
|
6e6eab1763 | ||
|
|
f35f9d7241 | ||
|
|
45d0a15c98 | ||
|
|
f4845cbd68 | ||
|
|
ed80a8a828 | ||
|
|
9e5492eecc | ||
|
|
e41b28ef78 | ||
|
|
1bd59129c4 | ||
|
|
8ccf7de304 |
3
.codespellrc
Normal file
3
.codespellrc
Normal file
@@ -0,0 +1,3 @@
|
||||
[codespell]
|
||||
skip = ./src/3rdparty,./src/crypto/ghostrider,./src/crypto/randomx/blake2,./src/crypto/cn/sse2neon.h,./src/backend/opencl/cl/cn/groestl256.cl,./src/backend/opencl/cl/cn/jh.cl
|
||||
ignore-words-list = Carmel,vor
|
||||
12
CHANGELOG.md
12
CHANGELOG.md
@@ -160,7 +160,7 @@
|
||||
# v6.16.2
|
||||
- [#2751](https://github.com/xmrig/xmrig/pull/2751) Fixed crash on CPUs supporting VAES and running GCC-compiled xmrig.
|
||||
- [#2761](https://github.com/xmrig/xmrig/pull/2761) Fixed broken auto-tuning in GCC Windows build.
|
||||
- [#2771](https://github.com/xmrig/xmrig/issues/2771) Fixed environment variables support for GhostRider and KawPow.
|
||||
- [#2771](https://github.com/xmrig/xmrig/issues/2771) Fixed environment variables support for GhostRider and KawPow.
|
||||
- [#2769](https://github.com/xmrig/xmrig/pull/2769) Performance fixes:
|
||||
- Fixed several performance bottlenecks introduced in v6.16.1.
|
||||
- Fixed overall GCC-compiled build performance, it's the same speed as MSVC build now.
|
||||
@@ -468,7 +468,7 @@
|
||||
- Compiler for Windows gcc builds updated to v10.1.
|
||||
|
||||
# v5.11.1
|
||||
- [#1652](https://github.com/xmrig/xmrig/pull/1652) Up to 1% RandomX perfomance improvement on recent AMD CPUs.
|
||||
- [#1652](https://github.com/xmrig/xmrig/pull/1652) Up to 1% RandomX performance improvement on recent AMD CPUs.
|
||||
- [#1306](https://github.com/xmrig/xmrig/issues/1306) Fixed possible double connection to a pool.
|
||||
- [#1654](https://github.com/xmrig/xmrig/issues/1654) Fixed build with LibreSSL.
|
||||
|
||||
@@ -574,9 +574,9 @@
|
||||
- Added automatic huge pages configuration on Linux if use the miner with root privileges.
|
||||
- **Added [automatic Intel prefetchers configuration](https://xmrig.com/docs/miner/randomx-optimization-guide#intel-specific-optimizations) on Linux.**
|
||||
- Added new option `wrmsr` in `randomx` object with command line equivalent `--randomx-wrmsr=6`.
|
||||
- [#1396](https://github.com/xmrig/xmrig/pull/1396) [#1401](https://github.com/xmrig/xmrig/pull/1401) New performance optimizations for Ryzen CPUs.
|
||||
- [#1385](https://github.com/xmrig/xmrig/issues/1385) Added `max-threads-hint` option support for RandomX dataset initialization threads.
|
||||
- [#1386](https://github.com/xmrig/xmrig/issues/1386) Added `priority` option support for RandomX dataset initialization threads.
|
||||
- [#1396](https://github.com/xmrig/xmrig/pull/1396) [#1401](https://github.com/xmrig/xmrig/pull/1401) New performance optimizations for Ryzen CPUs.
|
||||
- [#1385](https://github.com/xmrig/xmrig/issues/1385) Added `max-threads-hint` option support for RandomX dataset initialization threads.
|
||||
- [#1386](https://github.com/xmrig/xmrig/issues/1386) Added `priority` option support for RandomX dataset initialization threads.
|
||||
- For official builds all dependencies (libuv, hwloc, openssl) updated to recent versions.
|
||||
- Windows `msvc` builds now use Visual Studio 2019 instead of 2017.
|
||||
|
||||
@@ -622,7 +622,7 @@ This release based on 4.x.x series and include all features from v4.6.2-beta, ch
|
||||
- Removed command line option `--http-enabled`, HTTP API enabled automatically if any other `--http-*` option provided.
|
||||
- [#1172](https://github.com/xmrig/xmrig/issues/1172) **Added OpenCL mining backend.**
|
||||
- [#268](https://github.com/xmrig/xmrig-amd/pull/268) [#270](https://github.com/xmrig/xmrig-amd/pull/270) [#271](https://github.com/xmrig/xmrig-amd/pull/271) [#273](https://github.com/xmrig/xmrig-amd/pull/273) [#274](https://github.com/xmrig/xmrig-amd/pull/274) [#1171](https://github.com/xmrig/xmrig/pull/1171) Added RandomX support for OpenCL, thanks [@SChernykh](https://github.com/SChernykh).
|
||||
- Algorithm `cn/wow` removed, as no longer alive.
|
||||
- Algorithm `cn/wow` removed, as no longer alive.
|
||||
|
||||
# Previous versions
|
||||
[doc/CHANGELOG_OLD.md](doc/CHANGELOG_OLD.md)
|
||||
|
||||
@@ -105,6 +105,32 @@ if (XMRIG_RISCV)
|
||||
set(RVARCH_ZBB OFF)
|
||||
endif()
|
||||
|
||||
try_run(RANDOMX_ZVKB_RUN_FAIL
|
||||
RANDOMX_ZVKB_COMPILE_OK
|
||||
${CMAKE_CURRENT_BINARY_DIR}/
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/crypto/randomx/tests/riscv64_zvkb.s
|
||||
COMPILE_DEFINITIONS "-march=rv64gcv_zvkb")
|
||||
|
||||
if (RANDOMX_ZVKB_COMPILE_OK AND NOT RANDOMX_ZVKB_RUN_FAIL)
|
||||
set(RVARCH_ZVKB ON)
|
||||
message(STATUS "RISC-V zvkb extension detected")
|
||||
else()
|
||||
set(RVARCH_ZVKB OFF)
|
||||
endif()
|
||||
|
||||
try_run(RANDOMX_ZVKNED_RUN_FAIL
|
||||
RANDOMX_ZVKNED_COMPILE_OK
|
||||
${CMAKE_CURRENT_BINARY_DIR}/
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/crypto/randomx/tests/riscv64_zvkned.s
|
||||
COMPILE_DEFINITIONS "-march=rv64gcv_zvkned")
|
||||
|
||||
if (RANDOMX_ZVKNED_COMPILE_OK AND NOT RANDOMX_ZVKNED_RUN_FAIL)
|
||||
set(RVARCH_ZVKNED ON)
|
||||
message(STATUS "RISC-V zvkned extension detected")
|
||||
else()
|
||||
set(RVARCH_ZVKNED OFF)
|
||||
endif()
|
||||
|
||||
# for native builds, enable Zba and Zbb if supported by the CPU
|
||||
if (ARCH STREQUAL "native")
|
||||
if (RVARCH_V)
|
||||
@@ -119,6 +145,12 @@ if (XMRIG_RISCV)
|
||||
if (RVARCH_ZBB)
|
||||
set(RVARCH "${RVARCH}_zbb")
|
||||
endif()
|
||||
if (RVARCH_ZVKB)
|
||||
set(RVARCH "${RVARCH}_zvkb")
|
||||
endif()
|
||||
if (RVARCH_ZVKNED)
|
||||
set(RVARCH "${RVARCH}_zvkned")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
message(STATUS "Using -march=${RVARCH}")
|
||||
|
||||
@@ -87,33 +87,32 @@ if (WITH_RANDOMX)
|
||||
src/crypto/randomx/jit_compiler_rv64.cpp
|
||||
src/crypto/randomx/jit_compiler_rv64_vector.cpp
|
||||
src/crypto/randomx/aes_hash_rv64_vector.cpp
|
||||
src/crypto/randomx/aes_hash_rv64_zkn.cpp
|
||||
src/crypto/randomx/aes_hash_rv64_zvkned.cpp
|
||||
)
|
||||
# cheat because cmake and ccache hate each other
|
||||
set_property(SOURCE src/crypto/randomx/jit_compiler_rv64_static.S PROPERTY LANGUAGE C)
|
||||
set_property(SOURCE src/crypto/randomx/jit_compiler_rv64_vector_static.S PROPERTY LANGUAGE C)
|
||||
|
||||
set(RV64_VECTOR_FILE_ARCH "rv64gcv")
|
||||
set(RV64_AES_FILE_ARCH "rv64gc_zknd_zkne")
|
||||
|
||||
if (ARCH STREQUAL "native")
|
||||
if (RVARCH_ZICBOP)
|
||||
set(RV64_VECTOR_FILE_ARCH "${RV64_VECTOR_FILE_ARCH}_zicbop")
|
||||
set(RV64_AES_FILE_ARCH "${RV64_AES_FILE_ARCH}_zicbop")
|
||||
endif()
|
||||
if (RVARCH_ZBA)
|
||||
set(RV64_VECTOR_FILE_ARCH "${RV64_VECTOR_FILE_ARCH}_zba")
|
||||
set(RV64_AES_FILE_ARCH "${RV64_AES_FILE_ARCH}_zba")
|
||||
endif()
|
||||
if (RVARCH_ZBB)
|
||||
set(RV64_VECTOR_FILE_ARCH "${RV64_VECTOR_FILE_ARCH}_zbb")
|
||||
set(RV64_AES_FILE_ARCH "${RV64_AES_FILE_ARCH}_zbb")
|
||||
endif()
|
||||
if (RVARCH_ZVKB)
|
||||
set(RV64_VECTOR_FILE_ARCH "${RV64_VECTOR_FILE_ARCH}_zvkb")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set_source_files_properties(src/crypto/randomx/jit_compiler_rv64_vector_static.S PROPERTIES COMPILE_FLAGS "-march=${RV64_VECTOR_FILE_ARCH}")
|
||||
set_source_files_properties(src/crypto/randomx/aes_hash_rv64_vector.cpp PROPERTIES COMPILE_FLAGS "-O3 -march=${RV64_VECTOR_FILE_ARCH}")
|
||||
set_source_files_properties(src/crypto/randomx/aes_hash_rv64_zkn.cpp PROPERTIES COMPILE_FLAGS "-O3 -march=${RV64_AES_FILE_ARCH}")
|
||||
set_source_files_properties(src/crypto/randomx/aes_hash_rv64_zvkned.cpp PROPERTIES COMPILE_FLAGS "-O3 -march=${RV64_VECTOR_FILE_ARCH}_zvkned")
|
||||
else()
|
||||
list(APPEND SOURCES_CRYPTO
|
||||
src/crypto/randomx/jit_compiler_fallback.cpp
|
||||
@@ -191,6 +190,15 @@ if (WITH_RANDOMX)
|
||||
list(APPEND HEADERS_CRYPTO src/crypto/rx/Profiler.h)
|
||||
list(APPEND SOURCES_CRYPTO src/crypto/rx/Profiler.cpp)
|
||||
endif()
|
||||
|
||||
if (WITH_VAES)
|
||||
set(SOURCES_CRYPTO "${SOURCES_CRYPTO}" src/crypto/randomx/aes_hash_vaes512.cpp)
|
||||
if (CMAKE_C_COMPILER_ID MATCHES MSVC)
|
||||
set_source_files_properties(src/crypto/randomx/aes_hash_vaes512.cpp PROPERTIES COMPILE_FLAGS "/O2 /Ob2 /Oi /Ot /arch:AVX512")
|
||||
elseif (CMAKE_C_COMPILER_ID MATCHES GNU OR CMAKE_C_COMPILER_ID MATCHES Clang)
|
||||
set_source_files_properties(src/crypto/randomx/aes_hash_vaes512.cpp PROPERTIES COMPILE_FLAGS "-O3 -mavx512f -mvaes")
|
||||
endif()
|
||||
endif()
|
||||
else()
|
||||
remove_definitions(/DXMRIG_ALGO_RANDOMX)
|
||||
endif()
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
# HTTP API
|
||||
|
||||
If you want use HTTP API you need enable it (`"enabled": true,`) then choice `port` and optionaly `host`. API not available if miner built without HTTP support (`-DWITH_HTTP=OFF`).
|
||||
If you want use HTTP API you need enable it (`"enabled": true,`) then choice `port` and optionally `host`. API not available if miner built without HTTP support (`-DWITH_HTTP=OFF`).
|
||||
|
||||
Offical HTTP client for API: http://workers.xmrig.info/
|
||||
Official HTTP client for API: http://workers.xmrig.info/
|
||||
|
||||
Example configuration:
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ Double check that you see `Huge pages 100%` both for dataset and for all threads
|
||||
|
||||
### Benchmark with custom config
|
||||
|
||||
You can run benchmark with any configuration you want. Just start without command line parameteres, use regular config.json and add `"benchmark":"1M",` on the next line after pool url.
|
||||
You can run benchmark with any configuration you want. Just start without command line parameters, use regular config.json and add `"benchmark":"1M",` on the next line after pool url.
|
||||
|
||||
# Stress test
|
||||
|
||||
@@ -26,4 +26,4 @@ You can also run continuous stress-test that is as close to the real RandomX min
|
||||
xmrig --stress
|
||||
xmrig --stress -a rx/wow
|
||||
```
|
||||
This will require Internet connection and will run indefinitely.
|
||||
This will require Internet connection and will run indefinitely.
|
||||
|
||||
@@ -57,7 +57,7 @@
|
||||
# v4.0.0-beta
|
||||
- [#1172](https://github.com/xmrig/xmrig/issues/1172) **Added OpenCL mining backend.**
|
||||
- [#268](https://github.com/xmrig/xmrig-amd/pull/268) [#270](https://github.com/xmrig/xmrig-amd/pull/270) [#271](https://github.com/xmrig/xmrig-amd/pull/271) [#273](https://github.com/xmrig/xmrig-amd/pull/273) [#274](https://github.com/xmrig/xmrig-amd/pull/274) [#1171](https://github.com/xmrig/xmrig/pull/1171) Added RandomX support for OpenCL, thanks [@SChernykh](https://github.com/SChernykh).
|
||||
- Algorithm `cn/wow` removed, as no longer alive.
|
||||
- Algorithm `cn/wow` removed, as no longer alive.
|
||||
|
||||
# v3.2.0
|
||||
- Added per pool option `coin` with single possible value `monero` for pools without algorithm negotiation, for upcoming Monero fork.
|
||||
@@ -103,7 +103,7 @@
|
||||
- [#1105](https://github.com/xmrig/xmrig/issues/1105) Improved auto configuration for `cn-pico` algorithm.
|
||||
- Added commands `pause` and `resume` via JSON RPC 2.0 API (`POST /json_rpc`).
|
||||
- Added command line option `--export-topology` for export hwloc topology to a XML file.
|
||||
- Breaked backward compatibility with previous configs and command line, `variant` option replaced to `algo`, global option `algo` removed, all CPU related settings moved to `cpu` object.
|
||||
- Broken backward compatibility with previous configs and command line, `variant` option replaced to `algo`, global option `algo` removed, all CPU related settings moved to `cpu` object.
|
||||
- Options `av`, `safe` and `max-cpu-usage` removed.
|
||||
- Algorithm `cn/msr` renamed to `cn/fast`.
|
||||
- Algorithm `cn/xtl` removed.
|
||||
@@ -122,7 +122,7 @@
|
||||
- [#1092](https://github.com/xmrig/xmrig/issues/1092) Fixed crash if wrong CPU affinity used.
|
||||
- [#1103](https://github.com/xmrig/xmrig/issues/1103) Improved auto configuration for RandomX for CPUs where L2 cache is limiting factor.
|
||||
- [#1105](https://github.com/xmrig/xmrig/issues/1105) Improved auto configuration for `cn-pico` algorithm.
|
||||
- [#1106](https://github.com/xmrig/xmrig/issues/1106) Fixed `hugepages` field in summary API.
|
||||
- [#1106](https://github.com/xmrig/xmrig/issues/1106) Fixed `hugepages` field in summary API.
|
||||
- Added alternative short format for CPU threads.
|
||||
- Changed format for CPU threads with intensity above 1.
|
||||
- Name for reference RandomX configuration changed to `rx/test` to avoid potential conflicts in future.
|
||||
@@ -150,7 +150,7 @@
|
||||
- [#1050](https://github.com/xmrig/xmrig/pull/1050) Added RandomXL algorithm for [Loki](https://loki.network/), algorithm name used by miner is `randomx/loki` or `rx/loki`.
|
||||
- Added [flexible](https://github.com/xmrig/xmrig/blob/evo/doc/CPU.md) multi algorithm configuration.
|
||||
- Added unlimited switching between incompatible algorithms, all mining options can be changed in runtime.
|
||||
- Breaked backward compatibility with previous configs and command line, `variant` option replaced to `algo`, global option `algo` removed, all CPU related settings moved to `cpu` object.
|
||||
- Broken backward compatibility with previous configs and command line, `variant` option replaced to `algo`, global option `algo` removed, all CPU related settings moved to `cpu` object.
|
||||
- Options `av`, `safe` and `max-cpu-usage` removed.
|
||||
- Algorithm `cn/msr` renamed to `cn/fast`.
|
||||
- Algorithm `cn/xtl` removed.
|
||||
@@ -183,7 +183,7 @@
|
||||
- [#314](https://github.com/xmrig/xmrig-proxy/issues/314) Added donate over proxy feature.
|
||||
- Added new option `donate-over-proxy`.
|
||||
- Added real graceful exit.
|
||||
|
||||
|
||||
# v2.14.4
|
||||
- [#992](https://github.com/xmrig/xmrig/pull/992) Fixed compilation with Clang 3.5.
|
||||
- [#1012](https://github.com/xmrig/xmrig/pull/1012) Fixed compilation with Clang 9.0.
|
||||
@@ -250,7 +250,7 @@
|
||||
# v2.8.1
|
||||
- [#768](https://github.com/xmrig/xmrig/issues/768) Fixed build with Visual Studio 2015.
|
||||
- [#769](https://github.com/xmrig/xmrig/issues/769) Fixed regression, some ANSI escape sequences was in log with disabled colors.
|
||||
- [#777](https://github.com/xmrig/xmrig/issues/777) Better report about pool connection issues.
|
||||
- [#777](https://github.com/xmrig/xmrig/issues/777) Better report about pool connection issues.
|
||||
- Simplified checks for ASM auto detection, only AES support necessary.
|
||||
- Added missing options to `--help` output.
|
||||
|
||||
@@ -259,7 +259,7 @@
|
||||
- Added global and per thread option `"asm"` and command line equivalent.
|
||||
- **[#758](https://github.com/xmrig/xmrig/issues/758) Added SSL/TLS support for secure connections to pools.**
|
||||
- Added per pool options `"tls"` and `"tls-fingerprint"` and command line equivalents.
|
||||
- [#767](https://github.com/xmrig/xmrig/issues/767) Added config autosave feature, same with GPU miners.
|
||||
- [#767](https://github.com/xmrig/xmrig/issues/767) Added config autosave feature, same with GPU miners.
|
||||
- [#245](https://github.com/xmrig/xmrig-proxy/issues/245) Fixed API ID collision when run multiple miners on same machine.
|
||||
- [#757](https://github.com/xmrig/xmrig/issues/757) Fixed send buffer overflow.
|
||||
|
||||
@@ -346,7 +346,7 @@
|
||||
|
||||
# v2.4.4
|
||||
- Added libmicrohttpd version to --version output.
|
||||
- Fixed bug in singal handler, in some cases miner wasn't shutdown properly.
|
||||
- Fixed bug in signal handler, in some cases miner wasn't shutdown properly.
|
||||
- Fixed recent MSVC 2017 version detection.
|
||||
- [#279](https://github.com/xmrig/xmrig/pull/279) Fixed build on some macOS versions.
|
||||
|
||||
@@ -359,7 +359,7 @@
|
||||
# v2.4.2
|
||||
- [#60](https://github.com/xmrig/xmrig/issues/60) Added FreeBSD support, thanks [vcambur](https://github.com/vcambur).
|
||||
- [#153](https://github.com/xmrig/xmrig/issues/153) Fixed issues with dwarfpool.com.
|
||||
|
||||
|
||||
# v2.4.1
|
||||
- [#147](https://github.com/xmrig/xmrig/issues/147) Fixed comparability with monero-stratum.
|
||||
|
||||
@@ -371,7 +371,7 @@
|
||||
- [#101](https://github.com/xmrig/xmrig/issues/101) Fixed MSVC 2017 (15.3) compile time version detection.
|
||||
- [#108](https://github.com/xmrig/xmrig/issues/108) Silently ignore invalid values for `donate-level` option.
|
||||
- [#111](https://github.com/xmrig/xmrig/issues/111) Fixed build without AEON support.
|
||||
|
||||
|
||||
# v2.3.1
|
||||
- [#68](https://github.com/xmrig/xmrig/issues/68) Fixed compatibility with Docker containers, was nothing print on console.
|
||||
|
||||
@@ -398,7 +398,7 @@
|
||||
# v2.1.0
|
||||
- [#40](https://github.com/xmrig/xmrig/issues/40)
|
||||
Improved miner shutdown, fixed crash on exit for Linux and OS X.
|
||||
- Fixed, login request was contain malformed JSON if username or password has some special characters for example `\`.
|
||||
- Fixed, login request was contain malformed JSON if username or password has some special characters for example `\`.
|
||||
- [#220](https://github.com/fireice-uk/xmr-stak-cpu/pull/220) Better support for Round Robin DNS, IP address now always chosen randomly instead of stuck on first one.
|
||||
- Changed donation address, new [xmrig-proxy](https://github.com/xmrig/xmrig-proxy) is coming soon.
|
||||
|
||||
@@ -418,16 +418,16 @@ Improved miner shutdown, fixed crash on exit for Linux and OS X.
|
||||
- Fixed Windows XP support.
|
||||
- Fixed regression, option `--no-color` was not fully disable colored output.
|
||||
- Show resolved pool IP address in miner output.
|
||||
|
||||
|
||||
# v1.0.1
|
||||
- Fix broken software AES implementation, app has crashed if CPU not support AES-NI, only version 1.0.0 affected.
|
||||
|
||||
# v1.0.0
|
||||
- Miner complete rewritten in C++ with libuv.
|
||||
- This version should be fully compatible (except config file) with previos versions, many new nice features will come in next versions.
|
||||
- This is still beta. If you found regression, stability or perfomance issues or have an idea for new feature please fell free to open new [issue](https://github.com/xmrig/xmrig/issues/new).
|
||||
- This version should be fully compatible (except config file) with previous versions, many new nice features will come in next versions.
|
||||
- This is still beta. If you found regression, stability or performance issues or have an idea for new feature please fell free to open new [issue](https://github.com/xmrig/xmrig/issues/new).
|
||||
- Added new option `--print-time=N`, print hashrate report every N seconds.
|
||||
- New hashrate reports, by default every 60 secons.
|
||||
- New hashrate reports, by default every 60 seconds.
|
||||
- Added Microsoft Visual C++ 2015 and 2017 support.
|
||||
- Removed dependency on libcurl.
|
||||
- To compile this version from source please switch to [dev](https://github.com/xmrig/xmrig/tree/dev) branch.
|
||||
@@ -440,7 +440,7 @@ Improved miner shutdown, fixed crash on exit for Linux and OS X.
|
||||
- Fixed gcc 7.1 support.
|
||||
|
||||
# v0.8.1
|
||||
- Added nicehash support, detects automaticaly by pool URL, for example `cryptonight.eu.nicehash.com:3355` or manually via option `--nicehash`.
|
||||
- Added nicehash support, detects automatically by pool URL, for example `cryptonight.eu.nicehash.com:3355` or manually via option `--nicehash`.
|
||||
|
||||
# v0.8.0
|
||||
- Added double hash mode, also known as lower power mode. `--av=2` and `--av=4`.
|
||||
|
||||
@@ -124,7 +124,7 @@ Force enable (`true`) or disable (`false`) hardware AES support. Default value `
|
||||
Mining threads priority, value from `1` (lowest priority) to `5` (highest possible priority). Default value `null` means miner don't change threads priority at all. Setting priority higher than 2 can make your PC unresponsive.
|
||||
|
||||
#### `memory-pool` (since v4.3.0)
|
||||
Use continuous, persistent memory block for mining threads, useful for preserve huge pages allocation while algorithm switching. Possible values `false` (feature disabled, by default) or `true` or specific count of 2 MB huge pages. It helps to avoid loosing huge pages for scratchpads when RandomX dataset is updated and mining threads restart after a 2-3 days of mining.
|
||||
Use continuous, persistent memory block for mining threads, useful for preserve huge pages allocation while algorithm switching. Possible values `false` (feature disabled, by default) or `true` or specific count of 2 MB huge pages. It helps to avoid losing huge pages for scratchpads when RandomX dataset is updated and mining threads restart after a 2-3 days of mining.
|
||||
|
||||
#### `yield` (since v5.1.1)
|
||||
Prefer system better system response/stability `true` (default value) or maximum hashrate `false`.
|
||||
@@ -133,7 +133,7 @@ Prefer system better system response/stability `true` (default value) or maximum
|
||||
Enable/configure or disable ASM optimizations. Possible values: `true`, `false`, `"intel"`, `"ryzen"`, `"bulldozer"`.
|
||||
|
||||
#### `argon2-impl` (since v3.1.0)
|
||||
Allow override automatically detected Argon2 implementation, this option added mostly for debug purposes, default value `null` means autodetect. This is used in RandomX dataset initialization and also in some other mining algorithms. Other possible values: `"x86_64"`, `"SSE2"`, `"SSSE3"`, `"XOP"`, `"AVX2"`, `"AVX-512F"`. Manual selection has no safe guards - if your CPU doesn't support required instuctions, miner will crash.
|
||||
Allow override automatically detected Argon2 implementation, this option added mostly for debug purposes, default value `null` means autodetect. This is used in RandomX dataset initialization and also in some other mining algorithms. Other possible values: `"x86_64"`, `"SSE2"`, `"SSSE3"`, `"XOP"`, `"AVX2"`, `"AVX-512F"`. Manual selection has no safe guards - if your CPU doesn't support required instructions, miner will crash.
|
||||
|
||||
#### `astrobwt-max-size`
|
||||
AstroBWT algorithm: skip hashes with large stage 2 size, default: `550`, min: `400`, max: `1200`. Optimal value depends on your CPU/GPU
|
||||
|
||||
@@ -81,14 +81,26 @@ static bool read_riscv_cpuinfo(riscv_cpu_desc *desc)
|
||||
lookup_riscv(buf, "model name", desc->model);
|
||||
|
||||
if (lookup_riscv(buf, "isa", desc->isa)) {
|
||||
// Check for vector extensions
|
||||
if (strstr(buf, "zve64d") || strstr(buf, "v_") || strstr(buf, "vh_")) {
|
||||
desc->has_vector = true;
|
||||
}
|
||||
desc->isa.toLower();
|
||||
|
||||
// AES support requires both zknd and zkne extensions (they can be shown as a part of "zk" or "zkn")
|
||||
if (strstr(buf, "zk_") || strstr(buf, "zkn_") || (strstr(buf, "zknd_") && strstr(buf, "zkne_"))) {
|
||||
desc->has_aes = true;
|
||||
for (const String& s : desc->isa.split('_')) {
|
||||
const char* p = s.data();
|
||||
const size_t n = s.size();
|
||||
|
||||
if ((s.size() > 4) && (memcmp(p, "rv64", 4) == 0)) {
|
||||
for (size_t i = 4; i < n; ++i) {
|
||||
if (p[i] == 'v') {
|
||||
desc->has_vector = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (s == "zve64d") {
|
||||
desc->has_vector = true;
|
||||
}
|
||||
else if ((s == "zvkn") || (s == "zvknc") || (s == "zvkned") || (s == "zvkng")){
|
||||
desc->has_aes = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -706,7 +706,7 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u
|
||||
}
|
||||
|
||||
# if (ALGO_FAMILY == FAMILY_CN_HEAVY)
|
||||
/* Also left over threads performe this loop.
|
||||
/* Also left over threads perform this loop.
|
||||
* The left over thread results will be ignored
|
||||
*/
|
||||
#pragma unroll 16
|
||||
@@ -1005,7 +1005,7 @@ __kernel void Groestl(__global ulong *states, __global uint *BranchBuf, __global
|
||||
ulong State[8] = { 0UL, 0UL, 0UL, 0UL, 0UL, 0UL, 0UL, 0x0001000000000000UL };
|
||||
ulong H[8], M[8];
|
||||
|
||||
// BUG: AMD driver 19.7.X crashs if this is written as loop
|
||||
// BUG: AMD driver 19.7.X crashes if this is written as loop
|
||||
// Thx AMD for so bad software
|
||||
{
|
||||
((ulong8 *)M)[0] = vload8(0, states);
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
#else
|
||||
# define STATIC
|
||||
/* taken from https://www.khronos.org/registry/OpenCL/extensions/amd/cl_amd_media_ops.txt
|
||||
* Build-in Function
|
||||
* Built-in Function
|
||||
* uintn amd_bitalign (uintn src0, uintn src1, uintn src2)
|
||||
* Description
|
||||
* dst.s0 = (uint) (((((long)src0.s0) << 32) | (long)src1.s0) >> (src2.s0 & 31))
|
||||
|
||||
@@ -77,7 +77,7 @@ void keccak_f800_round(uint32_t st[25], const int r)
|
||||
void keccak_f800(uint32_t* st)
|
||||
{
|
||||
// Complete all 22 rounds as a separate impl to
|
||||
// evaluate only first 8 words is wasteful of regsters
|
||||
// evaluate only first 8 words is wasteful of registers
|
||||
for (int r = 0; r < 22; r++) {
|
||||
keccak_f800_round(st, r);
|
||||
}
|
||||
@@ -181,7 +181,7 @@ __kernel void progpow_search(__global dag_t const* g_dag, __global uint* job_blo
|
||||
for (int i = 10; i < 25; i++)
|
||||
state[i] = ravencoin_rndc[i-10];
|
||||
|
||||
// Run intial keccak round
|
||||
// Run initial keccak round
|
||||
keccak_f800(state);
|
||||
|
||||
for (int i = 0; i < 8; i++)
|
||||
|
||||
@@ -48,7 +48,7 @@
|
||||
#define KECCAK_ROUNDS 24
|
||||
|
||||
|
||||
/* *************************** Public Inteface ************************ */
|
||||
/* *************************** Public Interface ************************ */
|
||||
|
||||
/* For Init or Reset call these: */
|
||||
sha3_return_t
|
||||
|
||||
@@ -554,6 +554,7 @@ int64_t xmrig::Client::send(size_t size)
|
||||
}
|
||||
|
||||
m_expire = Chrono::steadyMSecs() + kResponseTimeout;
|
||||
startTimeout();
|
||||
return m_sequence++;
|
||||
}
|
||||
|
||||
@@ -661,8 +662,6 @@ void xmrig::Client::onClose()
|
||||
|
||||
void xmrig::Client::parse(char *line, size_t len)
|
||||
{
|
||||
startTimeout();
|
||||
|
||||
LOG_DEBUG("[%s] received (%d bytes): \"%.*s\"", url(), len, static_cast<int>(len), line);
|
||||
|
||||
if (len < 22 || line[0] != '{') {
|
||||
@@ -857,8 +856,6 @@ void xmrig::Client::parseResponse(int64_t id, const rapidjson::Value &result, co
|
||||
void xmrig::Client::ping()
|
||||
{
|
||||
send(snprintf(m_sendBuf.data(), m_sendBuf.size(), "{\"id\":%" PRId64 ",\"jsonrpc\":\"2.0\",\"method\":\"keepalived\",\"params\":{\"id\":\"%s\"}}\n", m_sequence, m_rpcId.data()));
|
||||
|
||||
m_keepAlive = 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -235,7 +235,7 @@ static HashReturn Init(hashState *state, int hashbitlen)
|
||||
/*initialize the initial hash value of JH*/
|
||||
state->hashbitlen = hashbitlen;
|
||||
|
||||
/*load the intital hash value into state*/
|
||||
/*load the initial hash value into state*/
|
||||
switch (hashbitlen)
|
||||
{
|
||||
case 224: memcpy(state->x,JH224_H0,128); break;
|
||||
|
||||
@@ -48,7 +48,7 @@
|
||||
multiple of size / 8)
|
||||
|
||||
ptr_cast(x,size) casts a pointer to a pointer to a
|
||||
varaiable of length 'size' bits
|
||||
variable of length 'size' bits
|
||||
*/
|
||||
|
||||
#define ui_type(size) uint##size##_t
|
||||
|
||||
@@ -38,10 +38,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#include "crypto/randomx/common.hpp"
|
||||
#include "crypto/rx/Profiler.h"
|
||||
|
||||
#ifdef XMRIG_RISCV
|
||||
#include "backend/cpu/Cpu.h"
|
||||
|
||||
#ifdef XMRIG_RISCV
|
||||
#include "crypto/randomx/aes_hash_rv64_vector.hpp"
|
||||
#include "crypto/randomx/aes_hash_rv64_zkn.hpp"
|
||||
#include "crypto/randomx/aes_hash_rv64_zvkned.hpp"
|
||||
#endif
|
||||
|
||||
#define AES_HASH_1R_STATE0 0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d
|
||||
@@ -69,7 +70,7 @@ void hashAes1Rx4(const void *input, size_t inputSize, void *hash)
|
||||
{
|
||||
#ifdef XMRIG_RISCV
|
||||
if (xmrig::Cpu::info()->hasAES()) {
|
||||
hashAes1Rx4_zkn(input, inputSize, hash);
|
||||
hashAes1Rx4_zvkned(input, inputSize, hash);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -85,7 +86,7 @@ void hashAes1Rx4(const void *input, size_t inputSize, void *hash)
|
||||
rx_vec_i128 state0, state1, state2, state3;
|
||||
rx_vec_i128 in0, in1, in2, in3;
|
||||
|
||||
//intial state
|
||||
//initial state
|
||||
state0 = rx_set_int_vec_i128(AES_HASH_1R_STATE0);
|
||||
state1 = rx_set_int_vec_i128(AES_HASH_1R_STATE1);
|
||||
state2 = rx_set_int_vec_i128(AES_HASH_1R_STATE2);
|
||||
@@ -150,7 +151,7 @@ void fillAes1Rx4(void *state, size_t outputSize, void *buffer)
|
||||
{
|
||||
#ifdef XMRIG_RISCV
|
||||
if (xmrig::Cpu::info()->hasAES()) {
|
||||
fillAes1Rx4_zkn(state, outputSize, buffer);
|
||||
fillAes1Rx4_zvkned(state, outputSize, buffer);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -207,7 +208,7 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer)
|
||||
{
|
||||
#ifdef XMRIG_RISCV
|
||||
if (xmrig::Cpu::info()->hasAES()) {
|
||||
fillAes4Rx4_zkn(state, outputSize, buffer);
|
||||
fillAes4Rx4_zvkned(state, outputSize, buffer);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -280,6 +281,10 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer)
|
||||
template void fillAes4Rx4<true>(void *state, size_t outputSize, void *buffer);
|
||||
template void fillAes4Rx4<false>(void *state, size_t outputSize, void *buffer);
|
||||
|
||||
#ifdef XMRIG_VAES
|
||||
void hashAndFillAes1Rx4_VAES512(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
||||
#endif
|
||||
|
||||
template<int softAes, int unroll>
|
||||
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state)
|
||||
{
|
||||
@@ -287,7 +292,7 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi
|
||||
|
||||
#ifdef XMRIG_RISCV
|
||||
if (xmrig::Cpu::info()->hasAES()) {
|
||||
hashAndFillAes1Rx4_zkn(scratchpad, scratchpadSize, hash, fill_state);
|
||||
hashAndFillAes1Rx4_zvkned(scratchpad, scratchpadSize, hash, fill_state);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -297,6 +302,13 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef XMRIG_VAES
|
||||
if (xmrig::Cpu::info()->arch() == xmrig::ICpuInfo::ARCH_ZEN5) {
|
||||
hashAndFillAes1Rx4_VAES512(scratchpad, scratchpadSize, hash, fill_state);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
uint8_t* scratchpadPtr = (uint8_t*)scratchpad;
|
||||
const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize;
|
||||
|
||||
|
||||
@@ -1,209 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2025 SChernykh <https://github.com/SChernykh>
|
||||
Copyright (c) 2025 XMRig <support@xmrig.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "crypto/randomx/aes_hash.hpp"
|
||||
#include "crypto/randomx/randomx.h"
|
||||
#include "crypto/rx/Profiler.h"
|
||||
|
||||
static FORCE_INLINE uint64_t aes64esm(uint64_t a, uint64_t b) { uint64_t t; asm("aes64esm %0,%1,%2" : "=r"(t) : "r"(a), "r"(b)); return t; }
|
||||
static FORCE_INLINE uint64_t aes64dsm(uint64_t a, uint64_t b) { uint64_t t; asm("aes64dsm %0,%1,%2" : "=r"(t) : "r"(a), "r"(b)); return t; }
|
||||
|
||||
static FORCE_INLINE void aesenc_zkn(uint64_t& in0, uint64_t& in1, uint64_t key0, uint64_t key1)
|
||||
{
|
||||
const uint64_t t0 = aes64esm(in0, in1);
|
||||
const uint64_t t1 = aes64esm(in1, in0);
|
||||
|
||||
in0 = t0 ^ key0;
|
||||
in1 = t1 ^ key1;
|
||||
}
|
||||
|
||||
static FORCE_INLINE void aesdec_zkn(uint64_t& in0, uint64_t& in1, uint64_t key0, uint64_t key1)
|
||||
{
|
||||
const uint64_t t0 = aes64dsm(in0, in1);
|
||||
const uint64_t t1 = aes64dsm(in1, in0);
|
||||
|
||||
in0 = t0 ^ key0;
|
||||
in1 = t1 ^ key1;
|
||||
}
|
||||
|
||||
static const uint64_t AES_HASH_1R_STATE[4][2] = {
|
||||
{ 0x9fa856de92b52c0dull, 0xd7983aadcc82db47ull },
|
||||
{ 0x15c7b798338d996eull, 0xace78057f59e125aull },
|
||||
{ 0xae62c7d06a770017ull, 0xe8a07ce45079506bull },
|
||||
{ 0x07ad828d630a240cull, 0x7e99494879a10005ull },
|
||||
};
|
||||
|
||||
static const uint64_t AES_HASH_1R_XKEY[2][2] = {
|
||||
{ 0x8b24949ff6fa8389ull, 0x0689020190dc56bfull },
|
||||
{ 0x51f4e03c61b263d1ull, 0xed18f99bee1043c6ull },
|
||||
};
|
||||
|
||||
void hashAes1Rx4_zkn(const void *input, size_t inputSize, void *hash)
|
||||
{
|
||||
const uint64_t* inptr = (uint64_t*)input;
|
||||
const uint64_t* inputEnd = inptr + inputSize / sizeof(uint64_t);
|
||||
|
||||
uint64_t state[4][2];
|
||||
memcpy(state, AES_HASH_1R_STATE, sizeof(state));
|
||||
|
||||
while (inptr < inputEnd) {
|
||||
aesenc_zkn(state[0][0], state[0][1], inptr[0], inptr[1]);
|
||||
aesdec_zkn(state[1][0], state[1][1], inptr[2], inptr[3]);
|
||||
aesenc_zkn(state[2][0], state[2][1], inptr[4], inptr[5]);
|
||||
aesdec_zkn(state[3][0], state[3][1], inptr[6], inptr[7]);
|
||||
|
||||
inptr += 8;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
const uint64_t xkey0 = AES_HASH_1R_XKEY[i][0];
|
||||
const uint64_t xkey1 = AES_HASH_1R_XKEY[i][1];
|
||||
|
||||
aesenc_zkn(state[0][0], state[0][1], xkey0, xkey1);
|
||||
aesdec_zkn(state[1][0], state[1][1], xkey0, xkey1);
|
||||
aesenc_zkn(state[2][0], state[2][1], xkey0, xkey1);
|
||||
aesdec_zkn(state[3][0], state[3][1], xkey0, xkey1);
|
||||
}
|
||||
|
||||
memcpy(hash, state, sizeof(state));
|
||||
}
|
||||
|
||||
static const uint64_t AES_GEN_1R_KEY[4][2] = {
|
||||
{ 0x627166096daca553ull, 0xb4f44917dbb5552bull },
|
||||
{ 0x846a710d6d7caf07ull, 0x0da1dc4e1725d378ull },
|
||||
{ 0x9f947ec63f1262f1ull, 0x3e20e345f4c0794full },
|
||||
{ 0xb1ba317c6aef8135ull, 0x4916915416314c88ull },
|
||||
};
|
||||
|
||||
void fillAes1Rx4_zkn(void *state, size_t outputSize, void *buffer)
|
||||
{
|
||||
uint8_t* outptr = (uint8_t*)buffer;
|
||||
const uint8_t* outputEnd = outptr + outputSize;
|
||||
|
||||
uint64_t key[4][2];
|
||||
memcpy(key, AES_GEN_1R_KEY, sizeof(key));
|
||||
|
||||
uint64_t cur_state[4][2];
|
||||
memcpy(cur_state, state, sizeof(cur_state));
|
||||
|
||||
while (outptr < outputEnd) {
|
||||
aesdec_zkn(cur_state[0][0], cur_state[0][1], key[0][0], key[0][1]);
|
||||
aesenc_zkn(cur_state[1][0], cur_state[1][1], key[1][0], key[1][1]);
|
||||
aesdec_zkn(cur_state[2][0], cur_state[2][1], key[2][0], key[2][1]);
|
||||
aesenc_zkn(cur_state[3][0], cur_state[3][1], key[3][0], key[3][1]);
|
||||
|
||||
memcpy(outptr, cur_state, sizeof(cur_state));
|
||||
outptr += 64;
|
||||
}
|
||||
|
||||
memcpy(state, cur_state, sizeof(cur_state));
|
||||
}
|
||||
|
||||
void fillAes4Rx4_zkn(void *state, size_t outputSize, void *buffer)
|
||||
{
|
||||
uint8_t* outptr = (uint8_t*)buffer;
|
||||
const uint8_t* outputEnd = outptr + outputSize;
|
||||
|
||||
uint64_t key[8][2];
|
||||
memcpy(key, RandomX_CurrentConfig.fillAes4Rx4_Key, sizeof(key));
|
||||
|
||||
uint64_t cur_state[4][2];
|
||||
memcpy(cur_state, state, sizeof(cur_state));
|
||||
|
||||
while (outptr < outputEnd) {
|
||||
aesdec_zkn(cur_state[0][0], cur_state[0][1], key[0][0], key[0][1]);
|
||||
aesenc_zkn(cur_state[1][0], cur_state[1][1], key[0][0], key[0][1]);
|
||||
aesdec_zkn(cur_state[2][0], cur_state[2][1], key[4][0], key[4][1]);
|
||||
aesenc_zkn(cur_state[3][0], cur_state[3][1], key[4][0], key[4][1]);
|
||||
|
||||
aesdec_zkn(cur_state[0][0], cur_state[0][1], key[1][0], key[1][1]);
|
||||
aesenc_zkn(cur_state[1][0], cur_state[1][1], key[1][0], key[1][1]);
|
||||
aesdec_zkn(cur_state[2][0], cur_state[2][1], key[5][0], key[5][1]);
|
||||
aesenc_zkn(cur_state[3][0], cur_state[3][1], key[5][0], key[5][1]);
|
||||
|
||||
aesdec_zkn(cur_state[0][0], cur_state[0][1], key[2][0], key[2][1]);
|
||||
aesenc_zkn(cur_state[1][0], cur_state[1][1], key[2][0], key[2][1]);
|
||||
aesdec_zkn(cur_state[2][0], cur_state[2][1], key[6][0], key[6][1]);
|
||||
aesenc_zkn(cur_state[3][0], cur_state[3][1], key[6][0], key[6][1]);
|
||||
|
||||
aesdec_zkn(cur_state[0][0], cur_state[0][1], key[3][0], key[3][1]);
|
||||
aesenc_zkn(cur_state[1][0], cur_state[1][1], key[3][0], key[3][1]);
|
||||
aesdec_zkn(cur_state[2][0], cur_state[2][1], key[7][0], key[7][1]);
|
||||
aesenc_zkn(cur_state[3][0], cur_state[3][1], key[7][0], key[7][1]);
|
||||
|
||||
memcpy(outptr, cur_state, sizeof(cur_state));
|
||||
outptr += 64;
|
||||
}
|
||||
}
|
||||
|
||||
void hashAndFillAes1Rx4_zkn(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state)
|
||||
{
|
||||
PROFILE_SCOPE(RandomX_AES);
|
||||
|
||||
uint64_t* scratchpadPtr = (uint64_t*)scratchpad;
|
||||
const uint64_t* scratchpadEnd = scratchpadPtr + scratchpadSize / sizeof(uint64_t);
|
||||
|
||||
uint64_t cur_hash_state[4][2];
|
||||
memcpy(cur_hash_state, AES_HASH_1R_STATE, sizeof(cur_hash_state));
|
||||
|
||||
uint64_t key[4][2];
|
||||
memcpy(key, AES_GEN_1R_KEY, sizeof(key));
|
||||
|
||||
uint64_t cur_fill_state[4][2];
|
||||
memcpy(cur_fill_state, fill_state, sizeof(cur_fill_state));
|
||||
|
||||
while (scratchpadPtr < scratchpadEnd) {
|
||||
aesenc_zkn(cur_hash_state[0][0], cur_hash_state[0][1], scratchpadPtr[0], scratchpadPtr[1]);
|
||||
aesdec_zkn(cur_hash_state[1][0], cur_hash_state[1][1], scratchpadPtr[2], scratchpadPtr[3]);
|
||||
aesenc_zkn(cur_hash_state[2][0], cur_hash_state[2][1], scratchpadPtr[4], scratchpadPtr[5]);
|
||||
aesdec_zkn(cur_hash_state[3][0], cur_hash_state[3][1], scratchpadPtr[6], scratchpadPtr[7]);
|
||||
|
||||
aesdec_zkn(cur_fill_state[0][0], cur_fill_state[0][1], key[0][0], key[0][1]);
|
||||
aesenc_zkn(cur_fill_state[1][0], cur_fill_state[1][1], key[1][0], key[1][1]);
|
||||
aesdec_zkn(cur_fill_state[2][0], cur_fill_state[2][1], key[2][0], key[2][1]);
|
||||
aesenc_zkn(cur_fill_state[3][0], cur_fill_state[3][1], key[3][0], key[3][1]);
|
||||
|
||||
memcpy(scratchpadPtr, cur_fill_state, sizeof(cur_fill_state));
|
||||
scratchpadPtr += 8;
|
||||
}
|
||||
|
||||
memcpy(fill_state, cur_fill_state, sizeof(cur_fill_state));
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
const uint64_t xkey0 = AES_HASH_1R_XKEY[i][0];
|
||||
const uint64_t xkey1 = AES_HASH_1R_XKEY[i][1];
|
||||
|
||||
aesenc_zkn(cur_hash_state[0][0], cur_hash_state[0][1], xkey0, xkey1);
|
||||
aesdec_zkn(cur_hash_state[1][0], cur_hash_state[1][1], xkey0, xkey1);
|
||||
aesenc_zkn(cur_hash_state[2][0], cur_hash_state[2][1], xkey0, xkey1);
|
||||
aesdec_zkn(cur_hash_state[3][0], cur_hash_state[3][1], xkey0, xkey1);
|
||||
}
|
||||
|
||||
memcpy(hash, cur_hash_state, sizeof(cur_hash_state));
|
||||
}
|
||||
199
src/crypto/randomx/aes_hash_rv64_zvkned.cpp
Normal file
199
src/crypto/randomx/aes_hash_rv64_zvkned.cpp
Normal file
@@ -0,0 +1,199 @@
|
||||
/*
|
||||
Copyright (c) 2025 SChernykh <https://github.com/SChernykh>
|
||||
Copyright (c) 2025 XMRig <support@xmrig.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "crypto/randomx/aes_hash.hpp"
|
||||
#include "crypto/randomx/randomx.h"
|
||||
#include "crypto/rx/Profiler.h"
|
||||
|
||||
#include <riscv_vector.h>
|
||||
|
||||
static FORCE_INLINE vuint32m1_t aesenc_zvkned(vuint32m1_t a, vuint32m1_t b) { return __riscv_vaesem_vv_u32m1(a, b, 8); }
|
||||
static FORCE_INLINE vuint32m1_t aesdec_zvkned(vuint32m1_t a, vuint32m1_t b, vuint32m1_t zero) { return __riscv_vxor_vv_u32m1(__riscv_vaesdm_vv_u32m1(a, zero, 8), b, 8); }
|
||||
|
||||
static constexpr uint32_t AES_HASH_1R_STATE02[8] = { 0x92b52c0d, 0x9fa856de, 0xcc82db47, 0xd7983aad, 0x6a770017, 0xae62c7d0, 0x5079506b, 0xe8a07ce4 };
|
||||
static constexpr uint32_t AES_HASH_1R_STATE13[8] = { 0x338d996e, 0x15c7b798, 0xf59e125a, 0xace78057, 0x630a240c, 0x07ad828d, 0x79a10005, 0x7e994948 };
|
||||
|
||||
static constexpr uint32_t AES_GEN_1R_KEY02[8] = { 0x6daca553, 0x62716609, 0xdbb5552b, 0xb4f44917, 0x3f1262f1, 0x9f947ec6, 0xf4c0794f, 0x3e20e345 };
|
||||
static constexpr uint32_t AES_GEN_1R_KEY13[8] = { 0x6d7caf07, 0x846a710d, 0x1725d378, 0x0da1dc4e, 0x6aef8135, 0xb1ba317c, 0x16314c88, 0x49169154 };
|
||||
|
||||
static constexpr uint32_t AES_HASH_1R_XKEY00[8] = { 0xf6fa8389, 0x8b24949f, 0x90dc56bf, 0x06890201, 0xf6fa8389, 0x8b24949f, 0x90dc56bf, 0x06890201 };
|
||||
static constexpr uint32_t AES_HASH_1R_XKEY11[8] = { 0x61b263d1, 0x51f4e03c, 0xee1043c6, 0xed18f99b, 0x61b263d1, 0x51f4e03c, 0xee1043c6, 0xed18f99b };
|
||||
|
||||
static constexpr uint32_t AES_HASH_STRIDE_X2[8] = { 0, 4, 8, 12, 32, 36, 40, 44 };
|
||||
static constexpr uint32_t AES_HASH_STRIDE_X4[8] = { 0, 4, 8, 12, 64, 68, 72, 76 };
|
||||
|
||||
void hashAes1Rx4_zvkned(const void *input, size_t inputSize, void *hash)
|
||||
{
|
||||
const uint8_t* inptr = (const uint8_t*)input;
|
||||
const uint8_t* inputEnd = inptr + inputSize;
|
||||
|
||||
//intial state
|
||||
vuint32m1_t state02 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE02, 8);
|
||||
vuint32m1_t state13 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE13, 8);
|
||||
|
||||
const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X2, 8);
|
||||
const vuint32m1_t zero = {};
|
||||
|
||||
//process 64 bytes at a time in 4 lanes
|
||||
while (inptr < inputEnd) {
|
||||
state02 = aesenc_zvkned(state02, __riscv_vluxei32_v_u32m1((uint32_t*)inptr + 0, stride, 8));
|
||||
state13 = aesdec_zvkned(state13, __riscv_vluxei32_v_u32m1((uint32_t*)inptr + 4, stride, 8), zero);
|
||||
|
||||
inptr += 64;
|
||||
}
|
||||
|
||||
//two extra rounds to achieve full diffusion
|
||||
const vuint32m1_t xkey00 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY00, 8);
|
||||
const vuint32m1_t xkey11 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY11, 8);
|
||||
|
||||
state02 = aesenc_zvkned(state02, xkey00);
|
||||
state13 = aesdec_zvkned(state13, xkey00, zero);
|
||||
|
||||
state02 = aesenc_zvkned(state02, xkey11);
|
||||
state13 = aesdec_zvkned(state13, xkey11, zero);
|
||||
|
||||
//output hash
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 0, stride, state02, 8);
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 4, stride, state13, 8);
|
||||
}
|
||||
|
||||
void fillAes1Rx4_zvkned(void *state, size_t outputSize, void *buffer)
|
||||
{
|
||||
const uint8_t* outptr = (uint8_t*)buffer;
|
||||
const uint8_t* outputEnd = outptr + outputSize;
|
||||
|
||||
const vuint32m1_t key02 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY02, 8);
|
||||
const vuint32m1_t key13 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY13, 8);
|
||||
|
||||
const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X2, 8);
|
||||
const vuint32m1_t zero = {};
|
||||
|
||||
vuint32m1_t state02 = __riscv_vluxei32_v_u32m1((uint32_t*)state + 0, stride, 8);
|
||||
vuint32m1_t state13 = __riscv_vluxei32_v_u32m1((uint32_t*)state + 4, stride, 8);
|
||||
|
||||
while (outptr < outputEnd) {
|
||||
state02 = aesdec_zvkned(state02, key02, zero);
|
||||
state13 = aesenc_zvkned(state13, key13);
|
||||
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)outptr + 0, stride, state02, 8);
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)outptr + 4, stride, state13, 8);
|
||||
|
||||
outptr += 64;
|
||||
}
|
||||
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)state + 0, stride, state02, 8);
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)state + 4, stride, state13, 8);
|
||||
}
|
||||
|
||||
void fillAes4Rx4_zvkned(void *state, size_t outputSize, void *buffer)
|
||||
{
|
||||
const uint8_t* outptr = (uint8_t*)buffer;
|
||||
const uint8_t* outputEnd = outptr + outputSize;
|
||||
|
||||
const vuint32m1_t stride4 = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X4, 8);
|
||||
|
||||
const vuint32m1_t key04 = __riscv_vluxei32_v_u32m1((uint32_t*)(RandomX_CurrentConfig.fillAes4Rx4_Key + 0), stride4, 8);
|
||||
const vuint32m1_t key15 = __riscv_vluxei32_v_u32m1((uint32_t*)(RandomX_CurrentConfig.fillAes4Rx4_Key + 1), stride4, 8);
|
||||
const vuint32m1_t key26 = __riscv_vluxei32_v_u32m1((uint32_t*)(RandomX_CurrentConfig.fillAes4Rx4_Key + 2), stride4, 8);
|
||||
const vuint32m1_t key37 = __riscv_vluxei32_v_u32m1((uint32_t*)(RandomX_CurrentConfig.fillAes4Rx4_Key + 3), stride4, 8);
|
||||
|
||||
const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X2, 8);
|
||||
const vuint32m1_t zero = {};
|
||||
|
||||
vuint32m1_t state02 = __riscv_vluxei32_v_u32m1((uint32_t*)state + 0, stride, 8);
|
||||
vuint32m1_t state13 = __riscv_vluxei32_v_u32m1((uint32_t*)state + 4, stride, 8);
|
||||
|
||||
while (outptr < outputEnd) {
|
||||
state02 = aesdec_zvkned(state02, key04, zero);
|
||||
state13 = aesenc_zvkned(state13, key04);
|
||||
|
||||
state02 = aesdec_zvkned(state02, key15, zero);
|
||||
state13 = aesenc_zvkned(state13, key15);
|
||||
|
||||
state02 = aesdec_zvkned(state02, key26, zero);
|
||||
state13 = aesenc_zvkned(state13, key26);
|
||||
|
||||
state02 = aesdec_zvkned(state02, key37, zero);
|
||||
state13 = aesenc_zvkned(state13, key37);
|
||||
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)outptr + 0, stride, state02, 8);
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)outptr + 4, stride, state13, 8);
|
||||
|
||||
outptr += 64;
|
||||
}
|
||||
}
|
||||
|
||||
void hashAndFillAes1Rx4_zvkned(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state)
|
||||
{
|
||||
uint8_t* scratchpadPtr = (uint8_t*)scratchpad;
|
||||
const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize;
|
||||
|
||||
vuint32m1_t hash_state02 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE02, 8);
|
||||
vuint32m1_t hash_state13 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE13, 8);
|
||||
|
||||
const vuint32m1_t key02 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY02, 8);
|
||||
const vuint32m1_t key13 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY13, 8);
|
||||
|
||||
const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X2, 8);
|
||||
const vuint32m1_t zero = {};
|
||||
|
||||
vuint32m1_t fill_state02 = __riscv_vluxei32_v_u32m1((uint32_t*)fill_state + 0, stride, 8);
|
||||
vuint32m1_t fill_state13 = __riscv_vluxei32_v_u32m1((uint32_t*)fill_state + 4, stride, 8);
|
||||
|
||||
//process 64 bytes at a time in 4 lanes
|
||||
while (scratchpadPtr < scratchpadEnd) {
|
||||
hash_state02 = aesenc_zvkned(hash_state02, __riscv_vluxei32_v_u32m1((uint32_t*)scratchpadPtr + 0, stride, 8));
|
||||
hash_state13 = aesdec_zvkned(hash_state13, __riscv_vluxei32_v_u32m1((uint32_t*)scratchpadPtr + 4, stride, 8), zero);
|
||||
|
||||
fill_state02 = aesdec_zvkned(fill_state02, key02, zero);
|
||||
fill_state13 = aesenc_zvkned(fill_state13, key13);
|
||||
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)scratchpadPtr + 0, stride, fill_state02, 8);
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)scratchpadPtr + 4, stride, fill_state13, 8);
|
||||
|
||||
scratchpadPtr += 64;
|
||||
}
|
||||
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)fill_state + 0, stride, fill_state02, 8);
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)fill_state + 4, stride, fill_state13, 8);
|
||||
|
||||
//two extra rounds to achieve full diffusion
|
||||
const vuint32m1_t xkey00 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY00, 8);
|
||||
const vuint32m1_t xkey11 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY11, 8);
|
||||
|
||||
hash_state02 = aesenc_zvkned(hash_state02, xkey00);
|
||||
hash_state13 = aesdec_zvkned(hash_state13, xkey00, zero);
|
||||
|
||||
hash_state02 = aesenc_zvkned(hash_state02, xkey11);
|
||||
hash_state13 = aesdec_zvkned(hash_state13, xkey11, zero);
|
||||
|
||||
//output hash
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 0, stride, hash_state02, 8);
|
||||
__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 4, stride, hash_state13, 8);
|
||||
}
|
||||
@@ -29,7 +29,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#pragma once
|
||||
|
||||
void hashAes1Rx4_zkn(const void *input, size_t inputSize, void *hash);
|
||||
void fillAes1Rx4_zkn(void *state, size_t outputSize, void *buffer);
|
||||
void fillAes4Rx4_zkn(void *state, size_t outputSize, void *buffer);
|
||||
void hashAndFillAes1Rx4_zkn(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
||||
void hashAes1Rx4_zvkned(const void *input, size_t inputSize, void *hash);
|
||||
void fillAes1Rx4_zvkned(void *state, size_t outputSize, void *buffer);
|
||||
void fillAes4Rx4_zvkned(void *state, size_t outputSize, void *buffer);
|
||||
void hashAndFillAes1Rx4_zvkned(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
||||
148
src/crypto/randomx/aes_hash_vaes512.cpp
Normal file
148
src/crypto/randomx/aes_hash_vaes512.cpp
Normal file
@@ -0,0 +1,148 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
Copyright (c) 2026 XMRig <support@xmrig.com>
|
||||
Copyright (c) 2026 SChernykh <https://github.com/SChernykh>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <immintrin.h>
|
||||
|
||||
#define REVERSE_4(A, B, C, D) D, C, B, A
|
||||
|
||||
alignas(64) static const uint32_t AES_HASH_1R_STATE[] = {
|
||||
REVERSE_4(0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d),
|
||||
REVERSE_4(0xace78057, 0xf59e125a, 0x15c7b798, 0x338d996e),
|
||||
REVERSE_4(0xe8a07ce4, 0x5079506b, 0xae62c7d0, 0x6a770017),
|
||||
REVERSE_4(0x7e994948, 0x79a10005, 0x07ad828d, 0x630a240c)
|
||||
};
|
||||
|
||||
alignas(64) static const uint32_t AES_GEN_1R_KEY[] = {
|
||||
REVERSE_4(0xb4f44917, 0xdbb5552b, 0x62716609, 0x6daca553),
|
||||
REVERSE_4(0x0da1dc4e, 0x1725d378, 0x846a710d, 0x6d7caf07),
|
||||
REVERSE_4(0x3e20e345, 0xf4c0794f, 0x9f947ec6, 0x3f1262f1),
|
||||
REVERSE_4(0x49169154, 0x16314c88, 0xb1ba317c, 0x6aef8135)
|
||||
};
|
||||
|
||||
alignas(64) static const uint32_t AES_HASH_1R_XKEY0[] = {
|
||||
REVERSE_4(0x06890201, 0x90dc56bf, 0x8b24949f, 0xf6fa8389),
|
||||
REVERSE_4(0x06890201, 0x90dc56bf, 0x8b24949f, 0xf6fa8389),
|
||||
REVERSE_4(0x06890201, 0x90dc56bf, 0x8b24949f, 0xf6fa8389),
|
||||
REVERSE_4(0x06890201, 0x90dc56bf, 0x8b24949f, 0xf6fa8389)
|
||||
};
|
||||
|
||||
alignas(64) static const uint32_t AES_HASH_1R_XKEY1[] = {
|
||||
REVERSE_4(0xed18f99b, 0xee1043c6, 0x51f4e03c, 0x61b263d1),
|
||||
REVERSE_4(0xed18f99b, 0xee1043c6, 0x51f4e03c, 0x61b263d1),
|
||||
REVERSE_4(0xed18f99b, 0xee1043c6, 0x51f4e03c, 0x61b263d1),
|
||||
REVERSE_4(0xed18f99b, 0xee1043c6, 0x51f4e03c, 0x61b263d1)
|
||||
};
|
||||
|
||||
void hashAndFillAes1Rx4_VAES512(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state)
|
||||
{
|
||||
uint8_t* scratchpadPtr = (uint8_t*)scratchpad;
|
||||
const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize;
|
||||
|
||||
const __m512i fill_key = _mm512_load_si512(AES_GEN_1R_KEY);
|
||||
|
||||
const __m512i initial_hash_state = _mm512_load_si512(AES_HASH_1R_STATE);
|
||||
const __m512i initial_fill_state = _mm512_load_si512(fill_state);
|
||||
|
||||
constexpr uint8_t mask = 0b11001100;
|
||||
|
||||
// enc_data[0] = hash_state[0]
|
||||
// enc_data[1] = fill_state[1]
|
||||
// enc_data[2] = hash_state[2]
|
||||
// enc_data[3] = fill_state[3]
|
||||
__m512i enc_data = _mm512_mask_blend_epi64(mask, initial_hash_state, initial_fill_state);
|
||||
|
||||
// dec_data[0] = fill_state[0]
|
||||
// dec_data[1] = hash_state[1]
|
||||
// dec_data[2] = fill_state[2]
|
||||
// dec_data[3] = hash_state[3]
|
||||
__m512i dec_data = _mm512_mask_blend_epi64(mask, initial_fill_state, initial_hash_state);
|
||||
|
||||
constexpr int PREFETCH_DISTANCE = 7168;
|
||||
|
||||
const uint8_t* prefetchPtr = scratchpadPtr + PREFETCH_DISTANCE;
|
||||
scratchpadEnd -= PREFETCH_DISTANCE;
|
||||
|
||||
for (const uint8_t* p = scratchpadPtr; p < prefetchPtr; p += 256) {
|
||||
_mm_prefetch((const char*)(p + 0), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(p + 64), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(p + 128), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(p + 192), _MM_HINT_T0);
|
||||
}
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
while (scratchpadPtr < scratchpadEnd) {
|
||||
const __m512i scratchpad_data = _mm512_load_si512(scratchpadPtr);
|
||||
|
||||
// enc_key[0] = scratchpad_data[0]
|
||||
// enc_key[1] = fill_key[1]
|
||||
// enc_key[2] = scratchpad_data[2]
|
||||
// enc_key[3] = fill_key[3]
|
||||
enc_data = _mm512_aesenc_epi128(enc_data, _mm512_mask_blend_epi64(mask, scratchpad_data, fill_key));
|
||||
|
||||
// dec_key[0] = fill_key[0]
|
||||
// dec_key[1] = scratchpad_data[1]
|
||||
// dec_key[2] = fill_key[2]
|
||||
// dec_key[3] = scratchpad_data[3]
|
||||
dec_data = _mm512_aesdec_epi128(dec_data, _mm512_mask_blend_epi64(mask, fill_key, scratchpad_data));
|
||||
|
||||
// fill_state[0] = dec_data[0]
|
||||
// fill_state[1] = enc_data[1]
|
||||
// fill_state[2] = dec_data[2]
|
||||
// fill_state[3] = enc_data[3]
|
||||
_mm512_store_si512(scratchpadPtr, _mm512_mask_blend_epi64(mask, dec_data, enc_data));
|
||||
|
||||
_mm_prefetch((const char*)prefetchPtr, _MM_HINT_T0);
|
||||
|
||||
scratchpadPtr += 64;
|
||||
prefetchPtr += 64;
|
||||
}
|
||||
prefetchPtr = (const uint8_t*) scratchpad;
|
||||
scratchpadEnd += PREFETCH_DISTANCE;
|
||||
}
|
||||
|
||||
_mm512_store_si512(fill_state, _mm512_mask_blend_epi64(mask, dec_data, enc_data));
|
||||
|
||||
//two extra rounds to achieve full diffusion
|
||||
const __m512i xkey0 = _mm512_load_si512(AES_HASH_1R_XKEY0);
|
||||
const __m512i xkey1 = _mm512_load_si512(AES_HASH_1R_XKEY1);
|
||||
|
||||
enc_data = _mm512_aesenc_epi128(enc_data, xkey0);
|
||||
dec_data = _mm512_aesdec_epi128(dec_data, xkey0);
|
||||
enc_data = _mm512_aesenc_epi128(enc_data, xkey1);
|
||||
dec_data = _mm512_aesdec_epi128(dec_data, xkey1);
|
||||
|
||||
//output hash
|
||||
_mm512_store_si512(hash, _mm512_mask_blend_epi64(mask, enc_data, dec_data));
|
||||
|
||||
// Just in case
|
||||
_mm256_zeroupper();
|
||||
}
|
||||
@@ -174,7 +174,7 @@ FORCE_INLINE void rx_set_rounding_mode(uint32_t mode) {
|
||||
_mm_setcsr(rx_mxcsr_default | (mode << 13));
|
||||
}
|
||||
|
||||
#elif defined(__PPC64__) && defined(__ALTIVEC__) && defined(__VSX__) //sadly only POWER7 and newer will be able to use SIMD acceleration. Earlier processors cant use doubles or 64 bit integers with SIMD
|
||||
#elif defined(__PPC64__) && defined(__ALTIVEC__) && defined(__VSX__) //sadly only POWER7 and newer will be able to use SIMD acceleration. Earlier processors can't use doubles or 64 bit integers with SIMD
|
||||
#include <cstdint>
|
||||
#include <stdexcept>
|
||||
#include <cstdlib>
|
||||
|
||||
@@ -94,6 +94,10 @@ void* generateDatasetInitVectorRV64(uint8_t* buf, SuperscalarProgram* programs,
|
||||
|
||||
case SuperscalarInstructionType::IROR_C:
|
||||
{
|
||||
#ifdef __riscv_zvkb
|
||||
// 57 30 00 52 vror.vi v0, v0, 0
|
||||
EMIT(0x52003057 | (dst << 7) | (dst << 20) | ((imm32 & 31) << 15) | ((imm32 & 32) << 21));
|
||||
#else // __riscv_zvkb
|
||||
const uint32_t shift_right = imm32 & 63;
|
||||
const uint32_t shift_left = 64 - shift_right;
|
||||
|
||||
@@ -121,6 +125,7 @@ void* generateDatasetInitVectorRV64(uint8_t* buf, SuperscalarProgram* programs,
|
||||
|
||||
// 57 00 20 2B vor.vv v0, v18, v0
|
||||
EMIT(0x2B200057 | (dst << 7) | (dst << 15));
|
||||
#endif // __riscv_zvkb
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -355,44 +360,6 @@ void* generateProgramVectorRV64(uint8_t* buf, Program& prog, ProgramConfiguratio
|
||||
|
||||
uint8_t* last_modified[RegistersCount] = { p, p, p, p, p, p, p, p };
|
||||
|
||||
uint8_t readReg01[RegistersCount] = {};
|
||||
|
||||
readReg01[pcfg.readReg0] = 1;
|
||||
readReg01[pcfg.readReg1] = 1;
|
||||
|
||||
uint32_t scratchpad_prefetch_pos = 0;
|
||||
|
||||
for (int32_t i = static_cast<int32_t>(prog.getSize()) - 1; i >= 0; --i) {
|
||||
Instruction instr = prog(i);
|
||||
|
||||
const InstructionType inst_type = static_cast<InstructionType>(inst_map[instr.opcode]);
|
||||
|
||||
if (inst_type == InstructionType::CBRANCH) {
|
||||
scratchpad_prefetch_pos = i;
|
||||
break;
|
||||
}
|
||||
|
||||
if (inst_type < InstructionType::FSWAP_R) {
|
||||
const uint32_t src = instr.src % RegistersCount;
|
||||
const uint32_t dst = instr.dst % RegistersCount;
|
||||
|
||||
if ((inst_type == InstructionType::ISWAP_R) && (src != dst) && (readReg01[src] || readReg01[dst])) {
|
||||
scratchpad_prefetch_pos = i;
|
||||
break;
|
||||
}
|
||||
|
||||
if ((inst_type == InstructionType::IMUL_RCP) && readReg01[dst] && !isZeroOrPowerOf2(instr.getImm32())) {
|
||||
scratchpad_prefetch_pos = i;
|
||||
break;
|
||||
}
|
||||
|
||||
if (readReg01[dst]) {
|
||||
scratchpad_prefetch_pos = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0, n = prog.getSize(); i < n; ++i) {
|
||||
Instruction instr = prog(i);
|
||||
|
||||
@@ -849,16 +816,6 @@ void* generateProgramVectorRV64(uint8_t* buf, Program& prog, ProgramConfiguratio
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
|
||||
// Prefetch scratchpad lines for the next main loop iteration
|
||||
// scratchpad_prefetch_pos is a conservative estimate of the earliest place in the code where we can do it
|
||||
if (i == scratchpad_prefetch_pos) {
|
||||
uint8_t* e = (uint8_t*)(buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_scratchpad_prefetch_end));
|
||||
const size_t n = e - ((uint8_t*)spaddr_xor2);
|
||||
|
||||
memcpy(p, spaddr_xor2, n);
|
||||
p += n;
|
||||
}
|
||||
}
|
||||
|
||||
const uint8_t* e;
|
||||
|
||||
@@ -75,7 +75,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
.global DECL(randomx_riscv64_vector_program_main_loop_instructions_end_light_mode)
|
||||
.global DECL(randomx_riscv64_vector_program_main_loop_mx_xor_light_mode)
|
||||
.global DECL(randomx_riscv64_vector_program_scratchpad_prefetch)
|
||||
.global DECL(randomx_riscv64_vector_program_scratchpad_prefetch_end)
|
||||
|
||||
.global DECL(randomx_riscv64_vector_program_end)
|
||||
|
||||
@@ -614,8 +613,7 @@ DECL(randomx_riscv64_vector_program_main_loop_mx_xor):
|
||||
and x5, x5, x19 // x5 = (readReg2 ^ readReg3) & dataset mask
|
||||
xor x14, x14, x5 // mx ^= (readReg2 ^ readReg3) & dataset mask
|
||||
|
||||
and x5, x14, x19 // x5 = mx & dataset mask
|
||||
add x5, x5, x11 // x5 = &dataset[mx & dataset mask]
|
||||
add x5, x14, x11 // x5 = &dataset[mx & dataset mask]
|
||||
|
||||
#ifdef __riscv_zicbop
|
||||
prefetch.r (x5)
|
||||
@@ -643,7 +641,24 @@ DECL(randomx_riscv64_vector_program_main_loop_mx_xor):
|
||||
ld x6, 56(x5)
|
||||
xor x27, x27, x6
|
||||
|
||||
randomx_riscv64_vector_program_main_loop_swap_mx_ma:
|
||||
DECL(randomx_riscv64_vector_program_scratchpad_prefetch):
|
||||
xor x5, x20, x22 // spAddr0-spAddr1 = readReg0 ^ readReg1 (JIT compiler will substitute the actual registers)
|
||||
srli x6, x5, 32 // x6 = spAddr1
|
||||
|
||||
and x5, x5, x9 // x5 = spAddr0 & 64-byte aligned L3 mask
|
||||
and x6, x6, x9 // x6 = spAddr1 & 64-byte aligned L3 mask
|
||||
|
||||
c.add x5, x12 // x5 = &scratchpad[spAddr0 & 64-byte aligned L3 mask]
|
||||
c.add x6, x12 // x6 = &scratchpad[spAddr1 & 64-byte aligned L3 mask]
|
||||
|
||||
#ifdef __riscv_zicbop
|
||||
prefetch.r (x5)
|
||||
prefetch.r (x6)
|
||||
#else
|
||||
ld x5, (x5)
|
||||
ld x6, (x6)
|
||||
#endif
|
||||
|
||||
// swap mx <-> ma
|
||||
#ifdef __riscv_zbb
|
||||
rori x14, x14, 32
|
||||
@@ -847,27 +862,7 @@ DECL(randomx_riscv64_vector_program_main_loop_mx_xor_light_mode):
|
||||
|
||||
addi sp, sp, 192
|
||||
|
||||
j randomx_riscv64_vector_program_main_loop_swap_mx_ma
|
||||
|
||||
DECL(randomx_riscv64_vector_program_scratchpad_prefetch):
|
||||
xor x5, x20, x22 // spAddr0-spAddr1 = readReg0 ^ readReg1 (JIT compiler will substitute the actual registers)
|
||||
srli x6, x5, 32 // x6 = spAddr1
|
||||
|
||||
and x5, x5, x9 // x5 = spAddr0 & 64-byte aligned L3 mask
|
||||
and x6, x6, x9 // x6 = spAddr1 & 64-byte aligned L3 mask
|
||||
|
||||
c.add x5, x12 // x5 = &scratchpad[spAddr0 & 64-byte aligned L3 mask]
|
||||
c.add x6, x12 // x6 = &scratchpad[spAddr1 & 64-byte aligned L3 mask]
|
||||
|
||||
#ifdef __riscv_zicbop
|
||||
prefetch.r (x5)
|
||||
prefetch.r (x6)
|
||||
#else
|
||||
ld x5, (x5)
|
||||
ld x6, (x6)
|
||||
#endif
|
||||
|
||||
DECL(randomx_riscv64_vector_program_scratchpad_prefetch_end):
|
||||
j randomx_riscv64_vector_program_scratchpad_prefetch
|
||||
|
||||
DECL(randomx_riscv64_vector_program_end):
|
||||
|
||||
|
||||
@@ -66,7 +66,6 @@ void randomx_riscv64_vector_program_main_loop_instructions_end_light_mode();
|
||||
void randomx_riscv64_vector_program_main_loop_mx_xor_light_mode();
|
||||
void randomx_riscv64_vector_program_end();
|
||||
void randomx_riscv64_vector_program_scratchpad_prefetch();
|
||||
void randomx_riscv64_vector_program_scratchpad_prefetch_end();
|
||||
|
||||
void randomx_riscv64_vector_code_end();
|
||||
|
||||
|
||||
@@ -231,7 +231,7 @@ RANDOMX_EXPORT unsigned long randomx_dataset_item_count(void);
|
||||
*
|
||||
* @param dataset is a pointer to a previously allocated randomx_dataset structure. Must not be NULL.
|
||||
* @param cache is a pointer to a previously allocated and initialized randomx_cache structure. Must not be NULL.
|
||||
* @param startItem is the item number where intialization should start.
|
||||
* @param startItem is the item number where initialization should start.
|
||||
* @param itemCount is the number of items that should be initialized.
|
||||
*/
|
||||
RANDOMX_EXPORT void randomx_init_dataset(randomx_dataset *dataset, randomx_cache *cache, unsigned long startItem, unsigned long itemCount);
|
||||
|
||||
13
src/crypto/randomx/tests/riscv64_zvkb.s
Normal file
13
src/crypto/randomx/tests/riscv64_zvkb.s
Normal file
@@ -0,0 +1,13 @@
|
||||
/* RISC-V - test if the vector bit manipulation extension is present */
|
||||
|
||||
.text
|
||||
.option arch, rv64gcv_zvkb
|
||||
.global main
|
||||
|
||||
main:
|
||||
vsetivli zero, 8, e32, m1, ta, ma
|
||||
vror.vv v0, v0, v0
|
||||
vror.vx v0, v0, x5
|
||||
vror.vi v0, v0, 1
|
||||
li x10, 0
|
||||
ret
|
||||
12
src/crypto/randomx/tests/riscv64_zvkned.s
Normal file
12
src/crypto/randomx/tests/riscv64_zvkned.s
Normal file
@@ -0,0 +1,12 @@
|
||||
/* RISC-V - test if the vector bit manipulation extension is present */
|
||||
|
||||
.text
|
||||
.option arch, rv64gcv_zvkned
|
||||
.global main
|
||||
|
||||
main:
|
||||
vsetivli zero, 8, e32, m1, ta, ma
|
||||
vaesem.vv v0, v0
|
||||
vaesdm.vv v0, v0
|
||||
li x10, 0
|
||||
ret
|
||||
Reference in New Issue
Block a user