mirror of
https://github.com/xmrig/xmrig.git
synced 2025-12-07 07:55:04 -05:00
Compare commits
114 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ec13337228 | ||
|
|
cfe2a098ce | ||
|
|
a89c2c8dd1 | ||
|
|
1b4a124bc5 | ||
|
|
4bb8be8a29 | ||
|
|
d45bb24a32 | ||
|
|
5a7bcb2d03 | ||
|
|
f1ec8a18f6 | ||
|
|
7b4f768114 | ||
|
|
dfab81e9fa | ||
|
|
3025c265e8 | ||
|
|
ee603ab9e2 | ||
|
|
84f8a0dc54 | ||
|
|
481deff163 | ||
|
|
0e9ed351a1 | ||
|
|
8952f6892d | ||
|
|
d51fe01273 | ||
|
|
f7d6348948 | ||
|
|
3a01ebe277 | ||
|
|
189cc78d44 | ||
|
|
9be3b69109 | ||
|
|
7b38af703e | ||
|
|
bef9031b03 | ||
|
|
e4929d7c06 | ||
|
|
1e26e58660 | ||
|
|
8fe0577d60 | ||
|
|
64f42feba9 | ||
|
|
36ed0b4309 | ||
|
|
cb0bba7e10 | ||
|
|
51a72afb0e | ||
|
|
b1b0a3ba95 | ||
|
|
9768bf65d1 | ||
|
|
1584cca6d1 | ||
|
|
891a46382e | ||
|
|
db920e8006 | ||
|
|
768a4581e0 | ||
|
|
866245b525 | ||
|
|
c7476e076b | ||
|
|
d11a313d88 | ||
|
|
8d1168385a | ||
|
|
852fe14604 | ||
|
|
30be1cd102 | ||
|
|
fa0bb0e1bf | ||
|
|
a05393727c | ||
|
|
adf833b60a | ||
|
|
23daceb4dc | ||
|
|
4a9db89527 | ||
|
|
060c1af4c4 | ||
|
|
b826985d05 | ||
|
|
0f09883429 | ||
|
|
a84b45b1bb | ||
|
|
a5b6383f7b | ||
|
|
24f8f76714 | ||
|
|
ba336122c0 | ||
|
|
591744174c | ||
|
|
fc85017948 | ||
|
|
24f541a0dd | ||
|
|
f552577e71 | ||
|
|
a06ec06e8b | ||
|
|
96833d4790 | ||
|
|
5611ae9a30 | ||
|
|
72c8404d18 | ||
|
|
bc128d11d9 | ||
|
|
ff13675d31 | ||
|
|
4b682b6633 | ||
|
|
879e160ba3 | ||
|
|
9a6b8594f3 | ||
|
|
a354e9d217 | ||
|
|
950b5fa75e | ||
|
|
9f66d59c0a | ||
|
|
9d99fef52e | ||
|
|
3b22f1704f | ||
|
|
c89ad6b36d | ||
|
|
45300f1ff5 | ||
|
|
847d08cdbc | ||
|
|
81af1e964d | ||
|
|
3662e45435 | ||
|
|
f06e30e343 | ||
|
|
34d4aa4012 | ||
|
|
3e4bf8cd6c | ||
|
|
206b675892 | ||
|
|
00b4ae9c36 | ||
|
|
8d5ea745bb | ||
|
|
cac48cdd27 | ||
|
|
c20010ed54 | ||
|
|
5926dee354 | ||
|
|
b78b0b5c6b | ||
|
|
43afa437e4 | ||
|
|
050568a4ab | ||
|
|
8bf40cea36 | ||
|
|
ae3ff0f570 | ||
|
|
0addf91a70 | ||
|
|
abb78302b8 | ||
|
|
e5579d8635 | ||
|
|
3986c43fa5 | ||
|
|
838cc08680 | ||
|
|
a0fe49f946 | ||
|
|
70dbe8562c | ||
|
|
41fcd1e49a | ||
|
|
90195caa1d | ||
|
|
cdb6287d89 | ||
|
|
32e9b7e34a | ||
|
|
6484bbb716 | ||
|
|
e59806d6ae | ||
|
|
299b180b28 | ||
|
|
1acd88ed39 | ||
|
|
109c088e8a | ||
|
|
bb18239642 | ||
|
|
ccded7cc0a | ||
|
|
5bc89fdc8b | ||
|
|
70c7f33a20 | ||
|
|
1ec185a3a0 | ||
|
|
6aa4eeefbb | ||
|
|
89e6998054 |
41
CHANGELOG.md
41
CHANGELOG.md
@@ -1,3 +1,44 @@
|
||||
# v6.3.5
|
||||
- [#1845](https://github.com/xmrig/xmrig/pull/1845) [#1861](https://github.com/xmrig/xmrig/pull/1861) Fixed ARM build and added CMake option `WITH_SSE4_1`.
|
||||
- [#1846](https://github.com/xmrig/xmrig/pull/1846) KawPow: fixed OpenCL memory leak.
|
||||
- [#1849](https://github.com/xmrig/xmrig/pull/1849) [#1859](https://github.com/xmrig/xmrig/pull/1859) RandomX: optimized soft AES code.
|
||||
- [#1850](https://github.com/xmrig/xmrig/pull/1850) [#1852](https://github.com/xmrig/xmrig/pull/1852) General code improvements.
|
||||
- [#1853](https://github.com/xmrig/xmrig/issues/1853) [#1856](https://github.com/xmrig/xmrig/pull/1856) [#1857](https://github.com/xmrig/xmrig/pull/1857) Fixed crash on old CPUs.
|
||||
|
||||
# v6.3.4
|
||||
- [#1823](https://github.com/xmrig/xmrig/pull/1823) RandomX: added new option `scratchpad_prefetch_mode`.
|
||||
- [#1827](https://github.com/xmrig/xmrig/pull/1827) [#1831](https://github.com/xmrig/xmrig/pull/1831) Improved nonce iteration performance.
|
||||
- [#1828](https://github.com/xmrig/xmrig/pull/1828) RandomX: added SSE4.1-optimized Blake2b.
|
||||
- [#1830](https://github.com/xmrig/xmrig/pull/1830) RandomX: added performance profiler (for developers).
|
||||
- [#1835](https://github.com/xmrig/xmrig/pull/1835) RandomX: returned old soft AES implementation and added auto-select between the two.
|
||||
- [#1840](https://github.com/xmrig/xmrig/pull/1840) RandomX: moved more stuff to compile time, small x86 JIT compiler speedup.
|
||||
- [#1841](https://github.com/xmrig/xmrig/pull/1841) Fixed Cryptonight OpenCL for AMD 20.7.2 drivers.
|
||||
- [#1842](https://github.com/xmrig/xmrig/pull/1842) RandomX: AES improvements, a bit faster hardware AES code when compiled with MSVC.
|
||||
- [#1843](https://github.com/xmrig/xmrig/pull/1843) RandomX: improved performance of GCC compiled binaries.
|
||||
|
||||
# v6.3.3
|
||||
- [#1817](https://github.com/xmrig/xmrig/pull/1817) Fixed self-select login sequence.
|
||||
- Added brand new [build from source](https://xmrig.com/docs/miner/build) documentation.
|
||||
- New binary downloads for macOS (`macos-x64`), FreeBSD (`freebsd-static-x64`), Linux (`linux-static-x64`), Ubuntu 18.04 (`bionic-x64`), Ubuntu 20.04 (`focal-x64`).
|
||||
- Generic Linux download `xenial-x64` renamed to `linux-x64`.
|
||||
- Builds without SSL/TLS support are no longer provided.
|
||||
- Improved CUDA loader error reporting and fixed plugin load on Linux.
|
||||
- Fixed build warnings with Clang compiler.
|
||||
- Fixed colors on macOS.
|
||||
|
||||
# v6.3.2
|
||||
- [#1794](https://github.com/xmrig/xmrig/pull/1794) More robust 1 GB pages handling.
|
||||
- Don't allocate 1 GB per thread if 1 GB is the default huge page size.
|
||||
- Try to allocate scratchpad from dataset's 1 GB huge pages, if normal huge pages are not available.
|
||||
- Correctly initialize RandomX cache if 1 GB pages fail to allocate on a first NUMA node.
|
||||
- [#1806](https://github.com/xmrig/xmrig/pull/1806) Fixed macOS battery detection.
|
||||
- [#1809](https://github.com/xmrig/xmrig/issues/1809) Improved auto configuration on ARM CPUs.
|
||||
- Added retrieving ARM CPU names, based on lscpu code and database.
|
||||
|
||||
# v6.3.1
|
||||
- [#1786](https://github.com/xmrig/xmrig/pull/1786) Added `pause-on-battery` option, supported on Windows and Linux.
|
||||
- Added command line options `--randomx-cache-qos` and `--argon2-impl`.
|
||||
|
||||
# v6.3.0
|
||||
- [#1771](https://github.com/xmrig/xmrig/pull/1771) Adopted new SSE2NEON and reduced ARM-specific changes.
|
||||
- [#1774](https://github.com/xmrig/xmrig/pull/1774) RandomX: Added new option `cache_qos` in `randomx` object for cache QoS support.
|
||||
|
||||
@@ -23,6 +23,8 @@ option(WITH_NVML "Enable NVML (NVIDIA Management Library) support (on
|
||||
option(WITH_ADL "Enable ADL (AMD Display Library) or sysfs support (only if OpenCL backend enabled)" ON)
|
||||
option(WITH_STRICT_CACHE "Enable strict checks for OpenCL cache" ON)
|
||||
option(WITH_INTERLEAVE_DEBUG_LOG "Enable debug log for threads interleave" OFF)
|
||||
option(WITH_PROFILING "Enable profiling for developers" OFF)
|
||||
option(WITH_SSE4_1 "Enable SSE 4.1 for Blake2" ON)
|
||||
|
||||
option(BUILD_STATIC "Build static binary" OFF)
|
||||
option(ARM_TARGET "Force use specific ARM target 8 or 7" 0)
|
||||
@@ -143,6 +145,8 @@ elseif (XMRIG_OS_APPLE)
|
||||
src/App_unix.cpp
|
||||
src/crypto/common/VirtualMemory_unix.cpp
|
||||
)
|
||||
find_library(IOKIT_LIBRARY IOKit)
|
||||
set(EXTRA_LIBS ${IOKIT_LIBRARY})
|
||||
else()
|
||||
list(APPEND SOURCES_OS
|
||||
src/App_unix.cpp
|
||||
@@ -196,10 +200,6 @@ include_directories(src)
|
||||
include_directories(src/3rdparty)
|
||||
include_directories(${UV_INCLUDE_DIR})
|
||||
|
||||
if (BUILD_STATIC)
|
||||
set(CMAKE_EXE_LINKER_FLAGS " -static")
|
||||
endif()
|
||||
|
||||
if (WITH_DEBUG_LOG)
|
||||
add_definitions(/DAPP_DEBUG)
|
||||
endif()
|
||||
@@ -211,3 +211,7 @@ if (WIN32)
|
||||
add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_SOURCE_DIR}/bin/WinRing0/WinRing0x64.sys" $<TARGET_FILE_DIR:${CMAKE_PROJECT_NAME}>)
|
||||
endif()
|
||||
|
||||
if (CMAKE_CXX_COMPILER_ID MATCHES Clang AND CMAKE_BUILD_TYPE STREQUAL Release)
|
||||
add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_STRIP} ${CMAKE_PROJECT_NAME})
|
||||
endif()
|
||||
|
||||
121
README.md
121
README.md
@@ -2,130 +2,35 @@
|
||||
|
||||
[](https://github.com/xmrig/xmrig/releases)
|
||||
[](https://github.com/xmrig/xmrig/releases)
|
||||
[](https://github.com/xmrig/xmrig/releases)
|
||||
[](https://github.com/xmrig/xmrig/releases)
|
||||
[](https://github.com/xmrig/xmrig/blob/master/LICENSE)
|
||||
[](https://github.com/xmrig/xmrig/stargazers)
|
||||
[](https://github.com/xmrig/xmrig/network)
|
||||
|
||||
XMRig High performance, open source, cross platform RandomX, KawPow, CryptoNight, AstroBWT and Argon2 CPU/GPU miner, with official support for Windows.
|
||||
XMRig is a high performance, open source, cross platform RandomX, KawPow, CryptoNight and AstroBWT unified CPU/GPU miner. Official binaries are available for Windows, Linux, macOS and FreeBSD.
|
||||
|
||||
## Mining backends
|
||||
- **CPU** (x64/x86/ARM)
|
||||
- **CPU** (x64/ARMv8)
|
||||
- **OpenCL** for AMD GPUs.
|
||||
- **CUDA** for NVIDIA GPUs via external [CUDA plugin](https://github.com/xmrig/xmrig-cuda).
|
||||
|
||||
<img src="doc/screenshot_v5_2_0.png" width="833" >
|
||||
|
||||
## Download
|
||||
* Binary releases: https://github.com/xmrig/xmrig/releases
|
||||
* Git tree: https://github.com/xmrig/xmrig.git
|
||||
* Clone with `git clone https://github.com/xmrig/xmrig.git` :hammer: [Build instructions](https://github.com/xmrig/xmrig/wiki/Build).
|
||||
* **[Binary releases](https://github.com/xmrig/xmrig/releases)**
|
||||
* **[Build from source](https://xmrig.com/docs/miner/build)**
|
||||
|
||||
## Usage
|
||||
The preferred way to configure the miner is the [JSON config file](src/config.json) as it is more flexible and human friendly. The command line interface does not cover all features, such as mining profiles for different algorithms. Important options can be changed during runtime without miner restart by editing the config file or executing API calls.
|
||||
The preferred way to configure the miner is the [JSON config file](src/config.json) as it is more flexible and human friendly. The [command line interface](https://xmrig.com/docs/miner/command-line-options) does not cover all features, such as mining profiles for different algorithms. Important options can be changed during runtime without miner restart by editing the config file or executing API calls.
|
||||
|
||||
* **[xmrig.com/wizard](https://xmrig.com/wizard)** helps you create initial configuration for the miner.
|
||||
* **[workers.xmrig.info](http://workers.xmrig.info)** helps manage your miners via HTTP API.
|
||||
|
||||
### Command line options
|
||||
```
|
||||
Network:
|
||||
-o, --url=URL URL of mining server
|
||||
-a, --algo=ALGO mining algorithm https://xmrig.com/docs/algorithms
|
||||
--coin=COIN specify coin instead of algorithm
|
||||
-u, --user=USERNAME username for mining server
|
||||
-p, --pass=PASSWORD password for mining server
|
||||
-O, --userpass=U:P username:password pair for mining server
|
||||
-x, --proxy=HOST:PORT connect through a SOCKS5 proxy
|
||||
-k, --keepalive send keepalive packet for prevent timeout (needs pool support)
|
||||
--nicehash enable nicehash.com support
|
||||
--rig-id=ID rig identifier for pool-side statistics (needs pool support)
|
||||
--tls enable SSL/TLS support (needs pool support)
|
||||
--tls-fingerprint=HEX pool TLS certificate fingerprint for strict certificate pinning
|
||||
--daemon use daemon RPC instead of pool for solo mining
|
||||
--daemon-poll-interval=N daemon poll interval in milliseconds (default: 1000)
|
||||
-r, --retries=N number of times to retry before switch to backup server (default: 5)
|
||||
-R, --retry-pause=N time to pause between retries (default: 5)
|
||||
--user-agent set custom user-agent string for pool
|
||||
--donate-level=N donate level, default 5%% (5 minutes in 100 minutes)
|
||||
--donate-over-proxy=N control donate over xmrig-proxy feature
|
||||
|
||||
CPU backend:
|
||||
--no-cpu disable CPU mining backend
|
||||
-t, --threads=N number of CPU threads
|
||||
-v, --av=N algorithm variation, 0 auto select
|
||||
--cpu-affinity set process affinity to CPU core(s), mask 0x3 for cores 0 and 1
|
||||
--cpu-priority set process priority (0 idle, 2 normal to 5 highest)
|
||||
--cpu-max-threads-hint=N maximum CPU threads count (in percentage) hint for autoconfig
|
||||
--cpu-memory-pool=N number of 2 MB pages for persistent memory pool, -1 (auto), 0 (disable)
|
||||
--cpu-no-yield prefer maximum hashrate rather than system response/stability
|
||||
--no-huge-pages disable huge pages support
|
||||
--asm=ASM ASM optimizations, possible values: auto, none, intel, ryzen, bulldozer
|
||||
--randomx-init=N thread count to initialize RandomX dataset
|
||||
--randomx-no-numa disable NUMA support for RandomX
|
||||
--randomx-mode=MODE RandomX mode: auto, fast, light
|
||||
--randomx-1gb-pages use 1GB hugepages for dataset (Linux only)
|
||||
--randomx-wrmsr=N write custom value (0-15) to Intel MSR register 0x1a4 or disable MSR mod (-1)
|
||||
--randomx-no-rdmsr disable reverting initial MSR values on exit
|
||||
--astrobwt-max-size=N skip hashes with large stage 2 size, default: 550, min: 400, max: 1200
|
||||
--astrobwt-avx2 enable AVX2 optimizations for AstroBWT algorithm
|
||||
|
||||
API:
|
||||
--api-worker-id=ID custom worker-id for API
|
||||
--api-id=ID custom instance ID for API
|
||||
--http-host=HOST bind host for HTTP API (default: 127.0.0.1)
|
||||
--http-port=N bind port for HTTP API
|
||||
--http-access-token=T access token for HTTP API
|
||||
--http-no-restricted enable full remote access to HTTP API (only if access token set)
|
||||
|
||||
OpenCL backend:
|
||||
--opencl enable OpenCL mining backend
|
||||
--opencl-devices=N comma separated list of OpenCL devices to use
|
||||
--opencl-platform=N OpenCL platform index or name
|
||||
--opencl-loader=PATH path to OpenCL-ICD-Loader (OpenCL.dll or libOpenCL.so)
|
||||
--opencl-no-cache disable OpenCL cache
|
||||
--print-platforms print available OpenCL platforms and exit
|
||||
|
||||
CUDA backend:
|
||||
--cuda enable CUDA mining backend
|
||||
--cuda-loader=PATH path to CUDA plugin (xmrig-cuda.dll or libxmrig-cuda.so)
|
||||
--cuda-devices=N comma separated list of CUDA devices to use
|
||||
--cuda-bfactor-hint=N bfactor hint for autoconfig (0-12)
|
||||
--cuda-bsleep-hint=N bsleep hint for autoconfig
|
||||
--no-nvml disable NVML (NVIDIA Management Library) support
|
||||
|
||||
TLS:
|
||||
--tls-gen=HOSTNAME generate TLS certificate for specific hostname
|
||||
--tls-cert=FILE load TLS certificate chain from a file in the PEM format
|
||||
--tls-cert-key=FILE load TLS certificate private key from a file in the PEM format
|
||||
--tls-dhparam=FILE load DH parameters for DHE ciphers from a file in the PEM format
|
||||
--tls-protocols=N enable specified TLS protocols, example: "TLSv1 TLSv1.1 TLSv1.2 TLSv1.3"
|
||||
--tls-ciphers=S set list of available ciphers (TLSv1.2 and below)
|
||||
--tls-ciphersuites=S set list of available TLSv1.3 ciphersuites
|
||||
|
||||
Logging:
|
||||
-S, --syslog use system log for output messages
|
||||
-l, --log-file=FILE log all output to a file
|
||||
--print-time=N print hashrate report every N seconds
|
||||
--health-print-time=N print health report every N seconds
|
||||
--no-color disable colored output
|
||||
--verbose verbose output
|
||||
|
||||
Misc:
|
||||
-c, --config=FILE load a JSON-format configuration file
|
||||
-B, --background run the miner in the background
|
||||
-V, --version output version information and exit
|
||||
-h, --help display this help and exit
|
||||
--dry-run test configuration and exit
|
||||
--export-topology export hwloc topology to a XML file and exit
|
||||
--title set custom console window title
|
||||
--no-title disable setting console window title
|
||||
```
|
||||
* **[Wizard](https://xmrig.com/wizard)** helps you create initial configuration for the miner.
|
||||
* **[Workers](http://workers.xmrig.info)** helps manage your miners via HTTP API.
|
||||
|
||||
## Donations
|
||||
* Default donation 5% (5 minutes in 100 minutes) can be reduced to 1% via option `donate-level` or disabled in source code.
|
||||
* Default donation 1% (1 minute in 100 minutes) can be increased via option `donate-level` or disabled in source code.
|
||||
* XMR: `48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD`
|
||||
* BTC: `1P7ujsXeX7GxQwHNnJsRMgAdNkFZmNVqJT`
|
||||
|
||||
## Developers
|
||||
* **[xmrig](https://github.com/xmrig)**
|
||||
* **[sech1](https://github.com/SChernykh)**
|
||||
|
||||
## Contacts
|
||||
* support@xmrig.com
|
||||
|
||||
@@ -10,6 +10,11 @@ if (WITH_TLS)
|
||||
set(OPENSSL_USE_STATIC_LIBS TRUE)
|
||||
endif()
|
||||
|
||||
if (BUILD_STATIC)
|
||||
set(OPENSSL_USE_STATIC_LIBS TRUE)
|
||||
endif()
|
||||
|
||||
|
||||
find_package(OpenSSL)
|
||||
|
||||
if (OPENSSL_FOUND)
|
||||
|
||||
@@ -2,9 +2,10 @@ if (NOT CMAKE_SYSTEM_PROCESSOR)
|
||||
message(WARNING "CMAKE_SYSTEM_PROCESSOR not defined")
|
||||
endif()
|
||||
|
||||
|
||||
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|AMD64)$")
|
||||
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|AMD64)$" AND CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
add_definitions(/DRAPIDJSON_SSE2)
|
||||
else()
|
||||
set(WITH_SSE4_1 OFF)
|
||||
endif()
|
||||
|
||||
if (NOT ARM_TARGET)
|
||||
@@ -41,3 +42,7 @@ if (ARM_TARGET AND ARM_TARGET GREATER 6)
|
||||
add_definitions(/DXMRIG_ARMv7)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (WITH_SSE4_1)
|
||||
add_definitions(/DXMRIG_FEATURE_SSE4_1)
|
||||
endif()
|
||||
|
||||
@@ -45,6 +45,10 @@ if (CMAKE_CXX_COMPILER_ID MATCHES GNU)
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc -static-libstdc++")
|
||||
endif()
|
||||
|
||||
if (BUILD_STATIC)
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static")
|
||||
endif()
|
||||
|
||||
add_definitions(/D_GNU_SOURCE)
|
||||
|
||||
if (${CMAKE_VERSION} VERSION_LESS "3.1.0")
|
||||
@@ -60,6 +64,9 @@ elseif (CMAKE_CXX_COMPILER_ID MATCHES MSVC)
|
||||
set(CMAKE_C_FLAGS_RELEASE "/MT /O2 /Oi /DNDEBUG /GL")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "/MT /O2 /Oi /DNDEBUG /GL")
|
||||
|
||||
set(CMAKE_C_FLAGS_RELWITHDEBINFO "/Ob1 /GL")
|
||||
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "/Ob1 /GL")
|
||||
|
||||
add_definitions(/D_CRT_SECURE_NO_WARNINGS)
|
||||
add_definitions(/D_CRT_NONSTDC_NO_WARNINGS)
|
||||
add_definitions(/DNOMINMAX)
|
||||
@@ -89,6 +96,10 @@ elseif (CMAKE_CXX_COMPILER_ID MATCHES Clang)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (BUILD_STATIC)
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static")
|
||||
endif()
|
||||
|
||||
endif()
|
||||
|
||||
if (NOT WIN32)
|
||||
|
||||
@@ -64,6 +64,14 @@ if (WITH_RANDOMX)
|
||||
set_property(SOURCE src/crypto/randomx/jit_compiler_a64_static.S PROPERTY LANGUAGE C)
|
||||
endif()
|
||||
|
||||
if (WITH_SSE4_1)
|
||||
list(APPEND SOURCES_CRYPTO src/crypto/randomx/blake2/blake2b_sse41.c)
|
||||
|
||||
if (CMAKE_C_COMPILER_ID MATCHES GNU OR CMAKE_C_COMPILER_ID MATCHES Clang)
|
||||
set_source_files_properties(src/crypto/randomx/blake2/blake2b_sse41.c PROPERTIES COMPILE_FLAGS -msse4.1)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (CMAKE_CXX_COMPILER_ID MATCHES Clang)
|
||||
set_source_files_properties(src/crypto/randomx/jit_compiler_x86.cpp PROPERTIES COMPILE_FLAGS -Wno-unused-const-variable)
|
||||
endif()
|
||||
|
||||
1
doc/build/CMAKE_OPTIONS.md
vendored
1
doc/build/CMAKE_OPTIONS.md
vendored
@@ -22,6 +22,7 @@ This feature add external dependency to libhwloc (1.10.0+) (except MSVC builds).
|
||||
* **`-DWITH_EMBEDDED_CONFIG=ON`** Enable [embedded](https://github.com/xmrig/xmrig/issues/957) config support.
|
||||
* **`-DWITH_OPENCL=OFF`** Disable OpenCL backend.
|
||||
* **`-DWITH_CUDA=OFF`** Disable CUDA backend.
|
||||
* **`-DWITH_SSE4_1=OFF`** Disable SSE 4.1 for Blake2 (useful for arm builds).
|
||||
|
||||
## Debug options
|
||||
|
||||
|
||||
@@ -8,12 +8,12 @@ mkdir -p deps/lib
|
||||
|
||||
mkdir -p build && cd build
|
||||
|
||||
wget https://download.open-mpi.org/release/hwloc/v2.2/hwloc-${HWLOC_VERSION}.tar.bz2 -O hwloc-${HWLOC_VERSION}.tar.bz2
|
||||
tar -xjf hwloc-${HWLOC_VERSION}.tar.bz2
|
||||
wget https://download.open-mpi.org/release/hwloc/v2.2/hwloc-${HWLOC_VERSION}.tar.gz -O hwloc-${HWLOC_VERSION}.tar.gz
|
||||
tar -xzf hwloc-${HWLOC_VERSION}.tar.gz
|
||||
|
||||
cd hwloc-${HWLOC_VERSION}
|
||||
./configure --disable-shared --enable-static --disable-io --disable-libudev --disable-libxml2
|
||||
make -j$(nproc)
|
||||
cp -fr include/ ../../deps
|
||||
make -j$(nproc || sysctl -n hw.ncpu || sysctl -n hw.logicalcpu)
|
||||
cp -fr include ../../deps
|
||||
cp hwloc/.libs/libhwloc.a ../../deps/lib
|
||||
cd ..
|
||||
19
scripts/build.hwloc1.sh
Executable file
19
scripts/build.hwloc1.sh
Executable file
@@ -0,0 +1,19 @@
|
||||
#!/bin/bash -e
|
||||
|
||||
HWLOC_VERSION="1.11.13"
|
||||
|
||||
mkdir -p deps
|
||||
mkdir -p deps/include
|
||||
mkdir -p deps/lib
|
||||
|
||||
mkdir -p build && cd build
|
||||
|
||||
wget https://download.open-mpi.org/release/hwloc/v1.11/hwloc-${HWLOC_VERSION}.tar.gz -O hwloc-${HWLOC_VERSION}.tar.gz
|
||||
tar -xzf hwloc-${HWLOC_VERSION}.tar.gz
|
||||
|
||||
cd hwloc-${HWLOC_VERSION}
|
||||
./configure --disable-shared --enable-static --disable-io --disable-libudev --disable-libxml2
|
||||
make -j$(nproc || sysctl -n hw.ncpu || sysctl -n hw.logicalcpu)
|
||||
cp -fr include ../../deps
|
||||
cp src/.libs/libhwloc.a ../../deps/lib
|
||||
cd ..
|
||||
@@ -13,8 +13,8 @@ tar -xzf libressl-${LIBRESSL_VERSION}.tar.gz
|
||||
|
||||
cd libressl-${LIBRESSL_VERSION}
|
||||
./configure --disable-shared
|
||||
make -j$(nproc)
|
||||
cp -fr include/ ../../deps
|
||||
make -j$(nproc || sysctl -n hw.ncpu || sysctl -n hw.logicalcpu)
|
||||
cp -fr include ../../deps
|
||||
cp crypto/.libs/libcrypto.a ../../deps/lib
|
||||
cp ssl/.libs/libssl.a ../../deps/lib
|
||||
cd ..
|
||||
@@ -13,8 +13,8 @@ tar -xzf openssl-${OPENSSL_VERSION}.tar.gz
|
||||
|
||||
cd openssl-${OPENSSL_VERSION}
|
||||
./config -no-shared -no-asm -no-zlib -no-comp -no-dgram -no-filenames -no-cms
|
||||
make -j$(nproc)
|
||||
cp -fr include/ ../../deps
|
||||
make -j$(nproc || sysctl -n hw.ncpu || sysctl -n hw.logicalcpu)
|
||||
cp -fr include ../../deps
|
||||
cp libcrypto.a ../../deps/lib
|
||||
cp libssl.a ../../deps/lib
|
||||
cd ..
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/bin/bash -e
|
||||
|
||||
UV_VERSION="1.38.0"
|
||||
UV_VERSION="1.38.1"
|
||||
|
||||
mkdir -p deps
|
||||
mkdir -p deps/include
|
||||
@@ -14,7 +14,7 @@ tar -xzf v${UV_VERSION}.tar.gz
|
||||
cd libuv-${UV_VERSION}
|
||||
sh autogen.sh
|
||||
./configure --disable-shared
|
||||
make -j$(nproc)
|
||||
cp -fr include/ ../../deps
|
||||
make -j$(nproc || sysctl -n hw.ncpu || sysctl -n hw.logicalcpu)
|
||||
cp -fr include ../../deps
|
||||
cp .libs/libuv.a ../../deps/lib
|
||||
cd ..
|
||||
@@ -47,11 +47,6 @@ const char *cuda_tag();
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef XMRIG_ALGO_RANDOMX
|
||||
const char *rx_tag();
|
||||
#endif
|
||||
|
||||
|
||||
} // namespace xmrig
|
||||
|
||||
|
||||
|
||||
@@ -66,14 +66,12 @@ public:
|
||||
|
||||
inline bool nextRound(uint32_t rounds, uint32_t roundSize)
|
||||
{
|
||||
bool ok = true;
|
||||
m_rounds[index()]++;
|
||||
|
||||
if ((m_rounds[index()] % rounds) == 0) {
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
*nonce(i) = Nonce::next(index(), *nonce(i), rounds * roundSize, currentJob().isNicehash(), &ok);
|
||||
if (!ok) {
|
||||
break;
|
||||
if (!Nonce::next(index(), nonce(i), rounds * roundSize, nonceMask())) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -83,13 +81,14 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
return ok;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
inline int32_t nonceOffset() const { return currentJob().nonceOffset(); }
|
||||
inline size_t nonceSize() const { return currentJob().nonceSize(); }
|
||||
inline uint64_t nonceMask() const { return m_nonce_mask[index()]; }
|
||||
|
||||
inline void save(const Job &job, uint32_t reserveCount, Nonce::Backend backend)
|
||||
{
|
||||
@@ -97,12 +96,13 @@ private:
|
||||
const size_t size = job.size();
|
||||
m_jobs[index()] = job;
|
||||
m_rounds[index()] = 0;
|
||||
m_nonce_mask[index()] = job.nonceMask();
|
||||
|
||||
m_jobs[index()].setBackend(backend);
|
||||
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
memcpy(m_blobs[index()] + (i * size), job.blob(), size);
|
||||
*nonce(i) = Nonce::next(index(), *nonce(i), reserveCount, job.isNicehash());
|
||||
Nonce::next(index(), nonce(i), reserveCount, nonceMask());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -110,6 +110,7 @@ private:
|
||||
alignas(16) uint8_t m_blobs[2][Job::kMaxBlobSize * N]{};
|
||||
Job m_jobs[2];
|
||||
uint32_t m_rounds[2] = { 0, 0 };
|
||||
uint64_t m_nonce_mask[2];
|
||||
uint64_t m_sequence = 0;
|
||||
uint8_t m_index = 0;
|
||||
};
|
||||
@@ -125,41 +126,23 @@ inline uint32_t *xmrig::WorkerJob<1>::nonce(size_t)
|
||||
template<>
|
||||
inline bool xmrig::WorkerJob<1>::nextRound(uint32_t rounds, uint32_t roundSize)
|
||||
{
|
||||
bool ok = true;
|
||||
m_rounds[index()]++;
|
||||
|
||||
uint32_t* n = nonce();
|
||||
const uint32_t prev_nonce = *n;
|
||||
|
||||
if ((m_rounds[index()] % rounds) == 0) {
|
||||
*n = Nonce::next(index(), *n, rounds * roundSize, currentJob().isNicehash(), &ok);
|
||||
if (!Nonce::next(index(), n, rounds * roundSize, nonceMask())) {
|
||||
return false;
|
||||
}
|
||||
if (nonceSize() == sizeof(uint64_t)) {
|
||||
m_jobs[index()].nonce()[1] = n[1];
|
||||
}
|
||||
}
|
||||
else {
|
||||
*n += roundSize;
|
||||
}
|
||||
|
||||
// Increment higher 32 bits of a 64-bit nonce when lower 32 bits overflow
|
||||
if (!currentJob().isNicehash() && (nonceSize() == sizeof(uint64_t))) {
|
||||
const bool wrapped = (*n < prev_nonce);
|
||||
const bool wraps_this_round = (static_cast<uint64_t>(*n) + roundSize > (1ULL << 32));
|
||||
|
||||
// Account for the case when starting nonce hasn't wrapped yet, but some nonces in the current round will wrap
|
||||
if (wrapped || wraps_this_round) {
|
||||
// Set lower 32 bits to 0 when higher 32 bits change
|
||||
Nonce::reset(index());
|
||||
|
||||
// Sets *n to 0 and Nonce::m_nonce[index] to the correct next value
|
||||
*n = 0;
|
||||
Nonce::next(index(), *n, rounds * roundSize, currentJob().isNicehash(), &ok);
|
||||
|
||||
++n[1];
|
||||
|
||||
Job& job = m_jobs[index()];
|
||||
memcpy(job.blob(), blob(), job.size());
|
||||
}
|
||||
}
|
||||
|
||||
return ok;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -169,11 +152,12 @@ inline void xmrig::WorkerJob<1>::save(const Job &job, uint32_t reserveCount, Non
|
||||
m_index = job.index();
|
||||
m_jobs[index()] = job;
|
||||
m_rounds[index()] = 0;
|
||||
m_nonce_mask[index()] = job.nonceMask();
|
||||
|
||||
m_jobs[index()].setBackend(backend);
|
||||
|
||||
memcpy(blob(), job.blob(), job.size());
|
||||
*nonce() = Nonce::next(index(), *nonce(), reserveCount, currentJob().isNicehash());
|
||||
Nonce::next(index(), nonce(), reserveCount, nonceMask());
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -36,6 +36,7 @@
|
||||
#include "crypto/common/Nonce.h"
|
||||
#include "crypto/common/VirtualMemory.h"
|
||||
#include "crypto/rx/Rx.h"
|
||||
#include "crypto/rx/RxDataset.h"
|
||||
#include "crypto/rx/RxVm.h"
|
||||
#include "net/JobResults.h"
|
||||
|
||||
@@ -118,7 +119,9 @@ void xmrig::CpuWorker<N>::allocateRandomX_VM()
|
||||
}
|
||||
|
||||
if (!m_vm) {
|
||||
m_vm = RxVm::create(dataset, m_memory->scratchpad(), !m_hwAES, m_assembly, m_node);
|
||||
// Try to allocate scratchpad from dataset's 1 GB huge pages, if normal huge pages are not available
|
||||
uint8_t* scratchpad = m_memory->isHugePages() ? m_memory->scratchpad() : dataset->tryAllocateScrathpad();
|
||||
m_vm = RxVm::create(dataset, scratchpad ? scratchpad : m_memory->scratchpad(), !m_hwAES, m_assembly, m_node);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -72,6 +72,10 @@ endif()
|
||||
|
||||
if (XMRIG_ARM)
|
||||
list(APPEND SOURCES_CPUID src/backend/cpu/platform/BasicCpuInfo_arm.cpp)
|
||||
|
||||
if (XMRIG_OS_UNIX)
|
||||
list(APPEND SOURCES_CPUID src/backend/cpu/platform/lscpu_arm.cpp)
|
||||
endif()
|
||||
else()
|
||||
list(APPEND SOURCES_CPUID src/backend/cpu/platform/BasicCpuInfo.cpp)
|
||||
endif()
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
|
||||
#include "backend/cpu/CpuThreads.h"
|
||||
#include "base/crypto/Algorithm.h"
|
||||
#include "base/tools/Object.h"
|
||||
#include "crypto/common/Assembly.h"
|
||||
|
||||
|
||||
@@ -37,6 +38,8 @@ namespace xmrig {
|
||||
class ICpuInfo
|
||||
{
|
||||
public:
|
||||
XMRIG_DISABLE_COPY_MOVE(ICpuInfo)
|
||||
|
||||
enum Vendor : uint32_t {
|
||||
VENDOR_UNKNOWN,
|
||||
VENDOR_INTEL,
|
||||
@@ -60,12 +63,14 @@ public:
|
||||
FLAG_PDPE1GB,
|
||||
FLAG_SSE2,
|
||||
FLAG_SSSE3,
|
||||
FLAG_SSE41,
|
||||
FLAG_XOP,
|
||||
FLAG_POPCNT,
|
||||
FLAG_CAT_L3,
|
||||
FLAG_MAX
|
||||
};
|
||||
|
||||
ICpuInfo() = default;
|
||||
virtual ~ICpuInfo() = default;
|
||||
|
||||
# if defined(__x86_64__) || defined(_M_AMD64) || defined (__arm64__) || defined (__aarch64__)
|
||||
@@ -93,6 +98,7 @@ public:
|
||||
virtual size_t packages() const = 0;
|
||||
virtual size_t threads() const = 0;
|
||||
virtual Vendor vendor() const = 0;
|
||||
virtual bool jccErratum() const = 0;
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -57,7 +57,7 @@
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
static const std::array<const char *, ICpuInfo::FLAG_MAX> flagNames = { "aes", "avx2", "avx512f", "bmi2", "osxsave", "pdpe1gb", "sse2", "ssse3", "xop", "popcnt", "cat_l3" };
|
||||
static const std::array<const char *, ICpuInfo::FLAG_MAX> flagNames = { "aes", "avx2", "avx512f", "bmi2", "osxsave", "pdpe1gb", "sse2", "ssse3", "sse4.1", "xop", "popcnt", "cat_l3" };
|
||||
static const std::array<const char *, ICpuInfo::MSR_MOD_MAX> msrNames = { "none", "ryzen", "intel", "custom" };
|
||||
|
||||
|
||||
@@ -141,6 +141,7 @@ static inline bool has_bmi2() { return has_feature(EXTENDED_FEATURES,
|
||||
static inline bool has_pdpe1gb() { return has_feature(PROCESSOR_EXT_INFO, EDX_Reg, 1 << 26); }
|
||||
static inline bool has_sse2() { return has_feature(PROCESSOR_INFO, EDX_Reg, 1 << 26); }
|
||||
static inline bool has_ssse3() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 9); }
|
||||
static inline bool has_sse41() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 19); }
|
||||
static inline bool has_xop() { return has_feature(0x80000001, ECX_Reg, 1 << 11); }
|
||||
static inline bool has_popcnt() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 23); }
|
||||
static inline bool has_cat_l3() { return has_feature(EXTENDED_FEATURES, EBX_Reg, 1 << 15) && has_feature(0x10, EBX_Reg, 1 << 1); }
|
||||
@@ -177,6 +178,7 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
|
||||
m_flags.set(FLAG_PDPE1GB, has_pdpe1gb());
|
||||
m_flags.set(FLAG_SSE2, has_sse2());
|
||||
m_flags.set(FLAG_SSSE3, has_ssse3());
|
||||
m_flags.set(FLAG_SSE41, has_sse41());
|
||||
m_flags.set(FLAG_XOP, has_xop());
|
||||
m_flags.set(FLAG_POPCNT, has_popcnt());
|
||||
m_flags.set(FLAG_CAT_L3, has_cat_l3());
|
||||
@@ -210,6 +212,37 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
|
||||
m_vendor = VENDOR_INTEL;
|
||||
m_assembly = Assembly::INTEL;
|
||||
m_msrMod = MSR_MOD_INTEL;
|
||||
|
||||
struct
|
||||
{
|
||||
unsigned int stepping : 4;
|
||||
unsigned int model : 4;
|
||||
unsigned int family : 4;
|
||||
unsigned int processor_type : 2;
|
||||
unsigned int reserved1 : 2;
|
||||
unsigned int ext_model : 4;
|
||||
unsigned int ext_family : 8;
|
||||
unsigned int reserved2 : 4;
|
||||
} processor_info;
|
||||
|
||||
cpuid(1, data);
|
||||
memcpy(&processor_info, data, sizeof(processor_info));
|
||||
|
||||
// Intel JCC erratum mitigation
|
||||
if (processor_info.family == 6) {
|
||||
const uint32_t model = processor_info.model | (processor_info.ext_model << 4);
|
||||
const uint32_t stepping = processor_info.stepping;
|
||||
|
||||
// Affected CPU models and stepping numbers are taken from https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
|
||||
m_jccErratum =
|
||||
((model == 0x4E) && (stepping == 0x3)) ||
|
||||
((model == 0x55) && (stepping == 0x4)) ||
|
||||
((model == 0x5E) && (stepping == 0x3)) ||
|
||||
((model == 0x8E) && (stepping >= 0x9) && (stepping <= 0xC)) ||
|
||||
((model == 0x9E) && (stepping >= 0x9) && (stepping <= 0xD)) ||
|
||||
((model == 0xA6) && (stepping == 0x0)) ||
|
||||
((model == 0xAE) && (stepping == 0xA));
|
||||
}
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
@@ -61,11 +61,13 @@ protected:
|
||||
inline size_t packages() const override { return 1; }
|
||||
inline size_t threads() const override { return m_threads; }
|
||||
inline Vendor vendor() const override { return m_vendor; }
|
||||
inline bool jccErratum() const override { return m_jccErratum; }
|
||||
|
||||
protected:
|
||||
char m_brand[64 + 6]{};
|
||||
size_t m_threads;
|
||||
Vendor m_vendor = VENDOR_UNKNOWN;
|
||||
bool m_jccErratum = false;
|
||||
|
||||
private:
|
||||
Assembly m_assembly = Assembly::NONE;
|
||||
|
||||
@@ -1,10 +1,4 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2020 XMRig <support@xmrig.com>
|
||||
*
|
||||
@@ -22,6 +16,10 @@
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include "base/tools/String.h"
|
||||
|
||||
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <thread>
|
||||
@@ -37,6 +35,15 @@
|
||||
#include "3rdparty/rapidjson/document.h"
|
||||
|
||||
|
||||
#ifdef XMRIG_OS_UNIX
|
||||
namespace xmrig {
|
||||
|
||||
extern String cpu_name_arm();
|
||||
|
||||
} // namespace xmrig
|
||||
#endif
|
||||
|
||||
|
||||
xmrig::BasicCpuInfo::BasicCpuInfo() :
|
||||
m_threads(std::thread::hardware_concurrency())
|
||||
{
|
||||
@@ -53,6 +60,13 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
|
||||
m_flags.set(FLAG_AES, true);
|
||||
# endif
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_OS_UNIX
|
||||
auto name = cpu_name_arm();
|
||||
if (!name.isNull()) {
|
||||
strncpy(m_brand, name, sizeof(m_brand) - 1);
|
||||
}
|
||||
# endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -218,10 +218,11 @@ xmrig::CpuThreads xmrig::HwlocCpuInfo::threads(const Algorithm &algorithm, uint3
|
||||
{
|
||||
# ifdef XMRIG_ALGO_ASTROBWT
|
||||
if (algorithm == Algorithm::ASTROBWT_DERO) {
|
||||
return BasicCpuInfo::threads(algorithm, limit);
|
||||
return allThreads(algorithm, limit);
|
||||
}
|
||||
# endif
|
||||
|
||||
# ifndef XMRIG_ARM
|
||||
if (L2() == 0 && L3() == 0) {
|
||||
return BasicCpuInfo::threads(algorithm, limit);
|
||||
}
|
||||
@@ -263,11 +264,35 @@ xmrig::CpuThreads xmrig::HwlocCpuInfo::threads(const Algorithm &algorithm, uint3
|
||||
}
|
||||
|
||||
return threads;
|
||||
# else
|
||||
return allThreads(algorithm, limit);
|
||||
# endif
|
||||
}
|
||||
|
||||
|
||||
xmrig::CpuThreads xmrig::HwlocCpuInfo::allThreads(const Algorithm &algorithm, uint32_t limit) const
|
||||
{
|
||||
CpuThreads threads;
|
||||
threads.reserve(m_threads);
|
||||
|
||||
hwloc_obj_t pu = nullptr;
|
||||
|
||||
while ((pu = hwloc_get_next_obj_by_type(m_topology, HWLOC_OBJ_PU, pu)) != nullptr) {
|
||||
threads.add(pu->os_index, 0);
|
||||
}
|
||||
|
||||
if (threads.isEmpty()) {
|
||||
return BasicCpuInfo::threads(algorithm, limit);
|
||||
}
|
||||
|
||||
return threads;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorithm &algorithm, CpuThreads &threads, size_t limit) const
|
||||
{
|
||||
# ifndef XMRIG_ARM
|
||||
constexpr size_t oneMiB = 1024U * 1024U;
|
||||
|
||||
size_t PUs = countByType(cache, HWLOC_OBJ_PU);
|
||||
@@ -366,4 +391,5 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith
|
||||
|
||||
pu_id++;
|
||||
}
|
||||
# endif
|
||||
}
|
||||
|
||||
@@ -70,6 +70,7 @@ protected:
|
||||
inline size_t packages() const override { return m_packages; }
|
||||
|
||||
private:
|
||||
CpuThreads allThreads(const Algorithm &algorithm, uint32_t limit) const;
|
||||
void processTopLevelCache(hwloc_obj_t obj, const Algorithm &algorithm, CpuThreads &threads, size_t limit) const;
|
||||
|
||||
|
||||
|
||||
314
src/backend/cpu/platform/lscpu_arm.cpp
Normal file
314
src/backend/cpu/platform/lscpu_arm.cpp
Normal file
@@ -0,0 +1,314 @@
|
||||
/* XMRig
|
||||
* Copyright 2018 Riku Voipio <riku.voipio@iki.fi>
|
||||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2020 XMRig <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include "base/tools/String.h"
|
||||
|
||||
|
||||
#include <cstdio>
|
||||
#include <cctype>
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
struct lscpu_desc
|
||||
{
|
||||
String vendor;
|
||||
String model;
|
||||
|
||||
inline bool isReady() const { return !vendor.isNull() && !model.isNull(); }
|
||||
};
|
||||
|
||||
|
||||
struct id_part {
|
||||
const int id;
|
||||
const char *name;
|
||||
};
|
||||
|
||||
|
||||
struct hw_impl {
|
||||
const int id;
|
||||
const id_part *parts;
|
||||
const char *name;
|
||||
};
|
||||
|
||||
|
||||
static const id_part arm_part[] = {
|
||||
{ 0x810, "ARM810" },
|
||||
{ 0x920, "ARM920" },
|
||||
{ 0x922, "ARM922" },
|
||||
{ 0x926, "ARM926" },
|
||||
{ 0x940, "ARM940" },
|
||||
{ 0x946, "ARM946" },
|
||||
{ 0x966, "ARM966" },
|
||||
{ 0xa20, "ARM1020" },
|
||||
{ 0xa22, "ARM1022" },
|
||||
{ 0xa26, "ARM1026" },
|
||||
{ 0xb02, "ARM11 MPCore" },
|
||||
{ 0xb36, "ARM1136" },
|
||||
{ 0xb56, "ARM1156" },
|
||||
{ 0xb76, "ARM1176" },
|
||||
{ 0xc05, "Cortex-A5" },
|
||||
{ 0xc07, "Cortex-A7" },
|
||||
{ 0xc08, "Cortex-A8" },
|
||||
{ 0xc09, "Cortex-A9" },
|
||||
{ 0xc0d, "Cortex-A17" }, /* Originally A12 */
|
||||
{ 0xc0f, "Cortex-A15" },
|
||||
{ 0xc0e, "Cortex-A17" },
|
||||
{ 0xc14, "Cortex-R4" },
|
||||
{ 0xc15, "Cortex-R5" },
|
||||
{ 0xc17, "Cortex-R7" },
|
||||
{ 0xc18, "Cortex-R8" },
|
||||
{ 0xc20, "Cortex-M0" },
|
||||
{ 0xc21, "Cortex-M1" },
|
||||
{ 0xc23, "Cortex-M3" },
|
||||
{ 0xc24, "Cortex-M4" },
|
||||
{ 0xc27, "Cortex-M7" },
|
||||
{ 0xc60, "Cortex-M0+" },
|
||||
{ 0xd01, "Cortex-A32" },
|
||||
{ 0xd03, "Cortex-A53" },
|
||||
{ 0xd04, "Cortex-A35" },
|
||||
{ 0xd05, "Cortex-A55" },
|
||||
{ 0xd07, "Cortex-A57" },
|
||||
{ 0xd08, "Cortex-A72" },
|
||||
{ 0xd09, "Cortex-A73" },
|
||||
{ 0xd0a, "Cortex-A75" },
|
||||
{ 0xd0b, "Cortex-A76" },
|
||||
{ 0xd0c, "Neoverse-N1" },
|
||||
{ 0xd13, "Cortex-R52" },
|
||||
{ 0xd20, "Cortex-M23" },
|
||||
{ 0xd21, "Cortex-M33" },
|
||||
{ 0xd4a, "Neoverse-E1" },
|
||||
{ -1, nullptr },
|
||||
};
|
||||
|
||||
static const id_part brcm_part[] = {
|
||||
{ 0x0f, "Brahma B15" },
|
||||
{ 0x100, "Brahma B53" },
|
||||
{ 0x516, "ThunderX2" },
|
||||
{ -1, nullptr },
|
||||
};
|
||||
|
||||
static const id_part dec_part[] = {
|
||||
{ 0xa10, "SA110" },
|
||||
{ 0xa11, "SA1100" },
|
||||
{ -1, nullptr },
|
||||
};
|
||||
|
||||
static const id_part cavium_part[] = {
|
||||
{ 0x0a0, "ThunderX" },
|
||||
{ 0x0a1, "ThunderX 88XX" },
|
||||
{ 0x0a2, "ThunderX 81XX" },
|
||||
{ 0x0a3, "ThunderX 83XX" },
|
||||
{ 0x0af, "ThunderX2 99xx" },
|
||||
{ -1, nullptr },
|
||||
};
|
||||
|
||||
static const id_part apm_part[] = {
|
||||
{ 0x000, "X-Gene" },
|
||||
{ -1, nullptr },
|
||||
};
|
||||
|
||||
static const id_part qcom_part[] = {
|
||||
{ 0x00f, "Scorpion" },
|
||||
{ 0x02d, "Scorpion" },
|
||||
{ 0x04d, "Krait" },
|
||||
{ 0x06f, "Krait" },
|
||||
{ 0x201, "Kryo" },
|
||||
{ 0x205, "Kryo" },
|
||||
{ 0x211, "Kryo" },
|
||||
{ 0x800, "Falkor V1/Kryo" },
|
||||
{ 0x801, "Kryo V2" },
|
||||
{ 0xc00, "Falkor" },
|
||||
{ 0xc01, "Saphira" },
|
||||
{ -1, nullptr },
|
||||
};
|
||||
|
||||
static const id_part samsung_part[] = {
|
||||
{ 0x001, "exynos-m1" },
|
||||
{ -1, nullptr },
|
||||
};
|
||||
|
||||
static const id_part nvidia_part[] = {
|
||||
{ 0x000, "Denver" },
|
||||
{ 0x003, "Denver 2" },
|
||||
{ -1, nullptr },
|
||||
};
|
||||
|
||||
static const id_part marvell_part[] = {
|
||||
{ 0x131, "Feroceon 88FR131" },
|
||||
{ 0x581, "PJ4/PJ4b" },
|
||||
{ 0x584, "PJ4B-MP" },
|
||||
{ -1, nullptr },
|
||||
};
|
||||
|
||||
static const id_part faraday_part[] = {
|
||||
{ 0x526, "FA526" },
|
||||
{ 0x626, "FA626" },
|
||||
{ -1, nullptr },
|
||||
};
|
||||
|
||||
static const id_part intel_part[] = {
|
||||
{ 0x200, "i80200" },
|
||||
{ 0x210, "PXA250A" },
|
||||
{ 0x212, "PXA210A" },
|
||||
{ 0x242, "i80321-400" },
|
||||
{ 0x243, "i80321-600" },
|
||||
{ 0x290, "PXA250B/PXA26x" },
|
||||
{ 0x292, "PXA210B" },
|
||||
{ 0x2c2, "i80321-400-B0" },
|
||||
{ 0x2c3, "i80321-600-B0" },
|
||||
{ 0x2d0, "PXA250C/PXA255/PXA26x" },
|
||||
{ 0x2d2, "PXA210C" },
|
||||
{ 0x411, "PXA27x" },
|
||||
{ 0x41c, "IPX425-533" },
|
||||
{ 0x41d, "IPX425-400" },
|
||||
{ 0x41f, "IPX425-266" },
|
||||
{ 0x682, "PXA32x" },
|
||||
{ 0x683, "PXA930/PXA935" },
|
||||
{ 0x688, "PXA30x" },
|
||||
{ 0x689, "PXA31x" },
|
||||
{ 0xb11, "SA1110" },
|
||||
{ 0xc12, "IPX1200" },
|
||||
{ -1, nullptr },
|
||||
};
|
||||
|
||||
static const id_part hisi_part[] = {
|
||||
{ 0xd01, "Kunpeng-920" }, /* aka tsv110 */
|
||||
{ -1, nullptr },
|
||||
};
|
||||
|
||||
|
||||
static const hw_impl hw_implementer[] = {
|
||||
{ 0x41, arm_part, "ARM" },
|
||||
{ 0x42, brcm_part, "Broadcom" },
|
||||
{ 0x43, cavium_part, "Cavium" },
|
||||
{ 0x44, dec_part, "DEC" },
|
||||
{ 0x48, hisi_part, "HiSilicon" },
|
||||
{ 0x4e, nvidia_part, "Nvidia" },
|
||||
{ 0x50, apm_part, "APM" },
|
||||
{ 0x51, qcom_part, "Qualcomm" },
|
||||
{ 0x53, samsung_part, "Samsung" },
|
||||
{ 0x56, marvell_part, "Marvell" },
|
||||
{ 0x66, faraday_part, "Faraday" },
|
||||
{ 0x69, intel_part, "Intel" }
|
||||
};
|
||||
|
||||
|
||||
static bool lookup(char *line, const char *pattern, String &value)
|
||||
{
|
||||
if (!*line || !value.isNull()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
char *p;
|
||||
int len = strlen(pattern);
|
||||
|
||||
if (strncmp(line, pattern, len) != 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (p = line + len; isspace(*p); p++);
|
||||
|
||||
if (*p != ':') {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (++p; isspace(*p); p++);
|
||||
|
||||
if (!*p) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const char *v = p;
|
||||
|
||||
len = strlen(line) - 1;
|
||||
for (p = line + len; isspace(*(p-1)); p--);
|
||||
*p = '\0';
|
||||
|
||||
value = v;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
static bool read_basicinfo(lscpu_desc *desc)
|
||||
{
|
||||
auto fp = fopen("/proc/cpuinfo", "r");
|
||||
if (!fp) {
|
||||
return false;
|
||||
}
|
||||
|
||||
char buf[BUFSIZ];
|
||||
while (fgets(buf, sizeof(buf), fp) != nullptr) {
|
||||
if (!lookup(buf, "CPU implementer", desc->vendor)) {
|
||||
lookup(buf, "CPU part", desc->model);
|
||||
}
|
||||
|
||||
if (desc->isReady()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
|
||||
return desc->isReady();
|
||||
}
|
||||
|
||||
|
||||
static bool arm_cpu_decode(lscpu_desc *desc)
|
||||
{
|
||||
if ((strncmp(desc->vendor, "0x", 2) != 0 || strncmp(desc->model, "0x", 2) != 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const int vendor = strtol(desc->vendor, nullptr, 0);
|
||||
const int model = strtol(desc->model, nullptr, 0);
|
||||
|
||||
for (const auto &impl : hw_implementer) {
|
||||
if (impl.id != vendor) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (size_t i = 0; impl.parts[i].id != -1; ++i) {
|
||||
if (impl.parts[i].id == model) {
|
||||
desc->model = impl.parts[i].name;
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
String cpu_name_arm()
|
||||
{
|
||||
lscpu_desc desc;
|
||||
if (read_basicinfo(&desc) && arm_cpu_decode(&desc)) {
|
||||
return desc.model;
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
|
||||
} // namespace xmrig
|
||||
@@ -152,7 +152,9 @@ public:
|
||||
}
|
||||
|
||||
if (!CudaLib::init(cuda.loader())) {
|
||||
return printDisabled(kLabel, RED_S " (failed to load CUDA plugin)");
|
||||
Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") RED_BOLD("disabled ") RED("(%s)"), kLabel, CudaLib::lastError());
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
runtimeVersion = CudaLib::runtimeVersion();
|
||||
|
||||
@@ -1,3 +1,9 @@
|
||||
if (BUILD_STATIC AND XMRIG_OS_UNIX AND WITH_CUDA)
|
||||
message(WARNING "CUDA backend is not compatible with static build, use -DWITH_CUDA=OFF to suppress this warning")
|
||||
|
||||
set(WITH_CUDA OFF)
|
||||
endif()
|
||||
|
||||
if (WITH_CUDA)
|
||||
add_definitions(/DXMRIG_FEATURE_CUDA)
|
||||
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
#include "backend/cuda/wrappers/CudaLib.h"
|
||||
#include "base/io/Env.h"
|
||||
#include "base/io/log/Log.h"
|
||||
#include "base/kernel/Process.h"
|
||||
#include "crypto/rx/RxAlgo.h"
|
||||
|
||||
|
||||
@@ -46,6 +47,14 @@ enum Version : uint32_t
|
||||
|
||||
static uv_lib_t cudaLib;
|
||||
|
||||
#if defined(__APPLE__)
|
||||
static String defaultLoader = "/System/Library/Frameworks/OpenCL.framework/OpenCL";
|
||||
#elif defined(_WIN32)
|
||||
static String defaultLoader = "xmrig-cuda.dll";
|
||||
#else
|
||||
static String defaultLoader = "libxmrig-cuda.so";
|
||||
#endif
|
||||
|
||||
|
||||
static const char *kAlloc = "alloc";
|
||||
static const char *kAstroBWTHash = "astroBWTHash";
|
||||
@@ -125,11 +134,12 @@ static setJob_v2_t pSetJob_v2 = nullptr;
|
||||
static version_t pVersion = nullptr;
|
||||
|
||||
|
||||
#define DLSYM(x) if (uv_dlsym(&cudaLib, k##x, reinterpret_cast<void**>(&p##x)) == -1) { throw std::runtime_error("symbol not found (" #x ")"); }
|
||||
#define DLSYM(x) if (uv_dlsym(&cudaLib, k##x, reinterpret_cast<void**>(&p##x)) == -1) { throw std::runtime_error(std::string("symbol not found: ") + k##x); }
|
||||
|
||||
|
||||
bool CudaLib::m_initialized = false;
|
||||
bool CudaLib::m_ready = false;
|
||||
String CudaLib::m_error;
|
||||
String CudaLib::m_loader;
|
||||
|
||||
|
||||
@@ -139,9 +149,22 @@ String CudaLib::m_loader;
|
||||
bool xmrig::CudaLib::init(const char *fileName)
|
||||
{
|
||||
if (!m_initialized) {
|
||||
m_loader = fileName == nullptr ? defaultLoader() : Env::expand(fileName);
|
||||
m_ready = uv_dlopen(m_loader, &cudaLib) == 0 && load();
|
||||
m_initialized = true;
|
||||
m_loader = fileName == nullptr ? defaultLoader : Env::expand(fileName);
|
||||
|
||||
if (!open()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
try {
|
||||
load();
|
||||
} catch (std::exception &ex) {
|
||||
m_error = (std::string(m_loader) + ": " + ex.what()).c_str();
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
m_ready = true;
|
||||
}
|
||||
|
||||
return m_ready;
|
||||
@@ -150,7 +173,7 @@ bool xmrig::CudaLib::init(const char *fileName)
|
||||
|
||||
const char *xmrig::CudaLib::lastError() noexcept
|
||||
{
|
||||
return uv_dlerror(&cudaLib);
|
||||
return m_error;
|
||||
}
|
||||
|
||||
|
||||
@@ -344,66 +367,70 @@ void xmrig::CudaLib::release(nvid_ctx *ctx) noexcept
|
||||
}
|
||||
|
||||
|
||||
bool xmrig::CudaLib::load()
|
||||
bool xmrig::CudaLib::open()
|
||||
{
|
||||
if (uv_dlsym(&cudaLib, kVersion, reinterpret_cast<void**>(&pVersion)) == -1) {
|
||||
m_error = nullptr;
|
||||
|
||||
if (uv_dlopen(m_loader, &cudaLib) == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
# ifdef XMRIG_OS_LINUX
|
||||
if (m_loader == defaultLoader) {
|
||||
m_loader = Process::location(Process::ExeLocation, m_loader);
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (uv_dlopen(m_loader, &cudaLib) == 0) {
|
||||
return true;
|
||||
}
|
||||
# endif
|
||||
|
||||
m_error = uv_dlerror(&cudaLib);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
void xmrig::CudaLib::load()
|
||||
{
|
||||
DLSYM(Version);
|
||||
|
||||
if (pVersion(ApiVersion) != 3U) {
|
||||
return false;
|
||||
throw std::runtime_error("API version mismatch");
|
||||
}
|
||||
|
||||
uv_dlsym(&cudaLib, kDeviceInfo_v2, reinterpret_cast<void**>(&pDeviceInfo_v2));
|
||||
uv_dlsym(&cudaLib, kSetJob_v2, reinterpret_cast<void**>(&pSetJob_v2));
|
||||
DLSYM(Alloc);
|
||||
DLSYM(CnHash);
|
||||
DLSYM(DeviceCount);
|
||||
DLSYM(DeviceInit);
|
||||
DLSYM(DeviceInt);
|
||||
DLSYM(DeviceName);
|
||||
DLSYM(DeviceUint);
|
||||
DLSYM(DeviceUlong);
|
||||
DLSYM(Init);
|
||||
DLSYM(LastError);
|
||||
DLSYM(PluginVersion);
|
||||
DLSYM(Release);
|
||||
DLSYM(RxHash);
|
||||
DLSYM(RxPrepare);
|
||||
DLSYM(AstroBWTHash);
|
||||
DLSYM(AstroBWTPrepare);
|
||||
DLSYM(KawPowHash);
|
||||
DLSYM(KawPowPrepare_v2);
|
||||
DLSYM(KawPowStopHash);
|
||||
|
||||
try {
|
||||
DLSYM(Alloc);
|
||||
DLSYM(CnHash);
|
||||
DLSYM(DeviceCount);
|
||||
DLSYM(DeviceInit);
|
||||
DLSYM(DeviceInt);
|
||||
DLSYM(DeviceName);
|
||||
DLSYM(DeviceUint);
|
||||
DLSYM(DeviceUlong);
|
||||
DLSYM(Init);
|
||||
DLSYM(LastError);
|
||||
DLSYM(PluginVersion);
|
||||
DLSYM(Release);
|
||||
DLSYM(RxHash);
|
||||
DLSYM(RxPrepare);
|
||||
DLSYM(AstroBWTHash);
|
||||
DLSYM(AstroBWTPrepare);
|
||||
DLSYM(KawPowHash);
|
||||
DLSYM(KawPowPrepare_v2);
|
||||
DLSYM(KawPowStopHash);
|
||||
DLSYM(Version);
|
||||
uv_dlsym(&cudaLib, kDeviceInfo_v2, reinterpret_cast<void**>(&pDeviceInfo_v2));
|
||||
if (!pDeviceInfo_v2) {
|
||||
DLSYM(DeviceInfo);
|
||||
}
|
||||
|
||||
if (!pDeviceInfo_v2) {
|
||||
DLSYM(DeviceInfo);
|
||||
}
|
||||
|
||||
if (!pSetJob_v2) {
|
||||
DLSYM(SetJob);
|
||||
}
|
||||
} catch (std::exception &ex) {
|
||||
LOG_ERR("Error loading CUDA library: %s", ex.what());
|
||||
return false;
|
||||
uv_dlsym(&cudaLib, kSetJob_v2, reinterpret_cast<void**>(&pSetJob_v2));
|
||||
if (!pSetJob_v2) {
|
||||
DLSYM(SetJob);
|
||||
}
|
||||
|
||||
pInit();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
xmrig::String xmrig::CudaLib::defaultLoader()
|
||||
{
|
||||
# if defined(__APPLE__)
|
||||
return "/System/Library/Frameworks/OpenCL.framework/OpenCL"; // FIXME
|
||||
# elif defined(_WIN32)
|
||||
return "xmrig-cuda.dll";
|
||||
# else
|
||||
return "libxmrig-cuda.so";
|
||||
# endif
|
||||
}
|
||||
|
||||
@@ -99,11 +99,12 @@ public:
|
||||
static void release(nvid_ctx *ctx) noexcept;
|
||||
|
||||
private:
|
||||
static bool load();
|
||||
static String defaultLoader();
|
||||
static bool open();
|
||||
static void load();
|
||||
|
||||
static bool m_initialized;
|
||||
static bool m_ready;
|
||||
static String m_error;
|
||||
static String m_loader;
|
||||
};
|
||||
|
||||
|
||||
@@ -899,7 +899,7 @@ __kernel void Blake(__global ulong *states, __global uint *BranchBuf, __global u
|
||||
|
||||
((uint8 *)h)[0] = vload8(0U, c_IV256);
|
||||
|
||||
for (uint i = 0; i < 3; ++i) {
|
||||
for (volatile uint i = 0; i < 3; ++i) {
|
||||
((uint16 *)m)[0] = vload16(i, (__global uint *)states);
|
||||
for (uint x = 0; x < 16; ++x) {
|
||||
m[x] = SWAP4(m[x]);
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
static const char cryptonight_cl[60954] = {
|
||||
static const char cryptonight_cl[60963] = {
|
||||
0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x63,0x6c,0x5f,0x63,0x6c,0x61,0x6e,0x67,0x5f,0x73,0x74,0x6f,0x72,0x61,0x67,0x65,0x5f,0x63,0x6c,0x61,0x73,0x73,0x5f,0x73,0x70,
|
||||
0x65,0x63,0x69,0x66,0x69,0x65,0x72,0x73,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x4f,0x50,0x45,0x4e,0x43,0x4c,0x20,0x45,0x58,0x54,0x45,0x4e,0x53,0x49,0x4f,
|
||||
0x4e,0x20,0x63,0x6c,0x5f,0x63,0x6c,0x61,0x6e,0x67,0x5f,0x73,0x74,0x6f,0x72,0x61,0x67,0x65,0x5f,0x63,0x6c,0x61,0x73,0x73,0x5f,0x73,0x70,0x65,0x63,0x69,0x66,0x69,
|
||||
@@ -1823,91 +1823,92 @@ static const char cryptonight_cl[60954] = {
|
||||
0x2a,0x42,0x72,0x61,0x6e,0x63,0x68,0x42,0x75,0x66,0x5b,0x69,0x64,0x78,0x5d,0x3b,0x0a,0x75,0x6e,0x73,0x69,0x67,0x6e,0x65,0x64,0x20,0x69,0x6e,0x74,0x20,0x6d,0x5b,
|
||||
0x31,0x36,0x5d,0x3b,0x0a,0x75,0x6e,0x73,0x69,0x67,0x6e,0x65,0x64,0x20,0x69,0x6e,0x74,0x20,0x76,0x5b,0x31,0x36,0x5d,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x20,0x68,0x5b,
|
||||
0x38,0x5d,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x20,0x62,0x69,0x74,0x6c,0x65,0x6e,0x3d,0x30,0x3b,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x68,0x29,0x5b,
|
||||
0x30,0x5d,0x3d,0x76,0x6c,0x6f,0x61,0x64,0x38,0x28,0x30,0x55,0x2c,0x63,0x5f,0x49,0x56,0x32,0x35,0x36,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,
|
||||
0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x33,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x20,0x7b,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x31,0x36,0x20,0x2a,0x29,0x6d,0x29,0x5b,
|
||||
0x30,0x5d,0x3d,0x76,0x6c,0x6f,0x61,0x64,0x31,0x36,0x28,0x69,0x2c,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x29,0x73,0x74,
|
||||
0x61,0x74,0x65,0x73,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x31,0x36,0x3b,0x20,0x2b,0x2b,0x78,0x29,
|
||||
0x20,0x7b,0x0a,0x6d,0x5b,0x78,0x5d,0x3d,0x53,0x57,0x41,0x50,0x34,0x28,0x6d,0x5b,0x78,0x5d,0x29,0x3b,0x0a,0x7d,0x0a,0x62,0x69,0x74,0x6c,0x65,0x6e,0x2b,0x3d,0x35,
|
||||
0x31,0x32,0x3b,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x31,0x36,0x20,0x2a,0x29,0x76,0x29,0x5b,0x30,0x5d,0x2e,0x6c,0x6f,0x3d,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,
|
||||
0x2a,0x29,0x68,0x29,0x5b,0x30,0x5d,0x3b,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x31,0x36,0x20,0x2a,0x29,0x76,0x29,0x5b,0x30,0x5d,0x2e,0x68,0x69,0x3d,0x76,0x6c,0x6f,
|
||||
0x61,0x64,0x38,0x28,0x30,0x55,0x2c,0x63,0x5f,0x75,0x32,0x35,0x36,0x29,0x3b,0x0a,0x76,0x5b,0x31,0x32,0x5d,0x20,0x5e,0x3d,0x20,0x62,0x69,0x74,0x6c,0x65,0x6e,0x3b,
|
||||
0x0a,0x76,0x5b,0x31,0x33,0x5d,0x20,0x5e,0x3d,0x20,0x62,0x69,0x74,0x6c,0x65,0x6e,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x72,0x3d,0x30,0x3b,
|
||||
0x20,0x72,0x3c,0x31,0x34,0x3b,0x20,0x72,0x2b,0x2b,0x29,0x20,0x7b,0x0a,0x47,0x53,0x28,0x30,0x2c,0x34,0x2c,0x30,0x78,0x38,0x2c,0x30,0x78,0x43,0x2c,0x30,0x78,0x30,
|
||||
0x29,0x3b,0x0a,0x47,0x53,0x28,0x31,0x2c,0x35,0x2c,0x30,0x78,0x39,0x2c,0x30,0x78,0x44,0x2c,0x30,0x78,0x32,0x29,0x3b,0x0a,0x47,0x53,0x28,0x32,0x2c,0x36,0x2c,0x30,
|
||||
0x78,0x41,0x2c,0x30,0x78,0x45,0x2c,0x30,0x78,0x34,0x29,0x3b,0x0a,0x47,0x53,0x28,0x33,0x2c,0x37,0x2c,0x30,0x78,0x42,0x2c,0x30,0x78,0x46,0x2c,0x30,0x78,0x36,0x29,
|
||||
0x3b,0x0a,0x47,0x53,0x28,0x30,0x2c,0x35,0x2c,0x30,0x78,0x41,0x2c,0x30,0x78,0x46,0x2c,0x30,0x78,0x38,0x29,0x3b,0x0a,0x47,0x53,0x28,0x31,0x2c,0x36,0x2c,0x30,0x78,
|
||||
0x42,0x2c,0x30,0x78,0x43,0x2c,0x30,0x78,0x41,0x29,0x3b,0x0a,0x47,0x53,0x28,0x32,0x2c,0x37,0x2c,0x30,0x78,0x38,0x2c,0x30,0x78,0x44,0x2c,0x30,0x78,0x43,0x29,0x3b,
|
||||
0x0a,0x47,0x53,0x28,0x33,0x2c,0x34,0x2c,0x30,0x78,0x39,0x2c,0x30,0x78,0x45,0x2c,0x30,0x78,0x45,0x29,0x3b,0x0a,0x7d,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,
|
||||
0x2a,0x29,0x68,0x29,0x5b,0x30,0x5d,0x20,0x5e,0x3d,0x20,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x76,0x29,0x5b,0x30,0x5d,0x5e,0x28,0x28,0x75,0x69,0x6e,
|
||||
0x74,0x38,0x20,0x2a,0x29,0x76,0x29,0x5b,0x31,0x5d,0x3b,0x0a,0x7d,0x0a,0x6d,0x5b,0x30,0x5d,0x3d,0x53,0x57,0x41,0x50,0x34,0x28,0x28,0x28,0x5f,0x5f,0x67,0x6c,0x6f,
|
||||
0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x29,0x73,0x74,0x61,0x74,0x65,0x73,0x29,0x5b,0x34,0x38,0x5d,0x29,0x3b,0x0a,0x6d,0x5b,0x31,0x5d,0x3d,0x53,0x57,
|
||||
0x41,0x50,0x34,0x28,0x28,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x29,0x73,0x74,0x61,0x74,0x65,0x73,0x29,0x5b,0x34,0x39,
|
||||
0x5d,0x29,0x3b,0x0a,0x6d,0x5b,0x32,0x5d,0x3d,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x33,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,
|
||||
0x3b,0x0a,0x6d,0x5b,0x34,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x35,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x36,0x5d,0x3d,0x30,
|
||||
0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x37,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x38,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,
|
||||
0x39,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x31,0x30,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x31,0x31,0x5d,0x3d,0x30,0x78,0x30,
|
||||
0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x31,0x32,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x31,0x33,0x5d,0x3d,0x31,0x55,0x3b,0x0a,0x6d,0x5b,0x31,0x34,0x5d,
|
||||
0x3d,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x31,0x35,0x5d,0x3d,0x30,0x78,0x36,0x34,0x30,0x3b,0x0a,0x62,0x69,0x74,0x6c,0x65,0x6e,0x2b,0x3d,0x36,0x34,0x3b,0x0a,0x28,0x28,
|
||||
0x75,0x69,0x6e,0x74,0x31,0x36,0x20,0x2a,0x29,0x76,0x29,0x5b,0x30,0x5d,0x2e,0x6c,0x6f,0x3d,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x68,0x29,0x5b,0x30,
|
||||
0x5d,0x3b,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x31,0x36,0x20,0x2a,0x29,0x76,0x29,0x5b,0x30,0x5d,0x2e,0x68,0x69,0x3d,0x76,0x6c,0x6f,0x61,0x64,0x38,0x28,0x30,0x55,
|
||||
0x2c,0x63,0x5f,0x75,0x32,0x35,0x36,0x29,0x3b,0x0a,0x76,0x5b,0x31,0x32,0x5d,0x20,0x5e,0x3d,0x20,0x62,0x69,0x74,0x6c,0x65,0x6e,0x3b,0x0a,0x76,0x5b,0x31,0x33,0x5d,
|
||||
0x20,0x5e,0x3d,0x20,0x62,0x69,0x74,0x6c,0x65,0x6e,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x72,0x3d,0x30,0x3b,0x20,0x72,0x3c,0x31,0x34,0x3b,
|
||||
0x20,0x72,0x2b,0x2b,0x29,0x20,0x7b,0x0a,0x47,0x53,0x28,0x30,0x2c,0x34,0x2c,0x30,0x78,0x38,0x2c,0x30,0x78,0x43,0x2c,0x30,0x78,0x30,0x29,0x3b,0x0a,0x47,0x53,0x28,
|
||||
0x31,0x2c,0x35,0x2c,0x30,0x78,0x39,0x2c,0x30,0x78,0x44,0x2c,0x30,0x78,0x32,0x29,0x3b,0x0a,0x47,0x53,0x28,0x32,0x2c,0x36,0x2c,0x30,0x78,0x41,0x2c,0x30,0x78,0x45,
|
||||
0x2c,0x30,0x78,0x34,0x29,0x3b,0x0a,0x47,0x53,0x28,0x33,0x2c,0x37,0x2c,0x30,0x78,0x42,0x2c,0x30,0x78,0x46,0x2c,0x30,0x78,0x36,0x29,0x3b,0x0a,0x47,0x53,0x28,0x30,
|
||||
0x2c,0x35,0x2c,0x30,0x78,0x41,0x2c,0x30,0x78,0x46,0x2c,0x30,0x78,0x38,0x29,0x3b,0x0a,0x47,0x53,0x28,0x31,0x2c,0x36,0x2c,0x30,0x78,0x42,0x2c,0x30,0x78,0x43,0x2c,
|
||||
0x30,0x78,0x41,0x29,0x3b,0x0a,0x47,0x53,0x28,0x32,0x2c,0x37,0x2c,0x30,0x78,0x38,0x2c,0x30,0x78,0x44,0x2c,0x30,0x78,0x43,0x29,0x3b,0x0a,0x47,0x53,0x28,0x33,0x2c,
|
||||
0x34,0x2c,0x30,0x78,0x39,0x2c,0x30,0x78,0x45,0x2c,0x30,0x78,0x45,0x29,0x3b,0x0a,0x7d,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x68,0x29,0x5b,0x30,
|
||||
0x5d,0x20,0x5e,0x3d,0x20,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x76,0x29,0x5b,0x30,0x5d,0x5e,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x76,
|
||||
0x29,0x5b,0x31,0x5d,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x20,0x7b,
|
||||
0x0a,0x68,0x5b,0x69,0x5d,0x3d,0x53,0x57,0x41,0x50,0x34,0x28,0x68,0x5b,0x69,0x5d,0x29,0x3b,0x0a,0x7d,0x0a,0x75,0x69,0x6e,0x74,0x32,0x20,0x74,0x3d,0x28,0x75,0x69,
|
||||
0x6e,0x74,0x32,0x29,0x28,0x68,0x5b,0x36,0x5d,0x2c,0x68,0x5b,0x37,0x5d,0x29,0x3b,0x0a,0x69,0x66,0x28,0x61,0x73,0x5f,0x75,0x6c,0x6f,0x6e,0x67,0x28,0x74,0x29,0x3c,
|
||||
0x3d,0x54,0x61,0x72,0x67,0x65,0x74,0x29,0x20,0x7b,0x0a,0x75,0x6c,0x6f,0x6e,0x67,0x20,0x6f,0x75,0x74,0x49,0x64,0x78,0x3d,0x61,0x74,0x6f,0x6d,0x69,0x63,0x5f,0x69,
|
||||
0x6e,0x63,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x2b,0x30,0x78,0x46,0x46,0x29,0x3b,0x0a,0x69,0x66,0x28,0x6f,0x75,0x74,0x49,0x64,0x78,0x3c,0x30,0x78,0x46,0x46,0x29,
|
||||
0x20,0x7b,0x0a,0x6f,0x75,0x74,0x70,0x75,0x74,0x5b,0x6f,0x75,0x74,0x49,0x64,0x78,0x5d,0x3d,0x42,0x72,0x61,0x6e,0x63,0x68,0x42,0x75,0x66,0x5b,0x69,0x64,0x78,0x5d,
|
||||
0x2b,0x28,0x75,0x69,0x6e,0x74,0x29,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x28,0x30,0x29,0x3b,0x0a,0x7d,0x0a,
|
||||
0x7d,0x0a,0x7d,0x0a,0x7d,0x0a,0x23,0x75,0x6e,0x64,0x65,0x66,0x20,0x53,0x57,0x41,0x50,0x34,0x0a,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,
|
||||
0x20,0x47,0x72,0x6f,0x65,0x73,0x74,0x6c,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x6c,0x6f,0x6e,0x67,0x20,0x2a,0x73,0x74,0x61,0x74,0x65,0x73,0x2c,
|
||||
0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x42,0x72,0x61,0x6e,0x63,0x68,0x42,0x75,0x66,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,
|
||||
0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0x75,0x6c,0x6f,0x6e,0x67,0x20,0x54,0x61,0x72,0x67,0x65,0x74,0x2c,0x75,0x69,0x6e,0x74,
|
||||
0x20,0x54,0x68,0x72,0x65,0x61,0x64,0x73,0x29,0x0a,0x7b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x20,0x69,0x64,0x78,0x3d,0x67,0x65,0x74,0x5f,0x67,
|
||||
0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x2d,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x28,0x30,0x29,
|
||||
0x3b,0x0a,0x69,0x66,0x28,0x69,0x64,0x78,0x3c,0x42,0x72,0x61,0x6e,0x63,0x68,0x42,0x75,0x66,0x5b,0x54,0x68,0x72,0x65,0x61,0x64,0x73,0x5d,0x29,0x20,0x7b,0x0a,0x73,
|
||||
0x74,0x61,0x74,0x65,0x73,0x2b,0x3d,0x32,0x35,0x2a,0x42,0x72,0x61,0x6e,0x63,0x68,0x42,0x75,0x66,0x5b,0x69,0x64,0x78,0x5d,0x3b,0x0a,0x75,0x6c,0x6f,0x6e,0x67,0x20,
|
||||
0x53,0x74,0x61,0x74,0x65,0x5b,0x38,0x5d,0x3d,0x7b,0x20,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,
|
||||
0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x31,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x55,0x4c,0x20,0x7d,0x3b,0x0a,0x75,
|
||||
0x6c,0x6f,0x6e,0x67,0x20,0x48,0x5b,0x38,0x5d,0x2c,0x4d,0x5b,0x38,0x5d,0x3b,0x0a,0x7b,0x0a,0x28,0x28,0x75,0x6c,0x6f,0x6e,0x67,0x38,0x20,0x2a,0x29,0x4d,0x29,0x5b,
|
||||
0x30,0x5d,0x3d,0x76,0x6c,0x6f,0x61,0x64,0x38,0x28,0x30,0x2c,0x73,0x74,0x61,0x74,0x65,0x73,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,
|
||||
0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x48,0x5b,0x78,0x5d,0x3d,0x4d,0x5b,0x78,0x5d,0x5e,0x53,0x74,0x61,0x74,0x65,0x5b,
|
||||
0x78,0x5d,0x3b,0x0a,0x7d,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x50,0x28,0x48,0x29,0x3b,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,
|
||||
0x4c,0x5f,0x51,0x28,0x4d,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x78,0x29,
|
||||
0x20,0x7b,0x0a,0x53,0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x20,0x5e,0x3d,0x20,0x48,0x5b,0x78,0x5d,0x5e,0x4d,0x5b,0x78,0x5d,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x7b,0x0a,
|
||||
0x28,0x28,0x75,0x6c,0x6f,0x6e,0x67,0x38,0x20,0x2a,0x29,0x4d,0x29,0x5b,0x30,0x5d,0x3d,0x76,0x6c,0x6f,0x61,0x64,0x38,0x28,0x31,0x2c,0x73,0x74,0x61,0x74,0x65,0x73,
|
||||
0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x48,0x5b,
|
||||
0x78,0x5d,0x3d,0x4d,0x5b,0x78,0x5d,0x5e,0x53,0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x3b,0x0a,0x7d,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x50,
|
||||
0x28,0x48,0x29,0x3b,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x51,0x28,0x4d,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,
|
||||
0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x53,0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x20,0x5e,0x3d,0x20,0x48,0x5b,0x78,
|
||||
0x5d,0x5e,0x4d,0x5b,0x78,0x5d,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x7b,0x0a,0x28,0x28,0x75,0x6c,0x6f,0x6e,0x67,0x38,0x20,0x2a,0x29,0x4d,0x29,0x5b,0x30,0x5d,0x3d,0x76,
|
||||
0x6c,0x6f,0x61,0x64,0x38,0x28,0x32,0x2c,0x73,0x74,0x61,0x74,0x65,0x73,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,
|
||||
0x78,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x48,0x5b,0x78,0x5d,0x3d,0x4d,0x5b,0x78,0x5d,0x5e,0x53,0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x3b,0x0a,
|
||||
0x7d,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x50,0x28,0x48,0x29,0x3b,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x51,0x28,
|
||||
0x4d,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x53,
|
||||
0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x20,0x5e,0x3d,0x20,0x48,0x5b,0x78,0x5d,0x5e,0x4d,0x5b,0x78,0x5d,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x4d,0x5b,0x30,0x5d,0x3d,0x73,
|
||||
0x74,0x61,0x74,0x65,0x73,0x5b,0x32,0x34,0x5d,0x3b,0x0a,0x4d,0x5b,0x31,0x5d,0x3d,0x30,0x78,0x38,0x30,0x55,0x4c,0x3b,0x0a,0x4d,0x5b,0x32,0x5d,0x3d,0x30,0x55,0x4c,
|
||||
0x3b,0x0a,0x4d,0x5b,0x33,0x5d,0x3d,0x30,0x55,0x4c,0x3b,0x0a,0x4d,0x5b,0x34,0x5d,0x3d,0x30,0x55,0x4c,0x3b,0x0a,0x4d,0x5b,0x35,0x5d,0x3d,0x30,0x55,0x4c,0x3b,0x0a,
|
||||
0x4d,0x5b,0x36,0x5d,0x3d,0x30,0x55,0x4c,0x3b,0x0a,0x4d,0x5b,0x37,0x5d,0x3d,0x30,0x78,0x30,0x34,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,
|
||||
0x30,0x55,0x4c,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,
|
||||
0x48,0x5b,0x78,0x5d,0x3d,0x4d,0x5b,0x78,0x5d,0x5e,0x53,0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x3b,0x0a,0x7d,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,
|
||||
0x5f,0x50,0x28,0x48,0x29,0x3b,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x51,0x28,0x4d,0x29,0x3b,0x0a,0x75,0x6c,0x6f,0x6e,0x67,0x20,0x74,0x6d,
|
||||
0x70,0x5b,0x38,0x5d,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x20,0x7b,
|
||||
0x0a,0x74,0x6d,0x70,0x5b,0x69,0x5d,0x3d,0x53,0x74,0x61,0x74,0x65,0x5b,0x69,0x5d,0x20,0x5e,0x3d,0x20,0x48,0x5b,0x69,0x5d,0x5e,0x4d,0x5b,0x69,0x5d,0x3b,0x0a,0x7d,
|
||||
0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x50,0x28,0x53,0x74,0x61,0x74,0x65,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,
|
||||
0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x20,0x7b,0x0a,0x53,0x74,0x61,0x74,0x65,0x5b,0x69,0x5d,0x20,0x5e,0x3d,0x20,0x74,0x6d,0x70,
|
||||
0x5b,0x69,0x5d,0x3b,0x0a,0x7d,0x0a,0x69,0x66,0x28,0x53,0x74,0x61,0x74,0x65,0x5b,0x37,0x5d,0x3c,0x3d,0x54,0x61,0x72,0x67,0x65,0x74,0x29,0x20,0x7b,0x0a,0x75,0x6c,
|
||||
0x6f,0x6e,0x67,0x20,0x6f,0x75,0x74,0x49,0x64,0x78,0x3d,0x61,0x74,0x6f,0x6d,0x69,0x63,0x5f,0x69,0x6e,0x63,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x2b,0x30,0x78,0x46,
|
||||
0x46,0x29,0x3b,0x0a,0x69,0x66,0x28,0x6f,0x75,0x74,0x49,0x64,0x78,0x3c,0x30,0x78,0x46,0x46,0x29,0x20,0x7b,0x0a,0x6f,0x75,0x74,0x70,0x75,0x74,0x5b,0x6f,0x75,0x74,
|
||||
0x49,0x64,0x78,0x5d,0x3d,0x42,0x72,0x61,0x6e,0x63,0x68,0x42,0x75,0x66,0x5b,0x69,0x64,0x78,0x5d,0x2b,0x28,0x75,0x69,0x6e,0x74,0x29,0x20,0x67,0x65,0x74,0x5f,0x67,
|
||||
0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x28,0x30,0x29,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x7d,0x0a,0x7d,0x0a,0x00
|
||||
0x30,0x5d,0x3d,0x76,0x6c,0x6f,0x61,0x64,0x38,0x28,0x30,0x55,0x2c,0x63,0x5f,0x49,0x56,0x32,0x35,0x36,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x76,0x6f,0x6c,0x61,
|
||||
0x74,0x69,0x6c,0x65,0x20,0x75,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x33,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x20,0x7b,0x0a,0x28,0x28,0x75,0x69,0x6e,
|
||||
0x74,0x31,0x36,0x20,0x2a,0x29,0x6d,0x29,0x5b,0x30,0x5d,0x3d,0x76,0x6c,0x6f,0x61,0x64,0x31,0x36,0x28,0x69,0x2c,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,
|
||||
0x75,0x69,0x6e,0x74,0x20,0x2a,0x29,0x73,0x74,0x61,0x74,0x65,0x73,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,
|
||||
0x3c,0x31,0x36,0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x6d,0x5b,0x78,0x5d,0x3d,0x53,0x57,0x41,0x50,0x34,0x28,0x6d,0x5b,0x78,0x5d,0x29,0x3b,0x0a,0x7d,0x0a,
|
||||
0x62,0x69,0x74,0x6c,0x65,0x6e,0x2b,0x3d,0x35,0x31,0x32,0x3b,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x31,0x36,0x20,0x2a,0x29,0x76,0x29,0x5b,0x30,0x5d,0x2e,0x6c,0x6f,
|
||||
0x3d,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x68,0x29,0x5b,0x30,0x5d,0x3b,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x31,0x36,0x20,0x2a,0x29,0x76,0x29,0x5b,
|
||||
0x30,0x5d,0x2e,0x68,0x69,0x3d,0x76,0x6c,0x6f,0x61,0x64,0x38,0x28,0x30,0x55,0x2c,0x63,0x5f,0x75,0x32,0x35,0x36,0x29,0x3b,0x0a,0x76,0x5b,0x31,0x32,0x5d,0x20,0x5e,
|
||||
0x3d,0x20,0x62,0x69,0x74,0x6c,0x65,0x6e,0x3b,0x0a,0x76,0x5b,0x31,0x33,0x5d,0x20,0x5e,0x3d,0x20,0x62,0x69,0x74,0x6c,0x65,0x6e,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,
|
||||
0x75,0x69,0x6e,0x74,0x20,0x72,0x3d,0x30,0x3b,0x20,0x72,0x3c,0x31,0x34,0x3b,0x20,0x72,0x2b,0x2b,0x29,0x20,0x7b,0x0a,0x47,0x53,0x28,0x30,0x2c,0x34,0x2c,0x30,0x78,
|
||||
0x38,0x2c,0x30,0x78,0x43,0x2c,0x30,0x78,0x30,0x29,0x3b,0x0a,0x47,0x53,0x28,0x31,0x2c,0x35,0x2c,0x30,0x78,0x39,0x2c,0x30,0x78,0x44,0x2c,0x30,0x78,0x32,0x29,0x3b,
|
||||
0x0a,0x47,0x53,0x28,0x32,0x2c,0x36,0x2c,0x30,0x78,0x41,0x2c,0x30,0x78,0x45,0x2c,0x30,0x78,0x34,0x29,0x3b,0x0a,0x47,0x53,0x28,0x33,0x2c,0x37,0x2c,0x30,0x78,0x42,
|
||||
0x2c,0x30,0x78,0x46,0x2c,0x30,0x78,0x36,0x29,0x3b,0x0a,0x47,0x53,0x28,0x30,0x2c,0x35,0x2c,0x30,0x78,0x41,0x2c,0x30,0x78,0x46,0x2c,0x30,0x78,0x38,0x29,0x3b,0x0a,
|
||||
0x47,0x53,0x28,0x31,0x2c,0x36,0x2c,0x30,0x78,0x42,0x2c,0x30,0x78,0x43,0x2c,0x30,0x78,0x41,0x29,0x3b,0x0a,0x47,0x53,0x28,0x32,0x2c,0x37,0x2c,0x30,0x78,0x38,0x2c,
|
||||
0x30,0x78,0x44,0x2c,0x30,0x78,0x43,0x29,0x3b,0x0a,0x47,0x53,0x28,0x33,0x2c,0x34,0x2c,0x30,0x78,0x39,0x2c,0x30,0x78,0x45,0x2c,0x30,0x78,0x45,0x29,0x3b,0x0a,0x7d,
|
||||
0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x68,0x29,0x5b,0x30,0x5d,0x20,0x5e,0x3d,0x20,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x76,0x29,
|
||||
0x5b,0x30,0x5d,0x5e,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x76,0x29,0x5b,0x31,0x5d,0x3b,0x0a,0x7d,0x0a,0x6d,0x5b,0x30,0x5d,0x3d,0x53,0x57,0x41,0x50,
|
||||
0x34,0x28,0x28,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x29,0x73,0x74,0x61,0x74,0x65,0x73,0x29,0x5b,0x34,0x38,0x5d,0x29,
|
||||
0x3b,0x0a,0x6d,0x5b,0x31,0x5d,0x3d,0x53,0x57,0x41,0x50,0x34,0x28,0x28,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x29,0x73,
|
||||
0x74,0x61,0x74,0x65,0x73,0x29,0x5b,0x34,0x39,0x5d,0x29,0x3b,0x0a,0x6d,0x5b,0x32,0x5d,0x3d,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x55,0x3b,0x0a,0x6d,
|
||||
0x5b,0x33,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x34,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x35,0x5d,0x3d,0x30,0x78,0x30,0x30,
|
||||
0x55,0x3b,0x0a,0x6d,0x5b,0x36,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x37,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x38,0x5d,0x3d,
|
||||
0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x39,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x31,0x30,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,
|
||||
0x6d,0x5b,0x31,0x31,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x31,0x32,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x31,0x33,0x5d,0x3d,
|
||||
0x31,0x55,0x3b,0x0a,0x6d,0x5b,0x31,0x34,0x5d,0x3d,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x31,0x35,0x5d,0x3d,0x30,0x78,0x36,0x34,0x30,0x3b,0x0a,0x62,0x69,0x74,0x6c,0x65,
|
||||
0x6e,0x2b,0x3d,0x36,0x34,0x3b,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x31,0x36,0x20,0x2a,0x29,0x76,0x29,0x5b,0x30,0x5d,0x2e,0x6c,0x6f,0x3d,0x28,0x28,0x75,0x69,0x6e,
|
||||
0x74,0x38,0x20,0x2a,0x29,0x68,0x29,0x5b,0x30,0x5d,0x3b,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x31,0x36,0x20,0x2a,0x29,0x76,0x29,0x5b,0x30,0x5d,0x2e,0x68,0x69,0x3d,
|
||||
0x76,0x6c,0x6f,0x61,0x64,0x38,0x28,0x30,0x55,0x2c,0x63,0x5f,0x75,0x32,0x35,0x36,0x29,0x3b,0x0a,0x76,0x5b,0x31,0x32,0x5d,0x20,0x5e,0x3d,0x20,0x62,0x69,0x74,0x6c,
|
||||
0x65,0x6e,0x3b,0x0a,0x76,0x5b,0x31,0x33,0x5d,0x20,0x5e,0x3d,0x20,0x62,0x69,0x74,0x6c,0x65,0x6e,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x72,
|
||||
0x3d,0x30,0x3b,0x20,0x72,0x3c,0x31,0x34,0x3b,0x20,0x72,0x2b,0x2b,0x29,0x20,0x7b,0x0a,0x47,0x53,0x28,0x30,0x2c,0x34,0x2c,0x30,0x78,0x38,0x2c,0x30,0x78,0x43,0x2c,
|
||||
0x30,0x78,0x30,0x29,0x3b,0x0a,0x47,0x53,0x28,0x31,0x2c,0x35,0x2c,0x30,0x78,0x39,0x2c,0x30,0x78,0x44,0x2c,0x30,0x78,0x32,0x29,0x3b,0x0a,0x47,0x53,0x28,0x32,0x2c,
|
||||
0x36,0x2c,0x30,0x78,0x41,0x2c,0x30,0x78,0x45,0x2c,0x30,0x78,0x34,0x29,0x3b,0x0a,0x47,0x53,0x28,0x33,0x2c,0x37,0x2c,0x30,0x78,0x42,0x2c,0x30,0x78,0x46,0x2c,0x30,
|
||||
0x78,0x36,0x29,0x3b,0x0a,0x47,0x53,0x28,0x30,0x2c,0x35,0x2c,0x30,0x78,0x41,0x2c,0x30,0x78,0x46,0x2c,0x30,0x78,0x38,0x29,0x3b,0x0a,0x47,0x53,0x28,0x31,0x2c,0x36,
|
||||
0x2c,0x30,0x78,0x42,0x2c,0x30,0x78,0x43,0x2c,0x30,0x78,0x41,0x29,0x3b,0x0a,0x47,0x53,0x28,0x32,0x2c,0x37,0x2c,0x30,0x78,0x38,0x2c,0x30,0x78,0x44,0x2c,0x30,0x78,
|
||||
0x43,0x29,0x3b,0x0a,0x47,0x53,0x28,0x33,0x2c,0x34,0x2c,0x30,0x78,0x39,0x2c,0x30,0x78,0x45,0x2c,0x30,0x78,0x45,0x29,0x3b,0x0a,0x7d,0x0a,0x28,0x28,0x75,0x69,0x6e,
|
||||
0x74,0x38,0x20,0x2a,0x29,0x68,0x29,0x5b,0x30,0x5d,0x20,0x5e,0x3d,0x20,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x76,0x29,0x5b,0x30,0x5d,0x5e,0x28,0x28,
|
||||
0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x76,0x29,0x5b,0x31,0x5d,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,
|
||||
0x38,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x20,0x7b,0x0a,0x68,0x5b,0x69,0x5d,0x3d,0x53,0x57,0x41,0x50,0x34,0x28,0x68,0x5b,0x69,0x5d,0x29,0x3b,0x0a,0x7d,0x0a,0x75,0x69,
|
||||
0x6e,0x74,0x32,0x20,0x74,0x3d,0x28,0x75,0x69,0x6e,0x74,0x32,0x29,0x28,0x68,0x5b,0x36,0x5d,0x2c,0x68,0x5b,0x37,0x5d,0x29,0x3b,0x0a,0x69,0x66,0x28,0x61,0x73,0x5f,
|
||||
0x75,0x6c,0x6f,0x6e,0x67,0x28,0x74,0x29,0x3c,0x3d,0x54,0x61,0x72,0x67,0x65,0x74,0x29,0x20,0x7b,0x0a,0x75,0x6c,0x6f,0x6e,0x67,0x20,0x6f,0x75,0x74,0x49,0x64,0x78,
|
||||
0x3d,0x61,0x74,0x6f,0x6d,0x69,0x63,0x5f,0x69,0x6e,0x63,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x2b,0x30,0x78,0x46,0x46,0x29,0x3b,0x0a,0x69,0x66,0x28,0x6f,0x75,0x74,
|
||||
0x49,0x64,0x78,0x3c,0x30,0x78,0x46,0x46,0x29,0x20,0x7b,0x0a,0x6f,0x75,0x74,0x70,0x75,0x74,0x5b,0x6f,0x75,0x74,0x49,0x64,0x78,0x5d,0x3d,0x42,0x72,0x61,0x6e,0x63,
|
||||
0x68,0x42,0x75,0x66,0x5b,0x69,0x64,0x78,0x5d,0x2b,0x28,0x75,0x69,0x6e,0x74,0x29,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x6f,0x66,0x66,0x73,
|
||||
0x65,0x74,0x28,0x30,0x29,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x7d,0x0a,0x7d,0x0a,0x23,0x75,0x6e,0x64,0x65,0x66,0x20,0x53,0x57,0x41,0x50,0x34,0x0a,0x5f,0x5f,0x6b,0x65,
|
||||
0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x47,0x72,0x6f,0x65,0x73,0x74,0x6c,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x6c,0x6f,0x6e,0x67,
|
||||
0x20,0x2a,0x73,0x74,0x61,0x74,0x65,0x73,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x42,0x72,0x61,0x6e,0x63,0x68,0x42,0x75,
|
||||
0x66,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0x75,0x6c,0x6f,0x6e,0x67,0x20,0x54,0x61,
|
||||
0x72,0x67,0x65,0x74,0x2c,0x75,0x69,0x6e,0x74,0x20,0x54,0x68,0x72,0x65,0x61,0x64,0x73,0x29,0x0a,0x7b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x20,
|
||||
0x69,0x64,0x78,0x3d,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x2d,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,
|
||||
0x6f,0x66,0x66,0x73,0x65,0x74,0x28,0x30,0x29,0x3b,0x0a,0x69,0x66,0x28,0x69,0x64,0x78,0x3c,0x42,0x72,0x61,0x6e,0x63,0x68,0x42,0x75,0x66,0x5b,0x54,0x68,0x72,0x65,
|
||||
0x61,0x64,0x73,0x5d,0x29,0x20,0x7b,0x0a,0x73,0x74,0x61,0x74,0x65,0x73,0x2b,0x3d,0x32,0x35,0x2a,0x42,0x72,0x61,0x6e,0x63,0x68,0x42,0x75,0x66,0x5b,0x69,0x64,0x78,
|
||||
0x5d,0x3b,0x0a,0x75,0x6c,0x6f,0x6e,0x67,0x20,0x53,0x74,0x61,0x74,0x65,0x5b,0x38,0x5d,0x3d,0x7b,0x20,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,
|
||||
0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x31,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,
|
||||
0x30,0x30,0x55,0x4c,0x20,0x7d,0x3b,0x0a,0x75,0x6c,0x6f,0x6e,0x67,0x20,0x48,0x5b,0x38,0x5d,0x2c,0x4d,0x5b,0x38,0x5d,0x3b,0x0a,0x7b,0x0a,0x28,0x28,0x75,0x6c,0x6f,
|
||||
0x6e,0x67,0x38,0x20,0x2a,0x29,0x4d,0x29,0x5b,0x30,0x5d,0x3d,0x76,0x6c,0x6f,0x61,0x64,0x38,0x28,0x30,0x2c,0x73,0x74,0x61,0x74,0x65,0x73,0x29,0x3b,0x0a,0x66,0x6f,
|
||||
0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x48,0x5b,0x78,0x5d,0x3d,0x4d,0x5b,
|
||||
0x78,0x5d,0x5e,0x53,0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x3b,0x0a,0x7d,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x50,0x28,0x48,0x29,0x3b,0x0a,
|
||||
0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x51,0x28,0x4d,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,
|
||||
0x78,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x53,0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x20,0x5e,0x3d,0x20,0x48,0x5b,0x78,0x5d,0x5e,0x4d,0x5b,0x78,
|
||||
0x5d,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x7b,0x0a,0x28,0x28,0x75,0x6c,0x6f,0x6e,0x67,0x38,0x20,0x2a,0x29,0x4d,0x29,0x5b,0x30,0x5d,0x3d,0x76,0x6c,0x6f,0x61,0x64,0x38,
|
||||
0x28,0x31,0x2c,0x73,0x74,0x61,0x74,0x65,0x73,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,0x3b,0x20,
|
||||
0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x48,0x5b,0x78,0x5d,0x3d,0x4d,0x5b,0x78,0x5d,0x5e,0x53,0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x3b,0x0a,0x7d,0x0a,0x50,0x45,0x52,
|
||||
0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x50,0x28,0x48,0x29,0x3b,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x51,0x28,0x4d,0x29,0x3b,0x0a,0x66,
|
||||
0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x53,0x74,0x61,0x74,0x65,0x5b,
|
||||
0x78,0x5d,0x20,0x5e,0x3d,0x20,0x48,0x5b,0x78,0x5d,0x5e,0x4d,0x5b,0x78,0x5d,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x7b,0x0a,0x28,0x28,0x75,0x6c,0x6f,0x6e,0x67,0x38,0x20,
|
||||
0x2a,0x29,0x4d,0x29,0x5b,0x30,0x5d,0x3d,0x76,0x6c,0x6f,0x61,0x64,0x38,0x28,0x32,0x2c,0x73,0x74,0x61,0x74,0x65,0x73,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,
|
||||
0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x48,0x5b,0x78,0x5d,0x3d,0x4d,0x5b,0x78,0x5d,0x5e,0x53,
|
||||
0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x3b,0x0a,0x7d,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x50,0x28,0x48,0x29,0x3b,0x0a,0x50,0x45,0x52,0x4d,
|
||||
0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x51,0x28,0x4d,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,0x3b,
|
||||
0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x53,0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x20,0x5e,0x3d,0x20,0x48,0x5b,0x78,0x5d,0x5e,0x4d,0x5b,0x78,0x5d,0x3b,0x0a,0x7d,
|
||||
0x0a,0x7d,0x0a,0x4d,0x5b,0x30,0x5d,0x3d,0x73,0x74,0x61,0x74,0x65,0x73,0x5b,0x32,0x34,0x5d,0x3b,0x0a,0x4d,0x5b,0x31,0x5d,0x3d,0x30,0x78,0x38,0x30,0x55,0x4c,0x3b,
|
||||
0x0a,0x4d,0x5b,0x32,0x5d,0x3d,0x30,0x55,0x4c,0x3b,0x0a,0x4d,0x5b,0x33,0x5d,0x3d,0x30,0x55,0x4c,0x3b,0x0a,0x4d,0x5b,0x34,0x5d,0x3d,0x30,0x55,0x4c,0x3b,0x0a,0x4d,
|
||||
0x5b,0x35,0x5d,0x3d,0x30,0x55,0x4c,0x3b,0x0a,0x4d,0x5b,0x36,0x5d,0x3d,0x30,0x55,0x4c,0x3b,0x0a,0x4d,0x5b,0x37,0x5d,0x3d,0x30,0x78,0x30,0x34,0x30,0x30,0x30,0x30,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x55,0x4c,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,
|
||||
0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x48,0x5b,0x78,0x5d,0x3d,0x4d,0x5b,0x78,0x5d,0x5e,0x53,0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x3b,0x0a,0x7d,0x0a,0x50,
|
||||
0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x50,0x28,0x48,0x29,0x3b,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x51,0x28,0x4d,0x29,0x3b,
|
||||
0x0a,0x75,0x6c,0x6f,0x6e,0x67,0x20,0x74,0x6d,0x70,0x5b,0x38,0x5d,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,
|
||||
0x38,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x20,0x7b,0x0a,0x74,0x6d,0x70,0x5b,0x69,0x5d,0x3d,0x53,0x74,0x61,0x74,0x65,0x5b,0x69,0x5d,0x20,0x5e,0x3d,0x20,0x48,0x5b,0x69,
|
||||
0x5d,0x5e,0x4d,0x5b,0x69,0x5d,0x3b,0x0a,0x7d,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x50,0x28,0x53,0x74,0x61,0x74,0x65,0x29,0x3b,0x0a,0x66,
|
||||
0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x20,0x7b,0x0a,0x53,0x74,0x61,0x74,0x65,0x5b,
|
||||
0x69,0x5d,0x20,0x5e,0x3d,0x20,0x74,0x6d,0x70,0x5b,0x69,0x5d,0x3b,0x0a,0x7d,0x0a,0x69,0x66,0x28,0x53,0x74,0x61,0x74,0x65,0x5b,0x37,0x5d,0x3c,0x3d,0x54,0x61,0x72,
|
||||
0x67,0x65,0x74,0x29,0x20,0x7b,0x0a,0x75,0x6c,0x6f,0x6e,0x67,0x20,0x6f,0x75,0x74,0x49,0x64,0x78,0x3d,0x61,0x74,0x6f,0x6d,0x69,0x63,0x5f,0x69,0x6e,0x63,0x28,0x6f,
|
||||
0x75,0x74,0x70,0x75,0x74,0x2b,0x30,0x78,0x46,0x46,0x29,0x3b,0x0a,0x69,0x66,0x28,0x6f,0x75,0x74,0x49,0x64,0x78,0x3c,0x30,0x78,0x46,0x46,0x29,0x20,0x7b,0x0a,0x6f,
|
||||
0x75,0x74,0x70,0x75,0x74,0x5b,0x6f,0x75,0x74,0x49,0x64,0x78,0x5d,0x3d,0x42,0x72,0x61,0x6e,0x63,0x68,0x42,0x75,0x66,0x5b,0x69,0x64,0x78,0x5d,0x2b,0x28,0x75,0x69,
|
||||
0x6e,0x74,0x29,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x28,0x30,0x29,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x7d,0x0a,
|
||||
0x7d,0x0a,0x00
|
||||
};
|
||||
|
||||
} // namespace xmrig
|
||||
|
||||
@@ -1,3 +1,9 @@
|
||||
if (BUILD_STATIC AND XMRIG_OS_UNIX AND WITH_OPENCL)
|
||||
message(WARNING "OpenCL backend is not compatible with static build, use -DWITH_OPENCL=OFF to suppress this warning")
|
||||
|
||||
set(WITH_OPENCL OFF)
|
||||
endif()
|
||||
|
||||
if (WITH_OPENCL)
|
||||
add_definitions(/DCL_TARGET_OPENCL_VERSION=200)
|
||||
add_definitions(/DCL_USE_DEPRECATED_OPENCL_1_2_APIS)
|
||||
|
||||
@@ -69,8 +69,6 @@ OclKawPowRunner::~OclKawPowRunner()
|
||||
|
||||
delete m_calculateDagKernel;
|
||||
|
||||
OclLib::release(m_searchKernel);
|
||||
|
||||
OclLib::release(m_controlQueue);
|
||||
OclLib::release(m_stop);
|
||||
|
||||
@@ -120,8 +118,7 @@ void OclKawPowRunner::run(uint32_t nonce, uint32_t *hashOutput)
|
||||
void OclKawPowRunner::set(const Job &job, uint8_t *blob)
|
||||
{
|
||||
m_blockHeight = static_cast<uint32_t>(job.height());
|
||||
m_searchProgram = OclKawPow::get(*this, m_blockHeight, m_workGroupSize);
|
||||
m_searchKernel = OclLib::createKernel(m_searchProgram, "progpow_search");
|
||||
m_searchKernel = OclKawPow::get(*this, m_blockHeight, m_workGroupSize);
|
||||
|
||||
const uint32_t epoch = m_blockHeight / KPHash::EPOCH_LENGTH;
|
||||
|
||||
|
||||
@@ -69,7 +69,6 @@ private:
|
||||
|
||||
KawPow_CalculateDAGKernel* m_calculateDagKernel = nullptr;
|
||||
|
||||
cl_program m_searchProgram = nullptr;
|
||||
cl_kernel m_searchKernel = nullptr;
|
||||
|
||||
size_t m_workGroupSize = 256;
|
||||
|
||||
@@ -54,8 +54,9 @@ namespace xmrig {
|
||||
class KawPowCacheEntry
|
||||
{
|
||||
public:
|
||||
inline KawPowCacheEntry(const Algorithm &algo, uint64_t period, uint32_t worksize, uint32_t index, cl_program program) :
|
||||
inline KawPowCacheEntry(const Algorithm &algo, uint64_t period, uint32_t worksize, uint32_t index, cl_program program, cl_kernel kernel) :
|
||||
program(program),
|
||||
kernel(kernel),
|
||||
m_algo(algo),
|
||||
m_index(index),
|
||||
m_period(period),
|
||||
@@ -65,9 +66,10 @@ public:
|
||||
inline bool isExpired(uint64_t period) const { return m_period + 1 < period; }
|
||||
inline bool match(const Algorithm &algo, uint64_t period, uint32_t worksize, uint32_t index) const { return m_algo == algo && m_period == period && m_worksize == worksize && m_index == index; }
|
||||
inline bool match(const IOclRunner &runner, uint64_t period, uint32_t worksize) const { return match(runner.algorithm(), period, worksize, runner.deviceIndex()); }
|
||||
inline void release() { OclLib::release(program); }
|
||||
inline void release() { OclLib::release(kernel); OclLib::release(program); }
|
||||
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
|
||||
private:
|
||||
Algorithm m_algo;
|
||||
@@ -82,16 +84,16 @@ class KawPowCache
|
||||
public:
|
||||
KawPowCache() = default;
|
||||
|
||||
inline cl_program search(const IOclRunner &runner, uint64_t period, uint32_t worksize) { return search(runner.algorithm(), period, worksize, runner.deviceIndex()); }
|
||||
inline cl_kernel search(const IOclRunner &runner, uint64_t period, uint32_t worksize) { return search(runner.algorithm(), period, worksize, runner.deviceIndex()); }
|
||||
|
||||
|
||||
inline cl_program search(const Algorithm &algo, uint64_t period, uint32_t worksize, uint32_t index)
|
||||
inline cl_kernel search(const Algorithm &algo, uint64_t period, uint32_t worksize, uint32_t index)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_mutex);
|
||||
|
||||
for (const auto &entry : m_data) {
|
||||
if (entry.match(algo, period, worksize, index)) {
|
||||
return entry.program;
|
||||
return entry.kernel;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -99,9 +101,10 @@ public:
|
||||
}
|
||||
|
||||
|
||||
void add(const Algorithm &algo, uint64_t period, uint32_t worksize, uint32_t index, cl_program program)
|
||||
void add(const Algorithm &algo, uint64_t period, uint32_t worksize, uint32_t index, cl_program program, cl_kernel kernel)
|
||||
{
|
||||
if (search(algo, period, worksize, index)) {
|
||||
OclLib::release(kernel);
|
||||
OclLib::release(program);
|
||||
return;
|
||||
}
|
||||
@@ -109,7 +112,7 @@ public:
|
||||
std::lock_guard<std::mutex> lock(m_mutex);
|
||||
|
||||
gc(period);
|
||||
m_data.emplace_back(algo, period, worksize, index, program);
|
||||
m_data.emplace_back(algo, period, worksize, index, program, kernel);
|
||||
}
|
||||
|
||||
|
||||
@@ -159,15 +162,15 @@ static KawPowCache cache;
|
||||
class KawPowBuilder
|
||||
{
|
||||
public:
|
||||
cl_program build(const IOclRunner &runner, uint64_t period, uint32_t worksize)
|
||||
cl_kernel build(const IOclRunner &runner, uint64_t period, uint32_t worksize)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_mutex);
|
||||
|
||||
const uint64_t ts = Chrono::steadyMSecs();
|
||||
|
||||
cl_program program = cache.search(runner, period, worksize);
|
||||
if (program) {
|
||||
return program;
|
||||
cl_kernel kernel = cache.search(runner, period, worksize);
|
||||
if (kernel) {
|
||||
return kernel;
|
||||
}
|
||||
|
||||
cl_int ret;
|
||||
@@ -175,7 +178,7 @@ public:
|
||||
cl_device_id device = runner.data().device.id();
|
||||
const char *s = source.c_str();
|
||||
|
||||
program = OclLib::createProgramWithSource(runner.ctx(), 1, &s, nullptr, &ret);
|
||||
cl_program program = OclLib::createProgramWithSource(runner.ctx(), 1, &s, nullptr, &ret);
|
||||
if (ret != CL_SUCCESS) {
|
||||
return nullptr;
|
||||
}
|
||||
@@ -199,11 +202,17 @@ public:
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
kernel = OclLib::createKernel(program, "progpow_search", &ret);
|
||||
if (ret != CL_SUCCESS) {
|
||||
OclLib::release(program);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
LOG_INFO("%s " YELLOW("KawPow") " program for period " WHITE_BOLD("%" PRIu64) " compiled " BLACK_BOLD("(%" PRIu64 "ms)"), Tags::opencl(), period, Chrono::steadyMSecs() - ts);
|
||||
|
||||
cache.add(runner.algorithm(), period, worksize, runner.deviceIndex(), program);
|
||||
cache.add(runner.algorithm(), period, worksize, runner.deviceIndex(), program, kernel);
|
||||
|
||||
return program;
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
||||
@@ -382,7 +391,7 @@ public:
|
||||
static KawPowBuilder builder;
|
||||
|
||||
|
||||
cl_program OclKawPow::get(const IOclRunner &runner, uint64_t height, uint32_t worksize)
|
||||
cl_kernel OclKawPow::get(const IOclRunner &runner, uint64_t height, uint32_t worksize)
|
||||
{
|
||||
const uint64_t period = height / KPHash::PERIOD_LENGTH;
|
||||
|
||||
@@ -396,9 +405,9 @@ cl_program OclKawPow::get(const IOclRunner &runner, uint64_t height, uint32_t wo
|
||||
[](uv_work_t *req, int) { delete static_cast<KawPowBaton*>(req->data); }
|
||||
);
|
||||
|
||||
cl_program program = cache.search(runner, period, worksize);
|
||||
if (program) {
|
||||
return program;
|
||||
cl_kernel kernel = cache.search(runner, period, worksize);
|
||||
if (kernel) {
|
||||
return kernel;
|
||||
}
|
||||
|
||||
return builder.build(runner, period, worksize);
|
||||
|
||||
@@ -30,7 +30,7 @@
|
||||
#include <cstdint>
|
||||
|
||||
|
||||
using cl_program = struct _cl_program *;
|
||||
using cl_kernel = struct _cl_kernel *;
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
@@ -42,7 +42,7 @@ class IOclRunner;
|
||||
class OclKawPow
|
||||
{
|
||||
public:
|
||||
static cl_program get(const IOclRunner &runner, uint64_t height, uint32_t worksize);
|
||||
static cl_kernel get(const IOclRunner &runner, uint64_t height, uint32_t worksize);
|
||||
static void clear();
|
||||
};
|
||||
|
||||
|
||||
@@ -44,7 +44,7 @@
|
||||
|
||||
static uv_lib_t oclLib;
|
||||
|
||||
static const char *kErrorTemplate = MAGENTA_BG_BOLD(WHITE_BOLD_S " ocl ") RED(" error ") RED_BOLD("%s") RED(" when calling ") RED_BOLD("%s");
|
||||
static const char *kErrorTemplate = MAGENTA_BG_BOLD(WHITE_BOLD_S " opencl ") RED(" error ") RED_BOLD("%s") RED(" when calling ") RED_BOLD("%s");
|
||||
|
||||
static const char *kBuildProgram = "clBuildProgram";
|
||||
static const char *kCreateBuffer = "clCreateBuffer";
|
||||
|
||||
@@ -222,3 +222,15 @@ if (WITH_KAWPOW)
|
||||
src/base/net/stratum/EthStratumClient.cpp
|
||||
)
|
||||
endif()
|
||||
|
||||
if (WITH_PROFILING)
|
||||
add_definitions(/DXMRIG_FEATURE_PROFILING)
|
||||
|
||||
list(APPEND HEADERS_BASE
|
||||
src/base/tools/Profiler.h
|
||||
)
|
||||
|
||||
list(APPEND SOURCES_BASE
|
||||
src/base/tools/Profiler.cpp
|
||||
)
|
||||
endif()
|
||||
|
||||
@@ -135,6 +135,12 @@ static AlgoName const algorithm_names[] = {
|
||||
} /* namespace xmrig */
|
||||
|
||||
|
||||
xmrig::Algorithm::Algorithm(const rapidjson::Value &value) :
|
||||
m_id(parse(value.GetString()))
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
rapidjson::Value xmrig::Algorithm::toJSON() const
|
||||
{
|
||||
using namespace rapidjson;
|
||||
@@ -143,6 +149,12 @@ rapidjson::Value xmrig::Algorithm::toJSON() const
|
||||
}
|
||||
|
||||
|
||||
rapidjson::Value xmrig::Algorithm::toJSON(rapidjson::Document &) const
|
||||
{
|
||||
return toJSON();
|
||||
}
|
||||
|
||||
|
||||
size_t xmrig::Algorithm::l2() const
|
||||
{
|
||||
# ifdef XMRIG_ALGO_RANDOMX
|
||||
|
||||
@@ -92,10 +92,11 @@ public:
|
||||
inline Algorithm() = default;
|
||||
inline Algorithm(const char *algo) : m_id(parse(algo)) {}
|
||||
inline Algorithm(Id id) : m_id(id) {}
|
||||
Algorithm(const rapidjson::Value &value);
|
||||
|
||||
inline bool isCN() const { auto f = family(); return f == CN || f == CN_LITE || f == CN_HEAVY || f == CN_PICO; }
|
||||
inline bool isEqual(const Algorithm &other) const { return m_id == other.m_id; }
|
||||
inline bool isValid() const { return m_id != INVALID; }
|
||||
inline bool isValid() const { return m_id != INVALID && family() != UNKNOWN; }
|
||||
inline const char *name() const { return name(false); }
|
||||
inline const char *shortName() const { return name(true); }
|
||||
inline Family family() const { return family(m_id); }
|
||||
@@ -108,6 +109,7 @@ public:
|
||||
inline operator Algorithm::Id() const { return m_id; }
|
||||
|
||||
rapidjson::Value toJSON() const;
|
||||
rapidjson::Value toJSON(rapidjson::Document &doc) const;
|
||||
size_t l2() const;
|
||||
size_t l3() const;
|
||||
uint32_t maxIntensity() const;
|
||||
|
||||
@@ -81,7 +81,13 @@ private:
|
||||
#define CLEAR CSI "0m" // all attributes off
|
||||
#define BRIGHT_BLACK_S CSI "0;90m" // somewhat MD.GRAY
|
||||
#define BLACK_S CSI "0;30m"
|
||||
#define BLACK_BOLD_S CSI "1;30m" // another name for GRAY
|
||||
|
||||
#ifdef XMRIG_OS_APPLE
|
||||
# define BLACK_BOLD_S CSI "0;37m"
|
||||
#else
|
||||
# define BLACK_BOLD_S CSI "1;30m" // another name for GRAY
|
||||
#endif
|
||||
|
||||
#define RED_S CSI "0;31m"
|
||||
#define RED_BOLD_S CSI "1;31m"
|
||||
#define GREEN_S CSI "0;32m"
|
||||
|
||||
@@ -101,3 +101,13 @@ const char *xmrig::Tags::opencl()
|
||||
return tag;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef XMRIG_FEATURE_PROFILING
|
||||
const char* xmrig::Tags::profiler()
|
||||
{
|
||||
static const char* tag = CYAN_BG_BOLD(WHITE_BOLD_S " profile ");
|
||||
|
||||
return tag;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -53,6 +53,10 @@ public:
|
||||
# ifdef XMRIG_FEATURE_OPENCL
|
||||
static const char *opencl();
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_FEATURE_PROFILING
|
||||
static const char* profiler();
|
||||
# endif
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -84,7 +84,7 @@ static int showVersion()
|
||||
# if defined(LIBRESSL_VERSION_TEXT)
|
||||
printf("LibreSSL/%s\n", LIBRESSL_VERSION_TEXT + 9);
|
||||
# elif defined(OPENSSL_VERSION_TEXT)
|
||||
constexpr const char *v = OPENSSL_VERSION_TEXT + 8;
|
||||
constexpr const char *v = &OPENSSL_VERSION_TEXT[8];
|
||||
printf("OpenSSL/%.*s\n", static_cast<int>(strchr(v, ' ') - v), v);
|
||||
# endif
|
||||
}
|
||||
|
||||
@@ -54,6 +54,8 @@ public:
|
||||
|
||||
static inline const char *userAgent() { return m_userAgent; }
|
||||
|
||||
static bool isOnBatteryPower();
|
||||
|
||||
private:
|
||||
static char *createUserAgent();
|
||||
|
||||
|
||||
@@ -22,6 +22,8 @@
|
||||
*/
|
||||
|
||||
|
||||
#include <IOKit/IOKitLib.h>
|
||||
#include <IOKit/ps/IOPowerSources.h>
|
||||
#include <mach/thread_act.h>
|
||||
#include <mach/thread_policy.h>
|
||||
#include <stdio.h>
|
||||
@@ -29,6 +31,7 @@
|
||||
#include <sys/resource.h>
|
||||
#include <uv.h>
|
||||
#include <thread>
|
||||
#include <fstream>
|
||||
|
||||
|
||||
#include "base/kernel/Platform.h"
|
||||
@@ -107,3 +110,8 @@ void xmrig::Platform::setThreadPriority(int priority)
|
||||
setpriority(PRIO_PROCESS, 0, prio);
|
||||
}
|
||||
|
||||
|
||||
bool xmrig::Platform::isOnBatteryPower()
|
||||
{
|
||||
return IOPSGetTimeRemainingEstimate() != kIOPSTimeRemainingUnlimited;
|
||||
}
|
||||
|
||||
@@ -38,6 +38,7 @@
|
||||
#include <unistd.h>
|
||||
#include <uv.h>
|
||||
#include <thread>
|
||||
#include <fstream>
|
||||
|
||||
|
||||
#include "base/kernel/Platform.h"
|
||||
@@ -146,3 +147,19 @@ void xmrig::Platform::setThreadPriority(int priority)
|
||||
}
|
||||
# endif
|
||||
}
|
||||
|
||||
|
||||
bool xmrig::Platform::isOnBatteryPower()
|
||||
{
|
||||
for (int i = 0; i <= 1; ++i) {
|
||||
char buf[64];
|
||||
snprintf(buf, 64, "/sys/class/power_supply/BAT%d/status", i);
|
||||
std::ifstream f(buf);
|
||||
if (f.is_open()) {
|
||||
std::string status;
|
||||
f >> status;
|
||||
return (status == "Discharging");
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -157,3 +157,12 @@ void xmrig::Platform::setThreadPriority(int priority)
|
||||
SetThreadPriority(GetCurrentThread(), prio);
|
||||
}
|
||||
|
||||
|
||||
bool xmrig::Platform::isOnBatteryPower()
|
||||
{
|
||||
SYSTEM_POWER_STATUS st;
|
||||
if (GetSystemPowerStatus(&st)) {
|
||||
return (st.ACLineStatus == 0);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -61,6 +61,7 @@ const char *BaseConfig::kColors = "colors";
|
||||
const char *BaseConfig::kDryRun = "dry-run";
|
||||
const char *BaseConfig::kHttp = "http";
|
||||
const char *BaseConfig::kLogFile = "log-file";
|
||||
const char *BaseConfig::kPauseOnBattery = "pause-on-battery";
|
||||
const char *BaseConfig::kPrintTime = "print-time";
|
||||
const char *BaseConfig::kSyslog = "syslog";
|
||||
const char *BaseConfig::kTitle = "title";
|
||||
@@ -85,15 +86,16 @@ bool xmrig::BaseConfig::read(const IJsonReader &reader, const char *fileName)
|
||||
return false;
|
||||
}
|
||||
|
||||
m_autoSave = reader.getBool(kAutosave, m_autoSave);
|
||||
m_background = reader.getBool(kBackground, m_background);
|
||||
m_dryRun = reader.getBool(kDryRun, m_dryRun);
|
||||
m_syslog = reader.getBool(kSyslog, m_syslog);
|
||||
m_watch = reader.getBool(kWatch, m_watch);
|
||||
m_logFile = reader.getString(kLogFile);
|
||||
m_userAgent = reader.getString(kUserAgent);
|
||||
m_printTime = std::min(reader.getUint(kPrintTime, m_printTime), 3600U);
|
||||
m_title = reader.getValue(kTitle);
|
||||
m_autoSave = reader.getBool(kAutosave, m_autoSave);
|
||||
m_background = reader.getBool(kBackground, m_background);
|
||||
m_dryRun = reader.getBool(kDryRun, m_dryRun);
|
||||
m_syslog = reader.getBool(kSyslog, m_syslog);
|
||||
m_watch = reader.getBool(kWatch, m_watch);
|
||||
m_pauseOnBattery = reader.getBool(kPauseOnBattery, m_pauseOnBattery);
|
||||
m_logFile = reader.getString(kLogFile);
|
||||
m_userAgent = reader.getString(kUserAgent);
|
||||
m_printTime = std::min(reader.getUint(kPrintTime, m_printTime), 3600U);
|
||||
m_title = reader.getValue(kTitle);
|
||||
|
||||
# ifdef XMRIG_FEATURE_TLS
|
||||
m_tls = reader.getValue(kTls);
|
||||
@@ -155,7 +157,7 @@ void xmrig::BaseConfig::printVersions()
|
||||
snprintf(buf, sizeof buf, "LibreSSL/%s ", LIBRESSL_VERSION_TEXT + 9);
|
||||
libs += buf;
|
||||
# elif defined(OPENSSL_VERSION_TEXT)
|
||||
constexpr const char *v = OPENSSL_VERSION_TEXT + 8;
|
||||
constexpr const char *v = &OPENSSL_VERSION_TEXT[8];
|
||||
snprintf(buf, sizeof buf, "OpenSSL/%.*s ", static_cast<int>(strchr(v, ' ') - v), v);
|
||||
libs += buf;
|
||||
# endif
|
||||
|
||||
@@ -55,6 +55,7 @@ public:
|
||||
static const char *kDryRun;
|
||||
static const char *kHttp;
|
||||
static const char *kLogFile;
|
||||
static const char *kPauseOnBattery;
|
||||
static const char *kPrintTime;
|
||||
static const char *kSyslog;
|
||||
static const char *kTitle;
|
||||
@@ -71,6 +72,7 @@ public:
|
||||
inline bool isAutoSave() const { return m_autoSave; }
|
||||
inline bool isBackground() const { return m_background; }
|
||||
inline bool isDryRun() const { return m_dryRun; }
|
||||
inline bool isPauseOnBattery() const { return m_pauseOnBattery; }
|
||||
inline bool isSyslog() const { return m_syslog; }
|
||||
inline const char *logFile() const { return m_logFile.data(); }
|
||||
inline const char *userAgent() const { return m_userAgent.data(); }
|
||||
@@ -95,12 +97,13 @@ public:
|
||||
void printVersions();
|
||||
|
||||
protected:
|
||||
bool m_autoSave = true;
|
||||
bool m_background = false;
|
||||
bool m_dryRun = false;
|
||||
bool m_syslog = false;
|
||||
bool m_upgrade = false;
|
||||
bool m_watch = true;
|
||||
bool m_autoSave = true;
|
||||
bool m_background = false;
|
||||
bool m_dryRun = false;
|
||||
bool m_pauseOnBattery = false;
|
||||
bool m_syslog = false;
|
||||
bool m_upgrade = false;
|
||||
bool m_watch = true;
|
||||
Http m_http;
|
||||
Pools m_pools;
|
||||
String m_apiId;
|
||||
|
||||
@@ -247,6 +247,7 @@ void xmrig::BaseTransform::transform(rapidjson::Document &doc, int key, const ch
|
||||
case IConfig::HttpEnabledKey: /* --http-enabled */
|
||||
case IConfig::DaemonKey: /* --daemon */
|
||||
case IConfig::VerboseKey: /* --verbose */
|
||||
case IConfig::PauseOnBatteryKey: /* --pause-on-battery */
|
||||
return transformBoolean(doc, key, true);
|
||||
|
||||
case IConfig::ColorKey: /* --no-color */
|
||||
@@ -305,6 +306,9 @@ void xmrig::BaseTransform::transformBoolean(rapidjson::Document &doc, int key, b
|
||||
case IConfig::NoTitleKey: /* --no-title */
|
||||
return set(doc, BaseConfig::kTitle, enable);
|
||||
|
||||
case IConfig::PauseOnBatteryKey: /* --pause-on-battery */
|
||||
return set(doc, BaseConfig::kPauseOnBattery, enable);
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -76,6 +76,7 @@ public:
|
||||
DataDirKey = 1035,
|
||||
TitleKey = 1037,
|
||||
NoTitleKey = 1038,
|
||||
PauseOnBatteryKey = 1041,
|
||||
|
||||
// xmrig common
|
||||
CPUPriorityKey = 1021,
|
||||
@@ -101,6 +102,8 @@ public:
|
||||
YieldKey = 1030,
|
||||
AstroBWTMaxSizeKey = 1034,
|
||||
AstroBWTAVX2Key = 1036,
|
||||
Argon2ImplKey = 1039,
|
||||
RandomXCacheQoSKey = 1040,
|
||||
|
||||
// xmrig amd
|
||||
OclPlatformKey = 1400,
|
||||
|
||||
@@ -126,6 +126,10 @@ bool xmrig::HttpContext::isRequest() const
|
||||
|
||||
size_t xmrig::HttpContext::parse(const char *data, size_t size)
|
||||
{
|
||||
if (size == 0) {
|
||||
return size;
|
||||
}
|
||||
|
||||
return http_parser_execute(m_parser, &http_settings, data, size);
|
||||
}
|
||||
|
||||
|
||||
@@ -29,13 +29,20 @@ namespace xmrig {
|
||||
class HttpListener : public IHttpListener
|
||||
{
|
||||
public:
|
||||
inline HttpListener(IHttpListener *listener, const char *tag = nullptr) : m_tag(tag), m_listener(listener) {}
|
||||
inline HttpListener(IHttpListener *listener, const char *tag = nullptr) :
|
||||
# ifdef APP_DEBUG
|
||||
m_tag(tag),
|
||||
# endif
|
||||
m_listener(listener)
|
||||
{}
|
||||
|
||||
protected:
|
||||
void onHttpData(const HttpData &data) override;
|
||||
|
||||
private:
|
||||
# ifdef APP_DEBUG
|
||||
const char *m_tag;
|
||||
# endif
|
||||
IHttpListener *m_listener;
|
||||
};
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ public:
|
||||
void setDiff(uint64_t diff);
|
||||
|
||||
inline bool isNicehash() const { return m_nicehash; }
|
||||
inline bool isValid() const { return m_size > 0 && m_diff > 0; }
|
||||
inline bool isValid() const { return (m_size > 0 && m_diff > 0) || !m_poolWallet.isEmpty(); }
|
||||
inline bool setId(const char *id) { return m_id = id; }
|
||||
inline const Algorithm &algorithm() const { return m_algorithm; }
|
||||
inline const Buffer &seed() const { return m_seed; }
|
||||
@@ -82,6 +82,7 @@ public:
|
||||
inline uint32_t backend() const { return m_backend; }
|
||||
inline uint64_t diff() const { return m_diff; }
|
||||
inline uint64_t height() const { return m_height; }
|
||||
inline uint64_t nonceMask() const { return isNicehash() ? 0xFFFFFFULL : (nonceSize() == sizeof(uint64_t) ? (-1ull >> (extraNonce().size() * 4)): 0xFFFFFFFFULL); }
|
||||
inline uint64_t target() const { return m_target; }
|
||||
inline uint8_t *blob() { return m_blob; }
|
||||
inline uint8_t fixedByte() const { return *(m_blob + 42); }
|
||||
|
||||
101
src/base/tools/Profiler.cpp
Normal file
101
src/base/tools/Profiler.cpp
Normal file
@@ -0,0 +1,101 @@
|
||||
/* XMRig
|
||||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include "base/tools/Profiler.h"
|
||||
#include "base/io/log/Log.h"
|
||||
#include "base/io/log/Tags.h"
|
||||
#include <cstring>
|
||||
#include <sstream>
|
||||
#include <thread>
|
||||
#include <chrono>
|
||||
#include <algorithm>
|
||||
|
||||
|
||||
#ifdef XMRIG_FEATURE_PROFILING
|
||||
|
||||
|
||||
ProfileScopeData* ProfileScopeData::s_data[MAX_DATA_COUNT] = {};
|
||||
volatile long ProfileScopeData::s_dataCount = 0;
|
||||
double ProfileScopeData::s_tscSpeed = 0.0;
|
||||
|
||||
|
||||
#ifndef NOINLINE
|
||||
#ifdef __GNUC__
|
||||
#define NOINLINE __attribute__ ((noinline))
|
||||
#elif _MSC_VER
|
||||
#define NOINLINE __declspec(noinline)
|
||||
#else
|
||||
#define NOINLINE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
static std::string get_thread_id()
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << std::this_thread::get_id();
|
||||
|
||||
std::string s = ss.str();
|
||||
if (s.length() > ProfileScopeData::MAX_THREAD_ID_LENGTH) {
|
||||
s.resize(ProfileScopeData::MAX_THREAD_ID_LENGTH);
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
NOINLINE void ProfileScopeData::Register(ProfileScopeData* data)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
const long id = _InterlockedIncrement(&s_dataCount) - 1;
|
||||
#else
|
||||
const long id = __sync_fetch_and_add(&s_dataCount, 1);
|
||||
#endif
|
||||
|
||||
if (static_cast<unsigned long>(id) < MAX_DATA_COUNT) {
|
||||
s_data[id] = data;
|
||||
|
||||
const std::string s = get_thread_id();
|
||||
memcpy(data->m_threadId, s.c_str(), s.length() + 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
NOINLINE void ProfileScopeData::Init()
|
||||
{
|
||||
using namespace std::chrono;
|
||||
|
||||
const uint64_t t1 = static_cast<uint64_t>(time_point_cast<nanoseconds>(high_resolution_clock::now()).time_since_epoch().count());
|
||||
const uint64_t count1 = ReadTSC();
|
||||
|
||||
for (;;)
|
||||
{
|
||||
const uint64_t t2 = static_cast<uint64_t>(time_point_cast<nanoseconds>(high_resolution_clock::now()).time_since_epoch().count());
|
||||
const uint64_t count2 = ReadTSC();
|
||||
|
||||
if (t2 - t1 > 1000000000) {
|
||||
s_tscSpeed = (count2 - count1) * 1e9 / (t2 - t1);
|
||||
LOG_INFO("%s TSC speed = %.3f GHz", xmrig::Tags::profiler(), s_tscSpeed / 1e9);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif /* XMRIG_FEATURE_PROFILING */
|
||||
133
src/base/tools/Profiler.h
Normal file
133
src/base/tools/Profiler.h
Normal file
@@ -0,0 +1,133 @@
|
||||
/* XMRig
|
||||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef XMRIG_PROFILER_H
|
||||
#define XMRIG_PROFILER_H
|
||||
|
||||
|
||||
#ifndef FORCE_INLINE
|
||||
#if defined(_MSC_VER)
|
||||
#define FORCE_INLINE __forceinline
|
||||
#elif defined(__GNUC__)
|
||||
#define FORCE_INLINE __attribute__((always_inline)) inline
|
||||
#elif defined(__clang__)
|
||||
#define FORCE_INLINE __inline__
|
||||
#else
|
||||
#define FORCE_INLINE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef XMRIG_FEATURE_PROFILING
|
||||
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstddef>
|
||||
#include <type_traits>
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
|
||||
static FORCE_INLINE uint64_t ReadTSC()
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
return __rdtsc();
|
||||
#else
|
||||
uint32_t hi, lo;
|
||||
__asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi));
|
||||
return (((uint64_t)hi) << 32) | lo;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
struct ProfileScopeData
|
||||
{
|
||||
const char* m_name;
|
||||
uint64_t m_totalCycles;
|
||||
uint32_t m_totalSamples;
|
||||
|
||||
enum
|
||||
{
|
||||
MAX_THREAD_ID_LENGTH = 11,
|
||||
MAX_SAMPLE_COUNT = 128,
|
||||
MAX_DATA_COUNT = 1024
|
||||
};
|
||||
|
||||
char m_threadId[MAX_THREAD_ID_LENGTH + 1];
|
||||
|
||||
static ProfileScopeData* s_data[MAX_DATA_COUNT];
|
||||
static volatile long s_dataCount;
|
||||
static double s_tscSpeed;
|
||||
|
||||
static void Register(ProfileScopeData* data);
|
||||
static void Init();
|
||||
};
|
||||
|
||||
static_assert(std::is_trivial<ProfileScopeData>::value, "ProfileScopeData must be a trivial struct");
|
||||
static_assert(sizeof(ProfileScopeData) <= 32, "ProfileScopeData struct is too big");
|
||||
|
||||
|
||||
class ProfileScope
|
||||
{
|
||||
public:
|
||||
FORCE_INLINE ProfileScope(ProfileScopeData& data)
|
||||
: m_data(data)
|
||||
{
|
||||
if (m_data.m_totalCycles == 0) {
|
||||
ProfileScopeData::Register(&data);
|
||||
}
|
||||
|
||||
m_startCounter = ReadTSC();
|
||||
}
|
||||
|
||||
FORCE_INLINE ~ProfileScope()
|
||||
{
|
||||
m_data.m_totalCycles += ReadTSC() - m_startCounter;
|
||||
++m_data.m_totalSamples;
|
||||
}
|
||||
|
||||
private:
|
||||
ProfileScopeData& m_data;
|
||||
uint64_t m_startCounter;
|
||||
};
|
||||
|
||||
|
||||
#define PROFILE_SCOPE(x) static thread_local ProfileScopeData x##_data{#x}; ProfileScope x(x##_data);
|
||||
|
||||
|
||||
#else /* XMRIG_FEATURE_PROFILING */
|
||||
#define PROFILE_SCOPE(x)
|
||||
#endif /* XMRIG_FEATURE_PROFILING */
|
||||
|
||||
|
||||
#include "crypto/randomx/blake2/blake2.h"
|
||||
|
||||
|
||||
struct rx_blake2b_wrapper
|
||||
{
|
||||
FORCE_INLINE static void run(void* out, size_t outlen, const void* in, size_t inlen)
|
||||
{
|
||||
PROFILE_SCOPE(RandomX_Blake2b);
|
||||
rx_blake2b(out, outlen, in, inlen);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#endif /* XMRIG_PROFILER_H */
|
||||
@@ -33,7 +33,7 @@
|
||||
xmrig::String::String(const char *str) :
|
||||
m_size(str == nullptr ? 0 : strlen(str))
|
||||
{
|
||||
if (m_size == 0) {
|
||||
if (str == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -21,7 +21,8 @@
|
||||
"rdmsr": true,
|
||||
"wrmsr": true,
|
||||
"cache_qos": false,
|
||||
"numa": true
|
||||
"numa": true,
|
||||
"scratchpad_prefetch_mode": 1
|
||||
},
|
||||
"cpu": {
|
||||
"enabled": true,
|
||||
@@ -91,5 +92,6 @@
|
||||
},
|
||||
"user-agent": null,
|
||||
"verbose": 0,
|
||||
"watch": true
|
||||
"watch": true,
|
||||
"pause-on-battery": false
|
||||
}
|
||||
|
||||
@@ -38,6 +38,7 @@
|
||||
#include "base/kernel/Platform.h"
|
||||
#include "base/net/stratum/Job.h"
|
||||
#include "base/tools/Object.h"
|
||||
#include "base/tools/Profiler.h"
|
||||
#include "base/tools/Timer.h"
|
||||
#include "core/config/Config.h"
|
||||
#include "core/Controller.h"
|
||||
@@ -120,7 +121,7 @@ public:
|
||||
for (int i = 0; i < Algorithm::MAX; ++i) {
|
||||
const Algorithm algo(static_cast<Algorithm::Id>(i));
|
||||
|
||||
if (isEnabled(algo)) {
|
||||
if (algo.isValid() && isEnabled(algo)) {
|
||||
algorithms.push_back(algo);
|
||||
}
|
||||
}
|
||||
@@ -267,6 +268,44 @@ public:
|
||||
h = "MH/s";
|
||||
}
|
||||
|
||||
# ifdef XMRIG_FEATURE_PROFILING
|
||||
ProfileScopeData* data[ProfileScopeData::MAX_DATA_COUNT];
|
||||
|
||||
const uint32_t n = std::min<uint32_t>(ProfileScopeData::s_dataCount, ProfileScopeData::MAX_DATA_COUNT);
|
||||
memcpy(data, ProfileScopeData::s_data, n * sizeof(ProfileScopeData*));
|
||||
|
||||
std::sort(data, data + n, [](ProfileScopeData* a, ProfileScopeData* b) {
|
||||
return strcmp(a->m_threadId, b->m_threadId) < 0;
|
||||
});
|
||||
|
||||
for (uint32_t i = 0; i < n;)
|
||||
{
|
||||
uint32_t n1 = i;
|
||||
while ((n1 < n) && (strcmp(data[i]->m_threadId, data[n1]->m_threadId) == 0)) {
|
||||
++n1;
|
||||
}
|
||||
|
||||
std::sort(data + i, data + n1, [](ProfileScopeData* a, ProfileScopeData* b) {
|
||||
return a->m_totalCycles > b->m_totalCycles;
|
||||
});
|
||||
|
||||
for (uint32_t j = i; j < n1; ++j) {
|
||||
ProfileScopeData* p = data[j];
|
||||
LOG_INFO("%s Thread %6s | %-30s | %7.3f%% | %9.0f ns",
|
||||
Tags::profiler(),
|
||||
p->m_threadId,
|
||||
p->m_name,
|
||||
p->m_totalCycles * 100.0 / data[i]->m_totalCycles,
|
||||
p->m_totalCycles / p->m_totalSamples * 1e9 / ProfileScopeData::s_tscSpeed
|
||||
);
|
||||
}
|
||||
|
||||
LOG_INFO("%s --------------|--------------------------------|----------|-------------", Tags::profiler());
|
||||
|
||||
i = n1;
|
||||
}
|
||||
# endif
|
||||
|
||||
LOG_INFO("%s " WHITE_BOLD("speed") " 10s/60s/15m " CYAN_BOLD("%s") CYAN(" %s %s ") CYAN_BOLD("%s") " max " CYAN_BOLD("%s %s"),
|
||||
Tags::miner(),
|
||||
Hashrate::format(speed[0] * scale, num, sizeof(num) / 4),
|
||||
@@ -287,6 +326,7 @@ public:
|
||||
bool active = false;
|
||||
bool enabled = true;
|
||||
bool reset = true;
|
||||
bool battery_power = false;
|
||||
Controller *controller;
|
||||
Job job;
|
||||
mutable std::map<Algorithm::Id, double> maxHashrate;
|
||||
@@ -310,6 +350,10 @@ xmrig::Miner::Miner(Controller *controller)
|
||||
Platform::setThreadPriority(std::min(priority + 1, 5));
|
||||
}
|
||||
|
||||
# ifdef XMRIG_FEATURE_PROFILING
|
||||
ProfileScopeData::Init();
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_RANDOMX
|
||||
Rx::init(this);
|
||||
# endif
|
||||
@@ -429,13 +473,24 @@ void xmrig::Miner::setEnabled(bool enabled)
|
||||
return;
|
||||
}
|
||||
|
||||
if (d_ptr->battery_power && enabled) {
|
||||
LOG_INFO("%s " YELLOW_BOLD("can't resume while on battery power"), Tags::miner());
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
d_ptr->enabled = enabled;
|
||||
|
||||
if (enabled) {
|
||||
LOG_INFO(GREEN_BOLD("resumed"));
|
||||
LOG_INFO("%s " GREEN_BOLD("resumed"), Tags::miner());
|
||||
}
|
||||
else {
|
||||
LOG_INFO(YELLOW_BOLD("paused") ", press " MAGENTA_BG_BOLD(" r ") " to resume");
|
||||
if (d_ptr->battery_power) {
|
||||
LOG_INFO("%s " YELLOW_BOLD("paused"), Tags::miner());
|
||||
}
|
||||
else {
|
||||
LOG_INFO("%s " YELLOW_BOLD("paused") ", press " MAGENTA_BG_BOLD(" r ") " to resume", Tags::miner());
|
||||
}
|
||||
}
|
||||
|
||||
if (!d_ptr->active) {
|
||||
@@ -538,6 +593,20 @@ void xmrig::Miner::onTimer(const Timer *)
|
||||
}
|
||||
|
||||
d_ptr->ticks++;
|
||||
|
||||
if (d_ptr->controller->config()->isPauseOnBattery()) {
|
||||
const bool battery_power = Platform::isOnBatteryPower();
|
||||
if (battery_power && d_ptr->enabled) {
|
||||
LOG_INFO("%s " YELLOW_BOLD("on battery power"), Tags::miner());
|
||||
d_ptr->battery_power = true;
|
||||
setEnabled(false);
|
||||
}
|
||||
else if (!battery_power && !d_ptr->enabled && d_ptr->battery_power) {
|
||||
LOG_INFO("%s " GREEN_BOLD("on AC power"), Tags::miner());
|
||||
d_ptr->battery_power = false;
|
||||
setEnabled(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -252,4 +252,5 @@ void xmrig::Config::getJSON(rapidjson::Document &doc) const
|
||||
doc.AddMember(StringRef(kUserAgent), m_userAgent.toJSON(), allocator);
|
||||
doc.AddMember(StringRef(kVerbose), Log::verbose(), allocator);
|
||||
doc.AddMember(StringRef(kWatch), m_watch, allocator);
|
||||
doc.AddMember(StringRef(kPauseOnBattery), isPauseOnBattery(), allocator);
|
||||
}
|
||||
|
||||
@@ -5,8 +5,8 @@
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -151,6 +151,9 @@ void xmrig::ConfigTransform::transform(rapidjson::Document &doc, int key, const
|
||||
case IConfig::YieldKey: /* --cpu-no-yield */
|
||||
return set(doc, kCpu, "yield", false);
|
||||
|
||||
case IConfig::Argon2ImplKey: /* --argon2-impl */
|
||||
return set(doc, kCpu, "argon2-impl", arg);
|
||||
|
||||
# ifdef XMRIG_FEATURE_ASM
|
||||
case IConfig::AssemblyKey: /* --asm */
|
||||
return set(doc, kCpu, "asm", arg);
|
||||
@@ -186,6 +189,9 @@ void xmrig::ConfigTransform::transform(rapidjson::Document &doc, int key, const
|
||||
|
||||
case IConfig::RandomXRdmsrKey: /* --randomx-no-rdmsr */
|
||||
return set(doc, kRandomX, "rdmsr", false);
|
||||
|
||||
case IConfig::RandomXCacheQoSKey: /* --cache-qos */
|
||||
return set(doc, kRandomX, "cache_qos", true);
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_FEATURE_OPENCL
|
||||
|
||||
@@ -5,8 +5,8 @@
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
||||
@@ -51,7 +51,12 @@ R"===(
|
||||
"randomx": {
|
||||
"init": -1,
|
||||
"mode": "auto",
|
||||
"numa": true
|
||||
"1gb-pages": false,
|
||||
"rdmsr": true,
|
||||
"wrmsr": true,
|
||||
"cache_qos": false,
|
||||
"numa": true,
|
||||
"scratchpad_prefetch_mode": 1
|
||||
},
|
||||
"cpu": {
|
||||
"enabled": true,
|
||||
@@ -109,7 +114,8 @@ R"===(
|
||||
"retry-pause": 5,
|
||||
"syslog": false,
|
||||
"user-agent": null,
|
||||
"watch": true
|
||||
"watch": true,
|
||||
"pause-on-battery": false
|
||||
}
|
||||
)===";
|
||||
#endif
|
||||
|
||||
@@ -87,11 +87,15 @@ static const option options[] = {
|
||||
{ "cpu-max-threads-hint", 1, nullptr, IConfig::CPUMaxThreadsKey },
|
||||
{ "cpu-memory-pool", 1, nullptr, IConfig::MemoryPoolKey },
|
||||
{ "cpu-no-yield", 0, nullptr, IConfig::YieldKey },
|
||||
{ "no-yield", 0, nullptr, IConfig::YieldKey },
|
||||
{ "cpu-argon2-impl", 1, nullptr, IConfig::Argon2ImplKey },
|
||||
{ "argon2-impl", 1, nullptr, IConfig::Argon2ImplKey },
|
||||
{ "verbose", 0, nullptr, IConfig::VerboseKey },
|
||||
{ "proxy", 1, nullptr, IConfig::ProxyKey },
|
||||
{ "data-dir", 1, nullptr, IConfig::DataDirKey },
|
||||
{ "title", 1, nullptr, IConfig::TitleKey },
|
||||
{ "no-title", 0, nullptr, IConfig::NoTitleKey },
|
||||
{ "pause-on-battery", 0, nullptr, IConfig::PauseOnBatteryKey },
|
||||
# ifdef XMRIG_FEATURE_TLS
|
||||
{ "tls", 0, nullptr, IConfig::TlsKey },
|
||||
{ "tls-fingerprint", 1, nullptr, IConfig::FingerprintKey },
|
||||
@@ -116,6 +120,8 @@ static const option options[] = {
|
||||
{ "wrmsr", 2, nullptr, IConfig::RandomXWrmsrKey },
|
||||
{ "randomx-no-rdmsr", 0, nullptr, IConfig::RandomXRdmsrKey },
|
||||
{ "no-rdmsr", 0, nullptr, IConfig::RandomXRdmsrKey },
|
||||
{ "randomx-cache-qos", 0, nullptr, IConfig::RandomXCacheQoSKey },
|
||||
{ "cache-qos", 0, nullptr, IConfig::RandomXCacheQoSKey },
|
||||
# endif
|
||||
#ifdef XMRIG_ALGO_ASTROBWT
|
||||
{ "astrobwt-max-size", 1, nullptr, IConfig::AstroBWTMaxSizeKey },
|
||||
|
||||
@@ -69,7 +69,7 @@ static inline const std::string &usage()
|
||||
u += " -r, --retries=N number of times to retry before switch to backup server (default: 5)\n";
|
||||
u += " -R, --retry-pause=N time to pause between retries (default: 5)\n";
|
||||
u += " --user-agent set custom user-agent string for pool\n";
|
||||
u += " --donate-level=N donate level, default 5%% (5 minutes in 100 minutes)\n";
|
||||
u += " --donate-level=N donate level, default 1%% (1 minute in 100 minutes)\n";
|
||||
u += " --donate-over-proxy=N control donate over xmrig-proxy feature\n";
|
||||
|
||||
u += "\nCPU backend:\n";
|
||||
@@ -85,13 +85,18 @@ static inline const std::string &usage()
|
||||
u += " --no-huge-pages disable huge pages support\n";
|
||||
u += " --asm=ASM ASM optimizations, possible values: auto, none, intel, ryzen, bulldozer\n";
|
||||
|
||||
# if defined(__x86_64__) || defined(_M_AMD64)
|
||||
u += " --argon2-impl=IMPL argon2 implementation: x86_64, SSE2, SSSE3, XOP, AVX2, AVX-512F\n";
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_RANDOMX
|
||||
u += " --randomx-init=N threads count to initialize RandomX dataset\n";
|
||||
u += " --randomx-no-numa disable NUMA support for RandomX\n";
|
||||
u += " --randomx-mode=MODE RandomX mode: auto, fast, light\n";
|
||||
u += " --randomx-1gb-pages use 1GB hugepages for dataset (Linux only)\n";
|
||||
u += " --randomx-wrmsr=N write custom value (0-15) to Intel MSR register 0x1a4 or disable MSR mod (-1)\n";
|
||||
u += " --randomx-1gb-pages use 1GB hugepages for RandomX dataset (Linux only)\n";
|
||||
u += " --randomx-wrmsr=N write custom value(s) to MSR registers or disable MSR mod (-1)\n";
|
||||
u += " --randomx-no-rdmsr disable reverting initial MSR values on exit\n";
|
||||
u += " --randomx-cache-qos enable Cache QoS\n";
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_ASTROBWT
|
||||
@@ -99,16 +104,6 @@ static inline const std::string &usage()
|
||||
u += " --astrobwt-avx2 enable AVX2 optimizations for AstroBWT algorithm";
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_FEATURE_HTTP
|
||||
u += "\nAPI:\n";
|
||||
u += " --api-worker-id=ID custom worker-id for API\n";
|
||||
u += " --api-id=ID custom instance ID for API\n";
|
||||
u += " --http-host=HOST bind host for HTTP API (default: 127.0.0.1)\n";
|
||||
u += " --http-port=N bind port for HTTP API\n";
|
||||
u += " --http-access-token=T access token for HTTP API\n";
|
||||
u += " --http-no-restricted enable full remote access to HTTP API (only if access token set)\n";
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_FEATURE_OPENCL
|
||||
u += "\nOpenCL backend:\n";
|
||||
u += " --opencl enable OpenCL mining backend\n";
|
||||
@@ -131,6 +126,16 @@ static inline const std::string &usage()
|
||||
u += " --no-nvml disable NVML (NVIDIA Management Library) support\n";
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_FEATURE_HTTP
|
||||
u += "\nAPI:\n";
|
||||
u += " --api-worker-id=ID custom worker-id for API\n";
|
||||
u += " --api-id=ID custom instance ID for API\n";
|
||||
u += " --http-host=HOST bind host for HTTP API (default: 127.0.0.1)\n";
|
||||
u += " --http-port=N bind port for HTTP API\n";
|
||||
u += " --http-access-token=T access token for HTTP API\n";
|
||||
u += " --http-no-restricted enable full remote access to HTTP API (only if access token set)\n";
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_FEATURE_TLS
|
||||
u += "\nTLS:\n";
|
||||
u += " --tls-gen=HOSTNAME generate TLS certificate for specific hostname\n";
|
||||
@@ -172,6 +177,7 @@ static inline const std::string &usage()
|
||||
u += " --title set custom console window title\n";
|
||||
u += " --no-title disable setting console window title\n";
|
||||
# endif
|
||||
u += " --pause-on-battery pause mine on battery power\n";
|
||||
|
||||
return u;
|
||||
}
|
||||
|
||||
@@ -72,8 +72,8 @@ bool xmrig::argon2::Impl::select(const String &nameHint, bool benchmark)
|
||||
}
|
||||
}
|
||||
|
||||
if (hint.isEmpty() || argon2_select_impl_by_name(hint) == 0) {
|
||||
argon2_select_impl();
|
||||
if (!hint.isEmpty()) {
|
||||
argon2_select_impl_by_name(hint);
|
||||
}
|
||||
# endif
|
||||
|
||||
|
||||
@@ -26,18 +26,14 @@
|
||||
#include "crypto/common/Nonce.h"
|
||||
|
||||
|
||||
#include <mutex>
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
std::atomic<bool> Nonce::m_paused;
|
||||
std::atomic<uint64_t> Nonce::m_sequence[Nonce::MAX];
|
||||
uint32_t Nonce::m_nonces[2] = { 0, 0 };
|
||||
std::atomic<uint64_t> Nonce::m_nonces[2] = { {0}, {0} };
|
||||
|
||||
|
||||
static std::mutex mutex;
|
||||
static Nonce nonce;
|
||||
|
||||
|
||||
@@ -54,40 +50,34 @@ xmrig::Nonce::Nonce()
|
||||
}
|
||||
|
||||
|
||||
uint32_t xmrig::Nonce::next(uint8_t index, uint32_t nonce, uint32_t reserveCount, bool nicehash, bool *ok)
|
||||
bool xmrig::Nonce::next(uint8_t index, uint32_t *nonce, uint32_t reserveCount, uint64_t mask)
|
||||
{
|
||||
uint32_t next;
|
||||
mask &= 0x7FFFFFFFFFFFFFFFULL;
|
||||
if (reserveCount == 0 || mask < reserveCount - 1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
|
||||
if (nicehash) {
|
||||
if ((m_nonces[index] + reserveCount) > 0x1000000) {
|
||||
if (ok) {
|
||||
*ok = false;
|
||||
}
|
||||
|
||||
pause(true);
|
||||
|
||||
return 0;
|
||||
uint64_t counter = m_nonces[index].fetch_add(reserveCount, std::memory_order_relaxed);
|
||||
while (true) {
|
||||
if (mask < counter) {
|
||||
return false;
|
||||
}
|
||||
|
||||
next = (nonce & 0xFF000000) | m_nonces[index];
|
||||
else if (mask - counter <= reserveCount - 1) {
|
||||
pause(true);
|
||||
if (mask - counter < reserveCount - 1) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if (0xFFFFFFFFUL - (uint32_t)counter < reserveCount - 1) {
|
||||
counter = m_nonces[index].fetch_add(reserveCount, std::memory_order_relaxed);
|
||||
continue;
|
||||
}
|
||||
*nonce = (nonce[0] & ~mask) | counter;
|
||||
if (mask > 0xFFFFFFFFULL) {
|
||||
nonce[1] = (nonce[1] & (~mask >> 32)) | (counter >> 32);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
next = m_nonces[index];
|
||||
}
|
||||
|
||||
m_nonces[index] += reserveCount;
|
||||
|
||||
return next;
|
||||
}
|
||||
|
||||
|
||||
void xmrig::Nonce::reset(uint8_t index)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
|
||||
m_nonces[index] = 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -49,18 +49,18 @@ public:
|
||||
static inline bool isPaused() { return m_paused.load(std::memory_order_relaxed); }
|
||||
static inline uint64_t sequence(Backend backend) { return m_sequence[backend].load(std::memory_order_relaxed); }
|
||||
static inline void pause(bool paused) { m_paused = paused; }
|
||||
static inline void reset(uint8_t index) { m_nonces[index] = 0; }
|
||||
static inline void stop(Backend backend) { m_sequence[backend] = 0; }
|
||||
static inline void touch(Backend backend) { m_sequence[backend]++; }
|
||||
|
||||
static uint32_t next(uint8_t index, uint32_t nonce, uint32_t reserveCount, bool nicehash, bool *ok = nullptr);
|
||||
static void reset(uint8_t index);
|
||||
static bool next(uint8_t index, uint32_t *nonce, uint32_t reserveCount, uint64_t mask);
|
||||
static void stop();
|
||||
static void touch();
|
||||
|
||||
private:
|
||||
static std::atomic<bool> m_paused;
|
||||
static std::atomic<uint64_t> m_sequence[MAX];
|
||||
static uint32_t m_nonces[2];
|
||||
static std::atomic<uint64_t> m_nonces[2];
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -51,6 +51,7 @@ static std::mutex mutex;
|
||||
|
||||
xmrig::VirtualMemory::VirtualMemory(size_t size, bool hugePages, bool oneGbPages, bool usePool, uint32_t node, size_t alignSize) :
|
||||
m_size(align(size)),
|
||||
m_capacity(m_size),
|
||||
m_node(node)
|
||||
{
|
||||
if (usePool) {
|
||||
@@ -69,6 +70,7 @@ xmrig::VirtualMemory::VirtualMemory(size_t size, bool hugePages, bool oneGbPages
|
||||
}
|
||||
|
||||
if (oneGbPages && allocateOneGbPagesMemory()) {
|
||||
m_capacity = align(size, 1ULL << 30);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -52,6 +52,7 @@ public:
|
||||
inline bool isHugePages() const { return m_flags.test(FLAG_HUGEPAGES); }
|
||||
inline bool isOneGbPages() const { return m_flags.test(FLAG_1GB_PAGES); }
|
||||
inline size_t size() const { return m_size; }
|
||||
inline size_t capacity() const { return m_capacity; }
|
||||
inline uint8_t *raw() const { return m_scratchpad; }
|
||||
inline uint8_t *scratchpad() const { return m_scratchpad; }
|
||||
|
||||
@@ -88,6 +89,7 @@ private:
|
||||
void freeLargePagesMemory();
|
||||
|
||||
const size_t m_size;
|
||||
size_t m_capacity;
|
||||
const uint32_t m_node;
|
||||
std::bitset<FLAG_MAX> m_flags;
|
||||
uint8_t *m_scratchpad = nullptr;
|
||||
|
||||
@@ -82,7 +82,17 @@ void *xmrig::VirtualMemory::allocateLargePagesMemory(size_t size)
|
||||
# elif defined(__FreeBSD__)
|
||||
void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER | MAP_PREFAULT_READ, -1, 0);
|
||||
# else
|
||||
void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, 0, 0);
|
||||
|
||||
# if defined(MAP_HUGE_2MB)
|
||||
constexpr int flag_2mb = MAP_HUGE_2MB;
|
||||
# elif defined(MAP_HUGE_SHIFT)
|
||||
constexpr int flag_2mb = (21 << MAP_HUGE_SHIFT);
|
||||
# else
|
||||
constexpr int flag_2mb = 0;
|
||||
# endif
|
||||
|
||||
void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE | flag_2mb, 0, 0);
|
||||
|
||||
# endif
|
||||
|
||||
return mem == MAP_FAILED ? nullptr : mem;
|
||||
|
||||
@@ -91,7 +91,7 @@ bool KPCache::init(uint32_t epoch)
|
||||
const uint32_t a = (cache_nodes * i) / n;
|
||||
const uint32_t b = (cache_nodes * (i + 1)) / n;
|
||||
|
||||
threads.emplace_back([this, a, b, cache_nodes, &cache]() {
|
||||
threads.emplace_back([this, a, b, &cache]() {
|
||||
uint32_t j = a;
|
||||
for (; j + 4 <= b; j += 4) ethash_calculate_dag_item4_opt(((node*)m_DAGCache.data()) + j, j, num_dataset_parents, &cache);
|
||||
for (; j < b; ++j) ethash_calculate_dag_item_opt(((node*)m_DAGCache.data()) + j, j, num_dataset_parents, &cache);
|
||||
|
||||
@@ -28,6 +28,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include "crypto/randomx/soft_aes.h"
|
||||
#include "crypto/randomx/randomx.h"
|
||||
#include "base/tools/Profiler.h"
|
||||
|
||||
#define AES_HASH_1R_STATE0 0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d
|
||||
#define AES_HASH_1R_STATE1 0xace78057, 0xf59e125a, 0x15c7b798, 0x338d996e
|
||||
@@ -49,7 +50,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
Hashing throughput: >20 GiB/s per CPU core with hardware AES
|
||||
*/
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void hashAes1Rx4(const void *input, size_t inputSize, void *hash) {
|
||||
const uint8_t* inptr = (uint8_t*)input;
|
||||
const uint8_t* inputEnd = inptr + inputSize;
|
||||
@@ -117,7 +118,7 @@ template void hashAes1Rx4<true>(const void *input, size_t inputSize, void *hash)
|
||||
The modified state is written back to 'state' to allow multiple
|
||||
calls to this function.
|
||||
*/
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void fillAes1Rx4(void *state, size_t outputSize, void *buffer) {
|
||||
const uint8_t* outptr = (uint8_t*)buffer;
|
||||
const uint8_t* outputEnd = outptr + outputSize;
|
||||
@@ -158,7 +159,7 @@ void fillAes1Rx4(void *state, size_t outputSize, void *buffer) {
|
||||
template void fillAes1Rx4<true>(void *state, size_t outputSize, void *buffer);
|
||||
template void fillAes1Rx4<false>(void *state, size_t outputSize, void *buffer);
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
|
||||
const uint8_t* outptr = (uint8_t*)buffer;
|
||||
const uint8_t* outputEnd = outptr + outputSize;
|
||||
@@ -213,8 +214,10 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
|
||||
template void fillAes4Rx4<true>(void *state, size_t outputSize, void *buffer);
|
||||
template void fillAes4Rx4<false>(void *state, size_t outputSize, void *buffer);
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
|
||||
PROFILE_SCOPE(RandomX_AES);
|
||||
|
||||
uint8_t* scratchpadPtr = (uint8_t*)scratchpad;
|
||||
const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize;
|
||||
|
||||
@@ -241,42 +244,48 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
//process 64 bytes at a time in 4 lanes
|
||||
while (scratchpadPtr < scratchpadEnd) {
|
||||
hash_state0 = aesenc<softAes>(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 0));
|
||||
hash_state1 = aesdec<softAes>(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 1));
|
||||
hash_state2 = aesenc<softAes>(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 2));
|
||||
hash_state3 = aesdec<softAes>(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 3));
|
||||
#define HASH_STATE(k) \
|
||||
hash_state0 = aesenc<softAes>(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 0)); \
|
||||
hash_state1 = aesdec<softAes>(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 1)); \
|
||||
hash_state2 = aesenc<softAes>(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 2)); \
|
||||
hash_state3 = aesdec<softAes>(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 3));
|
||||
|
||||
fill_state0 = aesdec<softAes>(fill_state0, key0);
|
||||
fill_state1 = aesenc<softAes>(fill_state1, key1);
|
||||
fill_state2 = aesdec<softAes>(fill_state2, key2);
|
||||
fill_state3 = aesenc<softAes>(fill_state3, key3);
|
||||
#define FILL_STATE(k) \
|
||||
fill_state0 = aesdec<softAes>(fill_state0, key0); \
|
||||
fill_state1 = aesenc<softAes>(fill_state1, key1); \
|
||||
fill_state2 = aesdec<softAes>(fill_state2, key2); \
|
||||
fill_state3 = aesenc<softAes>(fill_state3, key3); \
|
||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 0, fill_state0); \
|
||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 1, fill_state1); \
|
||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 2, fill_state2); \
|
||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 3, fill_state3);
|
||||
|
||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 0, fill_state0);
|
||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 1, fill_state1);
|
||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 2, fill_state2);
|
||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 3, fill_state3);
|
||||
switch(softAes) {
|
||||
case 0:
|
||||
HASH_STATE(0);
|
||||
HASH_STATE(1);
|
||||
|
||||
rx_prefetch_t0(prefetchPtr);
|
||||
FILL_STATE(0);
|
||||
FILL_STATE(1);
|
||||
|
||||
hash_state0 = aesenc<softAes>(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 4));
|
||||
hash_state1 = aesdec<softAes>(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 5));
|
||||
hash_state2 = aesenc<softAes>(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 6));
|
||||
hash_state3 = aesdec<softAes>(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 7));
|
||||
rx_prefetch_t0(prefetchPtr);
|
||||
rx_prefetch_t0(prefetchPtr + 64);
|
||||
|
||||
fill_state0 = aesdec<softAes>(fill_state0, key0);
|
||||
fill_state1 = aesenc<softAes>(fill_state1, key1);
|
||||
fill_state2 = aesdec<softAes>(fill_state2, key2);
|
||||
fill_state3 = aesenc<softAes>(fill_state3, key3);
|
||||
scratchpadPtr += 128;
|
||||
prefetchPtr += 128;
|
||||
|
||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 4, fill_state0);
|
||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 5, fill_state1);
|
||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 6, fill_state2);
|
||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 7, fill_state3);
|
||||
break;
|
||||
|
||||
rx_prefetch_t0(prefetchPtr + 64);
|
||||
default:
|
||||
HASH_STATE(0);
|
||||
FILL_STATE(0);
|
||||
rx_prefetch_t0(prefetchPtr);
|
||||
|
||||
scratchpadPtr += 128;
|
||||
prefetchPtr += 128;
|
||||
scratchpadPtr += 64;
|
||||
prefetchPtr += 64;
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
prefetchPtr = (const char*) scratchpad;
|
||||
scratchpadEnd += PREFETCH_DISTANCE;
|
||||
@@ -308,5 +317,6 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi
|
||||
rx_store_vec_i128((rx_vec_i128*)hash + 3, hash_state3);
|
||||
}
|
||||
|
||||
template void hashAndFillAes1Rx4<false>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
||||
template void hashAndFillAes1Rx4<true>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
||||
template void hashAndFillAes1Rx4<0>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
||||
template void hashAndFillAes1Rx4<1>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
||||
template void hashAndFillAes1Rx4<2>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);
|
||||
|
||||
@@ -30,14 +30,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void hashAes1Rx4(const void *input, size_t inputSize, void *hash);
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void fillAes1Rx4(void *state, size_t outputSize, void *buffer);
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void fillAes4Rx4(void *state, size_t outputSize, void *buffer);
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
||||
|
||||
@@ -92,7 +92,7 @@ extern "C" {
|
||||
int rx_blake2b_final(blake2b_state *S, void *out, size_t outlen);
|
||||
|
||||
/* Simple API */
|
||||
int rx_blake2b(void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen);
|
||||
int rx_blake2b(void *out, size_t outlen, const void *in, size_t inlen);
|
||||
|
||||
/* Argon2 Team - Begin Code */
|
||||
int rxa2_blake2b_long(void *out, size_t outlen, const void *in, size_t inlen);
|
||||
|
||||
123
src/crypto/randomx/blake2/blake2b-round.h
Normal file
123
src/crypto/randomx/blake2/blake2b-round.h
Normal file
@@ -0,0 +1,123 @@
|
||||
/*
|
||||
BLAKE2 reference source code package - optimized C implementations
|
||||
|
||||
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
|
||||
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
|
||||
your option. The terms of these licenses can be found at:
|
||||
|
||||
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||
- OpenSSL license : https://www.openssl.org/source/license.html
|
||||
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
More information about the BLAKE2 hash function can be found at
|
||||
https://blake2.net.
|
||||
*/
|
||||
#ifndef BLAKE2B_ROUND_H
|
||||
#define BLAKE2B_ROUND_H
|
||||
|
||||
#define LOADU(p) _mm_loadu_si128( (const __m128i *)(p) )
|
||||
#define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r)
|
||||
|
||||
#define TOF(reg) _mm_castsi128_ps((reg))
|
||||
#define TOI(reg) _mm_castps_si128((reg))
|
||||
|
||||
#define LIKELY(x) __builtin_expect((x),1)
|
||||
|
||||
|
||||
/* Microarchitecture-specific macros */
|
||||
#define _mm_roti_epi64(x, c) \
|
||||
(-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2,3,0,1)) \
|
||||
: (-(c) == 24) ? _mm_shuffle_epi8((x), r24) \
|
||||
: (-(c) == 16) ? _mm_shuffle_epi8((x), r16) \
|
||||
: (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_add_epi64((x), (x))) \
|
||||
: _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_slli_epi64((x), 64-(-(c))))
|
||||
|
||||
|
||||
|
||||
#define G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
|
||||
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
|
||||
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
|
||||
\
|
||||
row4l = _mm_xor_si128(row4l, row1l); \
|
||||
row4h = _mm_xor_si128(row4h, row1h); \
|
||||
\
|
||||
row4l = _mm_roti_epi64(row4l, -32); \
|
||||
row4h = _mm_roti_epi64(row4h, -32); \
|
||||
\
|
||||
row3l = _mm_add_epi64(row3l, row4l); \
|
||||
row3h = _mm_add_epi64(row3h, row4h); \
|
||||
\
|
||||
row2l = _mm_xor_si128(row2l, row3l); \
|
||||
row2h = _mm_xor_si128(row2h, row3h); \
|
||||
\
|
||||
row2l = _mm_roti_epi64(row2l, -24); \
|
||||
row2h = _mm_roti_epi64(row2h, -24); \
|
||||
|
||||
#define G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
|
||||
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
|
||||
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
|
||||
\
|
||||
row4l = _mm_xor_si128(row4l, row1l); \
|
||||
row4h = _mm_xor_si128(row4h, row1h); \
|
||||
\
|
||||
row4l = _mm_roti_epi64(row4l, -16); \
|
||||
row4h = _mm_roti_epi64(row4h, -16); \
|
||||
\
|
||||
row3l = _mm_add_epi64(row3l, row4l); \
|
||||
row3h = _mm_add_epi64(row3h, row4h); \
|
||||
\
|
||||
row2l = _mm_xor_si128(row2l, row3l); \
|
||||
row2h = _mm_xor_si128(row2h, row3h); \
|
||||
\
|
||||
row2l = _mm_roti_epi64(row2l, -63); \
|
||||
row2h = _mm_roti_epi64(row2h, -63); \
|
||||
|
||||
#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
|
||||
t0 = _mm_alignr_epi8(row2h, row2l, 8); \
|
||||
t1 = _mm_alignr_epi8(row2l, row2h, 8); \
|
||||
row2l = t0; \
|
||||
row2h = t1; \
|
||||
\
|
||||
t0 = row3l; \
|
||||
row3l = row3h; \
|
||||
row3h = t0; \
|
||||
\
|
||||
t0 = _mm_alignr_epi8(row4h, row4l, 8); \
|
||||
t1 = _mm_alignr_epi8(row4l, row4h, 8); \
|
||||
row4l = t1; \
|
||||
row4h = t0;
|
||||
|
||||
#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
|
||||
t0 = _mm_alignr_epi8(row2l, row2h, 8); \
|
||||
t1 = _mm_alignr_epi8(row2h, row2l, 8); \
|
||||
row2l = t0; \
|
||||
row2h = t1; \
|
||||
\
|
||||
t0 = row3l; \
|
||||
row3l = row3h; \
|
||||
row3h = t0; \
|
||||
\
|
||||
t0 = _mm_alignr_epi8(row4l, row4h, 8); \
|
||||
t1 = _mm_alignr_epi8(row4h, row4l, 8); \
|
||||
row4l = t1; \
|
||||
row4h = t0;
|
||||
|
||||
#define LOAD_MSG(r, i, b0, b1) \
|
||||
do { \
|
||||
b0 = _mm_set_epi64x(m[blake2b_sigma_sse41[r][i * 4 + 1]], m[blake2b_sigma_sse41[r][i * 4 + 0]]); \
|
||||
b1 = _mm_set_epi64x(m[blake2b_sigma_sse41[r][i * 4 + 3]], m[blake2b_sigma_sse41[r][i * 4 + 2]]); \
|
||||
} while(0)
|
||||
|
||||
#define ROUND(r) \
|
||||
LOAD_MSG(r, 0, b0, b1); \
|
||||
G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
|
||||
LOAD_MSG(r, 1, b0, b1); \
|
||||
G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
|
||||
DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
|
||||
LOAD_MSG(r, 2, b0, b1); \
|
||||
G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
|
||||
LOAD_MSG(r, 3, b0, b1); \
|
||||
G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
|
||||
UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
|
||||
|
||||
#endif
|
||||
@@ -39,7 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#include "crypto/randomx/blake2/blake2.h"
|
||||
#include "crypto/randomx/blake2/blake2-impl.h"
|
||||
|
||||
static const uint64_t blake2b_IV[8] = {
|
||||
const uint64_t blake2b_IV[8] = {
|
||||
UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b),
|
||||
UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1),
|
||||
UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f),
|
||||
@@ -179,7 +179,7 @@ int rx_blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, size_t
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void rx_blake2b_compress(blake2b_state *S, const uint8_t *block) {
|
||||
static void rx_blake2b_compress_integer(blake2b_state *S, const uint8_t *block) {
|
||||
uint64_t m[16];
|
||||
uint64_t v[16];
|
||||
unsigned int i, r;
|
||||
@@ -237,6 +237,21 @@ static void rx_blake2b_compress(blake2b_state *S, const uint8_t *block) {
|
||||
#undef ROUND
|
||||
}
|
||||
|
||||
#if defined(XMRIG_FEATURE_SSE4_1)
|
||||
|
||||
uint32_t rx_blake2b_use_sse41 = 0;
|
||||
void rx_blake2b_compress_sse41(blake2b_state* S, const uint8_t* block);
|
||||
|
||||
#define rx_blake2b_compress(S, block) \
|
||||
if (rx_blake2b_use_sse41) \
|
||||
rx_blake2b_compress_sse41(S, block); \
|
||||
else \
|
||||
rx_blake2b_compress_integer(S, block);
|
||||
|
||||
#else
|
||||
#define rx_blake2b_compress(S, block) rx_blake2b_compress_integer(S, block);
|
||||
#endif
|
||||
|
||||
int rx_blake2b_update(blake2b_state *S, const void *in, size_t inlen) {
|
||||
const uint8_t *pin = (const uint8_t *)in;
|
||||
|
||||
@@ -260,14 +275,14 @@ int rx_blake2b_update(blake2b_state *S, const void *in, size_t inlen) {
|
||||
size_t fill = BLAKE2B_BLOCKBYTES - left;
|
||||
memcpy(&S->buf[left], pin, fill);
|
||||
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
|
||||
rx_blake2b_compress(S, S->buf);
|
||||
rx_blake2b_compress(S, S->buf);
|
||||
S->buflen = 0;
|
||||
inlen -= fill;
|
||||
pin += fill;
|
||||
/* Avoid buffer copies when possible */
|
||||
while (inlen > BLAKE2B_BLOCKBYTES) {
|
||||
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
|
||||
rx_blake2b_compress(S, pin);
|
||||
rx_blake2b_compress(S, pin);
|
||||
inlen -= BLAKE2B_BLOCKBYTES;
|
||||
pin += BLAKE2B_BLOCKBYTES;
|
||||
}
|
||||
@@ -294,7 +309,7 @@ int rx_blake2b_final(blake2b_state *S, void *out, size_t outlen) {
|
||||
blake2b_increment_counter(S, S->buflen);
|
||||
blake2b_set_lastblock(S);
|
||||
memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */
|
||||
rx_blake2b_compress(S, S->buf);
|
||||
rx_blake2b_compress(S, S->buf);
|
||||
|
||||
for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */
|
||||
store64(buffer + sizeof(S->h[i]) * i, S->h[i]);
|
||||
@@ -307,8 +322,7 @@ int rx_blake2b_final(blake2b_state *S, void *out, size_t outlen) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int rx_blake2b(void *out, size_t outlen, const void *in, size_t inlen,
|
||||
const void *key, size_t keylen) {
|
||||
int rx_blake2b(void *out, size_t outlen, const void *in, size_t inlen) {
|
||||
blake2b_state S;
|
||||
int ret = -1;
|
||||
|
||||
@@ -321,25 +335,14 @@ int rx_blake2b(void *out, size_t outlen, const void *in, size_t inlen,
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if ((NULL == key && keylen > 0) || keylen > BLAKE2B_KEYBYTES) {
|
||||
if (rx_blake2b_init(&S, outlen) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (keylen > 0) {
|
||||
if (rx_blake2b_init_key(&S, outlen, key, keylen) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (rx_blake2b_init(&S, outlen) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
if (rx_blake2b_update(&S, in, inlen) < 0) {
|
||||
if (rx_blake2b_update(&S, in, inlen) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
ret = rx_blake2b_final(&S, out, outlen);
|
||||
ret = rx_blake2b_final(&S, out, outlen);
|
||||
|
||||
fail:
|
||||
//clear_internal_memory(&S, sizeof(S));
|
||||
@@ -361,43 +364,42 @@ int rxa2_blake2b_long(void *pout, size_t outlen, const void *in, size_t inlen) {
|
||||
store32(outlen_bytes, (uint32_t)outlen);
|
||||
|
||||
#define TRY(statement) \
|
||||
do { \
|
||||
ret = statement; \
|
||||
if (ret < 0) { \
|
||||
goto fail; \
|
||||
} \
|
||||
} while ((void)0, 0)
|
||||
do { \
|
||||
ret = statement; \
|
||||
if (ret < 0) { \
|
||||
goto fail; \
|
||||
} \
|
||||
} while ((void)0, 0)
|
||||
|
||||
if (outlen <= BLAKE2B_OUTBYTES) {
|
||||
TRY(rx_blake2b_init(&blake_state, outlen));
|
||||
TRY(rx_blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
|
||||
TRY(rx_blake2b_update(&blake_state, in, inlen));
|
||||
TRY(rx_blake2b_final(&blake_state, out, outlen));
|
||||
TRY(rx_blake2b_init(&blake_state, outlen));
|
||||
TRY(rx_blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
|
||||
TRY(rx_blake2b_update(&blake_state, in, inlen));
|
||||
TRY(rx_blake2b_final(&blake_state, out, outlen));
|
||||
}
|
||||
else {
|
||||
uint32_t toproduce;
|
||||
uint8_t out_buffer[BLAKE2B_OUTBYTES];
|
||||
uint8_t in_buffer[BLAKE2B_OUTBYTES];
|
||||
TRY(rx_blake2b_init(&blake_state, BLAKE2B_OUTBYTES));
|
||||
TRY(rx_blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
|
||||
TRY(rx_blake2b_update(&blake_state, in, inlen));
|
||||
TRY(rx_blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES));
|
||||
TRY(rx_blake2b_init(&blake_state, BLAKE2B_OUTBYTES));
|
||||
TRY(rx_blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
|
||||
TRY(rx_blake2b_update(&blake_state, in, inlen));
|
||||
TRY(rx_blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES));
|
||||
memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
|
||||
out += BLAKE2B_OUTBYTES / 2;
|
||||
toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2;
|
||||
|
||||
while (toproduce > BLAKE2B_OUTBYTES) {
|
||||
memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
|
||||
TRY(rx_blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer,
|
||||
BLAKE2B_OUTBYTES, NULL, 0));
|
||||
TRY(rx_blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer,
|
||||
BLAKE2B_OUTBYTES));
|
||||
memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
|
||||
out += BLAKE2B_OUTBYTES / 2;
|
||||
toproduce -= BLAKE2B_OUTBYTES / 2;
|
||||
}
|
||||
|
||||
memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
|
||||
TRY(rx_blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES, NULL,
|
||||
0));
|
||||
TRY(rx_blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES));
|
||||
memcpy(out, out_buffer, toproduce);
|
||||
}
|
||||
fail:
|
||||
|
||||
108
src/crypto/randomx/blake2/blake2b_sse41.c
Normal file
108
src/crypto/randomx/blake2/blake2b_sse41.c
Normal file
@@ -0,0 +1,108 @@
|
||||
/*
|
||||
* Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Original code from Argon2 reference source code package used under CC0 Licence
|
||||
* https://github.com/P-H-C/phc-winner-argon2
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*/
|
||||
|
||||
#if defined(_M_X64) || defined(__x86_64__)
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "crypto/randomx/blake2/blake2.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
#include <smmintrin.h>
|
||||
#include "blake2b-round.h"
|
||||
|
||||
|
||||
extern const uint64_t blake2b_IV[8];
|
||||
|
||||
|
||||
static const uint8_t blake2b_sigma_sse41[12][16] = {
|
||||
{0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15},
|
||||
{14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3},
|
||||
{11, 12, 5, 15, 8, 0, 2, 13, 10, 3, 7, 9, 14, 6, 1, 4},
|
||||
{7, 3, 13, 11, 9, 1, 12, 14, 2, 5, 4, 15, 6, 10, 0, 8},
|
||||
{9, 5, 2, 10, 0, 7, 4, 15, 14, 11, 6, 3, 1, 12, 8, 13},
|
||||
{2, 6, 0, 8, 12, 10, 11, 3, 4, 7, 15, 1, 13, 5, 14, 9},
|
||||
{12, 1, 14, 4, 5, 15, 13, 10, 0, 6, 9, 8, 7, 3, 2, 11},
|
||||
{13, 7, 12, 3, 11, 14, 1, 9, 5, 15, 8, 2, 0, 4, 6, 10},
|
||||
{6, 14, 11, 0, 15, 9, 3, 8, 12, 13, 1, 10, 2, 7, 4, 5},
|
||||
{10, 8, 7, 1, 2, 4, 6, 5, 15, 9, 3, 13, 11, 14, 12, 0},
|
||||
{0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15},
|
||||
{14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3},
|
||||
};
|
||||
|
||||
|
||||
void rx_blake2b_compress_sse41(blake2b_state* S, const uint8_t *block)
|
||||
{
|
||||
__m128i row1l, row1h;
|
||||
__m128i row2l, row2h;
|
||||
__m128i row3l, row3h;
|
||||
__m128i row4l, row4h;
|
||||
__m128i b0, b1;
|
||||
__m128i t0, t1;
|
||||
|
||||
const __m128i r16 = _mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9);
|
||||
const __m128i r24 = _mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10);
|
||||
|
||||
row1l = LOADU(&S->h[0]);
|
||||
row1h = LOADU(&S->h[2]);
|
||||
row2l = LOADU(&S->h[4]);
|
||||
row2h = LOADU(&S->h[6]);
|
||||
row3l = LOADU(&blake2b_IV[0]);
|
||||
row3h = LOADU(&blake2b_IV[2]);
|
||||
row4l = _mm_xor_si128(LOADU(&blake2b_IV[4]), LOADU(&S->t[0]));
|
||||
row4h = _mm_xor_si128(LOADU(&blake2b_IV[6]), LOADU(&S->f[0]));
|
||||
|
||||
const uint64_t* m = (const uint64_t*)(block);
|
||||
|
||||
for (uint32_t r = 0; r < 12; ++r) {
|
||||
ROUND(r);
|
||||
}
|
||||
|
||||
row1l = _mm_xor_si128(row3l, row1l);
|
||||
row1h = _mm_xor_si128(row3h, row1h);
|
||||
STOREU(&S->h[0], _mm_xor_si128(LOADU(&S->h[0]), row1l));
|
||||
STOREU(&S->h[2], _mm_xor_si128(LOADU(&S->h[2]), row1h));
|
||||
row2l = _mm_xor_si128(row4l, row2l);
|
||||
row2h = _mm_xor_si128(row4h, row2h);
|
||||
STOREU(&S->h[4], _mm_xor_si128(LOADU(&S->h[4]), row2l));
|
||||
STOREU(&S->h[6], _mm_xor_si128(LOADU(&S->h[6]), row2h));
|
||||
}
|
||||
#endif
|
||||
@@ -55,7 +55,7 @@ namespace randomx {
|
||||
|
||||
void Blake2Generator::checkData(const size_t bytesNeeded) {
|
||||
if (dataIndex + bytesNeeded > sizeof(data)) {
|
||||
rx_blake2b(data, sizeof(data), data, sizeof(data), nullptr, 0);
|
||||
rx_blake2b(data, sizeof(data), data, sizeof(data));
|
||||
dataIndex = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -79,9 +79,9 @@ namespace randomx {
|
||||
}
|
||||
|
||||
void BytecodeMachine::compileInstruction(RANDOMX_GEN_ARGS) {
|
||||
int opcode = instr.opcode;
|
||||
uint32_t opcode = instr.opcode;
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_IADD_RS) {
|
||||
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IADD_RS) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::IADD_RS;
|
||||
@@ -99,8 +99,9 @@ namespace randomx {
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IADD_RS;
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_IADD_M) {
|
||||
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IADD_M) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::IADD_M;
|
||||
@@ -108,7 +109,7 @@ namespace randomx {
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
if (src != dst) {
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
ibc.memMask = AddressMask[instr.getModMem()];
|
||||
}
|
||||
else {
|
||||
ibc.isrc = &zero;
|
||||
@@ -117,8 +118,9 @@ namespace randomx {
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IADD_M;
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_ISUB_R) {
|
||||
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_ISUB_R) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::ISUB_R;
|
||||
@@ -133,8 +135,9 @@ namespace randomx {
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_ISUB_R;
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_ISUB_M) {
|
||||
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_ISUB_M) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::ISUB_M;
|
||||
@@ -142,7 +145,7 @@ namespace randomx {
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
if (src != dst) {
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
ibc.memMask = AddressMask[instr.getModMem()];
|
||||
}
|
||||
else {
|
||||
ibc.isrc = &zero;
|
||||
@@ -151,8 +154,9 @@ namespace randomx {
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_ISUB_M;
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_IMUL_R) {
|
||||
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IMUL_R) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::IMUL_R;
|
||||
@@ -167,8 +171,9 @@ namespace randomx {
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IMUL_R;
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_IMUL_M) {
|
||||
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IMUL_M) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::IMUL_M;
|
||||
@@ -176,7 +181,7 @@ namespace randomx {
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
if (src != dst) {
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
ibc.memMask = AddressMask[instr.getModMem()];
|
||||
}
|
||||
else {
|
||||
ibc.isrc = &zero;
|
||||
@@ -185,8 +190,9 @@ namespace randomx {
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IMUL_M;
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_IMULH_R) {
|
||||
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IMULH_R) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::IMULH_R;
|
||||
@@ -195,8 +201,9 @@ namespace randomx {
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IMULH_R;
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_IMULH_M) {
|
||||
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IMULH_M) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::IMULH_M;
|
||||
@@ -204,7 +211,7 @@ namespace randomx {
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
if (src != dst) {
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
ibc.memMask = AddressMask[instr.getModMem()];
|
||||
}
|
||||
else {
|
||||
ibc.isrc = &zero;
|
||||
@@ -213,8 +220,9 @@ namespace randomx {
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IMULH_M;
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_ISMULH_R) {
|
||||
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_ISMULH_R) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::ISMULH_R;
|
||||
@@ -223,8 +231,9 @@ namespace randomx {
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_ISMULH_R;
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_ISMULH_M) {
|
||||
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_ISMULH_M) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::ISMULH_M;
|
||||
@@ -232,7 +241,7 @@ namespace randomx {
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
if (src != dst) {
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
ibc.memMask = AddressMask[instr.getModMem()];
|
||||
}
|
||||
else {
|
||||
ibc.isrc = &zero;
|
||||
@@ -241,8 +250,9 @@ namespace randomx {
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_ISMULH_M;
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_IMUL_RCP) {
|
||||
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IMUL_RCP) {
|
||||
uint64_t divisor = instr.getImm32();
|
||||
if (!isZeroOrPowerOf2(divisor)) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
@@ -257,16 +267,18 @@ namespace randomx {
|
||||
}
|
||||
return;
|
||||
}
|
||||
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IMUL_RCP;
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_INEG_R) {
|
||||
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_INEG_R) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
ibc.type = InstructionType::INEG_R;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_INEG_R;
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_IXOR_R) {
|
||||
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IXOR_R) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::IXOR_R;
|
||||
@@ -281,8 +293,9 @@ namespace randomx {
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IXOR_R;
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_IXOR_M) {
|
||||
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IXOR_M) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::IXOR_M;
|
||||
@@ -290,7 +303,7 @@ namespace randomx {
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
if (src != dst) {
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
ibc.memMask = AddressMask[instr.getModMem()];
|
||||
}
|
||||
else {
|
||||
ibc.isrc = &zero;
|
||||
@@ -299,8 +312,9 @@ namespace randomx {
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IXOR_M;
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_IROR_R) {
|
||||
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IROR_R) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::IROR_R;
|
||||
@@ -315,8 +329,9 @@ namespace randomx {
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IROR_R;
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_IROL_R) {
|
||||
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IROL_R) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::IROL_R;
|
||||
@@ -331,8 +346,9 @@ namespace randomx {
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IROL_R;
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_ISWAP_R) {
|
||||
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_ISWAP_R) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
if (src != dst) {
|
||||
@@ -347,8 +363,9 @@ namespace randomx {
|
||||
}
|
||||
return;
|
||||
}
|
||||
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_ISWAP_R;
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_FSWAP_R) {
|
||||
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FSWAP_R) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
ibc.type = InstructionType::FSWAP_R;
|
||||
if (dst < RegisterCountFlt)
|
||||
@@ -357,8 +374,9 @@ namespace randomx {
|
||||
ibc.fdst = &nreg->e[dst - RegisterCountFlt];
|
||||
return;
|
||||
}
|
||||
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FSWAP_R;
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_FADD_R) {
|
||||
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FADD_R) {
|
||||
auto dst = instr.dst % RegisterCountFlt;
|
||||
auto src = instr.src % RegisterCountFlt;
|
||||
ibc.type = InstructionType::FADD_R;
|
||||
@@ -366,19 +384,21 @@ namespace randomx {
|
||||
ibc.fsrc = &nreg->a[src];
|
||||
return;
|
||||
}
|
||||
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FADD_R;
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_FADD_M) {
|
||||
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FADD_M) {
|
||||
auto dst = instr.dst % RegisterCountFlt;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::FADD_M;
|
||||
ibc.fdst = &nreg->f[dst];
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
ibc.memMask = AddressMask[instr.getModMem()];
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
return;
|
||||
}
|
||||
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FADD_M;
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_FSUB_R) {
|
||||
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FSUB_R) {
|
||||
auto dst = instr.dst % RegisterCountFlt;
|
||||
auto src = instr.src % RegisterCountFlt;
|
||||
ibc.type = InstructionType::FSUB_R;
|
||||
@@ -386,26 +406,29 @@ namespace randomx {
|
||||
ibc.fsrc = &nreg->a[src];
|
||||
return;
|
||||
}
|
||||
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FSUB_R;
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_FSUB_M) {
|
||||
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FSUB_M) {
|
||||
auto dst = instr.dst % RegisterCountFlt;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::FSUB_M;
|
||||
ibc.fdst = &nreg->f[dst];
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
ibc.memMask = AddressMask[instr.getModMem()];
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
return;
|
||||
}
|
||||
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FSUB_M;
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_FSCAL_R) {
|
||||
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FSCAL_R) {
|
||||
auto dst = instr.dst % RegisterCountFlt;
|
||||
ibc.fdst = &nreg->f[dst];
|
||||
ibc.type = InstructionType::FSCAL_R;
|
||||
return;
|
||||
}
|
||||
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FSCAL_R;
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_FMUL_R) {
|
||||
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FMUL_R) {
|
||||
auto dst = instr.dst % RegisterCountFlt;
|
||||
auto src = instr.src % RegisterCountFlt;
|
||||
ibc.type = InstructionType::FMUL_R;
|
||||
@@ -413,52 +436,56 @@ namespace randomx {
|
||||
ibc.fsrc = &nreg->a[src];
|
||||
return;
|
||||
}
|
||||
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FMUL_R;
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_FDIV_M) {
|
||||
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FDIV_M) {
|
||||
auto dst = instr.dst % RegisterCountFlt;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::FDIV_M;
|
||||
ibc.fdst = &nreg->e[dst];
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
ibc.memMask = AddressMask[instr.getModMem()];
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
return;
|
||||
}
|
||||
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FDIV_M;
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_FSQRT_R) {
|
||||
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FSQRT_R) {
|
||||
auto dst = instr.dst % RegisterCountFlt;
|
||||
ibc.type = InstructionType::FSQRT_R;
|
||||
ibc.fdst = &nreg->e[dst];
|
||||
return;
|
||||
}
|
||||
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FSQRT_R;
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_CBRANCH) {
|
||||
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_CBRANCH) {
|
||||
ibc.type = InstructionType::CBRANCH;
|
||||
//jump condition
|
||||
int creg = instr.dst % RegistersCount;
|
||||
ibc.idst = &nreg->r[creg];
|
||||
ibc.target = registerUsage[creg];
|
||||
int shift = instr.getModCond() + RandomX_CurrentConfig.JumpOffset;
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32()) | (1ULL << shift);
|
||||
if (RandomX_CurrentConfig.JumpOffset > 0 || shift > 0) //clear the bit below the condition mask - this limits the number of successive jumps to 2
|
||||
ibc.imm &= ~(1ULL << (shift - 1));
|
||||
ibc.memMask = RandomX_CurrentConfig.ConditionMask_Calculated << shift;
|
||||
const int shift = instr.getModCond();
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32()) | ((1ULL << RandomX_ConfigurationBase::JumpOffset) << shift);
|
||||
ibc.imm &= ~((1ULL << (RandomX_ConfigurationBase::JumpOffset - 1)) << shift);
|
||||
ibc.memMask = RandomX_ConfigurationBase::ConditionMask_Calculated << shift;
|
||||
//mark all registers as used
|
||||
for (unsigned j = 0; j < RegistersCount; ++j) {
|
||||
registerUsage[j] = i;
|
||||
}
|
||||
return;
|
||||
}
|
||||
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_CBRANCH;
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_CFROUND) {
|
||||
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_CFROUND) {
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.type = InstructionType::CFROUND;
|
||||
ibc.imm = instr.getImm32() & 63;
|
||||
return;
|
||||
}
|
||||
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_CFROUND;
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_ISTORE) {
|
||||
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_ISTORE) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::ISTORE;
|
||||
@@ -466,13 +493,14 @@ namespace randomx {
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
if (instr.getModCond() < StoreL3Condition)
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
ibc.memMask = AddressMask[instr.getModMem()];
|
||||
else
|
||||
ibc.memMask = ScratchpadL3Mask;
|
||||
return;
|
||||
}
|
||||
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_ISTORE;
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_NOP) {
|
||||
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_NOP) {
|
||||
ibc.type = InstructionType::NOP;
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -225,7 +225,7 @@ namespace randomx {
|
||||
}
|
||||
|
||||
static void exe_CFROUND(RANDOMX_EXE_ARGS) {
|
||||
rx_set_rounding_mode(rotr64(*ibc.isrc, ibc.imm) % 4);
|
||||
rx_set_rounding_mode(rotr64(*ibc.isrc, static_cast<uint32_t>(ibc.imm)) % 4);
|
||||
}
|
||||
|
||||
static void exe_ISTORE(RANDOMX_EXE_ARGS) {
|
||||
|
||||
@@ -74,8 +74,8 @@ namespace randomx {
|
||||
constexpr int SuperscalarMaxSize = 3 * RANDOMX_SUPERSCALAR_MAX_LATENCY + 2;
|
||||
constexpr size_t CacheLineSize = RANDOMX_DATASET_ITEM_SIZE;
|
||||
#define ScratchpadSize RandomX_CurrentConfig.ScratchpadL3_Size
|
||||
#define CacheLineAlignMask RandomX_CurrentConfig.CacheLineAlignMask_Calculated
|
||||
#define DatasetExtraItems RandomX_CurrentConfig.DatasetExtraItems_Calculated
|
||||
#define CacheLineAlignMask RandomX_ConfigurationBase::CacheLineAlignMask_Calculated
|
||||
#define DatasetExtraItems RandomX_ConfigurationBase::DatasetExtraItems_Calculated
|
||||
constexpr int StoreL3Condition = 14;
|
||||
|
||||
//Prevent some unsafe configurations.
|
||||
@@ -126,10 +126,7 @@ namespace randomx {
|
||||
double hi;
|
||||
};
|
||||
|
||||
#define ScratchpadL1Mask RandomX_CurrentConfig.ScratchpadL1Mask_Calculated
|
||||
#define ScratchpadL1Mask16 RandomX_CurrentConfig.ScratchpadL1Mask16_Calculated
|
||||
#define ScratchpadL2Mask RandomX_CurrentConfig.ScratchpadL2Mask_Calculated
|
||||
#define ScratchpadL2Mask16 RandomX_CurrentConfig.ScratchpadL2Mask16_Calculated
|
||||
#define AddressMask RandomX_CurrentConfig.AddressMask_Calculated
|
||||
#define ScratchpadL3Mask RandomX_CurrentConfig.ScratchpadL3Mask_Calculated
|
||||
#define ScratchpadL3Mask64 RandomX_CurrentConfig.ScratchpadL3Mask64_Calculated
|
||||
constexpr int RegistersCount = 8;
|
||||
|
||||
@@ -414,7 +414,7 @@ FORCE_INLINE void rx_store_vec_f128(double* mem_addr, rx_vec_f128 val) {
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a) {
|
||||
float64x2_t temp;
|
||||
float64x2_t temp{};
|
||||
temp = vcopyq_laneq_f64(temp, 1, a, 1);
|
||||
a = vcopyq_laneq_f64(a, 1, a, 0);
|
||||
return vcopyq_laneq_f64(a, 0, temp, 1);
|
||||
@@ -505,7 +505,7 @@ FORCE_INLINE void rx_store_vec_i128(rx_vec_i128* mem_addr, rx_vec_i128 val) {
|
||||
FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
|
||||
double lo = unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 0));
|
||||
double hi = unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 4));
|
||||
rx_vec_f128 x;
|
||||
rx_vec_f128 x{};
|
||||
x = vsetq_lane_f64(lo, x, 0);
|
||||
x = vsetq_lane_f64(hi, x, 1);
|
||||
return x;
|
||||
|
||||
@@ -75,11 +75,11 @@ static size_t CalcDatasetItemSize()
|
||||
// Prologue
|
||||
((uint8_t*)randomx_calc_dataset_item_aarch64_prefetch - (uint8_t*)randomx_calc_dataset_item_aarch64) +
|
||||
// Main loop
|
||||
RandomX_CurrentConfig.CacheAccesses * (
|
||||
RandomX_ConfigurationBase::CacheAccesses * (
|
||||
// Main loop prologue
|
||||
((uint8_t*)randomx_calc_dataset_item_aarch64_mix - ((uint8_t*)randomx_calc_dataset_item_aarch64_prefetch)) + 4 +
|
||||
// Inner main loop (instructions)
|
||||
((RandomX_CurrentConfig.SuperscalarLatency * 3) + 2) * 16 +
|
||||
((RandomX_ConfigurationBase::SuperscalarLatency * 3) + 2) * 16 +
|
||||
// Main loop epilogue
|
||||
((uint8_t*)randomx_calc_dataset_item_aarch64_store_result - (uint8_t*)randomx_calc_dataset_item_aarch64_mix) + 4
|
||||
) +
|
||||
@@ -235,7 +235,7 @@ void JitCompilerA64::generateSuperscalarHash(SuperscalarProgram(&programs)[N], s
|
||||
num32bitLiterals = 64;
|
||||
constexpr uint32_t tmp_reg = 12;
|
||||
|
||||
for (size_t i = 0; i < RandomX_CurrentConfig.CacheAccesses; ++i)
|
||||
for (size_t i = 0; i < RandomX_ConfigurationBase::CacheAccesses; ++i)
|
||||
{
|
||||
// and x11, x10, CacheSize / CacheLineSize - 1
|
||||
emit32(0x92400000 | 11 | (10 << 5) | ((RandomX_CurrentConfig.Log2_CacheSize - 1) << 10), code, codePos);
|
||||
@@ -946,7 +946,7 @@ void JitCompilerA64::h_CBRANCH(Instruction& instr, uint32_t& codePos)
|
||||
|
||||
const uint32_t dst = IntRegMap[instr.dst];
|
||||
const uint32_t modCond = instr.getModCond();
|
||||
const uint32_t shift = modCond + RandomX_CurrentConfig.JumpOffset;
|
||||
const uint32_t shift = modCond + RandomX_ConfigurationBase::JumpOffset;
|
||||
const uint32_t imm = (instr.getImm32() | (1U << shift)) & ~(1U << (shift - 1));
|
||||
|
||||
emitAddImmediate(dst, dst, imm, code, k);
|
||||
|
||||
@@ -36,6 +36,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#include "crypto/randomx/program.hpp"
|
||||
#include "crypto/randomx/reciprocal.h"
|
||||
#include "crypto/randomx/virtual_memory.hpp"
|
||||
#include "base/tools/Profiler.h"
|
||||
#include "backend/cpu/Cpu.h"
|
||||
|
||||
#ifdef XMRIG_FIX_RYZEN
|
||||
# include "crypto/rx/Rx.h"
|
||||
@@ -166,55 +168,16 @@ namespace randomx {
|
||||
# endif
|
||||
}
|
||||
|
||||
// CPU-specific tweaks
|
||||
void JitCompilerX86::applyTweaks() {
|
||||
int32_t info[4];
|
||||
cpuid(0, info);
|
||||
|
||||
int32_t manufacturer[4];
|
||||
manufacturer[0] = info[1];
|
||||
manufacturer[1] = info[3];
|
||||
manufacturer[2] = info[2];
|
||||
manufacturer[3] = 0;
|
||||
|
||||
if (strcmp((const char*)manufacturer, "GenuineIntel") == 0) {
|
||||
struct
|
||||
{
|
||||
unsigned int stepping : 4;
|
||||
unsigned int model : 4;
|
||||
unsigned int family : 4;
|
||||
unsigned int processor_type : 2;
|
||||
unsigned int reserved1 : 2;
|
||||
unsigned int ext_model : 4;
|
||||
unsigned int ext_family : 8;
|
||||
unsigned int reserved2 : 4;
|
||||
} processor_info;
|
||||
|
||||
cpuid(1, info);
|
||||
memcpy(&processor_info, info, sizeof(processor_info));
|
||||
|
||||
// Intel JCC erratum mitigation
|
||||
if (processor_info.family == 6) {
|
||||
const uint32_t model = processor_info.model | (processor_info.ext_model << 4);
|
||||
const uint32_t stepping = processor_info.stepping;
|
||||
|
||||
// Affected CPU models and stepping numbers are taken from https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
|
||||
BranchesWithin32B =
|
||||
((model == 0x4E) && (stepping == 0x3)) ||
|
||||
((model == 0x55) && (stepping == 0x4)) ||
|
||||
((model == 0x5E) && (stepping == 0x3)) ||
|
||||
((model == 0x8E) && (stepping >= 0x9) && (stepping <= 0xC)) ||
|
||||
((model == 0x9E) && (stepping >= 0x9) && (stepping <= 0xD)) ||
|
||||
((model == 0xA6) && (stepping == 0x0)) ||
|
||||
((model == 0xAE) && (stepping == 0xA));
|
||||
}
|
||||
}
|
||||
}
|
||||
# ifdef _MSC_VER
|
||||
static FORCE_INLINE uint32_t rotl32(uint32_t a, int shift) { return _rotl(a, shift); }
|
||||
# else
|
||||
static FORCE_INLINE uint32_t rotl32(uint32_t a, int shift) { return (a << shift) | (a >> (-shift & 31)); }
|
||||
# endif
|
||||
|
||||
static std::atomic<size_t> codeOffset;
|
||||
|
||||
JitCompilerX86::JitCompilerX86() {
|
||||
applyTweaks();
|
||||
BranchesWithin32B = xmrig::Cpu::info()->jccErratum();
|
||||
|
||||
int32_t info[4];
|
||||
cpuid(1, info);
|
||||
@@ -255,6 +218,8 @@ namespace randomx {
|
||||
}
|
||||
|
||||
void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg, uint32_t flags) {
|
||||
PROFILE_SCOPE(RandomX_JIT_compile);
|
||||
|
||||
vm_flags = flags;
|
||||
|
||||
generateProgramPrologue(prog, pcfg);
|
||||
@@ -340,7 +305,6 @@ namespace randomx {
|
||||
r[j] = k;
|
||||
}
|
||||
|
||||
constexpr uint64_t instr_mask = (uint64_t(-1) - (0xFFFF << 8)) | ((RegistersCount - 1) << 8) | ((RegistersCount - 1) << 16);
|
||||
for (int i = 0, n = static_cast<int>(RandomX_CurrentConfig.ProgramSize); i < n; i += 4) {
|
||||
Instruction& instr1 = prog(i);
|
||||
Instruction& instr2 = prog(i + 1);
|
||||
@@ -352,17 +316,10 @@ namespace randomx {
|
||||
InstructionGeneratorX86 gen3 = engine[instr3.opcode];
|
||||
InstructionGeneratorX86 gen4 = engine[instr4.opcode];
|
||||
|
||||
*((uint64_t*)&instr1) &= instr_mask;
|
||||
(this->*gen1)(instr1);
|
||||
|
||||
*((uint64_t*)&instr2) &= instr_mask;
|
||||
(this->*gen2)(instr2);
|
||||
|
||||
*((uint64_t*)&instr3) &= instr_mask;
|
||||
(this->*gen3)(instr3);
|
||||
|
||||
*((uint64_t*)&instr4) &= instr_mask;
|
||||
(this->*gen4)(instr4);
|
||||
(*gen1)(this, instr1);
|
||||
(*gen2)(this, instr2);
|
||||
(*gen3)(this, instr3);
|
||||
(*gen4)(this, instr4);
|
||||
}
|
||||
|
||||
*(uint64_t*)(code + codePos) = 0xc03341c08b41ull + (static_cast<uint64_t>(pcfg.readReg2) << 16) + (static_cast<uint64_t>(pcfg.readReg3) << 40);
|
||||
@@ -508,21 +465,21 @@ namespace randomx {
|
||||
*(uint32_t*)(code + codePos) = 0xe181;
|
||||
codePos += 2;
|
||||
}
|
||||
emit32(instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask, code, codePos);
|
||||
emit32(AddressMask[instr.getModMem()], code, codePos);
|
||||
}
|
||||
|
||||
template void JitCompilerX86::genAddressReg<false>(const Instruction& instr, const uint32_t src, uint8_t* code, uint32_t& codePos);
|
||||
template void JitCompilerX86::genAddressReg<true>(const Instruction& instr, const uint32_t src, uint8_t* code, uint32_t& codePos);
|
||||
|
||||
FORCE_INLINE void JitCompilerX86::genAddressRegDst(const Instruction& instr, uint8_t* code, uint32_t& codePos) {
|
||||
const uint32_t dst = static_cast<uint32_t>(instr.dst) << 16;
|
||||
const uint32_t dst = static_cast<uint32_t>(instr.dst % RegistersCount) << 16;
|
||||
*(uint32_t*)(code + codePos) = 0x24808d41 + dst;
|
||||
codePos += (dst == (RegisterNeedsSib << 16)) ? 4 : 3;
|
||||
|
||||
emit32(instr.getImm32(), code, codePos);
|
||||
emitByte(0x25, code, codePos);
|
||||
if (instr.getModCond() < StoreL3Condition) {
|
||||
emit32(instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask, code, codePos);
|
||||
emit32(AddressMask[instr.getModMem()], code, codePos);
|
||||
}
|
||||
else {
|
||||
emit32(ScratchpadL3Mask, code, codePos);
|
||||
@@ -537,8 +494,8 @@ namespace randomx {
|
||||
uint32_t pos = codePos;
|
||||
uint8_t* const p = code + pos;
|
||||
|
||||
const uint32_t dst = instr.dst;
|
||||
const uint32_t sib = (instr.getModShift() << 6) | (instr.src << 3) | dst;
|
||||
const uint32_t dst = instr.dst % RegistersCount;
|
||||
const uint32_t sib = (instr.getModShift() << 6) | ((instr.src % RegistersCount) << 3) | dst;
|
||||
|
||||
uint32_t k = 0x048d4f + (dst << 19);
|
||||
if (dst == RegisterNeedsDisplacement)
|
||||
@@ -557,8 +514,8 @@ namespace randomx {
|
||||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint32_t src = instr.src;
|
||||
const uint32_t dst = instr.dst;
|
||||
const uint32_t src = instr.src % RegistersCount;
|
||||
const uint32_t dst = instr.dst % RegistersCount;
|
||||
|
||||
if (src != dst) {
|
||||
genAddressReg<true>(instr, src, p, pos);
|
||||
@@ -582,8 +539,8 @@ namespace randomx {
|
||||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint32_t src = instr.src;
|
||||
const uint32_t dst = instr.dst;
|
||||
const uint32_t src = instr.src % RegistersCount;
|
||||
const uint32_t dst = instr.dst % RegistersCount;
|
||||
|
||||
if (src != dst) {
|
||||
*(uint32_t*)(p + pos) = 0xc02b4d + (dst << 19) + (src << 16);
|
||||
@@ -603,8 +560,8 @@ namespace randomx {
|
||||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint32_t src = instr.src;
|
||||
const uint32_t dst = instr.dst;
|
||||
const uint32_t src = instr.src % RegistersCount;
|
||||
const uint32_t dst = instr.dst % RegistersCount;
|
||||
|
||||
if (src != dst) {
|
||||
genAddressReg<true>(instr, src, p, pos);
|
||||
@@ -624,8 +581,8 @@ namespace randomx {
|
||||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint32_t src = instr.src;
|
||||
const uint32_t dst = instr.dst;
|
||||
const uint32_t src = instr.src % RegistersCount;
|
||||
const uint32_t dst = instr.dst % RegistersCount;
|
||||
|
||||
if (src != dst) {
|
||||
emit32(0xc0af0f4d + ((dst * 8 + src) << 24), p, pos);
|
||||
@@ -644,8 +601,8 @@ namespace randomx {
|
||||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint64_t src = instr.src;
|
||||
const uint64_t dst = instr.dst;
|
||||
const uint64_t src = instr.src % RegistersCount;
|
||||
const uint64_t dst = instr.dst % RegistersCount;
|
||||
|
||||
if (src != dst) {
|
||||
genAddressReg<true>(instr, src, p, pos);
|
||||
@@ -665,8 +622,8 @@ namespace randomx {
|
||||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint32_t src = instr.src;
|
||||
const uint32_t dst = instr.dst;
|
||||
const uint32_t src = instr.src % RegistersCount;
|
||||
const uint32_t dst = instr.dst % RegistersCount;
|
||||
|
||||
*(uint32_t*)(p + pos) = 0xc08b49 + (dst << 16);
|
||||
*(uint32_t*)(p + pos + 3) = 0xe0f749 + (src << 16);
|
||||
@@ -681,8 +638,8 @@ namespace randomx {
|
||||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint32_t src = instr.src;
|
||||
const uint32_t dst = instr.dst;
|
||||
const uint32_t src = instr.src % RegistersCount;
|
||||
const uint32_t dst = instr.dst % RegistersCount;
|
||||
|
||||
*(uint32_t*)(p + pos) = 0xC4D08B49 + (dst << 16);
|
||||
*(uint32_t*)(p + pos + 4) = 0xC0F6FB42 + (dst << 27) + (src << 24);
|
||||
@@ -696,8 +653,8 @@ namespace randomx {
|
||||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint64_t src = instr.src;
|
||||
const uint64_t dst = instr.dst;
|
||||
const uint64_t src = instr.src % RegistersCount;
|
||||
const uint64_t dst = instr.dst % RegistersCount;
|
||||
|
||||
if (src != dst) {
|
||||
genAddressReg<false>(instr, src, p, pos);
|
||||
@@ -720,8 +677,8 @@ namespace randomx {
|
||||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint64_t src = instr.src;
|
||||
const uint64_t dst = instr.dst;
|
||||
const uint64_t src = instr.src % RegistersCount;
|
||||
const uint64_t dst = instr.dst % RegistersCount;
|
||||
|
||||
if (src != dst) {
|
||||
genAddressReg<false>(instr, src, p, pos);
|
||||
@@ -743,8 +700,8 @@ namespace randomx {
|
||||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint64_t src = instr.src;
|
||||
const uint64_t dst = instr.dst;
|
||||
const uint64_t src = instr.src % RegistersCount;
|
||||
const uint64_t dst = instr.dst % RegistersCount;
|
||||
|
||||
*(uint64_t*)(p + pos) = 0x8b4ce8f749c08b49ull + (dst << 16) + (src << 40);
|
||||
pos += 8;
|
||||
@@ -758,8 +715,8 @@ namespace randomx {
|
||||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint64_t src = instr.src;
|
||||
const uint64_t dst = instr.dst;
|
||||
const uint64_t src = instr.src % RegistersCount;
|
||||
const uint64_t dst = instr.dst % RegistersCount;
|
||||
|
||||
if (src != dst) {
|
||||
genAddressReg<false>(instr, src, p, pos);
|
||||
@@ -789,7 +746,7 @@ namespace randomx {
|
||||
|
||||
emit64(randomx_reciprocal_fast(divisor), p, pos);
|
||||
|
||||
const uint32_t dst = instr.dst;
|
||||
const uint32_t dst = instr.dst % RegistersCount;
|
||||
emit32(0xc0af0f4c + (dst << 27), p, pos);
|
||||
|
||||
registerUsage[dst] = pos;
|
||||
@@ -802,7 +759,7 @@ namespace randomx {
|
||||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint32_t dst = instr.dst;
|
||||
const uint32_t dst = instr.dst % RegistersCount;
|
||||
*(uint32_t*)(p + pos) = 0xd8f749 + (dst << 16);
|
||||
pos += 3;
|
||||
|
||||
@@ -814,8 +771,8 @@ namespace randomx {
|
||||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint64_t src = instr.src;
|
||||
const uint64_t dst = instr.dst;
|
||||
const uint64_t src = instr.src % RegistersCount;
|
||||
const uint64_t dst = instr.dst % RegistersCount;
|
||||
|
||||
if (src != dst) {
|
||||
*(uint32_t*)(p + pos) = 0xc0334d + (((dst << 3) + src) << 16);
|
||||
@@ -835,8 +792,8 @@ namespace randomx {
|
||||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint64_t src = instr.src;
|
||||
const uint64_t dst = instr.dst;
|
||||
const uint64_t src = instr.src % RegistersCount;
|
||||
const uint64_t dst = instr.dst % RegistersCount;
|
||||
|
||||
if (src != dst) {
|
||||
genAddressReg<true>(instr, src, p, pos);
|
||||
@@ -856,8 +813,8 @@ namespace randomx {
|
||||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint64_t src = instr.src;
|
||||
const uint64_t dst = instr.dst;
|
||||
const uint64_t src = instr.src % RegistersCount;
|
||||
const uint64_t dst = instr.dst % RegistersCount;
|
||||
|
||||
if (src != dst) {
|
||||
*(uint64_t*)(p + pos) = 0xc8d349c88b41ull + (src << 16) + (dst << 40);
|
||||
@@ -877,8 +834,8 @@ namespace randomx {
|
||||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint64_t src = instr.src;
|
||||
const uint64_t dst = instr.dst;
|
||||
const uint64_t src = instr.src % RegistersCount;
|
||||
const uint64_t dst = instr.dst % RegistersCount;
|
||||
|
||||
if (src != dst) {
|
||||
*(uint64_t*)(p + pos) = 0xc0d349c88b41ull + (src << 16) + (dst << 40);
|
||||
@@ -898,8 +855,8 @@ namespace randomx {
|
||||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint32_t src = instr.src;
|
||||
const uint32_t dst = instr.dst;
|
||||
const uint32_t src = instr.src % RegistersCount;
|
||||
const uint32_t dst = instr.dst % RegistersCount;
|
||||
|
||||
if (src != dst) {
|
||||
*(uint32_t*)(p + pos) = 0xc0874d + (((dst << 3) + src) << 16);
|
||||
@@ -915,7 +872,7 @@ namespace randomx {
|
||||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint64_t dst = instr.dst;
|
||||
const uint64_t dst = instr.dst % RegistersCount;
|
||||
|
||||
*(uint64_t*)(p + pos) = 0x01c0c60f66ull + (((dst << 3) + dst) << 24);
|
||||
pos += 5;
|
||||
@@ -940,7 +897,7 @@ namespace randomx {
|
||||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint32_t src = instr.src;
|
||||
const uint32_t src = instr.src % RegistersCount;
|
||||
const uint32_t dst = instr.dst % RegisterCountFlt;
|
||||
|
||||
genAddressReg<true>(instr, src, p, pos);
|
||||
@@ -968,7 +925,7 @@ namespace randomx {
|
||||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint32_t src = instr.src;
|
||||
const uint32_t src = instr.src % RegistersCount;
|
||||
const uint32_t dst = instr.dst % RegisterCountFlt;
|
||||
|
||||
genAddressReg<true>(instr, src, p, pos);
|
||||
@@ -1007,7 +964,7 @@ namespace randomx {
|
||||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint32_t src = instr.src;
|
||||
const uint32_t src = instr.src % RegistersCount;
|
||||
const uint64_t dst = instr.dst % RegisterCountFlt;
|
||||
|
||||
genAddressReg<true>(instr, src, p, pos);
|
||||
@@ -1043,7 +1000,7 @@ namespace randomx {
|
||||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint32_t src = instr.src;
|
||||
const uint32_t src = instr.src % RegistersCount;
|
||||
|
||||
*(uint32_t*)(p + pos) = 0x00C08B49 + (src << 16);
|
||||
const int rotate = (static_cast<int>(instr.getImm32() & 63) - 2) & 63;
|
||||
@@ -1067,7 +1024,7 @@ namespace randomx {
|
||||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint64_t src = instr.src;
|
||||
const uint64_t src = instr.src % RegistersCount;
|
||||
|
||||
const uint64_t rotate = (static_cast<int>(instr.getImm32() & 63) - 2) & 63;
|
||||
*(uint64_t*)(p + pos) = 0xC0F0FBC3C4ULL | (src << 32) | (rotate << 40);
|
||||
@@ -1086,14 +1043,15 @@ namespace randomx {
|
||||
codePos = pos;
|
||||
}
|
||||
|
||||
template<bool jccErratum>
|
||||
void JitCompilerX86::h_CBRANCH(const Instruction& instr) {
|
||||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const int reg = instr.dst;
|
||||
const int reg = instr.dst % RegistersCount;
|
||||
int32_t jmp_offset = registerUsage[reg] - (pos + 16);
|
||||
|
||||
if (BranchesWithin32B) {
|
||||
if (jccErratum) {
|
||||
const uint32_t branch_begin = static_cast<uint32_t>(pos + 7);
|
||||
const uint32_t branch_end = static_cast<uint32_t>(branch_begin + ((jmp_offset >= -128) ? 9 : 13));
|
||||
|
||||
@@ -1106,10 +1064,12 @@ namespace randomx {
|
||||
}
|
||||
|
||||
*(uint32_t*)(p + pos) = 0x00c08149 + (reg << 16);
|
||||
const int shift = instr.getModCond() + RandomX_CurrentConfig.JumpOffset;
|
||||
*(uint32_t*)(p + pos + 3) = (instr.getImm32() | (1UL << shift)) & ~(1UL << (shift - 1));
|
||||
const int shift = instr.getModCond();
|
||||
const uint32_t or_mask = (1UL << RandomX_ConfigurationBase::JumpOffset) << shift;
|
||||
const uint32_t and_mask = rotl32(~static_cast<uint32_t>(1UL << (RandomX_ConfigurationBase::JumpOffset - 1)), shift);
|
||||
*(uint32_t*)(p + pos + 3) = (instr.getImm32() | or_mask) & and_mask;
|
||||
*(uint32_t*)(p + pos + 7) = 0x00c0f749 + (reg << 16);
|
||||
*(uint32_t*)(p + pos + 10) = RandomX_CurrentConfig.ConditionMask_Calculated << shift;
|
||||
*(uint32_t*)(p + pos + 10) = RandomX_ConfigurationBase::ConditionMask_Calculated << shift;
|
||||
pos += 14;
|
||||
|
||||
if (jmp_offset >= -128) {
|
||||
@@ -1132,12 +1092,15 @@ namespace randomx {
|
||||
codePos = pos;
|
||||
}
|
||||
|
||||
template void JitCompilerX86::h_CBRANCH<false>(const Instruction&);
|
||||
template void JitCompilerX86::h_CBRANCH<true>(const Instruction&);
|
||||
|
||||
void JitCompilerX86::h_ISTORE(const Instruction& instr) {
|
||||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
genAddressRegDst(instr, p, pos);
|
||||
emit32(0x0604894c + (static_cast<uint32_t>(instr.src) << 19), p, pos);
|
||||
emit32(0x0604894c + (static_cast<uint32_t>(instr.src % RegistersCount) << 19), p, pos);
|
||||
|
||||
codePos = pos;
|
||||
}
|
||||
|
||||
@@ -41,7 +41,7 @@ namespace randomx {
|
||||
class JitCompilerX86;
|
||||
class Instruction;
|
||||
|
||||
typedef void(JitCompilerX86::*InstructionGeneratorX86)(const Instruction&);
|
||||
typedef void(*InstructionGeneratorX86)(JitCompilerX86*, const Instruction&);
|
||||
|
||||
constexpr uint32_t CodeSize = 64 * 1024;
|
||||
|
||||
@@ -84,7 +84,6 @@ namespace randomx {
|
||||
|
||||
uint8_t* allocatedCode;
|
||||
|
||||
void applyTweaks();
|
||||
void generateProgramPrologue(Program&, ProgramConfiguration&);
|
||||
void generateProgramEpilogue(Program&, ProgramConfiguration&);
|
||||
template<bool rax>
|
||||
@@ -148,11 +147,13 @@ namespace randomx {
|
||||
void h_FMUL_R(const Instruction&);
|
||||
void h_FDIV_M(const Instruction&);
|
||||
void h_FSQRT_R(const Instruction&);
|
||||
|
||||
template<bool jccErratum>
|
||||
void h_CBRANCH(const Instruction&);
|
||||
|
||||
void h_CFROUND(const Instruction&);
|
||||
void h_CFROUND_BMI2(const Instruction&);
|
||||
void h_ISTORE(const Instruction&);
|
||||
void h_NOP(const Instruction&);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -47,6 +47,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include <cassert>
|
||||
|
||||
#include "base/tools/Profiler.h"
|
||||
|
||||
RandomX_ConfigurationWownero::RandomX_ConfigurationWownero()
|
||||
{
|
||||
ArgonSalt = "RandomWOW\x01";
|
||||
@@ -109,22 +111,15 @@ RandomX_ConfigurationKeva::RandomX_ConfigurationKeva()
|
||||
}
|
||||
|
||||
RandomX_ConfigurationBase::RandomX_ConfigurationBase()
|
||||
: ArgonMemory(262144)
|
||||
, ArgonIterations(3)
|
||||
: ArgonIterations(3)
|
||||
, ArgonLanes(1)
|
||||
, ArgonSalt("RandomX\x03")
|
||||
, CacheAccesses(8)
|
||||
, SuperscalarLatency(170)
|
||||
, DatasetBaseSize(2147483648)
|
||||
, DatasetExtraSize(33554368)
|
||||
, ScratchpadL1_Size(16384)
|
||||
, ScratchpadL2_Size(262144)
|
||||
, ScratchpadL3_Size(2097152)
|
||||
, ProgramSize(256)
|
||||
, ProgramIterations(2048)
|
||||
, ProgramCount(8)
|
||||
, JumpBits(8)
|
||||
, JumpOffset(8)
|
||||
, RANDOMX_FREQ_IADD_RS(16)
|
||||
, RANDOMX_FREQ_IADD_M(7)
|
||||
, RANDOMX_FREQ_ISUB_R(16)
|
||||
@@ -165,53 +160,72 @@ RandomX_ConfigurationBase::RandomX_ConfigurationBase()
|
||||
fillAes4Rx4_Key[6] = rx_set_int_vec_i128(0xf63befa7, 0x2ba9660a, 0xf765a38b, 0xf273c9e7);
|
||||
fillAes4Rx4_Key[7] = rx_set_int_vec_i128(0xc0b0762d, 0x0c06d1fd, 0x915839de, 0x7a7cd609);
|
||||
|
||||
#if defined(_M_X64) || defined(__x86_64__)
|
||||
# if defined(_M_X64) || defined(__x86_64__)
|
||||
// Workaround for Visual Studio placing trampoline in debug builds.
|
||||
auto addr = [](void (*func)()) {
|
||||
const uint8_t* p = reinterpret_cast<const uint8_t*>(func);
|
||||
# if defined(_MSC_VER)
|
||||
if (p[0] == 0xE9) {
|
||||
p += *(const int32_t*)(p + 1) + 5;
|
||||
}
|
||||
# endif
|
||||
return p;
|
||||
};
|
||||
|
||||
{
|
||||
const uint8_t* a = (const uint8_t*)&randomx_sshash_prefetch;
|
||||
const uint8_t* b = (const uint8_t*)&randomx_sshash_end;
|
||||
const uint8_t* a = addr(randomx_sshash_prefetch);
|
||||
const uint8_t* b = addr(randomx_sshash_end);
|
||||
memcpy(codeShhPrefetchTweaked, a, b - a);
|
||||
}
|
||||
{
|
||||
const uint8_t* a = (const uint8_t*)&randomx_program_read_dataset;
|
||||
const uint8_t* b = (const uint8_t*)&randomx_program_read_dataset_ryzen;
|
||||
const uint8_t* a = addr(randomx_program_read_dataset);
|
||||
const uint8_t* b = addr(randomx_program_read_dataset_ryzen);
|
||||
memcpy(codeReadDatasetTweaked, a, b - a);
|
||||
codeReadDatasetTweakedSize = b - a;
|
||||
}
|
||||
{
|
||||
const uint8_t* a = (const uint8_t*)&randomx_program_read_dataset_ryzen;
|
||||
const uint8_t* b = (const uint8_t*)&randomx_program_read_dataset_sshash_init;
|
||||
const uint8_t* a = addr(randomx_program_read_dataset_ryzen);
|
||||
const uint8_t* b = addr(randomx_program_read_dataset_sshash_init);
|
||||
memcpy(codeReadDatasetRyzenTweaked, a, b - a);
|
||||
codeReadDatasetRyzenTweakedSize = b - a;
|
||||
}
|
||||
{
|
||||
const uint8_t* a = (const uint8_t*)&randomx_program_read_dataset_sshash_init;
|
||||
const uint8_t* b = (const uint8_t*)&randomx_program_read_dataset_sshash_fin;
|
||||
const uint8_t* a = addr(randomx_program_read_dataset_sshash_init);
|
||||
const uint8_t* b = addr(randomx_program_read_dataset_sshash_fin);
|
||||
memcpy(codeReadDatasetLightSshInitTweaked, a, b - a);
|
||||
}
|
||||
{
|
||||
const uint8_t* a = (const uint8_t*)&randomx_prefetch_scratchpad;
|
||||
const uint8_t* b = (const uint8_t*)&randomx_prefetch_scratchpad_end;
|
||||
const uint8_t* a = addr(randomx_prefetch_scratchpad);
|
||||
const uint8_t* b = addr(randomx_prefetch_scratchpad_end);
|
||||
memcpy(codePrefetchScratchpadTweaked, a, b - a);
|
||||
}
|
||||
#endif
|
||||
# endif
|
||||
}
|
||||
|
||||
#ifdef XMRIG_ARMv8
|
||||
static uint32_t Log2(size_t value) { return (value > 1) ? (Log2(value / 2) + 1) : 0; }
|
||||
#endif
|
||||
|
||||
static int scratchpadPrefetchMode = 1;
|
||||
|
||||
void randomx_set_scratchpad_prefetch_mode(int mode)
|
||||
{
|
||||
scratchpadPrefetchMode = mode;
|
||||
}
|
||||
|
||||
void RandomX_ConfigurationBase::Apply()
|
||||
{
|
||||
ScratchpadL1Mask_Calculated = (ScratchpadL1_Size / sizeof(uint64_t) - 1) * 8;
|
||||
ScratchpadL1Mask16_Calculated = (ScratchpadL1_Size / sizeof(uint64_t) / 2 - 1) * 16;
|
||||
ScratchpadL2Mask_Calculated = (ScratchpadL2_Size / sizeof(uint64_t) - 1) * 8;
|
||||
ScratchpadL2Mask16_Calculated = (ScratchpadL2_Size / sizeof(uint64_t) / 2 - 1) * 16;
|
||||
const uint32_t ScratchpadL1Mask_Calculated = (ScratchpadL1_Size / sizeof(uint64_t) - 1) * 8;
|
||||
const uint32_t ScratchpadL2Mask_Calculated = (ScratchpadL2_Size / sizeof(uint64_t) - 1) * 8;
|
||||
|
||||
AddressMask_Calculated[0] = ScratchpadL2Mask_Calculated;
|
||||
AddressMask_Calculated[1] = ScratchpadL1Mask_Calculated;
|
||||
AddressMask_Calculated[2] = ScratchpadL1Mask_Calculated;
|
||||
AddressMask_Calculated[3] = ScratchpadL1Mask_Calculated;
|
||||
|
||||
ScratchpadL3Mask_Calculated = (((ScratchpadL3_Size / sizeof(uint64_t)) - 1) * 8);
|
||||
ScratchpadL3Mask64_Calculated = ((ScratchpadL3_Size / sizeof(uint64_t)) / 8 - 1) * 64;
|
||||
|
||||
CacheLineAlignMask_Calculated = (DatasetBaseSize - 1) & ~(RANDOMX_DATASET_ITEM_SIZE - 1);
|
||||
DatasetExtraItems_Calculated = DatasetExtraSize / RANDOMX_DATASET_ITEM_SIZE;
|
||||
|
||||
ConditionMask_Calculated = (1 << JumpBits) - 1;
|
||||
|
||||
#if defined(_M_X64) || defined(__x86_64__)
|
||||
*(uint32_t*)(codeShhPrefetchTweaked + 3) = ArgonMemory * 16 - 1;
|
||||
// Not needed right now because all variants use default dataset base size
|
||||
@@ -223,7 +237,42 @@ void RandomX_ConfigurationBase::Apply()
|
||||
*(uint32_t*)(codePrefetchScratchpadTweaked + 4) = ScratchpadL3Mask64_Calculated;
|
||||
*(uint32_t*)(codePrefetchScratchpadTweaked + 18) = ScratchpadL3Mask64_Calculated;
|
||||
|
||||
#define JIT_HANDLE(x, prev) randomx::JitCompilerX86::engine[k] = &randomx::JitCompilerX86::h_##x
|
||||
// Apply scratchpad prefetch mode
|
||||
{
|
||||
uint32_t* a = (uint32_t*)(codePrefetchScratchpadTweaked + 8);
|
||||
uint32_t* b = (uint32_t*)(codePrefetchScratchpadTweaked + 22);
|
||||
|
||||
switch (scratchpadPrefetchMode)
|
||||
{
|
||||
case 0:
|
||||
*a = 0x00401F0FUL; // 4-byte nop
|
||||
*b = 0x00401F0FUL; // 4-byte nop
|
||||
break;
|
||||
|
||||
case 1:
|
||||
default:
|
||||
*a = 0x060C180FUL; // prefetcht0 [rsi+rax]
|
||||
*b = 0x160C180FUL; // prefetcht0 [rsi+rdx]
|
||||
break;
|
||||
|
||||
case 2:
|
||||
*a = 0x0604180FUL; // prefetchnta [rsi+rax]
|
||||
*b = 0x1604180FUL; // prefetchnta [rsi+rdx]
|
||||
break;
|
||||
|
||||
case 3:
|
||||
*a = 0x060C8B48UL; // mov rcx, [rsi+rax]
|
||||
*b = 0x160C8B48UL; // mov rcx, [rsi+rdx]
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
typedef void(randomx::JitCompilerX86::* InstructionGeneratorX86_2)(const randomx::Instruction&);
|
||||
|
||||
#define JIT_HANDLE(x, prev) do { \
|
||||
const InstructionGeneratorX86_2 p = &randomx::JitCompilerX86::h_##x; \
|
||||
memcpy(randomx::JitCompilerX86::engine + k, &p, sizeof(p)); \
|
||||
} while (0)
|
||||
|
||||
#elif defined(XMRIG_ARMv8)
|
||||
|
||||
@@ -239,16 +288,16 @@ void RandomX_ConfigurationBase::Apply()
|
||||
#define JIT_HANDLE(x, prev)
|
||||
#endif
|
||||
|
||||
constexpr int CEIL_NULL = 0;
|
||||
int k = 0;
|
||||
uint32_t k = 0;
|
||||
uint32_t freq_sum = 0;
|
||||
|
||||
#define INST_HANDLE(x, prev) \
|
||||
CEIL_##x = CEIL_##prev + RANDOMX_FREQ_##x; \
|
||||
for (; k < CEIL_##x; ++k) { JIT_HANDLE(x, prev); }
|
||||
freq_sum += RANDOMX_FREQ_##x; \
|
||||
for (; k < freq_sum; ++k) { JIT_HANDLE(x, prev); }
|
||||
|
||||
#define INST_HANDLE2(x, func_name, prev) \
|
||||
CEIL_##x = CEIL_##prev + RANDOMX_FREQ_##x; \
|
||||
for (; k < CEIL_##x; ++k) { JIT_HANDLE(func_name, prev); }
|
||||
freq_sum += RANDOMX_FREQ_##x; \
|
||||
for (; k < freq_sum; ++k) { JIT_HANDLE(func_name, prev); }
|
||||
|
||||
INST_HANDLE(IADD_RS, NULL);
|
||||
INST_HANDLE(IADD_M, IADD_RS);
|
||||
@@ -287,7 +336,17 @@ void RandomX_ConfigurationBase::Apply()
|
||||
INST_HANDLE(FMUL_R, FSCAL_R);
|
||||
INST_HANDLE(FDIV_M, FMUL_R);
|
||||
INST_HANDLE(FSQRT_R, FDIV_M);
|
||||
|
||||
#if defined(_M_X64) || defined(__x86_64__)
|
||||
if (xmrig::Cpu::info()->jccErratum()) {
|
||||
INST_HANDLE2(CBRANCH, CBRANCH<true>, FSQRT_R);
|
||||
}
|
||||
else {
|
||||
INST_HANDLE2(CBRANCH, CBRANCH<false>, FSQRT_R);
|
||||
}
|
||||
#else
|
||||
INST_HANDLE(CBRANCH, FSQRT_R);
|
||||
#endif
|
||||
|
||||
#if defined(_M_X64) || defined(__x86_64__)
|
||||
if (xmrig::Cpu::info()->hasBMI2()) {
|
||||
@@ -520,33 +579,35 @@ extern "C" {
|
||||
assert(inputSize == 0 || input != nullptr);
|
||||
assert(output != nullptr);
|
||||
alignas(16) uint64_t tempHash[8];
|
||||
rx_blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0);
|
||||
rx_blake2b_wrapper::run(tempHash, sizeof(tempHash), input, inputSize);
|
||||
machine->initScratchpad(&tempHash);
|
||||
machine->resetRoundingMode();
|
||||
for (uint32_t chain = 0; chain < RandomX_CurrentConfig.ProgramCount - 1; ++chain) {
|
||||
machine->run(&tempHash);
|
||||
rx_blake2b(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
|
||||
rx_blake2b_wrapper::run(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile));
|
||||
}
|
||||
machine->run(&tempHash);
|
||||
machine->getFinalResult(output, RANDOMX_HASH_SIZE);
|
||||
machine->getFinalResult(output);
|
||||
}
|
||||
|
||||
void randomx_calculate_hash_first(randomx_vm* machine, uint64_t (&tempHash)[8], const void* input, size_t inputSize) {
|
||||
rx_blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0);
|
||||
rx_blake2b_wrapper::run(tempHash, sizeof(tempHash), input, inputSize);
|
||||
machine->initScratchpad(tempHash);
|
||||
}
|
||||
|
||||
void randomx_calculate_hash_next(randomx_vm* machine, uint64_t (&tempHash)[8], const void* nextInput, size_t nextInputSize, void* output) {
|
||||
PROFILE_SCOPE(RandomX_hash);
|
||||
|
||||
machine->resetRoundingMode();
|
||||
for (uint32_t chain = 0; chain < RandomX_CurrentConfig.ProgramCount - 1; ++chain) {
|
||||
machine->run(&tempHash);
|
||||
rx_blake2b(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
|
||||
rx_blake2b_wrapper::run(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile));
|
||||
}
|
||||
machine->run(&tempHash);
|
||||
|
||||
// Finish current hash and fill the scratchpad for the next hash at the same time
|
||||
rx_blake2b(tempHash, sizeof(tempHash), nextInput, nextInputSize, nullptr, 0);
|
||||
machine->hashAndFill(output, RANDOMX_HASH_SIZE, tempHash);
|
||||
rx_blake2b_wrapper::run(tempHash, sizeof(tempHash), nextInput, nextInputSize);
|
||||
machine->hashAndFill(output, tempHash);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -64,15 +64,24 @@ struct RandomX_ConfigurationBase
|
||||
|
||||
void Apply();
|
||||
|
||||
uint32_t ArgonMemory;
|
||||
// Common parameters for all RandomX variants
|
||||
enum Params : uint64_t
|
||||
{
|
||||
ArgonMemory = 262144,
|
||||
CacheAccesses = 8,
|
||||
SuperscalarLatency = 170,
|
||||
DatasetBaseSize = 2147483648,
|
||||
DatasetExtraSize = 33554368,
|
||||
JumpBits = 8,
|
||||
JumpOffset = 8,
|
||||
CacheLineAlignMask_Calculated = (DatasetBaseSize - 1) & ~(RANDOMX_DATASET_ITEM_SIZE - 1),
|
||||
DatasetExtraItems_Calculated = DatasetExtraSize / RANDOMX_DATASET_ITEM_SIZE,
|
||||
ConditionMask_Calculated = ((1 << JumpBits) - 1) << JumpOffset,
|
||||
};
|
||||
|
||||
uint32_t ArgonIterations;
|
||||
uint32_t ArgonLanes;
|
||||
const char* ArgonSalt;
|
||||
uint32_t CacheAccesses;
|
||||
uint32_t SuperscalarLatency;
|
||||
|
||||
uint32_t DatasetBaseSize;
|
||||
uint32_t DatasetExtraSize;
|
||||
|
||||
uint32_t ScratchpadL1_Size;
|
||||
uint32_t ScratchpadL2_Size;
|
||||
@@ -82,9 +91,6 @@ struct RandomX_ConfigurationBase
|
||||
uint32_t ProgramIterations;
|
||||
uint32_t ProgramCount;
|
||||
|
||||
uint32_t JumpBits;
|
||||
uint32_t JumpOffset;
|
||||
|
||||
uint32_t RANDOMX_FREQ_IADD_RS;
|
||||
uint32_t RANDOMX_FREQ_IADD_M;
|
||||
uint32_t RANDOMX_FREQ_ISUB_R;
|
||||
@@ -126,18 +132,10 @@ struct RandomX_ConfigurationBase
|
||||
uint8_t codeReadDatasetLightSshInitTweaked[68];
|
||||
uint8_t codePrefetchScratchpadTweaked[32];
|
||||
|
||||
uint32_t CacheLineAlignMask_Calculated;
|
||||
uint32_t DatasetExtraItems_Calculated;
|
||||
|
||||
uint32_t ScratchpadL1Mask_Calculated;
|
||||
uint32_t ScratchpadL1Mask16_Calculated;
|
||||
uint32_t ScratchpadL2Mask_Calculated;
|
||||
uint32_t ScratchpadL2Mask16_Calculated;
|
||||
uint32_t AddressMask_Calculated[4];
|
||||
uint32_t ScratchpadL3Mask_Calculated;
|
||||
uint32_t ScratchpadL3Mask64_Calculated;
|
||||
|
||||
uint32_t ConditionMask_Calculated;
|
||||
|
||||
#if defined(XMRIG_ARMv8)
|
||||
uint32_t Log2_ScratchpadL1;
|
||||
uint32_t Log2_ScratchpadL2;
|
||||
@@ -145,37 +143,6 @@ struct RandomX_ConfigurationBase
|
||||
uint32_t Log2_DatasetBaseSize;
|
||||
uint32_t Log2_CacheSize;
|
||||
#endif
|
||||
|
||||
int CEIL_IADD_RS;
|
||||
int CEIL_IADD_M;
|
||||
int CEIL_ISUB_R;
|
||||
int CEIL_ISUB_M;
|
||||
int CEIL_IMUL_R;
|
||||
int CEIL_IMUL_M;
|
||||
int CEIL_IMULH_R;
|
||||
int CEIL_IMULH_M;
|
||||
int CEIL_ISMULH_R;
|
||||
int CEIL_ISMULH_M;
|
||||
int CEIL_IMUL_RCP;
|
||||
int CEIL_INEG_R;
|
||||
int CEIL_IXOR_R;
|
||||
int CEIL_IXOR_M;
|
||||
int CEIL_IROR_R;
|
||||
int CEIL_IROL_R;
|
||||
int CEIL_ISWAP_R;
|
||||
int CEIL_FSWAP_R;
|
||||
int CEIL_FADD_R;
|
||||
int CEIL_FADD_M;
|
||||
int CEIL_FSUB_R;
|
||||
int CEIL_FSUB_M;
|
||||
int CEIL_FSCAL_R;
|
||||
int CEIL_FMUL_R;
|
||||
int CEIL_FDIV_M;
|
||||
int CEIL_FSQRT_R;
|
||||
int CEIL_CBRANCH;
|
||||
int CEIL_CFROUND;
|
||||
int CEIL_ISTORE;
|
||||
int CEIL_NOP;
|
||||
};
|
||||
|
||||
struct RandomX_ConfigurationMonero : public RandomX_ConfigurationBase {};
|
||||
@@ -203,6 +170,8 @@ void randomx_apply_config(const T& config)
|
||||
RandomX_CurrentConfig.Apply();
|
||||
}
|
||||
|
||||
void randomx_set_scratchpad_prefetch_mode(int mode);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
@@ -28,6 +28,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "crypto/randomx/soft_aes.h"
|
||||
#include "crypto/randomx/aes_hash.hpp"
|
||||
#include "base/tools/Chrono.h"
|
||||
#include <vector>
|
||||
|
||||
alignas(64) uint32_t lutEnc0[256];
|
||||
alignas(64) uint32_t lutEnc1[256];
|
||||
@@ -117,3 +120,47 @@ static struct SAESInitializer
|
||||
}
|
||||
}
|
||||
} aes_initializer;
|
||||
|
||||
static uint32_t softAESImpl = 1;
|
||||
|
||||
uint32_t GetSoftAESImpl()
|
||||
{
|
||||
return softAESImpl;
|
||||
}
|
||||
|
||||
void SelectSoftAESImpl()
|
||||
{
|
||||
constexpr int test_length_ms = 100;
|
||||
double speed[2] = {};
|
||||
|
||||
for (int run = 0; run < 3; ++run) {
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
std::vector<uint8_t> scratchpad(10 * 1024);
|
||||
uint8_t hash[64] = {};
|
||||
uint8_t state[64] = {};
|
||||
|
||||
uint64_t t1, t2;
|
||||
|
||||
uint32_t count = 0;
|
||||
t1 = xmrig::Chrono::highResolutionMSecs();
|
||||
do {
|
||||
if (i == 0) {
|
||||
hashAndFillAes1Rx4<1>(scratchpad.data(), scratchpad.size(), hash, state);
|
||||
}
|
||||
else {
|
||||
hashAndFillAes1Rx4<2>(scratchpad.data(), scratchpad.size(), hash, state);
|
||||
}
|
||||
++count;
|
||||
|
||||
t2 = xmrig::Chrono::highResolutionMSecs();
|
||||
} while (t2 - t1 < test_length_ms);
|
||||
|
||||
const double x = count * 1e3 / (t2 - t1);
|
||||
if (x > speed[i]) {
|
||||
speed[i] = x;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
softAESImpl = (speed[0] > speed[1]) ? 1 : 2;
|
||||
}
|
||||
|
||||
@@ -41,11 +41,14 @@ extern uint32_t lutDec1[256];
|
||||
extern uint32_t lutDec2[256];
|
||||
extern uint32_t lutDec3[256];
|
||||
|
||||
template<bool soft> rx_vec_i128 aesenc(rx_vec_i128 in, rx_vec_i128 key);
|
||||
template<bool soft> rx_vec_i128 aesdec(rx_vec_i128 in, rx_vec_i128 key);
|
||||
uint32_t GetSoftAESImpl();
|
||||
void SelectSoftAESImpl();
|
||||
|
||||
template<int soft> rx_vec_i128 aesenc(rx_vec_i128 in, rx_vec_i128 key);
|
||||
template<int soft> rx_vec_i128 aesdec(rx_vec_i128 in, rx_vec_i128 key);
|
||||
|
||||
template<>
|
||||
FORCE_INLINE rx_vec_i128 aesenc<true>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||
FORCE_INLINE rx_vec_i128 aesenc<1>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||
volatile uint8_t s[16];
|
||||
memcpy((void*) s, &in, 16);
|
||||
|
||||
@@ -73,7 +76,7 @@ FORCE_INLINE rx_vec_i128 aesenc<true>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||
}
|
||||
|
||||
template<>
|
||||
FORCE_INLINE rx_vec_i128 aesdec<true>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||
FORCE_INLINE rx_vec_i128 aesdec<1>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||
volatile uint8_t s[16];
|
||||
memcpy((void*) s, &in, 16);
|
||||
|
||||
@@ -101,11 +104,49 @@ FORCE_INLINE rx_vec_i128 aesdec<true>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||
}
|
||||
|
||||
template<>
|
||||
FORCE_INLINE rx_vec_i128 aesenc<false>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||
FORCE_INLINE rx_vec_i128 aesenc<2>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||
uint32_t s0, s1, s2, s3;
|
||||
|
||||
s0 = rx_vec_i128_w(in);
|
||||
s1 = rx_vec_i128_z(in);
|
||||
s2 = rx_vec_i128_y(in);
|
||||
s3 = rx_vec_i128_x(in);
|
||||
|
||||
rx_vec_i128 out = rx_set_int_vec_i128(
|
||||
(lutEnc0[s0 & 0xff] ^ lutEnc1[(s3 >> 8) & 0xff] ^ lutEnc2[(s2 >> 16) & 0xff] ^ lutEnc3[s1 >> 24]),
|
||||
(lutEnc0[s1 & 0xff] ^ lutEnc1[(s0 >> 8) & 0xff] ^ lutEnc2[(s3 >> 16) & 0xff] ^ lutEnc3[s2 >> 24]),
|
||||
(lutEnc0[s2 & 0xff] ^ lutEnc1[(s1 >> 8) & 0xff] ^ lutEnc2[(s0 >> 16) & 0xff] ^ lutEnc3[s3 >> 24]),
|
||||
(lutEnc0[s3 & 0xff] ^ lutEnc1[(s2 >> 8) & 0xff] ^ lutEnc2[(s1 >> 16) & 0xff] ^ lutEnc3[s0 >> 24])
|
||||
);
|
||||
|
||||
return rx_xor_vec_i128(out, key);
|
||||
}
|
||||
|
||||
template<>
|
||||
FORCE_INLINE rx_vec_i128 aesdec<2>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||
uint32_t s0, s1, s2, s3;
|
||||
|
||||
s0 = rx_vec_i128_w(in);
|
||||
s1 = rx_vec_i128_z(in);
|
||||
s2 = rx_vec_i128_y(in);
|
||||
s3 = rx_vec_i128_x(in);
|
||||
|
||||
rx_vec_i128 out = rx_set_int_vec_i128(
|
||||
(lutDec0[s0 & 0xff] ^ lutDec1[(s1 >> 8) & 0xff] ^ lutDec2[(s2 >> 16) & 0xff] ^ lutDec3[s3 >> 24]),
|
||||
(lutDec0[s1 & 0xff] ^ lutDec1[(s2 >> 8) & 0xff] ^ lutDec2[(s3 >> 16) & 0xff] ^ lutDec3[s0 >> 24]),
|
||||
(lutDec0[s2 & 0xff] ^ lutDec1[(s3 >> 8) & 0xff] ^ lutDec2[(s0 >> 16) & 0xff] ^ lutDec3[s1 >> 24]),
|
||||
(lutDec0[s3 & 0xff] ^ lutDec1[(s0 >> 8) & 0xff] ^ lutDec2[(s1 >> 16) & 0xff] ^ lutDec3[s2 >> 24])
|
||||
);
|
||||
|
||||
return rx_xor_vec_i128(out, key);
|
||||
}
|
||||
|
||||
template<>
|
||||
FORCE_INLINE rx_vec_i128 aesenc<0>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||
return rx_aesenc_vec_i128(in, key);
|
||||
}
|
||||
|
||||
template<>
|
||||
FORCE_INLINE rx_vec_i128 aesdec<false>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||
FORCE_INLINE rx_vec_i128 aesdec<0>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||
return rx_aesdec_vec_i128(in, key);
|
||||
}
|
||||
|
||||
@@ -35,6 +35,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#include "crypto/randomx/blake2/blake2.h"
|
||||
#include "crypto/randomx/intrin_portable.h"
|
||||
#include "crypto/randomx/allocator.hpp"
|
||||
#include "crypto/randomx/soft_aes.h"
|
||||
#include "base/tools/Profiler.h"
|
||||
|
||||
randomx_vm::~randomx_vm() {
|
||||
|
||||
@@ -95,11 +97,11 @@ void randomx_vm::initialize() {
|
||||
|
||||
namespace randomx {
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
VmBase<softAes>::~VmBase() {
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void VmBase<softAes>::setScratchpad(uint8_t *scratchpad) {
|
||||
if (datasetPtr == nullptr) {
|
||||
throw std::invalid_argument("Cache/Dataset not set");
|
||||
@@ -108,25 +110,37 @@ namespace randomx {
|
||||
this->scratchpad = scratchpad;
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
void VmBase<softAes>::getFinalResult(void* out, size_t outSize) {
|
||||
template<int softAes>
|
||||
void VmBase<softAes>::getFinalResult(void* out) {
|
||||
hashAes1Rx4<softAes>(scratchpad, ScratchpadSize, ®.a);
|
||||
rx_blake2b(out, outSize, ®, sizeof(RegisterFile), nullptr, 0);
|
||||
rx_blake2b_wrapper::run(out, RANDOMX_HASH_SIZE, ®, sizeof(RegisterFile));
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
void VmBase<softAes>::hashAndFill(void* out, size_t outSize, uint64_t (&fill_state)[8]) {
|
||||
hashAndFillAes1Rx4<softAes>(scratchpad, ScratchpadSize, ®.a, fill_state);
|
||||
rx_blake2b(out, outSize, ®, sizeof(RegisterFile), nullptr, 0);
|
||||
template<int softAes>
|
||||
void VmBase<softAes>::hashAndFill(void* out, uint64_t (&fill_state)[8]) {
|
||||
if (!softAes) {
|
||||
hashAndFillAes1Rx4<0>(scratchpad, ScratchpadSize, ®.a, fill_state);
|
||||
}
|
||||
else {
|
||||
if (GetSoftAESImpl() == 1) {
|
||||
hashAndFillAes1Rx4<1>(scratchpad, ScratchpadSize, ®.a, fill_state);
|
||||
}
|
||||
else {
|
||||
hashAndFillAes1Rx4<2>(scratchpad, ScratchpadSize, ®.a, fill_state);
|
||||
}
|
||||
}
|
||||
|
||||
rx_blake2b_wrapper::run(out, RANDOMX_HASH_SIZE, ®, sizeof(RegisterFile));
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void VmBase<softAes>::initScratchpad(void* seed) {
|
||||
fillAes1Rx4<softAes>(seed, ScratchpadSize, scratchpad);
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void VmBase<softAes>::generateProgram(void* seed) {
|
||||
PROFILE_SCOPE(RandomX_generate_program);
|
||||
fillAes4Rx4<softAes>(seed, 128 + RandomX_CurrentConfig.ProgramSize * 8, &program);
|
||||
}
|
||||
|
||||
|
||||
@@ -38,8 +38,8 @@ class randomx_vm
|
||||
public:
|
||||
virtual ~randomx_vm() = 0;
|
||||
virtual void setScratchpad(uint8_t *scratchpad) = 0;
|
||||
virtual void getFinalResult(void* out, size_t outSize) = 0;
|
||||
virtual void hashAndFill(void* out, size_t outSize, uint64_t (&fill_state)[8]) = 0;
|
||||
virtual void getFinalResult(void* out) = 0;
|
||||
virtual void hashAndFill(void* out, uint64_t (&fill_state)[8]) = 0;
|
||||
virtual void setDataset(randomx_dataset* dataset) { }
|
||||
virtual void setCache(randomx_cache* cache) { }
|
||||
virtual void initScratchpad(void* seed) = 0;
|
||||
@@ -79,15 +79,15 @@ protected:
|
||||
|
||||
namespace randomx {
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
class VmBase : public randomx_vm
|
||||
{
|
||||
public:
|
||||
~VmBase() override;
|
||||
void setScratchpad(uint8_t *scratchpad) override;
|
||||
void initScratchpad(void* seed) override;
|
||||
void getFinalResult(void* out, size_t outSize) override;
|
||||
void hashAndFill(void* out, size_t outSize, uint64_t (&fill_state)[8]) override;
|
||||
void getFinalResult(void* out) override;
|
||||
void hashAndFill(void* out, uint64_t (&fill_state)[8]) override;
|
||||
|
||||
protected:
|
||||
void generateProgram(void* seed);
|
||||
|
||||
@@ -28,19 +28,22 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include "crypto/randomx/vm_compiled.hpp"
|
||||
#include "crypto/randomx/common.hpp"
|
||||
#include "base/tools/Profiler.h"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
static_assert(sizeof(MemoryRegisters) == 2 * sizeof(addr_t) + sizeof(uintptr_t), "Invalid alignment of struct randomx::MemoryRegisters");
|
||||
static_assert(sizeof(RegisterFile) == 256, "Invalid alignment of struct randomx::RegisterFile");
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void CompiledVm<softAes>::setDataset(randomx_dataset* dataset) {
|
||||
datasetPtr = dataset;
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void CompiledVm<softAes>::run(void* seed) {
|
||||
PROFILE_SCOPE(RandomX_run);
|
||||
|
||||
compiler.prepare();
|
||||
VmBase<softAes>::generateProgram(seed);
|
||||
randomx_vm::initialize();
|
||||
@@ -49,8 +52,10 @@ namespace randomx {
|
||||
execute();
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void CompiledVm<softAes>::execute() {
|
||||
PROFILE_SCOPE(RandomX_JIT_execute);
|
||||
|
||||
#ifdef XMRIG_ARM
|
||||
memcpy(reg.f, config.eMask, sizeof(config.eMask));
|
||||
#endif
|
||||
|
||||
@@ -37,7 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
namespace randomx {
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
class CompiledVm : public VmBase<softAes>
|
||||
{
|
||||
public:
|
||||
@@ -61,6 +61,6 @@ namespace randomx {
|
||||
JitCompiler compiler;
|
||||
};
|
||||
|
||||
using CompiledVmDefault = CompiledVm<true>;
|
||||
using CompiledVmHardAes = CompiledVm<false>;
|
||||
using CompiledVmDefault = CompiledVm<1>;
|
||||
using CompiledVmHardAes = CompiledVm<0>;
|
||||
}
|
||||
|
||||
@@ -32,14 +32,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
namespace randomx {
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void CompiledLightVm<softAes>::setCache(randomx_cache* cache) {
|
||||
cachePtr = cache;
|
||||
mem.memory = cache->memory;
|
||||
compiler.generateSuperscalarHash(cache->programs, cache->reciprocalCache);
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void CompiledLightVm<softAes>::run(void* seed) {
|
||||
VmBase<softAes>::generateProgram(seed);
|
||||
randomx_vm::initialize();
|
||||
|
||||
@@ -33,7 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
namespace randomx {
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
class CompiledLightVm : public CompiledVm<softAes>
|
||||
{
|
||||
public:
|
||||
@@ -52,6 +52,6 @@ namespace randomx {
|
||||
using CompiledVm<softAes>::datasetOffset;
|
||||
};
|
||||
|
||||
using CompiledLightVmDefault = CompiledLightVm<true>;
|
||||
using CompiledLightVmHardAes = CompiledLightVm<false>;
|
||||
using CompiledLightVmDefault = CompiledLightVm<1>;
|
||||
using CompiledLightVmHardAes = CompiledLightVm<0>;
|
||||
}
|
||||
|
||||
@@ -33,20 +33,20 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
namespace randomx {
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void InterpretedVm<softAes>::setDataset(randomx_dataset* dataset) {
|
||||
datasetPtr = dataset;
|
||||
mem.memory = dataset->memory;
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void InterpretedVm<softAes>::run(void* seed) {
|
||||
VmBase<softAes>::generateProgram(seed);
|
||||
randomx_vm::initialize();
|
||||
execute();
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void InterpretedVm<softAes>::execute() {
|
||||
|
||||
NativeRegisterFile nreg;
|
||||
@@ -106,14 +106,14 @@ namespace randomx {
|
||||
rx_store_vec_f128(®.e[i].lo, nreg.e[i]);
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void InterpretedVm<softAes>::datasetRead(uint64_t address, int_reg_t(&r)[RegistersCount]) {
|
||||
uint64_t* datasetLine = (uint64_t*)(mem.memory + address);
|
||||
for (int i = 0; i < RegistersCount; ++i)
|
||||
r[i] ^= datasetLine[i];
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void InterpretedVm<softAes>::datasetPrefetch(uint64_t address) {
|
||||
rx_prefetch_nta(mem.memory + address);
|
||||
}
|
||||
|
||||
@@ -38,7 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
namespace randomx {
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
class InterpretedVm : public VmBase<softAes>, public BytecodeMachine {
|
||||
public:
|
||||
using VmBase<softAes>::mem;
|
||||
@@ -65,6 +65,6 @@ namespace randomx {
|
||||
InstructionByteCode bytecode[RANDOMX_PROGRAM_MAX_SIZE];
|
||||
};
|
||||
|
||||
using InterpretedVmDefault = InterpretedVm<true>;
|
||||
using InterpretedVmHardAes = InterpretedVm<false>;
|
||||
using InterpretedVmDefault = InterpretedVm<1>;
|
||||
using InterpretedVmHardAes = InterpretedVm<0>;
|
||||
}
|
||||
|
||||
@@ -31,13 +31,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
namespace randomx {
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void InterpretedLightVm<softAes>::setCache(randomx_cache* cache) {
|
||||
cachePtr = cache;
|
||||
mem.memory = cache->memory;
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void InterpretedLightVm<softAes>::datasetRead(uint64_t address, int_reg_t(&r)[8]) {
|
||||
uint32_t itemNumber = address / CacheLineSize;
|
||||
int_reg_t rl[8];
|
||||
|
||||
@@ -33,7 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
namespace randomx {
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
class InterpretedLightVm : public InterpretedVm<softAes> {
|
||||
public:
|
||||
using VmBase<softAes>::mem;
|
||||
@@ -50,6 +50,6 @@ namespace randomx {
|
||||
void datasetPrefetch(uint64_t address) override { }
|
||||
};
|
||||
|
||||
using InterpretedLightVmDefault = InterpretedLightVm<true>;
|
||||
using InterpretedLightVmHardAes = InterpretedLightVm<false>;
|
||||
using InterpretedLightVmDefault = InterpretedLightVm<1>;
|
||||
using InterpretedLightVmHardAes = InterpretedLightVm<0>;
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user