1
0
mirror of https://github.com/xmrig/xmrig.git synced 2025-12-06 23:52:38 -05:00

Compare commits

..

131 Commits

Author SHA1 Message Date
XMRig
ca5fb6e8f0 v2.12.0 2019-02-13 09:28:06 +07:00
XMRig
7f5ac83050 Merge branch 'dev' 2019-02-13 09:27:10 +07:00
xmrig
3688cd9b6c Update CHANGELOG.md 2019-02-13 09:17:30 +07:00
XMRig
33a3ae59bd Removed "cn/r" alias and fixed build without ASM. 2019-02-13 00:18:40 +07:00
XMRig
8cf56438f6 Merge branch 'feature-cn-wow' into dev 2019-02-12 04:47:09 +07:00
XMRig
4a781cee8b Expired shares prohibited for cn/wow. 2019-02-12 03:15:36 +07:00
XMRig
8d316c17f0 Add block version check for pools without algorithm negotiation. 2019-02-11 07:07:09 +07:00
XMRig
42e0d35207 Show height only if it specified by pool. 2019-02-11 06:22:20 +07:00
XMRig
a6813ef4f5 Temporary added "cn/r" alias for test pool. 2019-02-11 05:46:43 +07:00
SChernykh
36ce144d35 Fixed ARM code 2019-02-10 16:04:24 +01:00
SChernykh
65dab8371b Removed unused variable 2019-02-10 15:46:55 +01:00
SChernykh
c2e15e2b86 Fixed ARM compilation 2019-02-10 15:34:31 +01:00
SChernykh
e3f2c38fff CryptonightR support for Wownero 2019-02-10 15:21:17 +01:00
XMRig
73852f44c6 v2.11.1-dev 2019-02-10 01:30:54 +07:00
XMRig
01428acc65 Merge branch 'master' into dev 2019-02-10 01:30:12 +07:00
XMRig
9dd223d5b1 v2.11.0 2019-02-09 22:06:38 +07:00
XMRig
c27c577c5e Merge branch 'dev' 2019-02-09 22:06:10 +07:00
xmrig
a1fec1c797 Update CHANGELOG.md 2019-02-09 16:36:28 +07:00
XMRig
a5f77eb16a #925 Fixed self test on recent macOS. 2019-02-09 03:48:24 +07:00
XMRig
5b3ad39a9e v2.11.0-dev 2019-02-08 17:25:09 +07:00
XMRig
1a2aaf210c Sync changes. 2019-02-08 16:55:21 +07:00
XMRig
a6a0fb965a Fixed self test, cn/gpu not support multihash mode. 2019-02-06 01:27:50 +07:00
XMRig
aabdff2cc9 Fixed floating-point rounding, resolved conflict between cn/2 and cn/gpu. Thanks @SChernykh. 2019-02-06 00:38:55 +07:00
XMRig
3f8898f228 Sync changes. 2019-02-05 18:47:54 +07:00
XMRig
ab60add9d5 #749 Use HWCAP to detect AES feature on ARMv8, thanks @lhirlimann. 2019-02-04 03:03:10 +07:00
XMRig
7e4858db2a Allow override ARM arch detection and support for ARMv8 without crypto extension. 2019-02-04 02:23:05 +07:00
XMRig
885a2cab21 Merge branch 'feature-cn-gpu' into dev 2019-02-04 00:14:14 +07:00
XMRig
3337f5ac43 Fix ARM builds with clang. 2019-02-03 20:43:49 +07:00
XMRig
5f113a47cf Fix typo. 2019-02-03 20:34:24 +07:00
XMRig
f4170ec7bc Add ARM support for cn/gpu. 2019-02-03 20:28:13 +07:00
XMRig
e9069a381e Disable cn/gpu for 32bit builds. 2019-02-03 16:49:17 +07:00
XMRig
ca18622ad0 Disable -Ofast for gcc and cn/gpu. 2019-02-03 16:00:06 +07:00
XMRig
eac6a97a2e Fix Linux build. 2019-02-03 14:48:11 +07:00
XMRig
bd4bc9ba4d Added reference implementation for cn/gpu (cn-gpu). 2019-02-03 14:44:23 +07:00
XMRig
7605c4241f v2.10.1-dev 2019-02-02 01:33:25 +07:00
XMRig
bb583afcff Merge branch 'master' into dev 2019-02-02 01:32:52 +07:00
xmrig
5a2cf85b6c Update CHANGELOG.md 2019-01-23 19:44:14 +07:00
xmrig
80de5766ac Update CHANGELOG.md 2019-01-23 19:39:55 +07:00
XMRig
2c0183dcef v2.10.0 2019-01-23 19:31:45 +07:00
XMRig
8a90fc699e Merge branch 'dev' 2019-01-23 19:29:54 +07:00
XMRig
b1816005fd Sync changes with proxy. 2019-01-21 19:16:49 +07:00
XMRig
f478fa843d Better fix for memory allocation, thanks @SChernykh. 2019-01-21 17:27:45 +07:00
XMRig
046c345f65 Fix memory allocation. 2019-01-21 01:44:15 +07:00
XMRig
9692f92c6b New default value for max-cpu-usage option is 100 (instead of 75) also this option now deprecated. 2019-01-21 00:37:36 +07:00
XMRig
bc26d2ede6 #904 Add ASM implementation. 2019-01-21 00:32:24 +07:00
XMRig
36b01f136f #904 Initial support for "cn-pico/trtl". 2019-01-20 19:52:44 +07:00
XMRig
ca43d155d8 v2.9.5-dev 2019-01-20 15:13:01 +07:00
XMRig
e04d0f504c Merge branch 'master' into dev 2019-01-20 15:12:26 +07:00
XMRig
acca8f79cf v2.9.4 2019-01-19 23:55:30 +07:00
XMRig
2c38f693d7 Merge branch 'dev' 2019-01-19 23:54:50 +07:00
xmrig
4faa95b460 Update CHANGELOG.md 2019-01-19 23:53:39 +07:00
xmrig
5f9ebdf149 Merge pull request #915 from SChernykh/dev
Make JIT memory read-only after patching is done
2019-01-19 17:55:54 +07:00
SChernykh
31a571dc70 Make JIT memory read-only after patching is done 2019-01-19 11:22:54 +01:00
XMRig
897ff83dcd #913 Increase max blob size for MSR secor upgrade. 2019-01-18 21:44:44 +07:00
XMRig
a63794ccd6 v2.9.4-dev 2019-01-17 20:43:25 +07:00
XMRig
b15e605614 Merge branch 'master' into dev 2019-01-17 20:41:52 +07:00
XMRig
18335392a1 Merge branch 'master' of github.com:xmrig/xmrig 2019-01-17 12:22:00 +07:00
XMRig
b1c22da6a1 v2.9.3 2019-01-17 12:21:39 +07:00
xmrig
b95ff65eae Update CHANGELOG.md 2019-01-17 12:20:37 +07:00
xmrig
005a09cd98 Merge pull request #912 from SChernykh/master
Correct FP rounding mode for cn/half
2019-01-17 12:12:03 +07:00
SChernykh
e0f5066ded Correct FP rounding mode for cn/half 2019-01-17 00:49:48 +01:00
XMRig
2b15bcbb4f Don't use __builtin___clear_cache on FreeBSD. 2019-01-16 17:53:47 +07:00
xmrig
db646d5d2d Merge pull request #910 from ehaupt/patch-1
FreeBSD needs pthread
2019-01-16 17:50:53 +07:00
Emanuel Haupt
b58e20dde4 FreeBSD needs pthread
FreeBSD needs the pthread linker flag.
2019-01-16 10:55:45 +01:00
XMRig
b11e772acc v2.9.2 2019-01-16 09:17:35 +07:00
xmrig
d1bc03351b Merge pull request #907 from SChernykh/master
Fixed crash in patchCode() on Linux
2019-01-16 09:09:51 +07:00
SChernykh
d1f551da2c Fixed compilation error on Windows 2019-01-16 00:24:34 +01:00
SChernykh
6425c53d61 Fixed crash in patchCode() on Linux 2019-01-16 00:09:00 +01:00
XMRig
118e547175 Restored compatibility with https://stellite.hashvault.pro. 2019-01-16 02:22:29 +07:00
XMRig
d696daf5af Merge branch 'dev' 2019-01-16 00:42:06 +07:00
xmrig
ac60d9426c Update README.md 2019-01-16 00:34:48 +07:00
XMRig
209796a07e v2.9.0 2019-01-16 00:32:02 +07:00
XMRig
a1f19305f4 Merge branch 'dev' 2019-01-16 00:30:15 +07:00
xmrig
dd3243aa70 Update CHANGELOG.md 2019-01-16 00:28:49 +07:00
XMRig
09893bfd36 Fix warnings on macOS. 2019-01-15 18:28:35 +07:00
XMRig
a98c475a3c Fixed wrong ASM code alignment on macOS, thanks @SChernykh. 2019-01-15 18:18:04 +07:00
XMRig
59b147b6fb Merge branch 'pr905' into dev 2019-01-15 02:18:30 +07:00
XMRig
17f28667b3 Code-style/copyright cleanup. 2019-01-15 02:15:36 +07:00
XMRig
63a62e7ea0 Merge branch 'dev' of https://github.com/SChernykh/xmrig into pr905 2019-01-15 01:37:53 +07:00
SChernykh
56cacbd5bc Fixes for Visual Studio 2019-01-14 16:38:28 +01:00
SChernykh
8b9d5cff91 Added ASM code patching when loading
For CNv2 variants with different iterations and memory size.
2019-01-14 15:34:55 +01:00
XMRig
eede1b4881 Allow ignore block version. 2019-01-14 20:59:39 +07:00
XMRig
492449e9fb #899 Add ASM implementation for cn/half. 2019-01-14 18:09:16 +07:00
XMRig
b43336582d Rename ASM files. 2019-01-13 23:00:41 +07:00
XMRig
67863a427d Rename "cn/xtlv9" to "cn/half". 2019-01-13 19:42:27 +07:00
XMRig
1382857c83 #899 Fixed ARM build. 2019-01-13 15:24:04 +07:00
XMRig
84c2cadc50 #899 Initial support for "cn/xtlv9" (C++ only). 2019-01-13 15:03:36 +07:00
XMRig
16b4fd0ff5 Update variant detection for nicehash.com and minergate.com. 2019-01-09 21:47:03 +07:00
XMRig
cfe3995aa8 Sync changes. 2019-01-09 16:43:36 +07:00
XMRig
5382216725 Merge branch 'pr857' into dev 2018-11-20 08:19:29 +07:00
XMRig
c06f77b9e9 Better compiler name and version handling on Linux and macOS for user-agent string. 2018-11-20 08:18:39 +07:00
XMRig
0c930e277b Merge branch 'upstream-osx-compilers' of https://github.com/rivoreo/xmrig into pr857 2018-11-20 08:03:05 +07:00
XMRig
2a2712ab90 Sync changes. 2018-11-20 07:24:14 +07:00
XMRig
9f6f599d78 Sync changes. 2018-11-11 03:18:56 +07:00
XMRig
a76243a65e Sync changes with proxy. 2018-11-06 00:50:28 +07:00
WHR
16babcc6bc Fix impossible to build for OS X without clang 2018-10-30 21:15:49 +08:00
XMRig
3d60b3cc62 #844 Fixed copy/paste typo in comment. 2018-10-25 14:07:15 +07:00
XMRig
acd042c234 #839 Fixed FreeBSD compile. 2018-10-24 09:55:40 +07:00
XMRig
a2e384df58 Merge branch 'feature-asm-bulldozer' into dev 2018-10-22 23:24:26 +07:00
XMRig
735180ac04 Fixed MSVC 2015 build. 2018-10-22 23:17:54 +07:00
XMRig
06a84499d7 Fixed MSYS2 build & copyright 2018-10-22 23:08:29 +07:00
XMRig
caa70a1e9d Merge branch 'dev' of https://github.com/SChernykh/xmrig into feature-asm-bulldozer 2018-10-22 22:52:53 +07:00
XMRig
deb832c9c6 Restore old method for total threads count detection. 2018-10-21 23:52:23 +07:00
XMRig
d7feb2719e v2.8.5-dev 2018-10-21 23:29:17 +07:00
SChernykh
4b91978af6 Added asm optimized code for AMD Bulldozer 2018-10-21 18:29:03 +02:00
XMRig
7e078f2d07 Merge branch 'master' into dev 2018-10-21 23:25:06 +07:00
xmrig
2b0b71b9f6 Update CHANGELOG.md 2018-10-19 10:41:32 +07:00
xmrig
938e652c45 Update CHANGELOG.md 2018-10-19 10:26:12 +07:00
xmrig
20bd22ea22 Update README.md 2018-10-19 10:19:18 +07:00
xmrig
c7c9d20b8b Update CHANGELOG.md 2018-10-19 09:59:14 +07:00
XMRig
dd6aeb26e8 v2.8.3 2018-10-19 09:56:28 +07:00
XMRig
ba0df6e973 Merge branch 'dev' 2018-10-19 09:52:09 +07:00
xmrig
a0f48adf5c Merge pull request #814 from 0xc0d32/patch-1
Update Pool.cpp
2018-10-19 09:40:00 +07:00
_0xc0d32
2b0cac5d4d Update Pool.cpp
Fixed to #813
2018-10-18 22:42:47 -03:00
XMRig
afeaabdca4 v2.8.2-dev 2018-10-15 18:22:45 +07:00
XMRig
6b584a78b2 Merge branch 'master' into dev 2018-10-15 18:21:37 +07:00
xmrig
4bf248f513 Update README.md 2018-10-09 07:41:40 +07:00
XMRig
93b54f8f44 v2.8.1 2018-10-09 07:22:23 +07:00
XMRig
dd7c776ff4 Merge branch 'dev' 2018-10-09 07:19:07 +07:00
xmrig
ff3ae25d16 Update CHANGELOG.md 2018-10-09 07:12:31 +07:00
XMRig
bdc16df418 #777 Make EOF as verbose error too. 2018-10-09 03:47:39 +07:00
XMRig
9ef59366ba Sync changes with proxy. 2018-10-09 02:32:29 +07:00
XMRig
dda8157a7b Add "--tls", "--tls-fingerprint", "--asm" and "--dry-run" to help output. 2018-10-09 01:35:33 +07:00
xmrig
023062b2f1 Update README.md 2018-10-09 01:32:07 +07:00
XMRig
ad92c3b025 Add ASM detection for builds without libcpuid. 2018-10-06 15:13:55 +03:00
xmrig
8f3d405b34 Update CHANGELOG.md 2018-10-06 14:25:59 +03:00
XMRig
20268d5291 #753 Removed unnecessary ext_family and ext_model checks. 2018-10-04 19:10:47 +03:00
XMRig
152c4f2f1b #768 Fixed build error with MSVC 2015 and enabled ASM code. 2018-10-02 17:50:19 +03:00
XMRig
c2f6c70044 #769 Fixed regression with colors. 2018-10-02 13:14:32 +03:00
XMRig
9a70f0e564 v2.8.1-dev 2018-10-02 01:22:13 +03:00
XMRig
0efd995b9b Merge branch 'master' into dev 2018-10-02 01:18:18 +03:00
103 changed files with 14955 additions and 628 deletions

View File

@@ -1,3 +1,45 @@
# v2.12.0
- [#929](https://github.com/xmrig/xmrig/pull/929) Added support for new algorithm `cryptonight/wow`, short alias `cn/wow` (also known as CryptonightR), for upcoming [Wownero](http://wownero.org) fork on February 14.
# v2.11.0
- [#928](https://github.com/xmrig/xmrig/issues/928) Added support for new algorithm `cryptonight/gpu`, short alias `cn/gpu` (original name `cryptonight-gpu`), for upcoming [Ryo currency](https://ryo-currency.com) fork on February 14.
- [#749](https://github.com/xmrig/xmrig/issues/749) Added support for detect hardware AES in runtime on ARMv8 platforms.
- [#292](https://github.com/xmrig/xmrig/issues/292) Fixed build on ARMv8 platforms if compiler not support hardware AES.
# v2.10.0
- [#904](https://github.com/xmrig/xmrig/issues/904) Added new algorithm `cn-pico/trtl` (aliases `cryptonight-turtle`, `cn-trtl`) for upcoming TurtleCoin (TRTL) fork.
- Default value for option `max-cpu-usage` changed to `100` also this option now deprecated.
# v2.9.4
- [#913](https://github.com/xmrig/xmrig/issues/913) Fixed Masari (MSR) support (this update required for upcoming fork).
- [#915](https://github.com/xmrig/xmrig/pull/915) Improved security, JIT memory now read-only after patching.
# v2.9.3
- [#909](https://github.com/xmrig/xmrig/issues/909) Fixed compile errors on FreeBSD.
- [#912](https://github.com/xmrig/xmrig/pull/912) Fixed, C++ implementation of `cn/half` was produce up to 13% of invalid hashes.
# v2.9.2
- [#907](https://github.com/xmrig/xmrig/pull/907) Fixed crash on Linux.
# v2.9.1
- Restored compatibility with https://stellite.hashvault.pro.
# v2.9.0
- [#899](https://github.com/xmrig/xmrig/issues/899) Added support for new algorithm `cn/half` for Masari and Stellite forks.
- [#834](https://github.com/xmrig/xmrig/pull/834) Added ASM optimized code for AMD Bulldozer.
- [#839](https://github.com/xmrig/xmrig/issues/839) Fixed FreeBSD compile.
- [#857](https://github.com/xmrig/xmrig/pull/857) Fixed impossible to build for macOS without clang.
# v2.8.3
- [#813](https://github.com/xmrig/xmrig/issues/813) Fixed critical bug with Minergate pool and variant 2.
# v2.8.1
- [#768](https://github.com/xmrig/xmrig/issues/768) Fixed build with Visual Studio 2015.
- [#769](https://github.com/xmrig/xmrig/issues/769) Fixed regression, some ANSI escape sequences was in log with disabled colors.
- [#777](https://github.com/xmrig/xmrig/issues/777) Better report about pool connection issues.
- Simplified checks for ASM auto detection, only AES support necessary.
- Added missing options to `--help` output.
# v2.8.0
- **[#753](https://github.com/xmrig/xmrig/issues/753) Added new algorithm [CryptoNight variant 2](https://github.com/xmrig/xmrig/issues/753) for Monero fork, thanks [@SChernykh](https://github.com/SChernykh).**
- Added global and per thread option `"asm"` and and command line equivalent.

View File

@@ -4,11 +4,14 @@ project(xmrig)
option(WITH_LIBCPUID "Use Libcpuid" ON)
option(WITH_AEON "CryptoNight-Lite support" ON)
option(WITH_SUMO "CryptoNight-Heavy support" ON)
option(WITH_CN_PICO "CryptoNight-Pico support" ON)
option(WITH_CN_GPU "CryptoNight-GPU support" ON)
option(WITH_HTTPD "HTTP REST API" ON)
option(WITH_DEBUG_LOG "Enable debug log output" OFF)
option(WITH_TLS "Enable OpenSSL support" ON)
option(WITH_ASM "Enable ASM PoW implementations" ON)
option(BUILD_STATIC "Build static binary" OFF)
option(ARM_TARGET "Force use specific ARM target 8 or 7" 0)
include (CheckIncludeFile)
include (cmake/cpu.cmake)
@@ -17,6 +20,7 @@ include (cmake/cpu.cmake)
set(HEADERS
src/api/NetworkState.h
src/App.h
src/base/tools/String.h
src/common/config/CommonConfig.h
src/common/config/ConfigLoader.h
src/common/config/ConfigWatcher.h
@@ -82,6 +86,7 @@ set(HEADERS_CRYPTO
src/crypto/hash.h
src/crypto/skein_port.h
src/crypto/soft_aes.h
src/crypto/asm/CryptonightR_template.h
)
if (XMRIG_ARM)
@@ -93,6 +98,7 @@ endif()
set(SOURCES
src/api/NetworkState.cpp
src/App.cpp
src/base/tools/String.cpp
src/common/config/CommonConfig.cpp
src/common/config/ConfigLoader.cpp
src/common/config/ConfigWatcher.cpp
@@ -130,6 +136,7 @@ set(SOURCES_CRYPTO
src/crypto/c_blake256.c
src/crypto/c_jh.c
src/crypto/c_skein.c
src/crypto/CryptonightR_gen.cpp
)
if (WIN32)
@@ -155,11 +162,11 @@ else()
src/Mem_unix.cpp
)
set(EXTRA_LIBS pthread rt dl)
endif()
if (CMAKE_SYSTEM_NAME STREQUAL FreeBSD)
set(EXTRA_LIBS ${EXTRA_LIBS} kvm)
if (CMAKE_SYSTEM_NAME STREQUAL FreeBSD)
set(EXTRA_LIBS kvm pthread)
else()
set(EXTRA_LIBS pthread rt dl)
endif()
endif()
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
@@ -197,6 +204,7 @@ endif()
include(cmake/OpenSSL.cmake)
include(cmake/asm.cmake)
include(cmake/cn-gpu.cmake)
CHECK_INCLUDE_FILE (syslog.h HAVE_SYSLOG_H)
if (HAVE_SYSLOG_H)
@@ -216,6 +224,10 @@ if (NOT WITH_IPBC)
add_definitions(/DXMRIG_NO_IPBC)
endif()
if (NOT WITH_CN_PICO)
add_definitions(/DXMRIG_NO_CN_PICO)
endif()
if (WITH_HTTPD)
find_package(MHD)
@@ -255,5 +267,5 @@ if (WITH_DEBUG_LOG)
add_definitions(/DAPP_DEBUG)
endif()
add_executable(${PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${HTTPD_SOURCES} ${TLS_SOURCES} ${XMRIG_ASM_SOURCES})
target_link_libraries(${PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB})
add_executable(${CMAKE_PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${HTTPD_SOURCES} ${TLS_SOURCES} ${XMRIG_ASM_SOURCES} ${CN_GPU_SOURCES})
target_link_libraries(${CMAKE_PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB})

View File

@@ -1,7 +1,5 @@
# XMRig
:warning: **[Monero will change PoW algorithm on October 18](https://github.com/xmrig/xmrig/issues/753), all miners and proxy should be updated to [v2.8+](https://github.com/xmrig/xmrig/releases/tag/v2.8.0-rc)** :warning:
[![Github All Releases](https://img.shields.io/github/downloads/xmrig/xmrig/total.svg)](https://github.com/xmrig/xmrig/releases)
[![GitHub release](https://img.shields.io/github/release/xmrig/xmrig/all.svg)](https://github.com/xmrig/xmrig/releases)
[![GitHub Release Date](https://img.shields.io/github/release-date-pre/xmrig/xmrig.svg)](https://github.com/xmrig/xmrig/releases)
@@ -52,14 +50,21 @@ Use [config.xmrig.com](https://config.xmrig.com/xmrig) to generate, edit or shar
### Options
```
-a, --algo=ALGO cryptonight (default) or cryptonight-lite
-a, --algo=ALGO specify the algorithm to use
cryptonight
cryptonight-lite
cryptonight-heavy
-o, --url=URL URL of mining server
-O, --userpass=U:P username:password pair for mining server
-u, --user=USERNAME username for mining server
-p, --pass=PASSWORD password for mining server
--rig-id=ID rig identifier for pool-side statistics (needs pool support)
-t, --threads=N number of miner threads
-v, --av=N algorithm variation, 0 auto select
-k, --keepalive send keepalived for prevent timeout (need pool support)
-k, --keepalive send keepalived packet for prevent timeout (needs pool support)
--nicehash enable nicehash.com support
--tls enable SSL/TLS support (needs pool support)
--tls-fingerprint=F pool TLS certificate fingerprint, if set enable strict certificate pinning
-r, --retries=N number of times to retry before switch to backup server (default: 5)
-R, --retry-pause=N time to pause between retries (default: 5)
--cpu-affinity set process affinity to CPU core(s), mask 0x3 for cores 0 and 1
@@ -75,16 +80,20 @@ Use [config.xmrig.com](https://config.xmrig.com/xmrig) to generate, edit or shar
-S, --syslog use system log for output messages
--max-cpu-usage=N maximum CPU usage for automatic threads mode (default 75)
--safe safe adjust threads and av settings for current CPU
--nicehash enable nicehash/xmrig-proxy support
--asm=ASM ASM code for cn/2, possible values: auto, none, intel, ryzen.
--print-time=N print hashrate report every N seconds
--api-port=N port for the miner API
--api-access-token=T access token for API
--api-worker-id=ID custom worker-id for API
--api-id=ID custom instance ID for API
--api-ipv6 enable IPv6 support for API
--api-no-restricted enable full remote access (only if API token set)
--dry-run test configuration and exit
-h, --help display this help and exit
-V, --version output version information and exit
```
Also you can use configuration via config file, default **config.json**. You can load multiple config files and combine it with command line options.
Also you can use configuration via config file, default name **config.json**. Some options available only via config file: [`autosave`](https://github.com/xmrig/xmrig/issues/767), [`hw-aes`](https://github.com/xmrig/xmrig/issues/563). `watch` option currently not implemented in miners only in proxy.
## Algorithm variations
@@ -131,19 +140,6 @@ Please note performance is highly dependent on system load. The numbers above ar
* XMR: `48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD`
* BTC: `1P7ujsXeX7GxQwHNnJsRMgAdNkFZmNVqJT`
## Release checksums
### SHA-256
```
ea6a71732937e06d5434b863bedd2d627c500e8ce30b30a02054015bb3aae3fc xmrig-2.8.0-xenial-amd64.tar.gz/xmrig-2.8.0/xmrig
a067de09a2d49c39317d6a98e420fcd80ef040c8cecb6b21317cbe4eabbdb995 xmrig-2.8.0-xenial-amd64.tar.gz/xmrig-2.8.0/xmrig-notls
89b7b4616faec76c40dab714046ea08ebcd6c558299d17d44639e6bc62c54186 xmrig-2.8.0-gcc-win32.zip/xmrig.exe
0fae870bef4223905b24bb00283b3bba7ece547a09a540033beb9a2a33335124 xmrig-2.8.0-gcc-win32.zip/xmrig-notls.exe
953df29ef354d541b89a70a36ddf16d8a9d5f2c37419d3aacd1352ff6ff539e8 xmrig-2.8.0-gcc-win64.zip/xmrig.exe
84d73422a43b46879d88f87a492e99c81d5d632eb3be79793fc0b895196f131d xmrig-2.8.0-gcc-win64.zip/xmrig-notls.exe
8e1302fe632249a713d7af0b7f3f50ad6e707d71d00fcd1f3be56abc024f98ac xmrig-2.8.0-msvc-win64.zip/xmrig.exe
0042b5e7b14a4f8aca748a4c7656a0ec6705bc593c26f9987a25b10fa471f004 xmrig-2.8.0-msvc-win64.zip/xmrig-notls.exe
```
## Contacts
* support@xmrig.com
* [reddit](https://www.reddit.com/user/XMRig/)

View File

@@ -20,4 +20,6 @@ else()
set(TLS_SOURCES "")
set(OPENSSL_LIBRARIES "")
add_definitions(/DXMRIG_NO_TLS)
set(CMAKE_PROJECT_NAME "${CMAKE_PROJECT_NAME}-notls")
endif()

View File

@@ -3,15 +3,33 @@ if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
if (CMAKE_C_COMPILER_ID MATCHES MSVC)
enable_language(ASM_MASM)
set(XMRIG_ASM_FILE "src/crypto/asm/cnv2_main_loop.asm")
if (MSVC_TOOLSET_VERSION GREATER_EQUAL 141)
set(XMRIG_ASM_FILE
"src/crypto/asm/cn_main_loop.asm"
"src/crypto/asm/CryptonightR_template.asm"
)
else()
set(XMRIG_ASM_FILE
"src/crypto/asm/win64/cn_main_loop.asm"
"src/crypto/asm/win64/CryptonightR_template.asm"
)
endif()
set_property(SOURCE ${XMRIG_ASM_FILE} PROPERTY ASM_MASM)
else()
enable_language(ASM)
if (WIN32 AND CMAKE_C_COMPILER_ID MATCHES GNU)
set(XMRIG_ASM_FILE "src/crypto/asm/cnv2_main_loop_win.S")
set(XMRIG_ASM_FILE
"src/crypto/asm/win64/cn_main_loop.S"
"src/crypto/asm/win64/CryptonightR_template.S"
)
else()
set(XMRIG_ASM_FILE "src/crypto/asm/cnv2_main_loop.S")
set(XMRIG_ASM_FILE
"src/crypto/asm/cn_main_loop.S"
"src/crypto/asm/CryptonightR_template.S"
)
endif()
set_property(SOURCE ${XMRIG_ASM_FILE} PROPERTY C)

23
cmake/cn-gpu.cmake Normal file
View File

@@ -0,0 +1,23 @@
if (WITH_CN_GPU AND CMAKE_SIZEOF_VOID_P EQUAL 8)
if (XMRIG_ARM)
set(CN_GPU_SOURCES src/crypto/cn_gpu_arm.cpp)
if (CMAKE_CXX_COMPILER_ID MATCHES GNU OR CMAKE_CXX_COMPILER_ID MATCHES Clang)
set_source_files_properties(src/crypto/cn_gpu_arm.cpp PROPERTIES COMPILE_FLAGS "-O3")
endif()
else()
set(CN_GPU_SOURCES src/crypto/cn_gpu_avx.cpp src/crypto/cn_gpu_ssse3.cpp)
if (CMAKE_CXX_COMPILER_ID MATCHES GNU OR CMAKE_CXX_COMPILER_ID MATCHES Clang)
set_source_files_properties(src/crypto/cn_gpu_avx.cpp PROPERTIES COMPILE_FLAGS "-O3 -mavx2")
set_source_files_properties(src/crypto/cn_gpu_ssse3.cpp PROPERTIES COMPILE_FLAGS "-O3")
elseif (CMAKE_CXX_COMPILER_ID MATCHES MSVC)
set_source_files_properties(src/crypto/cn_gpu_avx.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX")
endif()
endif()
else()
set(CN_GPU_SOURCES "")
add_definitions(/DXMRIG_NO_CN_GPU)
endif()

View File

@@ -7,19 +7,37 @@ if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|AMD64)$")
add_definitions(/DRAPIDJSON_SSE2)
endif()
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64)$")
set(XMRIG_ARM ON)
set(XMRIG_ARMv8 ON)
set(WITH_LIBCPUID OFF)
add_definitions(/DXMRIG_ARM)
add_definitions(/DXMRIG_ARMv8)
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv7|armv7f|armv7s|armv7k|armv7-a|armv7l)$")
set(XMRIG_ARM ON)
set(XMRIG_ARMv7 ON)
set(WITH_LIBCPUID OFF)
add_definitions(/DXMRIG_ARM)
add_definitions(/DXMRIG_ARMv7)
if (NOT ARM_TARGET)
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64|armv8-a)$")
set(ARM_TARGET 8)
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv7|armv7f|armv7s|armv7k|armv7-a|armv7l)$")
set(ARM_TARGET 7)
endif()
endif()
if (ARM_TARGET AND ARM_TARGET GREATER 6)
set(XMRIG_ARM ON)
set(WITH_LIBCPUID OFF)
add_definitions(/DXMRIG_ARM)
message(STATUS "Use ARM_TARGET=${ARM_TARGET} (${CMAKE_SYSTEM_PROCESSOR})")
include(CheckCXXCompilerFlag)
if (ARM_TARGET EQUAL 8)
set(XMRIG_ARMv8 ON)
add_definitions(/DXMRIG_ARMv8)
CHECK_CXX_COMPILER_FLAG(-march=armv8-a+crypto XMRIG_ARM_CRYPTO)
if (XMRIG_ARM_CRYPTO)
add_definitions(/DXMRIG_ARM_CRYPTO)
set(ARM8_CXX_FLAGS "-march=armv8-a+crypto")
else()
set(ARM8_CXX_FLAGS "-march=armv8-a")
endif()
elseif (ARM_TARGET EQUAL 7)
set(XMRIG_ARMv7 ON)
add_definitions(/DXMRIG_ARMv7)
endif()
endif()

View File

@@ -19,8 +19,8 @@ if (CMAKE_CXX_COMPILER_ID MATCHES GNU)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Ofast -s")
if (XMRIG_ARMv8)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8-a+crypto")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a+crypto -flax-vector-conversions")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARM8_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARM8_CXX_FLAGS} -flax-vector-conversions")
elseif (XMRIG_ARMv7)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -flax-vector-conversions")
@@ -60,8 +60,8 @@ elseif (CMAKE_CXX_COMPILER_ID MATCHES Clang)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Ofast -funroll-loops -fmerge-all-constants")
if (XMRIG_ARMv8)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8-a+crypto")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a+crypto")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARM8_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARM8_CXX_FLAGS}")
elseif (XMRIG_ARMv7)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon -march=${CMAKE_SYSTEM_PROCESSOR}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -march=${CMAKE_SYSTEM_PROCESSOR}")

View File

@@ -87,6 +87,10 @@ App::~App()
int App::exec()
{
if (m_controller->isDone()) {
return 0;
}
if (!m_controller->isReady()) {
return 2;
}

View File

@@ -6,7 +6,8 @@
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -33,7 +34,6 @@ bool Mem::m_enabled = true;
int Mem::m_flags = 0;
MemInfo Mem::create(cryptonight_ctx **ctx, xmrig::Algo algorithm, size_t count)
{
using namespace xmrig;
@@ -41,11 +41,9 @@ MemInfo Mem::create(cryptonight_ctx **ctx, xmrig::Algo algorithm, size_t count)
MemInfo info;
info.size = cn_select_memory(algorithm) * count;
# ifndef XMRIG_NO_AEON
info.size += info.size % cn_select_memory<CRYPTONIGHT>();
# endif
info.pages = info.size / cn_select_memory<CRYPTONIGHT>();
constexpr const size_t align_size = 2 * 1024 * 1024;
info.size = ((info.size + align_size - 1) / align_size) * align_size;
info.pages = info.size / align_size;
allocate(info, m_enabled);
@@ -53,6 +51,12 @@ MemInfo Mem::create(cryptonight_ctx **ctx, xmrig::Algo algorithm, size_t count)
cryptonight_ctx *c = static_cast<cryptonight_ctx *>(_mm_malloc(sizeof(cryptonight_ctx), 4096));
c->memory = info.memory + (i * cn_select_memory(algorithm));
uint8_t* p = reinterpret_cast<uint8_t*>(allocateExecutableMemory(0x4000));
c->generated_code = reinterpret_cast<cn_mainloop_fun_ms_abi>(p);
c->generated_code_double = reinterpret_cast<cn_mainloop_double_fun_ms_abi>(p + 0x2000);
c->generated_code_height = (uint64_t)(-1);
c->generated_code_double_height = (uint64_t)(-1);
ctx[i] = c;
}

View File

@@ -6,7 +6,8 @@
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -22,8 +23,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __MEM_H__
#define __MEM_H__
#ifndef XMRIG_MEM_H
#define XMRIG_MEM_H
#include <stddef.h>
@@ -59,6 +60,10 @@ public:
static void init(bool enabled);
static void release(cryptonight_ctx **ctx, size_t count, MemInfo &info);
static void *allocateExecutableMemory(size_t size);
static void protectExecutableMemory(void *p, size_t size);
static void flushInstructionCache(void *p, size_t size);
static inline bool isHugepagesAvailable() { return (m_flags & HugepagesAvailable) != 0; }
private:
@@ -70,4 +75,4 @@ private:
};
#endif /* __MEM_H__ */
#endif /* XMRIG_MEM_H */

View File

@@ -6,7 +6,8 @@
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -87,3 +88,27 @@ void Mem::release(MemInfo &info)
_mm_free(info.memory);
}
}
void *Mem::allocateExecutableMemory(size_t size)
{
# if defined(__APPLE__)
return mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0);
# else
return mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
# endif
}
void Mem::protectExecutableMemory(void *p, size_t size)
{
mprotect(p, size, PROT_READ | PROT_EXEC);
}
void Mem::flushInstructionCache(void *p, size_t size)
{
# ifndef __FreeBSD__
__builtin___clear_cache(reinterpret_cast<char*>(p), reinterpret_cast<char*>(p) + size);
# endif
}

View File

@@ -6,7 +6,8 @@
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -182,3 +183,22 @@ void Mem::release(MemInfo &info)
_mm_free(info.memory);
}
}
void *Mem::allocateExecutableMemory(size_t size)
{
return VirtualAlloc(0, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
}
void Mem::protectExecutableMemory(void *p, size_t size)
{
DWORD oldProtect;
VirtualProtect(p, size, PAGE_EXECUTE_READ, &oldProtect);
}
void Mem::flushInstructionCache(void *p, size_t size)
{
::FlushInstructionCache(GetCurrentProcess(), p, size);
}

View File

@@ -4,8 +4,9 @@
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -43,7 +44,8 @@ static const char *coloredAsmNames[] = {
"\x1B[1;31mnone\x1B[0m",
"auto",
"\x1B[1;32mintel\x1B[0m",
"\x1B[1;32mryzen\x1B[0m"
"\x1B[1;32mryzen\x1B[0m",
"\x1B[1;32mbulldozer\x1B[0m"
};
@@ -72,18 +74,25 @@ static void print_cpu(xmrig::Config *config)
using namespace xmrig;
if (config->isColors()) {
Log::i()->text(GREEN_BOLD(" * ") WHITE_BOLD("%-13s%s (%d)") " %sx64 %sAES",
Log::i()->text(GREEN_BOLD(" * ") WHITE_BOLD("%-13s%s (%d)") " %sx64 %sAES %sAVX2",
"CPU",
Cpu::info()->brand(),
Cpu::info()->sockets(),
Cpu::info()->isX64() ? "\x1B[1;32m" : "\x1B[1;31m-",
Cpu::info()->hasAES() ? "\x1B[1;32m" : "\x1B[1;31m-");
Cpu::info()->isX64() ? "\x1B[1;32m" : "\x1B[1;31m-",
Cpu::info()->hasAES() ? "\x1B[1;32m" : "\x1B[1;31m-",
Cpu::info()->hasAVX2() ? "\x1B[1;32m" : "\x1B[1;31m-");
# ifndef XMRIG_NO_LIBCPUID
Log::i()->text(GREEN_BOLD(" * ") WHITE_BOLD("%-13s%.1f MB/%.1f MB"), "CPU L2/L3", Cpu::info()->L2() / 1024.0, Cpu::info()->L3() / 1024.0);
# endif
}
else {
Log::i()->text(" * %-13s%s (%d) %sx64 %sAES", "CPU", Cpu::info()->brand(), Cpu::info()->sockets(), Cpu::info()->isX64() ? "" : "-", Cpu::info()->hasAES() ? "" : "-");
Log::i()->text(" * %-13s%s (%d) %sx64 %sAES %sAVX2",
"CPU",
Cpu::info()->brand(),
Cpu::info()->sockets(),
Cpu::info()->isX64() ? "" : "-",
Cpu::info()->hasAES() ? "" : "-",
Cpu::info()->hasAVX2() ? "" : "-");
# ifndef XMRIG_NO_LIBCPUID
Log::i()->text(" * %-13s%.1f MB/%.1f MB", "CPU L2/L3", Cpu::info()->L2() / 1024.0, Cpu::info()->L3() / 1024.0);
# endif

225
src/base/tools/String.cpp Normal file
View File

@@ -0,0 +1,225 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "base/tools/String.h"
#include "rapidjson/document.h"
xmrig::String::String(const char *str) :
m_data(nullptr),
m_size(str == nullptr ? 0 : strlen(str))
{
if (m_size == 0) {
return;
}
m_data = new char[m_size + 1];
memcpy(m_data, str, m_size + 1);
}
xmrig::String::String(const char *str, size_t size) :
m_data(nullptr),
m_size(size)
{
if (str == nullptr) {
m_size = 0;
return;
}
m_data = new char[m_size + 1];
memcpy(m_data, str, m_size);
m_data[m_size] = '\0';
}
xmrig::String::String(const String &other) :
m_data(nullptr),
m_size(other.m_size)
{
if (other.m_data == nullptr) {
return;
}
m_data = new char[m_size + 1];
memcpy(m_data, other.m_data, m_size + 1);
}
bool xmrig::String::isEqual(const char *str) const
{
return (m_data != nullptr && str != nullptr && strcmp(m_data, str) == 0) || (m_data == nullptr && str == nullptr);
}
bool xmrig::String::isEqual(const String &other) const
{
if (m_size != other.m_size) {
return false;
}
return (m_data != nullptr && other.m_data != nullptr && memcmp(m_data, other.m_data, m_size) == 0) || (m_data == nullptr && other.m_data == nullptr);
}
rapidjson::Value xmrig::String::toJSON() const
{
using namespace rapidjson;
return isNull() ? Value(kNullType) : Value(StringRef(m_data));
}
rapidjson::Value xmrig::String::toJSON(rapidjson::Document &doc) const
{
using namespace rapidjson;
return isNull() ? Value(kNullType) : Value(m_data, doc.GetAllocator());
}
std::vector<xmrig::String> xmrig::String::split(char sep) const
{
std::vector<xmrig::String> out;
if (m_size == 0) {
return out;
}
size_t start = 0;
size_t pos = 0;
for (pos = 0; pos < m_size; ++pos) {
if (m_data[pos] == sep) {
if ((pos - start) > 0) {
out.push_back(String(m_data + start, pos - start));
}
start = pos + 1;
}
}
if ((pos - start) > 0) {
out.push_back(String(m_data + start, pos - start));
}
return out;
}
xmrig::String xmrig::String::join(const std::vector<xmrig::String> &vec, char sep)
{
if (vec.empty()) {
return String();
}
size_t size = vec.size();
for (const String &str : vec) {
size += str.size();
}
size_t offset = 0;
char *buf = new char[size];
for (const String &str : vec) {
memcpy(buf + offset, str.data(), str.size());
offset += str.size() + 1;
if (offset < size) {
buf[offset - 1] = sep;
}
}
buf[size - 1] = '\0';
return String(buf);
}
void xmrig::String::copy(const char *str)
{
delete [] m_data;
if (str == nullptr) {
m_size = 0;
m_data = nullptr;
return;
}
m_size = strlen(str);
m_data = new char[m_size + 1];
memcpy(m_data, str, m_size + 1);
}
void xmrig::String::copy(const String &other)
{
if (m_size > 0) {
if (m_size == other.m_size) {
memcpy(m_data, other.m_data, m_size + 1);
return;
}
delete [] m_data;
}
delete [] m_data;
if (other.m_data == nullptr) {
m_size = 0;
m_data = nullptr;
return;
}
m_size = other.m_size;
m_data = new char[m_size + 1];
memcpy(m_data, other.m_data, m_size + 1);
}
void xmrig::String::move(char *str)
{
delete [] m_data;
m_size = str == nullptr ? 0 : strlen(str);
m_data = str;
}
void xmrig::String::move(String &&other)
{
delete [] m_data;
m_data = other.m_data;
m_size = other.m_size;
other.m_data = nullptr;
other.m_size = 0;
}

104
src/base/tools/String.h Normal file
View File

@@ -0,0 +1,104 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef XMRIG_STRING_H
#define XMRIG_STRING_H
#include <utility>
#include <vector>
#include "rapidjson/fwd.h"
namespace xmrig {
/**
* @brief Simple C string wrapper.
*
* 1. I know about std:string.
* 2. For some reason I prefer don't use std:string in miner, eg because of file size of MSYS2 builds.
* 3. nullptr and JSON conversion supported.
*/
class String
{
public:
inline String() : m_data(nullptr), m_size(0) {}
inline String(char *str) : m_data(str), m_size(str == nullptr ? 0 : strlen(str)) {}
inline String(String &&other) : m_data(other.m_data), m_size(other.m_size) { other.m_data = nullptr; other.m_size = 0; }
String(const char *str);
String(const char *str, size_t size);
String(const String &other);
inline ~String() { delete [] m_data; }
bool isEqual(const char *str) const;
bool isEqual(const String &other) const;
inline bool contains(const char *str) const { return isNull() ? false : strstr(m_data, str) != nullptr; }
inline bool isEmpty() const { return size() == 0; }
inline bool isNull() const { return m_data == nullptr; }
inline char *data() { return m_data; }
inline const char *data() const { return m_data; }
inline size_t size() const { return m_size; }
inline bool operator!=(const char *str) const { return !isEqual(str); }
inline bool operator!=(const String &other) const { return !isEqual(other); }
inline bool operator<(const String &str) const { return strcmp(data(), str.data()) < 0; }
inline bool operator==(const char *str) const { return isEqual(str); }
inline bool operator==(const String &other) const { return isEqual(other); }
inline operator const char*() const { return m_data; }
inline String &operator=(char *str) { move(str); return *this; }
inline String &operator=(const char *str) { copy(str); return *this; }
inline String &operator=(const String &str) { copy(str); return *this; }
inline String &operator=(String &&other) { move(std::move(other)); return *this; }
rapidjson::Value toJSON() const;
rapidjson::Value toJSON(rapidjson::Document &doc) const;
std::vector<xmrig::String> split(char sep) const;
static String join(const std::vector<xmrig::String> &vec, char sep);
private:
void copy(const char *str);
void copy(const String &other);
void move(char *str);
void move(String &&other);
char *m_data;
size_t m_size;
};
} /* namespace xmrig */
#endif /* XMRIG_STRING_H */

View File

@@ -4,8 +4,9 @@
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -36,7 +37,7 @@
char Platform::m_defaultConfigName[520] = { 0 };
xmrig::c_str Platform::m_userAgent;
xmrig::String Platform::m_userAgent;
const char *Platform::defaultConfigName()

View File

@@ -4,8 +4,9 @@
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -28,7 +29,7 @@
#include <stdint.h>
#include "common/utils/c_str.h"
#include "base/tools/String.h"
class Platform
@@ -36,7 +37,9 @@ class Platform
public:
static bool setThreadAffinity(uint64_t cpu_id);
static const char *defaultConfigName();
static uint32_t setTimerResolution(uint32_t resolution);
static void init(const char *userAgent);
static void restoreTimerResolution();
static void setProcessPriority(int priority);
static void setThreadPriority(int priority);
@@ -46,7 +49,7 @@ private:
static char *createUserAgent();
static char m_defaultConfigName[520];
static xmrig::c_str m_userAgent;
static xmrig::String m_userAgent;
};

View File

@@ -40,15 +40,20 @@
char *Platform::createUserAgent()
{
const size_t max = 160;
constexpr const size_t max = 256;
char *buf = new char[max];
char *buf = new char[max]();
int length = snprintf(buf, max, "%s/%s (Macintosh; Intel Mac OS X) libuv/%s", APP_NAME, APP_VERSION, uv_version_string());
# ifdef XMRIG_NVIDIA_PROJECT
const int cudaVersion = cuda_get_runtime_version();
snprintf(buf, max, "%s/%s (Macintosh; Intel Mac OS X) libuv/%s CUDA/%d.%d clang/%d.%d.%d", APP_NAME, APP_VERSION, uv_version_string(), cudaVersion / 1000, cudaVersion % 100, __clang_major__, __clang_minor__, __clang_patchlevel__);
# else
snprintf(buf, max, "%s/%s (Macintosh; Intel Mac OS X) libuv/%s clang/%d.%d.%d", APP_NAME, APP_VERSION, uv_version_string(), __clang_major__, __clang_minor__, __clang_patchlevel__);
length += snprintf(buf + length, max - length, " CUDA/%d.%d", cudaVersion / 1000, cudaVersion % 100);
# endif
# ifdef __clang__
length += snprintf(buf + length, max - length, " clang/%d.%d.%d", __clang_major__, __clang_minor__, __clang_patchlevel__);
# elif defined(__GNUC__)
length += snprintf(buf + length, max - length, " gcc/%d.%d.%d", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__);
# endif
return buf;
@@ -65,9 +70,19 @@ bool Platform::setThreadAffinity(uint64_t cpu_id)
}
uint32_t Platform::setTimerResolution(uint32_t resolution)
{
return resolution;
}
void Platform::restoreTimerResolution()
{
}
void Platform::setProcessPriority(int priority)
{
}

View File

@@ -54,9 +54,9 @@ typedef cpuset_t cpu_set_t;
char *Platform::createUserAgent()
{
const size_t max = 160;
constexpr const size_t max = 256;
char *buf = new char[max];
char *buf = new char[max]();
int length = snprintf(buf, max, "%s/%s (Linux ", APP_NAME, APP_VERSION);
# if defined(__x86_64__)
@@ -70,7 +70,9 @@ char *Platform::createUserAgent()
length += snprintf(buf + length, max - length, " CUDA/%d.%d", cudaVersion / 1000, cudaVersion % 100);
# endif
# ifdef __GNUC__
# ifdef __clang__
length += snprintf(buf + length, max - length, " clang/%d.%d.%d", __clang_major__, __clang_minor__, __clang_patchlevel__);
# elif defined(__GNUC__)
length += snprintf(buf + length, max - length, " gcc/%d.%d.%d", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__);
# endif
@@ -92,6 +94,17 @@ bool Platform::setThreadAffinity(uint64_t cpu_id)
}
uint32_t Platform::setTimerResolution(uint32_t resolution)
{
return resolution;
}
void Platform::restoreTimerResolution()
{
}
void Platform::setProcessPriority(int priority)
{
}

View File

@@ -4,8 +4,9 @@
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -22,6 +23,7 @@
*/
#include <algorithm>
#include <winsock2.h>
#include <windows.h>
#include <uv.h>
@@ -37,6 +39,11 @@
#endif
#ifdef XMRIG_AMD_PROJECT
static uint32_t timerResolution = 0;
#endif
static inline OSVERSIONINFOEX winOsVersion()
{
typedef NTSTATUS (NTAPI *RtlGetVersionFunction)(LPOSVERSIONINFO);
@@ -58,9 +65,9 @@ static inline OSVERSIONINFOEX winOsVersion()
char *Platform::createUserAgent()
{
const auto osver = winOsVersion();
const size_t max = 160;
constexpr const size_t max = 256;
char *buf = new char[max];
char *buf = new char[max]();
int length = snprintf(buf, max, "%s/%s (Windows NT %lu.%lu", APP_NAME, APP_VERSION, osver.dwMajorVersion, osver.dwMinorVersion);
# if defined(__x86_64__) || defined(_M_AMD64)
@@ -94,6 +101,34 @@ bool Platform::setThreadAffinity(uint64_t cpu_id)
}
uint32_t Platform::setTimerResolution(uint32_t resolution)
{
# ifdef XMRIG_AMD_PROJECT
TIMECAPS tc;
if (timeGetDevCaps(&tc, sizeof(TIMECAPS)) != TIMERR_NOERROR) {
return 0;
}
timerResolution = std::min<uint32_t>(std::max<uint32_t>(tc.wPeriodMin, resolution), tc.wPeriodMax);
return timeBeginPeriod(timerResolution) == TIMERR_NOERROR ? timerResolution : 0;
# else
return resolution;
# endif
}
void Platform::restoreTimerResolution()
{
# ifdef XMRIG_AMD_PROJECT
if (timerResolution) {
timeEndPeriod(timerResolution);
}
# endif
}
void Platform::setProcessPriority(int priority)
{
if (priority == -1) {
@@ -121,6 +156,7 @@ void Platform::setProcessPriority(int priority)
case 5:
prio = REALTIME_PRIORITY_CLASS;
break;
default:
break;

View File

@@ -174,8 +174,11 @@ void xmrig::CommonConfig::printVersions()
int length = snprintf(buf, sizeof buf, "CUDA/%d.%d ", cudaVersion / 1000, cudaVersion % 100);
# else
memset(buf, 0, 16);
# if !defined(XMRIG_NO_HTTPD) || !defined(XMRIG_NO_TLS)
int length = 0;
# endif
# endif
# if !defined(XMRIG_NO_TLS) && defined(OPENSSL_VERSION_TEXT)
{
@@ -280,16 +283,16 @@ bool xmrig::CommonConfig::parseBoolean(int key, bool enable)
break;
case KeepAliveKey: /* --keepalive */
m_pools.back().setKeepAlive(enable ? Pool::kKeepAliveTimeout : 0);
currentPool().setKeepAlive(enable ? Pool::kKeepAliveTimeout : 0);
break;
case TlsKey: /* --tls */
m_pools.back().setTLS(enable);
currentPool().setTLS(enable);
break;
# ifndef XMRIG_PROXY_PROJECT
case NicehashKey: /* --nicehash */
m_pools.back().setNicehash(enable);
currentPool().setNicehash(enable);
break;
# endif
@@ -333,13 +336,15 @@ bool xmrig::CommonConfig::parseString(int key, const char *arg)
break;
case UserpassKey: /* --userpass */
if (!m_pools.back().setUserpass(arg)) {
if (!currentPool().setUserpass(arg)) {
return false;
}
break;
case UrlKey: /* --url */
fixup();
if (m_pools.size() > 1 || m_pools[0].isValid()) {
Pool pool(arg);
@@ -358,23 +363,23 @@ bool xmrig::CommonConfig::parseString(int key, const char *arg)
break;
case UserKey: /* --user */
m_pools.back().setUser(arg);
currentPool().setUser(arg);
break;
case PasswordKey: /* --pass */
m_pools.back().setPassword(arg);
currentPool().setPassword(arg);
break;
case RigIdKey: /* --rig-id */
m_pools.back().setRigId(arg);
currentPool().setRigId(arg);
break;
case FingerprintKey: /* --tls-fingerprint */
m_pools.back().setFingerprint(arg);
currentPool().setFingerprint(arg);
break;
case VariantKey: /* --variant */
m_pools.back().algorithm().parseVariant(arg);
currentPool().algorithm().parseVariant(arg);
break;
case LogFileKey: /* --log-file */
@@ -400,7 +405,7 @@ bool xmrig::CommonConfig::parseString(int key, const char *arg)
case RetriesKey: /* --retries */
case RetryPauseKey: /* --retry-pause */
case ApiPort: /* --api-port */
case PrintTimeKey: /* --cpu-priority */
case PrintTimeKey: /* --print-time */
return parseUint64(key, strtol(arg, nullptr, 10));
case BackgroundKey: /* --background */
@@ -462,11 +467,11 @@ bool xmrig::CommonConfig::parseInt(int key, int arg)
break;
case KeepAliveKey: /* --keepalive */
m_pools.back().setKeepAlive(arg);
currentPool().setKeepAlive(arg);
break;
case VariantKey: /* --variant */
m_pools.back().algorithm().parseVariant(arg);
currentPool().algorithm().parseVariant(arg);
break;
case DonateLevelKey: /* --donate-level */
@@ -493,3 +498,30 @@ bool xmrig::CommonConfig::parseInt(int key, int arg)
return true;
}
Pool &xmrig::CommonConfig::currentPool()
{
fixup();
return m_pools.back();
}
void xmrig::CommonConfig::fixup()
{
if (m_state == NoneState) {
return;
}
if (m_pools.empty()) {
if (!m_activePools.empty()) {
std::swap(m_pools, m_activePools);
}
else {
m_pools.push_back(Pool());
}
m_state = NoneState;
}
}

View File

@@ -112,9 +112,11 @@ protected:
private:
bool parseInt(int key, int arg);
Pool &currentPool();
void fixup();
};
} /* namespace xmrig */
#endif /* __COMMONCONFIG_H__ */
#endif /* XMRIG_COMMONCONFIG_H */

View File

@@ -5,7 +5,7 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -50,6 +50,7 @@
#include "rapidjson/filereadstream.h"
bool xmrig::ConfigLoader::m_done = false;
xmrig::ConfigWatcher *xmrig::ConfigLoader::m_watcher = nullptr;
xmrig::IConfigCreator *xmrig::ConfigLoader::m_creator = nullptr;
xmrig::IWatcherListener *xmrig::ConfigLoader::m_listener = nullptr;
@@ -283,12 +284,16 @@ void xmrig::ConfigLoader::parseJSON(xmrig::IConfig *config, const struct option
void xmrig::ConfigLoader::showUsage()
{
m_done = true;
printf(usage);
}
void xmrig::ConfigLoader::showVersion()
{
m_done = true;
printf(APP_NAME " " APP_VERSION "\n built on " __DATE__
# if defined(__clang__)

View File

@@ -5,7 +5,7 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -21,8 +21,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __CONFIGLOADER_H__
#define __CONFIGLOADER_H__
#ifndef XMRIG_CONFIGLOADER_H
#define XMRIG_CONFIGLOADER_H
#include <stdint.h>
@@ -53,6 +53,8 @@ public:
static IConfig *load(int argc, char **argv, IConfigCreator *creator, IWatcherListener *listener);
static void release();
static inline bool isDone() { return m_done; }
private:
static bool getJSON(const char *fileName, rapidjson::Document &doc);
static bool parseArg(IConfig *config, int key, const char *arg);
@@ -60,6 +62,7 @@ private:
static void showUsage();
static void showVersion();
static bool m_done;
static ConfigWatcher *m_watcher;
static IConfigCreator *m_creator;
static IWatcherListener *m_listener;
@@ -68,4 +71,4 @@ private:
} /* namespace xmrig */
#endif /* __CONFIGLOADER_H__ */
#endif /* XMRIG_CONFIGLOADER_H */

View File

@@ -4,8 +4,9 @@
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -35,6 +36,10 @@
# define bit_AES (1 << 25)
#endif
#ifndef bit_AVX2
# define bit_AVX2 (1 << 5)
#endif
#include "common/cpu/BasicCpuInfo.h"
@@ -71,7 +76,7 @@ static inline void cpuid(int level, int output[4]) {
static inline void cpu_brand_string(char* s) {
int cpu_info[4] = { 0 };
int32_t cpu_info[4] = { 0 };
cpuid(VENDOR_ID, cpu_info);
if (cpu_info[EAX_Reg] >= 4) {
@@ -86,19 +91,50 @@ static inline void cpu_brand_string(char* s) {
static inline bool has_aes_ni()
{
int cpu_info[4] = { 0 };
int32_t cpu_info[4] = { 0 };
cpuid(PROCESSOR_INFO, cpu_info);
return (cpu_info[ECX_Reg] & bit_AES) != 0;
}
static inline bool has_avx2()
{
int32_t cpu_info[4] = { 0 };
cpuid(EXTENDED_FEATURES, cpu_info);
return (cpu_info[EBX_Reg] & bit_AVX2) != 0;
}
xmrig::BasicCpuInfo::BasicCpuInfo() :
m_assembly(ASM_NONE),
m_aes(has_aes_ni()),
m_avx2(has_avx2()),
m_brand(),
m_threads(std::thread::hardware_concurrency())
{
cpu_brand_string(m_brand);
# ifndef XMRIG_NO_ASM
if (hasAES()) {
char vendor[13] = { 0 };
int32_t data[4] = { 0 };
cpuid(0, data);
memcpy(vendor + 0, &data[1], 4);
memcpy(vendor + 4, &data[3], 4);
memcpy(vendor + 8, &data[2], 4);
if (memcmp(vendor, "GenuineIntel", 12) == 0) {
m_assembly = ASM_INTEL;
}
else if (memcmp(vendor, "AuthenticAMD", 12) == 0) {
m_assembly = ASM_RYZEN;
}
}
# endif
}

View File

@@ -4,9 +4,9 @@
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -40,8 +40,9 @@ public:
protected:
size_t optimalThreadsCount(size_t memSize, int maxCpuUsage) const override;
inline Assembly assembly() const override { return ASM_NONE; }
inline Assembly assembly() const override { return m_assembly; }
inline bool hasAES() const override { return m_aes; }
inline bool hasAVX2() const override { return m_avx2; }
inline bool isSupported() const override { return true; }
inline const char *brand() const override { return m_brand; }
inline int32_t cores() const override { return -1; }
@@ -58,7 +59,9 @@ protected:
# endif
private:
Assembly m_assembly;
bool m_aes;
bool m_avx2;
char m_brand[64];
int32_t m_threads;
};

View File

@@ -4,8 +4,9 @@
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -24,19 +25,29 @@
#include <string.h>
#include <thread>
#if __ARM_FEATURE_CRYPTO
# include <sys/auxv.h>
# include <asm/hwcap.h>
#endif
#include "common/cpu/BasicCpuInfo.h"
xmrig::BasicCpuInfo::BasicCpuInfo() :
m_aes(false),
m_avx2(false),
m_brand(),
m_threads(std::thread::hardware_concurrency())
{
memcpy(m_brand, "Unknown", 7);
# ifdef XMRIG_ARMv8
memcpy(m_brand, "ARMv8", 5);
# else
memcpy(m_brand, "ARMv7", 5);
# endif
# if __ARM_FEATURE_CRYPTO
m_aes = true;
m_aes = getauxval(AT_HWCAP) & HWCAP_AES;
# endif
}

View File

@@ -6,8 +6,8 @@
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -62,6 +62,9 @@ static AlgoData const algorithms[] = {
{ "cryptonight/xao", "cn/xao", xmrig::CRYPTONIGHT, xmrig::VARIANT_XAO },
{ "cryptonight/rto", "cn/rto", xmrig::CRYPTONIGHT, xmrig::VARIANT_RTO },
{ "cryptonight/2", "cn/2", xmrig::CRYPTONIGHT, xmrig::VARIANT_2 },
{ "cryptonight/half", "cn/half", xmrig::CRYPTONIGHT, xmrig::VARIANT_HALF },
{ "cryptonight/xtlv9", "cn/xtlv9", xmrig::CRYPTONIGHT, xmrig::VARIANT_HALF },
{ "cryptonight/wow", "cn/wow", xmrig::CRYPTONIGHT, xmrig::VARIANT_WOW },
# ifndef XMRIG_NO_AEON
{ "cryptonight-lite", "cn-lite", xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_AUTO },
@@ -76,6 +79,18 @@ static AlgoData const algorithms[] = {
{ "cryptonight-heavy/xhv", "cn-heavy/xhv", xmrig::CRYPTONIGHT_HEAVY, xmrig::VARIANT_XHV },
{ "cryptonight-heavy/tube", "cn-heavy/tube", xmrig::CRYPTONIGHT_HEAVY, xmrig::VARIANT_TUBE },
# endif
# ifndef XMRIG_NO_CN_PICO
{ "cryptonight-pico/trtl", "cn-pico/trtl", xmrig::CRYPTONIGHT_PICO, xmrig::VARIANT_TRTL },
{ "cryptonight-pico", "cn-pico", xmrig::CRYPTONIGHT_PICO, xmrig::VARIANT_TRTL },
{ "cryptonight-turtle", "cn-trtl", xmrig::CRYPTONIGHT_PICO, xmrig::VARIANT_TRTL },
{ "cryptonight-ultralite", "cn-ultralite", xmrig::CRYPTONIGHT_PICO, xmrig::VARIANT_TRTL },
{ "cryptonight_turtle", "cn_turtle", xmrig::CRYPTONIGHT_PICO, xmrig::VARIANT_TRTL },
# endif
# ifndef XMRIG_NO_CN_GPU
{ "cryptonight/gpu", "cn/gpu", xmrig::CRYPTONIGHT, xmrig::VARIANT_GPU },
# endif
};
@@ -95,6 +110,8 @@ static AlgoData const xmrStakAlgorithms[] = {
{ "cryptonight_masari", nullptr, xmrig::CRYPTONIGHT, xmrig::VARIANT_MSR },
{ "cryptonight-bittube2", nullptr, xmrig::CRYPTONIGHT_HEAVY, xmrig::VARIANT_TUBE }, // bittube-miner
{ "cryptonight_alloy", nullptr, xmrig::CRYPTONIGHT, xmrig::VARIANT_XAO }, // xmr-stak-alloy
{ "cryptonight_turtle", nullptr, xmrig::CRYPTONIGHT_PICO, xmrig::VARIANT_TRTL },
{ "cryptonight_gpu", nullptr, xmrig::CRYPTONIGHT, xmrig::VARIANT_GPU },
};
#endif
@@ -109,9 +126,16 @@ static const char *variants[] = {
"xao",
"rto",
"2",
"half",
"trtl",
"gpu",
"wow",
};
static_assert(xmrig::VARIANT_MAX == ARRAY_SIZE(variants), "variants size mismatch");
bool xmrig::Algorithm::isValid() const
{
if (m_algo == INVALID_ALGO) {
@@ -144,10 +168,16 @@ void xmrig::Algorithm::parseAlgorithm(const char *algo)
m_variant = VARIANT_AUTO;
assert(algo != nullptr);
if (algo == nullptr) {
if (algo == nullptr || strlen(algo) < 1) {
return;
}
if (*algo == '!') {
m_flags |= Forced;
return parseAlgorithm(algo + 1);
}
for (size_t i = 0; i < ARRAY_SIZE(algorithms); i++) {
if ((strcasecmp(algo, algorithms[i].name) == 0) || (strcasecmp(algo, algorithms[i].shortName) == 0)) {
m_algo = algorithms[i].algo;
@@ -166,12 +196,26 @@ void xmrig::Algorithm::parseVariant(const char *variant)
{
m_variant = VARIANT_AUTO;
if (variant == nullptr || strlen(variant) < 1) {
return;
}
if (*variant == '!') {
m_flags |= Forced;
return parseVariant(variant + 1);
}
for (size_t i = 0; i < ARRAY_SIZE(variants); i++) {
if (strcasecmp(variant, variants[i]) == 0) {
m_variant = static_cast<Variant>(i);
break;
return;
}
}
if (strcasecmp(variant, "xtlv9") == 0) {
m_variant = VARIANT_HALF;
}
}
@@ -199,6 +243,10 @@ void xmrig::Algorithm::parseVariant(int variant)
void xmrig::Algorithm::setAlgo(Algo algo)
{
m_algo = algo;
if (m_algo == CRYPTONIGHT_PICO && m_variant == VARIANT_AUTO) {
m_variant = xmrig::VARIANT_TRTL;
}
}

View File

@@ -7,7 +7,7 @@
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -39,28 +39,38 @@ namespace xmrig {
class Algorithm
{
public:
enum Flags {
None = 0,
Forced = 1
};
inline Algorithm() :
m_algo(INVALID_ALGO),
m_flags(0),
m_variant(VARIANT_AUTO)
{}
inline Algorithm(Algo algo, Variant variant) :
m_flags(0),
m_variant(variant)
{
setAlgo(algo);
}
inline Algorithm(const char *algo)
inline Algorithm(const char *algo) :
m_flags(0)
{
parseAlgorithm(algo);
}
bool isEqual(const Algorithm &other) const { return m_algo == other.m_algo && m_variant == other.m_variant; }
inline Algo algo() const { return m_algo; }
inline const char *name() const { return name(false); }
inline const char *shortName() const { return name(true); }
inline Variant variant() const { return m_variant; }
inline void setVariant(Variant variant) { m_variant = variant; }
inline Algo algo() const { return m_algo; }
inline bool isEqual(const Algorithm &other) const { return m_algo == other.m_algo && m_variant == other.m_variant; }
inline bool isForced() const { return m_flags & Forced; }
inline const char *name() const { return name(false); }
inline const char *shortName() const { return name(true); }
inline int flags() const { return m_flags; }
inline Variant variant() const { return m_variant; }
inline void setVariant(Variant variant) { m_variant = variant; }
inline bool operator!=(const Algorithm &other) const { return !isEqual(other); }
inline bool operator==(const Algorithm &other) const { return isEqual(other); }
@@ -80,6 +90,7 @@ private:
const char *name(bool shortName) const;
Algo m_algo;
int m_flags;
Variant m_variant;
};

View File

@@ -97,16 +97,23 @@ public:
OclCompModeKey = 1410,
// xmrig-proxy
AccessLogFileKey = 'A',
BindKey = 'b',
CoinKey = 1104,
CustomDiffKey = 1102,
DebugKey = 1101,
ModeKey = 'm',
PoolCoinKey = 'C',
ReuseTimeoutKey = 1106,
WorkersKey = 1103,
WorkersAdvKey = 1107,
AccessLogFileKey = 'A',
BindKey = 'b',
CoinKey = 1104,
CustomDiffKey = 1102,
DebugKey = 1101,
ModeKey = 'm',
PoolCoinKey = 'C',
ReuseTimeoutKey = 1106,
WorkersKey = 1103,
WorkersAdvKey = 1107,
TlsBindKey = 1108,
TlsCertKey = 1109,
TlsCertKeyKey = 1110,
TlsDHparamKey = 1111,
TlsCiphersKey = 1112,
TlsCipherSuitesKey = 1113,
TlsProtocolsKey = 1114,
// xmrig nvidia
CudaMaxThreadsKey = 1200,

View File

@@ -4,7 +4,9 @@
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2018 XMRig <support@xmrig.com>
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -40,6 +42,7 @@ public:
virtual ~ICpuInfo() {}
virtual bool hasAES() const = 0;
virtual bool hasAVX2() const = 0;
virtual bool isSupported() const = 0;
virtual bool isX64() const = 0;
virtual const char *brand() const = 0;

View File

@@ -5,7 +5,8 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -21,8 +22,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ISTRATEGY_H__
#define __ISTRATEGY_H__
#ifndef XMRIG_ISTRATEGY_H
#define XMRIG_ISTRATEGY_H
#include <stdint.h>
@@ -31,18 +32,24 @@
class JobResult;
namespace xmrig {
class Algorithm;
}
class IStrategy
{
public:
virtual ~IStrategy() {}
virtual bool isActive() const = 0;
virtual int64_t submit(const JobResult &result) = 0;
virtual void connect() = 0;
virtual void resume() = 0;
virtual void stop() = 0;
virtual void tick(uint64_t now) = 0;
virtual bool isActive() const = 0;
virtual int64_t submit(const JobResult &result) = 0;
virtual void connect() = 0;
virtual void resume() = 0;
virtual void setAlgo(const xmrig::Algorithm &algo) = 0;
virtual void stop() = 0;
virtual void tick(uint64_t now) = 0;
};
#endif // __ISTRATEGY_H__
#endif // XMRIG_ISTRATEGY_H

View File

@@ -21,8 +21,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __LOG_H__
#define __LOG_H__
#ifndef XMRIG_LOG_H
#define XMRIG_LOG_H
#include <assert.h>
@@ -39,7 +39,7 @@ public:
static inline Log* i() { if (!m_self) { defaultInit(); } return m_self; }
static inline void add(ILogBackend *backend) { i()->m_backends.push_back(backend); }
static inline void init() { if (!m_self) { new Log(); } }
static inline void release() { assert(m_self != nullptr); delete m_self; }
static inline void release() { delete m_self; }
void message(ILogBackend::Level level, const char* fmt, ...);
void text(const char* fmt, ...);
@@ -77,6 +77,7 @@ private:
#define CYAN(x) "\x1B[0;36m" x "\x1B[0m"
#define WHITE_BOLD(x) "\x1B[1;37m" x "\x1B[0m"
#define WHITE(x) "\x1B[0;37m" x "\x1B[0m"
#define GRAY(x) "\x1B[1;30m" x "\x1B[0m"
#define LOG_ERR(x, ...) Log::i()->message(ILogBackend::ERR, x, ##__VA_ARGS__)
@@ -98,4 +99,4 @@ private:
# define LOG_DEBUG_WARN(x, ...)
#endif
#endif /* __LOG_H__ */
#endif /* XMRIG_LOG_H */

View File

@@ -5,7 +5,7 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -212,6 +212,16 @@ const char *Client::tlsVersion() const
int64_t Client::submit(const JobResult &result)
{
# ifndef XMRIG_PROXY_PROJECT
if (result.clientId != m_rpcId) {
return -1;
}
# endif
if (m_job.algorithm().variant() == xmrig::VARIANT_WOW && m_job.id() != result.jobId) {
return -1;
}
using namespace rapidjson;
# ifdef XMRIG_PROXY_PROJECT
@@ -259,7 +269,11 @@ int64_t Client::submit(const JobResult &result)
bool Client::close()
{
if (m_state == UnconnectedState || m_state == ClosingState || !m_socket) {
if (m_state == ClosingState) {
return m_socket != nullptr;
}
if (m_state == UnconnectedState || m_socket == nullptr) {
return false;
}
@@ -344,6 +358,14 @@ bool Client::parseJob(const rapidjson::Value &params, int *code)
}
}
if (params.HasMember("height")) {
const rapidjson::Value &variant = params["height"];
if (variant.IsUint64()) {
job.setHeight(variant.GetUint64());
}
}
if (!verifyAlgorithm(job.algorithm())) {
*code = 6;
@@ -351,6 +373,8 @@ bool Client::parseJob(const rapidjson::Value &params, int *code)
return false;
}
m_job.setClientId(m_rpcId);
if (m_job != job) {
m_jobs++;
m_job = std::move(job);
@@ -426,7 +450,7 @@ bool Client::send(BIO *bio)
bool Client::verifyAlgorithm(const xmrig::Algorithm &algorithm) const
{
# ifdef XMRIG_PROXY_PROJECT
if (m_pool.algorithm().variant() == xmrig::VARIANT_AUTO) {
if (m_pool.algorithm().variant() == xmrig::VARIANT_AUTO || m_id == -1) {
return true;
}
# endif
@@ -550,7 +574,6 @@ void Client::connect(sockaddr *addr)
setState(ConnectingState);
reinterpret_cast<sockaddr_in*>(addr)->sin_port = htons(m_pool.port());
delete m_socket;
uv_connect_t *req = new uv_connect_t;
req->data = m_storage.ptr(m_key);
@@ -831,13 +854,14 @@ void Client::reconnect()
return;
}
setState(ConnectingState);
m_keepAlive = 0;
if (m_failures == -1) {
return m_listener->onClose(this, -1);
}
setState(ConnectingState);
m_failures++;
m_listener->onClose(this, (int) m_failures);
@@ -927,7 +951,7 @@ void Client::onRead(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf)
}
if (nread < 0) {
if (nread != UV_EOF && !client->isQuiet()) {
if (!client->isQuiet()) {
LOG_ERR("[%s] read error: \"%s\"", client->m_pool.url(), uv_strerror((int) nread));
}

View File

@@ -5,7 +5,7 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by

View File

@@ -5,7 +5,7 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -21,8 +21,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ID_H__
#define __ID_H__
#ifndef XMRIG_ID_H
#define XMRIG_ID_H
#include <string.h>
@@ -95,4 +95,4 @@ private:
} /* namespace xmrig */
#endif /* __ID_H__ */
#endif /* XMRIG_ID_H */

View File

@@ -7,7 +7,7 @@
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -31,7 +31,7 @@
#include "common/net/Job.h"
static inline unsigned char hf_hex2bin(char c, bool &err)
unsigned char hf_hex2bin(char c, bool &err)
{
if (c >= '0' && c <= '9') {
return c - '0';
@@ -48,7 +48,7 @@ static inline unsigned char hf_hex2bin(char c, bool &err)
}
static inline char hf_bin2hex(unsigned char c)
char hf_bin2hex(unsigned char c)
{
if (c <= 0x9) {
return '0' + c;
@@ -66,7 +66,8 @@ Job::Job() :
m_size(0),
m_diff(0),
m_target(0),
m_blob()
m_blob(),
m_height(0)
{
}
@@ -80,6 +81,7 @@ Job::Job(int poolId, bool nicehash, const xmrig::Algorithm &algorithm, const xmr
m_diff(0),
m_target(0),
m_blob(),
m_height(0),
m_algorithm(algorithm),
m_clientId(clientId)
{
@@ -91,6 +93,12 @@ Job::~Job()
}
bool Job::isEqual(const Job &other) const
{
return m_id == other.m_id && m_clientId == other.m_clientId && memcmp(m_blob, other.m_blob, sizeof(m_blob)) == 0;
}
bool Job::setBlob(const char *blob)
{
if (!blob) {
@@ -119,6 +127,18 @@ bool Job::setBlob(const char *blob)
m_algorithm.setVariant(variant());
}
if (!m_algorithm.isForced()) {
if (m_algorithm.variant() == xmrig::VARIANT_XTL && m_blob[0] >= 9) {
m_algorithm.setVariant(xmrig::VARIANT_HALF);
}
else if (m_algorithm.variant() == xmrig::VARIANT_MSR && m_blob[0] >= 8) {
m_algorithm.setVariant(xmrig::VARIANT_HALF);
}
else if (m_algorithm.variant() == xmrig::VARIANT_WOW && m_blob[0] < 11) {
m_algorithm.setVariant(xmrig::VARIANT_2);
}
}
# ifdef XMRIG_PROXY_PROJECT
memset(m_rawBlob, 0, sizeof(m_rawBlob));
memcpy(m_rawBlob, blob, m_size * 2);
@@ -180,6 +200,12 @@ void Job::setAlgorithm(const char *algo)
}
void Job::setHeight(uint64_t height)
{
m_height = height;
}
bool Job::fromHex(const char* in, unsigned int len, unsigned char* out)
{
bool error = false;
@@ -214,18 +240,6 @@ char *Job::toHex(const unsigned char* in, unsigned int len)
#endif
bool Job::operator==(const Job &other) const
{
return m_id == other.m_id && memcmp(m_blob, other.m_blob, sizeof(m_blob)) == 0;
}
bool Job::operator!=(const Job &other) const
{
return m_id != other.m_id || memcmp(m_blob, other.m_blob, sizeof(m_blob)) != 0;
}
xmrig::Variant Job::variant() const
{
using namespace xmrig;

View File

@@ -6,8 +6,8 @@
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -38,13 +38,19 @@
class Job
{
public:
// Max blob size is 84 (75 fixed + 9 variable), aligned to 96. https://github.com/xmrig/xmrig/issues/1 Thanks fireice-uk.
// SECOR increase requirements for blob size: https://github.com/xmrig/xmrig/issues/913
static constexpr const size_t kMaxBlobSize = 128;
Job();
Job(int poolId, bool nicehash, const xmrig::Algorithm &algorithm, const xmrig::Id &clientId);
~Job();
bool isEqual(const Job &other) const;
bool setBlob(const char *blob);
bool setTarget(const char *target);
void setAlgorithm(const char *algo);
void setHeight(uint64_t height);
inline bool isNicehash() const { return m_nicehash; }
inline bool isValid() const { return m_size > 0 && m_diff > 0; }
@@ -60,6 +66,7 @@ public:
inline uint32_t *nonce() { return reinterpret_cast<uint32_t*>(m_blob + 39); }
inline uint32_t diff() const { return static_cast<uint32_t>(m_diff); }
inline uint64_t target() const { return m_target; }
inline uint64_t height() const { return m_height; }
inline void reset() { m_size = 0; m_diff = 0; }
inline void setClientId(const xmrig::Id &id) { m_clientId = id; }
inline void setPoolId(int poolId) { m_poolId = poolId; }
@@ -81,8 +88,8 @@ public:
static char *toHex(const unsigned char* in, unsigned int len);
# endif
bool operator==(const Job &other) const;
bool operator!=(const Job &other) const;
inline bool operator==(const Job &other) const { return isEqual(other); }
inline bool operator!=(const Job &other) const { return !isEqual(other); }
private:
xmrig::Variant variant() const;
@@ -94,13 +101,14 @@ private:
size_t m_size;
uint64_t m_diff;
uint64_t m_target;
uint8_t m_blob[96]; // Max blob size is 84 (75 fixed + 9 variable), aligned to 96. https://github.com/xmrig/xmrig/issues/1 Thanks fireice-uk.
uint8_t m_blob[kMaxBlobSize];
uint64_t m_height;
xmrig::Algorithm m_algorithm;
xmrig::Id m_clientId;
xmrig::Id m_id;
# ifdef XMRIG_PROXY_PROJECT
char m_rawBlob[176];
char m_rawBlob[kMaxBlobSize * 2 + 8];
char m_rawTarget[24];
# endif
};

View File

@@ -4,9 +4,9 @@
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -106,8 +106,8 @@ bool Pool::isCompatible(const xmrig::Algorithm &algorithm) const
}
# ifdef XMRIG_PROXY_PROJECT
if (m_algorithm.algo() == xmrig::CRYPTONIGHT && algorithm.algo() == xmrig::CRYPTONIGHT && m_algorithm.variant() == xmrig::VARIANT_XTL) {
return true;
if (m_algorithm.algo() == xmrig::CRYPTONIGHT && algorithm.algo() == xmrig::CRYPTONIGHT) {
return m_algorithm.variant() == xmrig::VARIANT_XTL || m_algorithm.variant() == xmrig::VARIANT_MSR;
}
# endif
@@ -203,10 +203,10 @@ rapidjson::Value Pool::toJSON(rapidjson::Document &doc) const
Value obj(kObjectType);
obj.AddMember("url", StringRef(url()), allocator);
obj.AddMember("user", StringRef(user()), allocator);
obj.AddMember("pass", StringRef(password()), allocator);
obj.AddMember("rig-id", rigId() ? Value(StringRef(rigId())).Move() : Value(kNullType).Move(), allocator);
obj.AddMember("url", m_url.toJSON(), allocator);
obj.AddMember("user", m_user.toJSON(), allocator);
obj.AddMember("pass", m_password.toJSON(), allocator);
obj.AddMember("rig-id", m_rigId.toJSON(), allocator);
# ifndef XMRIG_PROXY_PROJECT
obj.AddMember("nicehash", isNicehash(), allocator);
@@ -223,17 +223,20 @@ rapidjson::Value Pool::toJSON(rapidjson::Document &doc) const
case xmrig::VARIANT_AUTO:
case xmrig::VARIANT_0:
case xmrig::VARIANT_1:
case xmrig::VARIANT_2:
obj.AddMember("variant", m_algorithm.variant(), allocator);
break;
case xmrig::VARIANT_2:
obj.AddMember("variant", 2, allocator);
break;
default:
obj.AddMember("variant", StringRef(m_algorithm.variantName()), allocator);
break;
}
obj.AddMember("tls", isTLS(), allocator);
obj.AddMember("tls-fingerprint", fingerprint() ? Value(StringRef(fingerprint())).Move() : Value(kNullType).Move(), allocator);
obj.AddMember("tls-fingerprint", m_fingerprint.toJSON(), allocator);
return obj;
}
@@ -322,23 +325,39 @@ void Pool::adjustVariant(const xmrig::Variant variantHint)
m_nicehash = true;
bool valid = true;
if (m_host.contains("cryptonight.") && m_port == 3355) {
valid = m_algorithm.algo() == CRYPTONIGHT;
switch (m_port) {
case 3355:
case 33355:
valid = m_algorithm.algo() == CRYPTONIGHT && m_host.contains("cryptonight.");
m_algorithm.setVariant(VARIANT_0);
}
else if (m_host.contains("cryptonightv7.") && m_port == 3363) {
valid = m_algorithm.algo() == CRYPTONIGHT;
break;
case 3363:
case 33363:
valid = m_algorithm.algo() == CRYPTONIGHT && m_host.contains("cryptonightv7.");
m_algorithm.setVariant(VARIANT_1);
}
else if (m_host.contains("cryptonightheavy.") && m_port == 3364) {
valid = m_algorithm.algo() == CRYPTONIGHT_HEAVY;
break;
case 3364:
valid = m_algorithm.algo() == CRYPTONIGHT_HEAVY && m_host.contains("cryptonightheavy.");
m_algorithm.setVariant(VARIANT_0);
break;
case 3367:
case 33367:
valid = m_algorithm.algo() == CRYPTONIGHT && m_host.contains("cryptonightv8.");
m_algorithm.setVariant(VARIANT_2);
break;
default:
break;
}
if (!valid) {
m_algorithm.setAlgo(INVALID_ALGO);
}
m_tls = m_port > 33000;
return;
}
@@ -349,7 +368,7 @@ void Pool::adjustVariant(const xmrig::Variant variantHint)
if (m_host.contains("xmr.pool.")) {
valid = m_algorithm.algo() == CRYPTONIGHT;
m_algorithm.setVariant(m_port == 45700 ? VARIANT_1 : VARIANT_0);
m_algorithm.setVariant(m_port == 45700 ? VARIANT_AUTO : VARIANT_0);
}
else if (m_host.contains("aeon.pool.") && m_port == 45690) {
valid = m_algorithm.algo() == CRYPTONIGHT_LITE;
@@ -393,15 +412,18 @@ void Pool::rebuild()
m_algorithms.push_back(m_algorithm);
# ifndef XMRIG_PROXY_PROJECT
addVariant(xmrig::VARIANT_WOW);
addVariant(xmrig::VARIANT_2);
addVariant(xmrig::VARIANT_1);
addVariant(xmrig::VARIANT_0);
addVariant(xmrig::VARIANT_HALF);
addVariant(xmrig::VARIANT_XTL);
addVariant(xmrig::VARIANT_TUBE);
addVariant(xmrig::VARIANT_MSR);
addVariant(xmrig::VARIANT_XHV);
addVariant(xmrig::VARIANT_XAO);
addVariant(xmrig::VARIANT_RTO);
addVariant(xmrig::VARIANT_GPU);
addVariant(xmrig::VARIANT_AUTO);
# endif
}

View File

@@ -6,7 +6,7 @@
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by

View File

@@ -21,8 +21,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef XMRIG_TLS_H
#define XMRIG_TLS_H
#ifndef XMRIG_CLIENT_TLS_H
#define XMRIG_CLIENT_TLS_H
#include <openssl/ssl.h>
@@ -59,4 +59,4 @@ private:
};
#endif /* XMRIG_TLS_H */
#endif /* XMRIG_CLIENT_TLS_H */

View File

@@ -5,7 +5,8 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -76,6 +77,14 @@ void FailoverStrategy::resume()
}
void FailoverStrategy::setAlgo(const xmrig::Algorithm &algo)
{
for (Client *client : m_pools) {
client->setAlgo(algo);
}
}
void FailoverStrategy::stop()
{
for (size_t i = 0; i < m_pools.size(); ++i) {

View File

@@ -5,7 +5,8 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -21,8 +22,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __FAILOVERSTRATEGY_H__
#define __FAILOVERSTRATEGY_H__
#ifndef XMRIG_FAILOVERSTRATEGY_H
#define XMRIG_FAILOVERSTRATEGY_H
#include <vector>
@@ -42,7 +43,7 @@ class FailoverStrategy : public IStrategy, public IClientListener
{
public:
FailoverStrategy(const std::vector<Pool> &urls, int retryPause, int retries, IStrategyListener *listener, bool quiet = false);
~FailoverStrategy();
~FailoverStrategy() override;
public:
inline bool isActive() const override { return m_active >= 0; }
@@ -50,6 +51,7 @@ public:
int64_t submit(const JobResult &result) override;
void connect() override;
void resume() override;
void setAlgo(const xmrig::Algorithm &algo) override;
void stop() override;
void tick(uint64_t now) override;
@@ -71,4 +73,4 @@ private:
std::vector<Client*> m_pools;
};
#endif /* __FAILOVERSTRATEGY_H__ */
#endif /* XMRIG_FAILOVERSTRATEGY_H */

View File

@@ -5,7 +5,8 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -68,6 +69,12 @@ void SinglePoolStrategy::resume()
}
void SinglePoolStrategy::setAlgo(const xmrig::Algorithm &algo)
{
m_client->setAlgo(algo);
}
void SinglePoolStrategy::stop()
{
m_client->disconnect();

View File

@@ -5,7 +5,8 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -21,8 +22,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __SINGLEPOOLSTRATEGY_H__
#define __SINGLEPOOLSTRATEGY_H__
#ifndef XMRIG_SINGLEPOOLSTRATEGY_H
#define XMRIG_SINGLEPOOLSTRATEGY_H
#include "common/interfaces/IClientListener.h"
@@ -31,14 +32,14 @@
class Client;
class IStrategyListener;
class Url;
class Pool;
class SinglePoolStrategy : public IStrategy, public IClientListener
{
public:
SinglePoolStrategy(const Pool &pool, int retryPause, int retries, IStrategyListener *listener, bool quiet = false);
~SinglePoolStrategy();
~SinglePoolStrategy() override;
public:
inline bool isActive() const override { return m_active; }
@@ -46,6 +47,7 @@ public:
int64_t submit(const JobResult &result) override;
void connect() override;
void resume() override;
void setAlgo(const xmrig::Algorithm &algo) override;
void stop() override;
void tick(uint64_t now) override;
@@ -61,4 +63,4 @@ private:
IStrategyListener *m_listener;
};
#endif /* __SINGLEPOOLSTRATEGY_H__ */
#endif /* XMRIG_SINGLEPOOLSTRATEGY_H */

View File

@@ -21,82 +21,19 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __C_STR_H__
#define __C_STR_H__
#ifndef XMRIG_C_STR_H
#define XMRIG_C_STR_H
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include "base/tools/String.h"
namespace xmrig {
/**
* @brief Simple C string wrapper.
*
* 1. I know about std:string.
* 2. For some reason I prefer don't use std:string in miner, eg because of file size of MSYS2 builds.
*/
class c_str
{
public:
inline c_str() : m_data(nullptr) {}
inline c_str(c_str &&other) { m_data = other.m_data; other.m_data = nullptr; }
inline c_str(const c_str &other) : m_data(nullptr) { set(other.data()); }
inline c_str(const char *str) : m_data(nullptr) { set(str); }
inline ~c_str() { free(m_data); }
inline void set(const char *str)
{
free(m_data);
m_data = str != nullptr ? strdup(str) : nullptr;
}
inline void set(char *str)
{
free(m_data);
m_data = str;
}
inline bool isEqual(const char *str) const
{
return (m_data != nullptr && str != nullptr && strcmp(m_data, str) == 0) || (m_data == nullptr && m_data == nullptr);
}
inline bool contains(const char *str) const
{
return strstr(m_data, str) != nullptr;
}
inline bool isNull() const { return m_data == nullptr; }
inline const char *data() const { return m_data; }
inline size_t size() const { return m_data == nullptr ? 0 : strlen(m_data); }
inline bool operator!=(const c_str &str) const { return !isEqual(str.data()); }
inline bool operator!=(const char *str) const { return !isEqual(str); }
inline bool operator==(const c_str &str) const { return isEqual(str.data()); }
inline bool operator==(const char *str) const { return isEqual(str); }
inline c_str &operator=(char *str) { set(str); return *this; }
inline c_str &operator=(const c_str &str) { set(str.data()); return *this; }
inline c_str &operator=(const char *str) { set(str); return *this; }
private:
char *m_data;
};
typedef String c_str;
} /* namespace xmrig */
#endif /* __C_STR_H__ */
#endif /* XMRIG_C_STR_H */

View File

@@ -31,7 +31,7 @@
namespace xmrig {
static inline int64_t currentMSecsSinceEpoch()
static inline int64_t steadyTimestamp()
{
using namespace std::chrono;
if (high_resolution_clock::is_steady) {
@@ -42,6 +42,14 @@ static inline int64_t currentMSecsSinceEpoch()
}
static inline int64_t currentMSecsSinceEpoch()
{
using namespace std::chrono;
return time_point_cast<milliseconds>(high_resolution_clock::now()).time_since_epoch().count();
}
} /* namespace xmrig */
#endif /* XMRIG_TIMESTAMP_H */

View File

@@ -5,7 +5,8 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -31,9 +32,11 @@ namespace xmrig
enum Algo {
INVALID_ALGO = -1,
CRYPTONIGHT, /* CryptoNight (Monero) */
CRYPTONIGHT_LITE, /* CryptoNight-Lite (AEON) */
CRYPTONIGHT_HEAVY /* CryptoNight-Heavy (RYO) */
CRYPTONIGHT, /* CryptoNight (2 MB) */
CRYPTONIGHT_LITE, /* CryptoNight (1 MB) */
CRYPTONIGHT_HEAVY, /* CryptoNight (4 MB) */
CRYPTONIGHT_PICO, /* CryptoNight (256 KB) */
ALGO_MAX
};
@@ -68,6 +71,10 @@ enum Variant {
VARIANT_XAO = 6, // Modified CryptoNight variant 0 (Alloy only)
VARIANT_RTO = 7, // Modified CryptoNight variant 1 (Arto only)
VARIANT_2 = 8, // CryptoNight variant 2
VARIANT_HALF = 9, // CryptoNight variant 2 with half iterations (Masari/Stellite)
VARIANT_TRTL = 10, // CryptoNight Turtle (TRTL)
VARIANT_GPU = 11, // CryptoNight-GPU (Ryo)
VARIANT_WOW = 12, // CryptoNightR (Wownero)
VARIANT_MAX
};
@@ -99,6 +106,7 @@ enum Assembly {
ASM_AUTO,
ASM_INTEL,
ASM_RYZEN,
ASM_BULLDOZER,
ASM_MAX
};

View File

@@ -19,7 +19,7 @@
"huge-pages": true,
"hw-aes": null,
"log-file": null,
"max-cpu-usage": 75,
"max-cpu-usage": 100,
"pools": [
{
"url": "donate.v2.xmrig.com:3333",

View File

@@ -5,7 +5,8 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -48,7 +49,7 @@ xmrig::Config::Config() : xmrig::CommonConfig(),
m_hugePages(true),
m_safe(false),
m_shouldSave(false),
m_maxCpuUsage(75),
m_maxCpuUsage(100),
m_priority(-1)
{
}

View File

@@ -61,7 +61,10 @@ Options:\n\
--rig-id=ID rig identifier for pool-side statistics (needs pool support)\n\
-t, --threads=N number of miner threads\n\
-v, --av=N algorithm variation, 0 auto select\n\
-k, --keepalive send keepalived for prevent timeout (need pool support)\n\
-k, --keepalive send keepalived packet for prevent timeout (needs pool support)\n\
--nicehash enable nicehash.com support\n\
--tls enable SSL/TLS support (needs pool support)\n\
--tls-fingerprint=F pool TLS certificate fingerprint, if set enable strict certificate pinning\n\
-r, --retries=N number of times to retry before switch to backup server (default: 5)\n\
-R, --retry-pause=N time to pause between retries (default: 5)\n\
--cpu-affinity set process affinity to CPU core(s), mask 0x3 for cores 0 and 1\n\
@@ -81,7 +84,7 @@ Options:\n\
"\
--max-cpu-usage=N maximum CPU usage for automatic threads mode (default 75)\n\
--safe safe adjust threads and av settings for current CPU\n\
--nicehash enable nicehash/xmrig-proxy support\n\
--asm=ASM ASM code for cn/2, possible values: auto, none, intel, ryzen, bulldozer.\n\
--print-time=N print hashrate report every N seconds\n\
--api-port=N port for the miner API\n\
--api-access-token=T access token for API\n\
@@ -89,6 +92,7 @@ Options:\n\
--api-id=ID custom instance ID for API\n\
--api-ipv6 enable IPv6 support for API\n\
--api-no-restricted enable full remote access (only if API token set)\n\
--dry-run test configuration and exit\n\
-h, --help display this help and exit\n\
-V, --version output version information and exit\n\
";

View File

@@ -78,6 +78,12 @@ xmrig::Controller::~Controller()
}
bool xmrig::Controller::isDone() const
{
return ConfigLoader::isDone();
}
bool xmrig::Controller::isReady() const
{
return d_ptr->config && d_ptr->network;

View File

@@ -21,8 +21,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __CONTROLLER_H__
#define __CONTROLLER_H__
#ifndef XMRIG_CONTROLLER_H
#define XMRIG_CONTROLLER_H
#include "common/interfaces/IWatcherListener.h"
@@ -46,6 +46,7 @@ public:
Controller();
~Controller();
bool isDone() const;
bool isReady() const;
Config *config() const;
int init(int argc, char **argv);
@@ -61,4 +62,4 @@ private:
} /* namespace xmrig */
#endif /* __CONTROLLER_H__ */
#endif /* XMRIG_CONTROLLER_H */

View File

@@ -4,8 +4,9 @@
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -24,7 +25,6 @@
#include <libcpuid.h>
#include <math.h>
#include <string.h>
#include <thread>
#include "core/cpu/AdvancedCpuInfo.h"
@@ -33,13 +33,14 @@
xmrig::AdvancedCpuInfo::AdvancedCpuInfo() :
m_assembly(ASM_NONE),
m_aes(false),
m_avx2(false),
m_L2_exclusive(false),
m_brand(),
m_cores(0),
m_L2(0),
m_L3(0),
m_sockets(1),
m_threads(std::thread::hardware_concurrency())
m_threads(0)
{
struct cpu_raw_data_t raw = { 0 };
struct cpu_id_t data = { 0 };
@@ -49,6 +50,7 @@ xmrig::AdvancedCpuInfo::AdvancedCpuInfo() :
strncpy(m_brand, data.brand_str, sizeof(m_brand));
m_threads = data.total_logical_cpus;
m_sockets = threads() / data.num_logical_cpus;
if (m_sockets == 0) {
m_sockets = 1;
@@ -75,13 +77,15 @@ xmrig::AdvancedCpuInfo::AdvancedCpuInfo() :
if (data.flags[CPU_FEATURE_AES]) {
m_aes = true;
if (data.vendor == VENDOR_AMD && data.ext_family >= 23) {
m_assembly = ASM_RYZEN;
if (data.vendor == VENDOR_AMD) {
m_assembly = (data.ext_family >= 23) ? ASM_RYZEN : ASM_BULLDOZER;
}
else if (data.vendor == VENDOR_INTEL && data.ext_model >= 42) {
else if (data.vendor == VENDOR_INTEL) {
m_assembly = ASM_INTEL;
}
}
m_avx2 = data.flags[CPU_FEATURE_AVX2];
}

View File

@@ -4,8 +4,9 @@
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -41,6 +42,7 @@ protected:
inline Assembly assembly() const override { return m_assembly; }
inline bool hasAES() const override { return m_aes; }
inline bool hasAVX2() const override { return m_avx2; }
inline bool isSupported() const override { return true; }
inline const char *brand() const override { return m_brand; }
inline int32_t cores() const override { return m_cores; }
@@ -59,6 +61,7 @@ protected:
private:
Assembly m_assembly;
bool m_aes;
bool m_avx2;
bool m_L2_exclusive;
char m_brand[64];
int32_t m_cores;

View File

@@ -5,6 +5,7 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
@@ -40,7 +41,8 @@ static const char *asmNames[] = {
"none",
"auto",
"intel",
"ryzen"
"ryzen",
"bulldozer"
};

View File

@@ -29,10 +29,23 @@
#include <stddef.h>
#include <stdint.h>
#if defined _MSC_VER || defined XMRIG_ARM
#define ABI_ATTRIBUTE
#else
#define ABI_ATTRIBUTE __attribute__((ms_abi))
#endif
struct cryptonight_ctx;
typedef void(*cn_mainloop_fun_ms_abi)(cryptonight_ctx*) ABI_ATTRIBUTE;
typedef void(*cn_mainloop_double_fun_ms_abi)(cryptonight_ctx*, cryptonight_ctx*) ABI_ATTRIBUTE;
struct cryptonight_ctx {
alignas(16) uint8_t state[224];
alignas(16) uint8_t *memory;
cn_mainloop_fun_ms_abi generated_code;
cn_mainloop_double_fun_ms_abi generated_code_double;
uint64_t generated_code_height;
uint64_t generated_code_double_height;
};

View File

@@ -5,10 +5,10 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016 Imran Yusuff <https://github.com/imranyusuff>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -284,6 +284,34 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
}
#ifndef XMRIG_NO_CN_GPU
template<xmrig::Algo ALGO, size_t MEM>
void cn_explode_scratchpad_gpu(const uint8_t *input, uint8_t *output)
{
constexpr size_t hash_size = 200; // 25x8 bytes
alignas(16) uint64_t hash[25];
for (uint64_t i = 0; i < MEM / 512; i++)
{
memcpy(hash, input, hash_size);
hash[0] ^= i;
xmrig::keccakf(hash, 24);
memcpy(output, hash, 160);
output += 160;
xmrig::keccakf(hash, 24);
memcpy(output, hash, 176);
output += 176;
xmrig::keccakf(hash, 24);
memcpy(output, hash, 176);
output += 176;
}
}
#endif
template<xmrig::Algo ALGO, size_t MEM, bool SOFT_AES>
static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
{
@@ -402,12 +430,12 @@ static inline __m128i aes_round_tweak_div(const __m128i &in, const __m128i &key)
}
template<xmrig::Variant VARIANT>
template<xmrig::Variant VARIANT, xmrig::Variant BASE>
static inline void cryptonight_monero_tweak(const uint8_t* l, uint64_t idx, __m128i ax0, __m128i bx0, __m128i bx1, __m128i cx)
{
uint64_t* mem_out = (uint64_t*)&l[idx];
if (VARIANT == xmrig::VARIANT_2) {
if (BASE == xmrig::VARIANT_2) {
VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1);
_mm_store_si128((__m128i *)mem_out, _mm_xor_si128(bx0, cx));
} else {
@@ -427,14 +455,14 @@ static inline void cryptonight_monero_tweak(const uint8_t* l, uint64_t idx, __m1
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
{
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
constexpr bool IS_V1 = xmrig::cn_base_variant<VARIANT>() == xmrig::VARIANT_1;
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
constexpr xmrig::Variant BASE = xmrig::cn_base_variant<VARIANT>();
if (IS_V1 && size < 43) {
if (BASE == xmrig::VARIANT_1 && size < 43) {
memset(output, 0, 32);
return;
}
@@ -448,6 +476,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
VARIANT1_INIT(0);
VARIANT2_INIT(0);
VARIANT4_RANDOM_MATH_INIT(0);
uint64_t al0 = h0[0] ^ h0[4];
uint64_t ah0 = h0[1] ^ h0[5];
@@ -473,8 +502,8 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
cx = _mm_aesenc_si128(cx, ax0);
}
if (IS_V1 || VARIANT == xmrig::VARIANT_2) {
cryptonight_monero_tweak<VARIANT>(l0, idx0 & MASK, ax0, bx0, bx1, cx);
if (BASE == xmrig::VARIANT_1 || BASE == xmrig::VARIANT_2) {
cryptonight_monero_tweak<VARIANT, BASE>(l0, idx0 & MASK, ax0, bx0, bx1, cx);
} else {
_mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
}
@@ -484,13 +513,19 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & MASK])[0];
ch = ((uint64_t*) &l0[idx0 & MASK])[1];
if (VARIANT == xmrig::VARIANT_2) {
VARIANT2_INTEGER_MATH(0, cl, cx);
lo = __umul128(idx0, cl, &hi);
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo);
if (BASE == xmrig::VARIANT_2) {
if (VARIANT == xmrig::VARIANT_WOW) {
VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx0, bx1);
} else {
VARIANT2_INTEGER_MATH(0, cl, cx);
}
}
else {
lo = __umul128(idx0, cl, &hi);
lo = __umul128(idx0, cl, &hi);
if (BASE == xmrig::VARIANT_2) {
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo);
}
al0 += hi;
@@ -498,9 +533,9 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
((uint64_t*)&l0[idx0 & MASK])[0] = al0;
if (IS_V1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) {
if (BASE == xmrig::VARIANT_1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) {
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0;
} else if (IS_V1) {
} else if (BASE == xmrig::VARIANT_1) {
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0;
} else {
((uint64_t*)&l0[idx0 & MASK])[1] = ah0;
@@ -525,9 +560,11 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
idx0 = d ^ q;
}
}
if (VARIANT == xmrig::VARIANT_2) {
if (BASE == xmrig::VARIANT_2) {
bx1 = bx0;
}
bx0 = cx;
}
@@ -538,15 +575,44 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
}
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx)
{
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
constexpr bool IS_V1 = xmrig::cn_base_variant<VARIANT>() == xmrig::VARIANT_1;
#ifndef XMRIG_NO_CN_GPU
template<size_t ITER, uint32_t MASK>
void cn_gpu_inner_arm(const uint8_t *spad, uint8_t *lpad);
if (IS_V1 && size < 43) {
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
inline void cryptonight_single_hash_gpu(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
{
constexpr size_t MASK = xmrig::CRYPTONIGHT_GPU_MASK;
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
static_assert(MASK > 0 && ITERATIONS > 0 && MEM > 0, "unsupported algorithm/variant");
xmrig::keccak(input, size, ctx[0]->state);
cn_explode_scratchpad_gpu<ALGO, MEM>(ctx[0]->state, ctx[0]->memory);
fesetround(FE_TONEAREST);
cn_gpu_inner_arm<ITERATIONS, MASK>(ctx[0]->state, ctx[0]->memory);
cn_implode_scratchpad<xmrig::CRYPTONIGHT_HEAVY, MEM, SOFT_AES>((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
xmrig::keccakf((uint64_t*) ctx[0]->state, 24);
memcpy(output, ctx[0]->state, 32);
}
#endif
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx, uint64_t height)
{
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
constexpr xmrig::Variant BASE = xmrig::cn_base_variant<VARIANT>();
if (BASE == xmrig::VARIANT_1 && size < 43) {
memset(output, 0, 64);
return;
}
@@ -563,6 +629,8 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
VARIANT1_INIT(1);
VARIANT2_INIT(0);
VARIANT2_INIT(1);
VARIANT4_RANDOM_MATH_INIT(0);
VARIANT4_RANDOM_MATH_INIT(1);
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
@@ -602,9 +670,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
cx1 = _mm_aesenc_si128(cx1, ax1);
}
if (IS_V1 || (VARIANT == xmrig::VARIANT_2)) {
cryptonight_monero_tweak<VARIANT>(l0, idx0 & MASK, ax0, bx00, bx01, cx0);
cryptonight_monero_tweak<VARIANT>(l1, idx1 & MASK, ax1, bx10, bx11, cx1);
if (BASE == xmrig::VARIANT_1 || (BASE == xmrig::VARIANT_2)) {
cryptonight_monero_tweak<VARIANT, BASE>(l0, idx0 & MASK, ax0, bx00, bx01, cx0);
cryptonight_monero_tweak<VARIANT, BASE>(l1, idx1 & MASK, ax1, bx10, bx11, cx1);
} else {
_mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
_mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx10, cx1));
@@ -616,12 +684,19 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & MASK])[0];
ch = ((uint64_t*) &l0[idx0 & MASK])[1];
if (VARIANT == xmrig::VARIANT_2) {
VARIANT2_INTEGER_MATH(0, cl, cx0);
lo = __umul128(idx0, cl, &hi);
if (BASE == xmrig::VARIANT_2) {
if (VARIANT == xmrig::VARIANT_WOW) {
VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx01);
} else {
VARIANT2_INTEGER_MATH(0, cl, cx0);
}
}
lo = __umul128(idx0, cl, &hi);
if (BASE == xmrig::VARIANT_2) {
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo);
} else {
lo = __umul128(idx0, cl, &hi);
}
al0 += hi;
@@ -629,9 +704,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
((uint64_t*)&l0[idx0 & MASK])[0] = al0;
if (IS_V1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) {
if (BASE == xmrig::VARIANT_1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) {
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0;
} else if (IS_V1) {
} else if (BASE == xmrig::VARIANT_1) {
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0;
} else {
((uint64_t*)&l0[idx0 & MASK])[1] = ah0;
@@ -641,7 +716,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
ah0 ^= ch;
idx0 = al0;
if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {
if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {
const int64x2_t x = vld1q_s64(reinterpret_cast<const int64_t *>(&l0[idx0 & MASK]));
const int64_t n = vgetq_lane_s64(x, 0);
const int32_t d = vgetq_lane_s32(x, 2);
@@ -659,12 +734,19 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
cl = ((uint64_t*) &l1[idx1 & MASK])[0];
ch = ((uint64_t*) &l1[idx1 & MASK])[1];
if (VARIANT == xmrig::VARIANT_2) {
VARIANT2_INTEGER_MATH(1, cl, cx1);
lo = __umul128(idx1, cl, &hi);
if (BASE == xmrig::VARIANT_2) {
if (VARIANT == xmrig::VARIANT_WOW) {
VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx10, bx11);
} else {
VARIANT2_INTEGER_MATH(1, cl, cx1);
}
}
lo = __umul128(idx1, cl, &hi);
if (BASE == xmrig::VARIANT_2) {
VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo);
} else {
lo = __umul128(idx1, cl, &hi);
}
al1 += hi;
@@ -672,9 +754,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
((uint64_t*)&l1[idx1 & MASK])[0] = al1;
if (IS_V1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) {
if (BASE == xmrig::VARIANT_1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) {
((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1 ^ al1;
} else if (IS_V1) {
} else if (BASE == xmrig::VARIANT_1) {
((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1;
} else {
((uint64_t*)&l1[idx1 & MASK])[1] = ah1;
@@ -699,7 +781,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
idx1 = d ^ q;
}
}
if (VARIANT == xmrig::VARIANT_2) {
if (BASE == xmrig::VARIANT_2) {
bx01 = bx00;
bx11 = bx10;
}
@@ -719,19 +801,19 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx)
inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx, uint64_t height)
{
}
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx)
inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx, uint64_t height)
{
}
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx)
inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx, uint64_t height)
{
}

View File

@@ -4,9 +4,10 @@
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -22,10 +23,11 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __CRYPTONIGHT_CONSTANTS_H__
#define __CRYPTONIGHT_CONSTANTS_H__
#ifndef XMRIG_CRYPTONIGHT_CONSTANTS_H
#define XMRIG_CRYPTONIGHT_CONSTANTS_H
#include <stddef.h>
#include <stdint.h>
@@ -38,9 +40,12 @@ namespace xmrig
constexpr const size_t CRYPTONIGHT_MEMORY = 2 * 1024 * 1024;
constexpr const uint32_t CRYPTONIGHT_MASK = 0x1FFFF0;
constexpr const uint32_t CRYPTONIGHT_ITER = 0x80000;
constexpr const uint32_t CRYPTONIGHT_MSR_ITER = 0x40000;
constexpr const uint32_t CRYPTONIGHT_HALF_ITER = 0x40000;
constexpr const uint32_t CRYPTONIGHT_XAO_ITER = 0x100000;
constexpr const uint32_t CRYPTONIGHT_GPU_ITER = 0xC000;
constexpr const uint32_t CRYPTONIGHT_GPU_MASK = 0x1FFFC0;
constexpr const size_t CRYPTONIGHT_LITE_MEMORY = 1 * 1024 * 1024;
constexpr const uint32_t CRYPTONIGHT_LITE_MASK = 0xFFFF0;
constexpr const uint32_t CRYPTONIGHT_LITE_ITER = 0x40000;
@@ -49,11 +54,17 @@ constexpr const size_t CRYPTONIGHT_HEAVY_MEMORY = 4 * 1024 * 1024;
constexpr const uint32_t CRYPTONIGHT_HEAVY_MASK = 0x3FFFF0;
constexpr const uint32_t CRYPTONIGHT_HEAVY_ITER = 0x40000;
constexpr const size_t CRYPTONIGHT_PICO_MEMORY = 256 * 1024;
constexpr const uint32_t CRYPTONIGHT_PICO_MASK = 0x1FFF0;
constexpr const uint32_t CRYPTONIGHT_PICO_ITER = 0x40000;
constexpr const uint32_t CRYPTONIGHT_TRTL_ITER = 0x10000;
template<Algo ALGO> inline constexpr size_t cn_select_memory() { return 0; }
template<> inline constexpr size_t cn_select_memory<CRYPTONIGHT>() { return CRYPTONIGHT_MEMORY; }
template<> inline constexpr size_t cn_select_memory<CRYPTONIGHT_LITE>() { return CRYPTONIGHT_LITE_MEMORY; }
template<> inline constexpr size_t cn_select_memory<CRYPTONIGHT_HEAVY>() { return CRYPTONIGHT_HEAVY_MEMORY; }
template<> inline constexpr size_t cn_select_memory<CRYPTONIGHT_PICO>() { return CRYPTONIGHT_PICO_MEMORY; }
inline size_t cn_select_memory(Algo algorithm)
@@ -69,6 +80,9 @@ inline size_t cn_select_memory(Algo algorithm)
case CRYPTONIGHT_HEAVY:
return CRYPTONIGHT_HEAVY_MEMORY;
case CRYPTONIGHT_PICO:
return CRYPTONIGHT_PICO_MEMORY;
default:
break;
}
@@ -81,6 +95,7 @@ template<Algo ALGO> inline constexpr uint32_t cn_select_mask() { retur
template<> inline constexpr uint32_t cn_select_mask<CRYPTONIGHT>() { return CRYPTONIGHT_MASK; }
template<> inline constexpr uint32_t cn_select_mask<CRYPTONIGHT_LITE>() { return CRYPTONIGHT_LITE_MASK; }
template<> inline constexpr uint32_t cn_select_mask<CRYPTONIGHT_HEAVY>() { return CRYPTONIGHT_HEAVY_MASK; }
template<> inline constexpr uint32_t cn_select_mask<CRYPTONIGHT_PICO>() { return CRYPTONIGHT_PICO_MASK; }
inline uint32_t cn_select_mask(Algo algorithm)
@@ -96,6 +111,9 @@ inline uint32_t cn_select_mask(Algo algorithm)
case CRYPTONIGHT_HEAVY:
return CRYPTONIGHT_HEAVY_MASK;
case CRYPTONIGHT_PICO:
return CRYPTONIGHT_PICO_MASK;
default:
break;
}
@@ -108,26 +126,37 @@ template<Algo ALGO, Variant variant> inline constexpr uint32_t cn_select_iter()
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_0>() { return CRYPTONIGHT_ITER; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_1>() { return CRYPTONIGHT_ITER; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_2>() { return CRYPTONIGHT_ITER; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_WOW>() { return CRYPTONIGHT_ITER; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_XTL>() { return CRYPTONIGHT_ITER; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_MSR>() { return CRYPTONIGHT_MSR_ITER; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_HALF>() { return CRYPTONIGHT_HALF_ITER; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_MSR>() { return CRYPTONIGHT_HALF_ITER; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_XAO>() { return CRYPTONIGHT_XAO_ITER; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_RTO>() { return CRYPTONIGHT_ITER; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_GPU>() { return CRYPTONIGHT_GPU_ITER; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT_LITE, VARIANT_0>() { return CRYPTONIGHT_LITE_ITER; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT_LITE, VARIANT_1>() { return CRYPTONIGHT_LITE_ITER; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT_HEAVY, VARIANT_0>() { return CRYPTONIGHT_HEAVY_ITER; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT_HEAVY, VARIANT_XHV>() { return CRYPTONIGHT_HEAVY_ITER; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT_HEAVY, VARIANT_TUBE>() { return CRYPTONIGHT_HEAVY_ITER; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT_PICO, VARIANT_TRTL>() { return CRYPTONIGHT_TRTL_ITER; }
inline uint32_t cn_select_iter(Algo algorithm, Variant variant)
{
switch (variant) {
case VARIANT_MSR:
return CRYPTONIGHT_MSR_ITER;
case VARIANT_HALF:
return CRYPTONIGHT_HALF_ITER;
case VARIANT_GPU:
return CRYPTONIGHT_GPU_ITER;
case VARIANT_RTO:
return CRYPTONIGHT_XAO_ITER;
case VARIANT_TRTL:
return CRYPTONIGHT_TRTL_ITER;
default:
break;
}
@@ -143,6 +172,9 @@ inline uint32_t cn_select_iter(Algo algorithm, Variant variant)
case CRYPTONIGHT_HEAVY:
return CRYPTONIGHT_HEAVY_ITER;
case CRYPTONIGHT_PICO:
return CRYPTONIGHT_TRTL_ITER;
default:
break;
}
@@ -161,9 +193,13 @@ template<> inline constexpr Variant cn_base_variant<VARIANT_XHV>() { return VA
template<> inline constexpr Variant cn_base_variant<VARIANT_XAO>() { return VARIANT_0; }
template<> inline constexpr Variant cn_base_variant<VARIANT_RTO>() { return VARIANT_1; }
template<> inline constexpr Variant cn_base_variant<VARIANT_2>() { return VARIANT_2; }
template<> inline constexpr Variant cn_base_variant<VARIANT_HALF>() { return VARIANT_2; }
template<> inline constexpr Variant cn_base_variant<VARIANT_TRTL>() { return VARIANT_2; }
template<> inline constexpr Variant cn_base_variant<VARIANT_GPU>() { return VARIANT_GPU; }
template<> inline constexpr Variant cn_base_variant<VARIANT_WOW>() { return VARIANT_2; }
} /* namespace xmrig */
#endif /* __CRYPTONIGHT_CONSTANTS_H__ */
#endif /* XMRIG_CRYPTONIGHT_CONSTANTS_H */

View File

@@ -7,7 +7,7 @@
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -33,21 +33,21 @@
#ifndef XMRIG_ARM
# define VARIANT1_INIT(part) \
uint64_t tweak1_2_##part = 0; \
if (IS_V1) { \
if (BASE == xmrig::VARIANT_1) { \
tweak1_2_##part = (*reinterpret_cast<const uint64_t*>(input + 35 + part * size) ^ \
*(reinterpret_cast<const uint64_t*>(ctx[part]->state) + 24)); \
}
#else
# define VARIANT1_INIT(part) \
uint64_t tweak1_2_##part = 0; \
if (IS_V1) { \
if (BASE == xmrig::VARIANT_1) { \
memcpy(&tweak1_2_##part, input + 35 + part * size, sizeof tweak1_2_##part); \
tweak1_2_##part ^= *(reinterpret_cast<const uint64_t*>(ctx[part]->state) + 24); \
}
#endif
#define VARIANT1_1(p) \
if (IS_V1) { \
if (BASE == xmrig::VARIANT_1) { \
const uint8_t tmp = reinterpret_cast<const uint8_t*>(p)[11]; \
static const uint32_t table = 0x75310; \
const uint8_t index = (((tmp >> 3) & 6) | (tmp & 1)) << 1; \
@@ -55,7 +55,7 @@
}
#define VARIANT1_2(p, part) \
if (IS_V1) { \
if (BASE == xmrig::VARIANT_1) { \
(p) ^= tweak1_2_##part; \
}
@@ -66,9 +66,9 @@
__m128i sqrt_result_xmm_##part = _mm_cvtsi64_si128(h##part[13]);
#ifdef _MSC_VER
# define VARIANT2_SET_ROUNDING_MODE() if (VARIANT == xmrig::VARIANT_2) { _control87(RC_DOWN, MCW_RC); }
# define VARIANT2_SET_ROUNDING_MODE() if (BASE == xmrig::VARIANT_2) { _control87(RC_DOWN, MCW_RC); }
#else
# define VARIANT2_SET_ROUNDING_MODE() if (VARIANT == xmrig::VARIANT_2) { fesetround(FE_DOWNWARD); }
# define VARIANT2_SET_ROUNDING_MODE() if (BASE == xmrig::VARIANT_2) { fesetround(FE_DOWNWARD); }
#endif
# define VARIANT2_INTEGER_MATH(part, cl, cx) \
@@ -147,4 +147,32 @@
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(_a))); \
} while (0)
#endif
#define SWAP32LE(x) x
#define SWAP64LE(x) x
#define hash_extra_blake(data, length, hash) blake256_hash((uint8_t*)(hash), (uint8_t*)(data), (length))
#include "variant4_random_math.h"
#define VARIANT4_RANDOM_MATH_INIT(part) \
uint32_t r##part[8]; \
struct V4_Instruction code##part[256]; \
if (VARIANT == xmrig::VARIANT_WOW) { \
r##part[0] = (uint32_t)(h##part[12]); \
r##part[1] = (uint32_t)(h##part[12] >> 32); \
r##part[2] = (uint32_t)(h##part[13]); \
r##part[3] = (uint32_t)(h##part[13] >> 32); \
} \
v4_random_math_init(code##part, height);
#define VARIANT4_RANDOM_MATH(part, al, ah, cl, bx0, bx1) \
if (VARIANT == xmrig::VARIANT_WOW) { \
cl ^= (r##part[0] + r##part[1]) | ((uint64_t)(r##part[2] + r##part[3]) << 32); \
r##part[4] = static_cast<uint32_t>(al); \
r##part[5] = static_cast<uint32_t>(ah); \
r##part[6] = static_cast<uint32_t>(_mm_cvtsi128_si32(bx0)); \
r##part[7] = static_cast<uint32_t>(_mm_cvtsi128_si32(bx1)); \
v4_random_math(code##part, r##part); \
}
#endif /* XMRIG_CRYPTONIGHT_MONERO_H */

View File

@@ -6,7 +6,8 @@
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -22,8 +23,11 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __CRYPTONIGHT_TEST_H__
#define __CRYPTONIGHT_TEST_H__
#ifndef XMRIG_CRYPTONIGHT_TEST_H
#define XMRIG_CRYPTONIGHT_TEST_H
#include <stdint.h>
const static uint8_t test_input[380] = {
@@ -54,7 +58,20 @@ const static uint8_t test_input[380] = {
0xCF, 0x50, 0x29, 0x6A, 0x07, 0x0B, 0x93, 0x8F, 0x8F, 0xA8, 0x10, 0x04
};
const static char* test_input_WOW = R"===(
9d47bf4c41b7e8e727e681715acb47fa1677cdba9ca7bcb05ad8cc8abd5daa66 5468697320697320612074657374205468697320697320612074657374205468697320697320612074657374 1806260
0d4a495cb844a3ca8ba4edb8e6bcf829ef1c06d9cdea2b62ca46c2a21b8b0a79 4c6f72656d20697073756d20646f6c6f722073697420616d65742c20636f6e73656374657475722061646970697363696e67 1806261
a1d6d848b5c5915fccd2f64cf216c6b1a02cf7c77bc80d8d4e51b419e88ff0dd 656c69742c2073656420646f20656975736d6f642074656d706f7220696e6369646964756e74207574206c61626f7265 1806262
af3a8544a0221a148c2ac90484b19861e3afca33fe17021efb8ad6496b567915 657420646f6c6f7265206d61676e6120616c697175612e20557420656e696d206164206d696e696d2076656e69616d2c 1806263
313399e0963ae8a99dab8af66d343e097dae0c0feb08dbc43ccdafef5515f413 71756973206e6f737472756420657865726369746174696f6e20756c6c616d636f206c61626f726973206e697369 1806264
6021c6ef90bff9ae94a7506d623d3a7a86c1756d655f50dd558f716d64622a34 757420616c697175697020657820656120636f6d6d6f646f20636f6e7365717561742e20447569732061757465 1806265
2b13000535f3db5f9b9b84a65c4351f386cd2cdedebb8c3ad2eab086e6a3fee5 697275726520646f6c6f7220696e20726570726568656e646572697420696e20766f6c7570746174652076656c6974 1806266
fc0e1dad8e895749dc90eb690bc1ba059a1cd772afaaf65a106bf9e5e6b80503 657373652063696c6c756d20646f6c6f726520657520667567696174206e756c6c612070617269617475722e 1806267
b60b0afe144deff7d903ed2d5545e77ebe66a3c51fee7016eeb8fee9eb630c0f 4578636570746575722073696e74206f6363616563617420637570696461746174206e6f6e2070726f6964656e742c 1806268
64774b27e7d5fec862fc4c0c13ac6bf09123b6f05bb0e4b75c97f379a2b3a679 73756e7420696e2063756c706120717569206f666669636961206465736572756e74206d6f6c6c697420616e696d20696420657374206c61626f72756d2e 1806269
)===";
// "cn/0"
const static uint8_t test_output_v0[160] = {
0x1A, 0x3F, 0xFB, 0xEE, 0x90, 0x9B, 0x42, 0x0D, 0x91, 0xF7, 0xBE, 0x6E, 0x5F, 0xB5, 0x6D, 0xB7,
0x1B, 0x31, 0x10, 0xD8, 0x86, 0x01, 0x1E, 0x87, 0x7E, 0xE5, 0x78, 0x6A, 0xFD, 0x08, 0x01, 0x00,
@@ -69,12 +86,12 @@ const static uint8_t test_output_v0[160] = {
};
// Cryptonight variant 1 (Monero v7)
// "cn/1" Cryptonight variant 1 (Monero v7)
const static uint8_t test_output_v1[160] = {
0xF2, 0x2D, 0x3D, 0x62, 0x03, 0xD2, 0xA0, 0x8B, 0x41, 0xD9, 0x02, 0x72, 0x78, 0xD8, 0xBC, 0xC9,
0x83, 0xAC, 0xAD, 0xA9, 0xB6, 0x8E, 0x52, 0xE3, 0xC6, 0x89, 0x69, 0x2A, 0x50, 0xE9, 0x21, 0xD9,
0xC9, 0xFA, 0xE8, 0x42, 0x5D, 0x86, 0x88, 0xDC, 0x23, 0x6B, 0xCD, 0xBC, 0x42, 0xFD, 0xB4, 0x2D,
0x37, 0x6C, 0x6E, 0xC1, 0x90, 0x50, 0x1A, 0xA8, 0x4B, 0x04, 0xA4, 0xB4, 0xCF, 0x1E, 0xE1, 0x22,
0x37, 0x6C, 0x6E, 0xC1, 0x90, 0x50, 0x1A, 0xA8, 0x4B, 0x04, 0xA4, 0xB4, 0xCF, 0x1E, 0xE1, 0x22,
0xE7, 0x8C, 0x5A, 0x6E, 0x38, 0x30, 0x68, 0x4A, 0x73, 0xFC, 0x1B, 0xC6, 0x6D, 0xFC, 0x8D, 0x98,
0xB4, 0xC2, 0x23, 0x39, 0xAD, 0xE0, 0x9D, 0xF6, 0x6D, 0x8C, 0x6A, 0xAA, 0xF9, 0xB2, 0xE3, 0x4C,
0xB6, 0x90, 0x6C, 0xE6, 0x15, 0x5E, 0x46, 0x07, 0x9C, 0xB2, 0x6B, 0xAC, 0x3B, 0xAC, 0x1A, 0xDE,
@@ -84,7 +101,7 @@ const static uint8_t test_output_v1[160] = {
};
// Cryptonight variant 2 (Monero v8)
// "cn/2" Cryptonight variant 2 (Monero v8)
const static uint8_t test_output_v2[160] = {
0x97, 0x37, 0x82, 0x82, 0xCF, 0x10, 0xE7, 0xAD, 0x03, 0x3F, 0x7B, 0x80, 0x74, 0xC4, 0x0E, 0x14,
0xD0, 0x6E, 0x7F, 0x60, 0x9D, 0xDD, 0xDA, 0x78, 0x76, 0x80, 0xB5, 0x8C, 0x05, 0xF4, 0x3D, 0x21,
@@ -99,7 +116,7 @@ const static uint8_t test_output_v2[160] = {
};
// Stellite (XTL)
// "cn/xtl" Stellite (XTL)
const static uint8_t test_output_xtl[160] = {
0x8F, 0xE5, 0xF0, 0x5F, 0x02, 0x2A, 0x61, 0x7D, 0xE5, 0x3F, 0x79, 0x36, 0x4B, 0x25, 0xCB, 0xC3,
0xC0, 0x8E, 0x0E, 0x1F, 0xE3, 0xBE, 0x48, 0x57, 0x07, 0x03, 0xFE, 0xE1, 0xEC, 0x0E, 0xB0, 0xB1,
@@ -114,7 +131,22 @@ const static uint8_t test_output_xtl[160] = {
};
// Masari (MSR)
// "cn/half"
const static uint8_t test_output_half[160] = {
0x5D, 0x4F, 0xBC, 0x35, 0x60, 0x97, 0xEA, 0x64, 0x40, 0xB0, 0x88, 0x8E, 0xDE, 0xB6, 0x35, 0xDD,
0xC8, 0x4A, 0x0E, 0x39, 0x7C, 0x86, 0x84, 0x56, 0x89, 0x5C, 0x3F, 0x29, 0xBE, 0x73, 0x12, 0xA7,
0x02, 0xE6, 0x1D, 0x2B, 0xBC, 0x84, 0xB6, 0x71, 0x96, 0x71, 0xD5, 0x0C, 0xAC, 0x76, 0x0E, 0x6B,
0xF1, 0xF0, 0x55, 0x34, 0x15, 0x29, 0x93, 0x04, 0x2D, 0xED, 0xD2, 0x33, 0x50, 0x6E, 0xBE, 0x25,
0xD0, 0xFD, 0x8E, 0xC6, 0x15, 0xD5, 0x12, 0x53, 0x7B, 0x26, 0xF6, 0x01, 0xA5, 0xA8, 0xBE, 0x7C,
0xCF, 0x5E, 0x19, 0xB7, 0x63, 0x0D, 0x0F, 0x02, 0x2B, 0xD7, 0xC4, 0x8C, 0x12, 0x24, 0x80, 0x02,
0xE7, 0xB7, 0xA0, 0x4F, 0x94, 0xF9, 0x46, 0xB5, 0x18, 0x64, 0x7E, 0x4E, 0x9C, 0x81, 0x6C, 0x60,
0x7D, 0x2E, 0xEA, 0xCF, 0x90, 0xCB, 0x68, 0x09, 0xC9, 0x53, 0xF6, 0xA9, 0xCA, 0x0C, 0xAC, 0xDC,
0xFD, 0x07, 0xDA, 0x24, 0x1D, 0xD1, 0x35, 0x32, 0x3C, 0xE8, 0x64, 0x44, 0x5E, 0xCB, 0xB5, 0x00,
0x69, 0xF4, 0x6F, 0xBB, 0x62, 0x0D, 0x25, 0xD8, 0xAC, 0x20, 0x90, 0xC5, 0x1B, 0xD3, 0x5F, 0xCA
};
// "cn/msr" Masari (MSR)
const static uint8_t test_output_msr[160] = {
0x3C, 0x7A, 0x61, 0x08, 0x4C, 0x5E, 0xB8, 0x65, 0xB4, 0x98, 0xAB, 0x2F, 0x5A, 0x1A, 0xC5, 0x2C,
0x49, 0xC1, 0x77, 0xC2, 0xD0, 0x13, 0x34, 0x42, 0xD6, 0x5E, 0xD5, 0x14, 0x33, 0x5C, 0x82, 0xC5,
@@ -129,7 +161,7 @@ const static uint8_t test_output_msr[160] = {
};
// Alloy (XAO)
// "cn/xao" Alloy (XAO)
const static uint8_t test_output_xao[160] = {
0x9A, 0x29, 0xD0, 0xC4, 0xAF, 0xDC, 0x63, 0x9B, 0x65, 0x53, 0xB1, 0xC8, 0x37, 0x35, 0x11, 0x4C,
0x5D, 0x77, 0x16, 0x21, 0x42, 0x97, 0x5C, 0xB8, 0x50, 0xC0, 0xA5, 0x1F, 0x64, 0x07, 0xBD, 0x33,
@@ -144,7 +176,7 @@ const static uint8_t test_output_xao[160] = {
};
// Arto (RTO)
// "cn/rto" Arto (RTO)
const static uint8_t test_output_rto[160] = {
0x82, 0x66, 0x1E, 0x1C, 0x6E, 0x64, 0x36, 0x66, 0x84, 0x06, 0x32, 0x7A, 0x9B, 0xB1, 0x13, 0x19,
0xA5, 0x56, 0x16, 0x15, 0xDF, 0xEC, 0x1C, 0x9E, 0xE3, 0x88, 0x4A, 0x6C, 0x1C, 0xEB, 0x76, 0xA5,
@@ -160,6 +192,7 @@ const static uint8_t test_output_rto[160] = {
#ifndef XMRIG_NO_AEON
// "cn-lite/0"
const static uint8_t test_output_v0_lite[160] = {
0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E,
0x00, 0x4E, 0xEC, 0xE0, 0x9B, 0x83, 0xA7, 0x2E, 0xF6, 0xBA, 0x98, 0x64, 0xD3, 0x51, 0x0C, 0x88,
@@ -174,7 +207,7 @@ const static uint8_t test_output_v0_lite[160] = {
};
// AEON v7
// "cn-lite/1" AEON v7
const static uint8_t test_output_v1_lite[160] = {
0x6D, 0x8C, 0xDC, 0x44, 0x4E, 0x9B, 0xBB, 0xFD, 0x68, 0xFC, 0x43, 0xFC, 0xD4, 0x85, 0x5B, 0x22,
0x8C, 0x8A, 0x1B, 0xD9, 0x1D, 0x9D, 0x00, 0x28, 0x5B, 0xEC, 0x02, 0xB7, 0xCA, 0x2D, 0x67, 0x41,
@@ -191,6 +224,7 @@ const static uint8_t test_output_v1_lite[160] = {
#ifndef XMRIG_NO_SUMO
// "cn-heavy/0"
const static uint8_t test_output_v0_heavy[160] = {
0x99, 0x83, 0xF2, 0x1B, 0xDF, 0x20, 0x10, 0xA8, 0xD7, 0x07, 0xBB, 0x2F, 0x14, 0xD7, 0x86, 0x64,
0xBB, 0xE1, 0x18, 0x7F, 0x55, 0x01, 0x4B, 0x39, 0xE5, 0xF3, 0xD6, 0x93, 0x28, 0xE4, 0x8F, 0xC2,
@@ -204,6 +238,8 @@ const static uint8_t test_output_v0_heavy[160] = {
0xAD, 0xB1, 0xFD, 0x89, 0xFB, 0x5C, 0xB4, 0x25, 0x6A, 0xDD, 0xB0, 0x09, 0xC5, 0x72, 0x87, 0xEB
};
// "cn-heavy/xhv"
const static uint8_t test_output_xhv_heavy[160] = {
0x5A, 0xC3, 0xF7, 0x85, 0xC4, 0x90, 0xC5, 0x85, 0x50, 0xEC, 0x95, 0xD2, 0x72, 0x65, 0x63, 0x57,
0x7E, 0x7C, 0x1C, 0x21, 0x2D, 0x0C, 0xDE, 0x59, 0x12, 0x73, 0x20, 0x1E, 0x44, 0xFD, 0xD5, 0xB6,
@@ -218,7 +254,7 @@ const static uint8_t test_output_xhv_heavy[160] = {
};
// TUBE
// "cn-heavy/tube"
const static uint8_t test_output_tube_heavy[160] = {
0xFE, 0x53, 0x35, 0x20, 0x76, 0xEA, 0xE6, 0x89, 0xFA, 0x3B, 0x4F, 0xDA, 0x61, 0x46, 0x34, 0xCF,
0xC3, 0x12, 0xEE, 0x0C, 0x38, 0x7D, 0xF2, 0xB8, 0xB7, 0x4D, 0xA2, 0xA1, 0x59, 0x74, 0x12, 0x35,
@@ -234,4 +270,40 @@ const static uint8_t test_output_tube_heavy[160] = {
#endif
#endif /* __CRYPTONIGHT_TEST_H__ */
#ifndef XMRIG_NO_CN_PICO
// "cn-pico/trtl"
const static uint8_t test_output_pico_trtl[160] = {
0x08, 0xF4, 0x21, 0xD7, 0x83, 0x31, 0x17, 0x30, 0x0E, 0xDA, 0x66, 0xE9, 0x8F, 0x4A, 0x25, 0x69,
0x09, 0x3D, 0xF3, 0x00, 0x50, 0x01, 0x73, 0x94, 0x4E, 0xFC, 0x40, 0x1E, 0x9A, 0x4A, 0x17, 0xAF,
0xB2, 0x17, 0x2E, 0xC9, 0x46, 0x6E, 0x1A, 0xEE, 0x70, 0xEC, 0x85, 0x72, 0xA1, 0x4C, 0x23, 0x3E,
0xE3, 0x54, 0x58, 0x2B, 0xCB, 0x93, 0xF8, 0x69, 0xD4, 0x29, 0x74, 0x4D, 0xE5, 0x72, 0x6A, 0x26,
0x4E, 0xFD, 0x28, 0xFC, 0xD3, 0x74, 0x8A, 0x83, 0xF3, 0xCA, 0x92, 0x84, 0xE7, 0x4E, 0x10, 0xC2,
0x05, 0x62, 0xC7, 0xBE, 0x99, 0x73, 0xED, 0x90, 0xB5, 0x6F, 0xDA, 0x64, 0x71, 0x2D, 0x99, 0x39,
0x29, 0xDB, 0x22, 0x2B, 0x97, 0xB6, 0x37, 0x0E, 0x9A, 0x03, 0x65, 0xCC, 0xF7, 0xD0, 0x9A, 0xB7,
0x68, 0xCE, 0x07, 0x3E, 0x15, 0x40, 0x3C, 0xCE, 0x8C, 0x63, 0x16, 0x72, 0xB5, 0x74, 0x84, 0xF4,
0xA1, 0xE7, 0x53, 0x85, 0xFB, 0x72, 0xDD, 0x75, 0x90, 0x39, 0xB2, 0x3D, 0xC3, 0x08, 0x2C, 0xD5,
0x01, 0x08, 0x27, 0x75, 0x86, 0xB9, 0xBB, 0x9B, 0xDF, 0xEA, 0x49, 0xDE, 0x46, 0xCB, 0x83, 0x45
};
#endif
unsigned char hf_hex2bin(char c, bool &err);
char hf_bin2hex(unsigned char c);
#ifndef XMRIG_NO_CN_GPU
// "cn/gpu"
const static uint8_t test_output_gpu[160] = {
0xE5, 0x5C, 0xB2, 0x3E, 0x51, 0x64, 0x9A, 0x59, 0xB1, 0x27, 0xB9, 0x6B, 0x51, 0x5F, 0x2B, 0xF7,
0xBF, 0xEA, 0x19, 0x97, 0x41, 0xA0, 0x21, 0x6C, 0xF8, 0x38, 0xDE, 0xD0, 0x6E, 0xFF, 0x82, 0xDF,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
};
#endif
#endif /* XMRIG_CRYPTONIGHT_TEST_H */

View File

@@ -4,10 +4,10 @@
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -35,6 +35,7 @@
#endif
#include "common/cpu/Cpu.h"
#include "common/crypto/keccak.h"
#include "crypto/CryptoNight.h"
#include "crypto/CryptoNight_constants.h"
@@ -289,6 +290,34 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
}
#ifndef XMRIG_NO_CN_GPU
template<xmrig::Algo ALGO, size_t MEM>
void cn_explode_scratchpad_gpu(const uint8_t *input, uint8_t *output)
{
constexpr size_t hash_size = 200; // 25x8 bytes
alignas(16) uint64_t hash[25];
for (uint64_t i = 0; i < MEM / 512; i++)
{
memcpy(hash, input, hash_size);
hash[0] ^= i;
xmrig::keccakf(hash, 24);
memcpy(output, hash, 160);
output += 160;
xmrig::keccakf(hash, 24);
memcpy(output, hash, 176);
output += 176;
xmrig::keccakf(hash, 24);
memcpy(output, hash, 176);
output += 176;
}
}
#endif
template<xmrig::Algo ALGO, size_t MEM, bool SOFT_AES>
static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
{
@@ -427,10 +456,10 @@ static inline __m128i int_sqrt_v2(const uint64_t n0)
}
template<xmrig::Variant VARIANT>
template<xmrig::Variant VARIANT, xmrig::Variant BASE>
static inline void cryptonight_monero_tweak(uint64_t* mem_out, const uint8_t* l, uint64_t idx, __m128i ax0, __m128i bx0, __m128i bx1, __m128i cx)
{
if (VARIANT == xmrig::VARIANT_2) {
if (BASE == xmrig::VARIANT_2) {
VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1);
_mm_store_si128((__m128i *)mem_out, _mm_xor_si128(bx0, cx));
} else {
@@ -451,14 +480,16 @@ static inline void cryptonight_monero_tweak(uint64_t* mem_out, const uint8_t* l,
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
{
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
constexpr bool IS_V1 = xmrig::cn_base_variant<VARIANT>() == xmrig::VARIANT_1;
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
constexpr xmrig::Variant BASE = xmrig::cn_base_variant<VARIANT>();
if (IS_V1 && size < 43) {
static_assert(MASK > 0 && ITERATIONS > 0 && MEM > 0, "unsupported algorithm/variant");
if (BASE == xmrig::VARIANT_1 && size < 43) {
memset(output, 0, 32);
return;
}
@@ -473,6 +504,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
VARIANT1_INIT(0);
VARIANT2_INIT(0);
VARIANT2_SET_ROUNDING_MODE();
VARIANT4_RANDOM_MATH_INIT(0);
uint64_t al0 = h0[0] ^ h0[4];
uint64_t ah0 = h0[1] ^ h0[5];
@@ -494,12 +526,12 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
else if (SOFT_AES) {
cx = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0);
}
else {
else {
cx = _mm_aesenc_si128(cx, ax0);
}
if (IS_V1 || VARIANT == xmrig::VARIANT_2) {
cryptonight_monero_tweak<VARIANT>((uint64_t*)&l0[idx0 & MASK], l0, idx0 & MASK, ax0, bx0, bx1, cx);
if (BASE == xmrig::VARIANT_1 || BASE == xmrig::VARIANT_2) {
cryptonight_monero_tweak<VARIANT, BASE>((uint64_t*)&l0[idx0 & MASK], l0, idx0 & MASK, ax0, bx0, bx1, cx);
} else {
_mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
}
@@ -509,13 +541,19 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & MASK])[0];
ch = ((uint64_t*) &l0[idx0 & MASK])[1];
if (VARIANT == xmrig::VARIANT_2) {
VARIANT2_INTEGER_MATH(0, cl, cx);
lo = __umul128(idx0, cl, &hi);
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo);
if (BASE == xmrig::VARIANT_2) {
if (VARIANT == xmrig::VARIANT_WOW) {
VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx0, bx1);
} else {
VARIANT2_INTEGER_MATH(0, cl, cx);
}
}
else {
lo = __umul128(idx0, cl, &hi);
lo = __umul128(idx0, cl, &hi);
if (BASE == xmrig::VARIANT_2) {
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo);
}
al0 += hi;
@@ -523,9 +561,9 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
((uint64_t*)&l0[idx0 & MASK])[0] = al0;
if (IS_V1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) {
if (BASE == xmrig::VARIANT_1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) {
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0;
} else if (IS_V1) {
} else if (BASE == xmrig::VARIANT_1) {
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0;
} else {
((uint64_t*)&l0[idx0 & MASK])[1] = ah0;
@@ -548,9 +586,11 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
idx0 = d ^ q;
}
if (VARIANT == xmrig::VARIANT_2) {
if (BASE == xmrig::VARIANT_2) {
bx1 = bx0;
}
bx0 = cx;
}
@@ -561,25 +601,118 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
}
#ifndef XMRIG_NO_CN_GPU
template<size_t ITER, uint32_t MASK>
void cn_gpu_inner_avx(const uint8_t *spad, uint8_t *lpad);
template<size_t ITER, uint32_t MASK>
void cn_gpu_inner_ssse3(const uint8_t *spad, uint8_t *lpad);
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
inline void cryptonight_single_hash_gpu(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
{
constexpr size_t MASK = xmrig::CRYPTONIGHT_GPU_MASK;
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
static_assert(MASK > 0 && ITERATIONS > 0 && MEM > 0, "unsupported algorithm/variant");
xmrig::keccak(input, size, ctx[0]->state);
cn_explode_scratchpad_gpu<ALGO, MEM>(ctx[0]->state, ctx[0]->memory);
# ifdef _MSC_VER
_control87(RC_NEAR, MCW_RC);
# else
fesetround(FE_TONEAREST);
# endif
if (xmrig::Cpu::info()->hasAVX2()) {
cn_gpu_inner_avx<ITERATIONS, MASK>(ctx[0]->state, ctx[0]->memory);
} else {
cn_gpu_inner_ssse3<ITERATIONS, MASK>(ctx[0]->state, ctx[0]->memory);
}
cn_implode_scratchpad<xmrig::CRYPTONIGHT_HEAVY, MEM, SOFT_AES>((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
xmrig::keccakf((uint64_t*) ctx[0]->state, 24);
memcpy(output, ctx[0]->state, 32);
}
#endif
#ifndef XMRIG_NO_ASM
extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx *ctx);
extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx *ctx);
extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx *ctx);
extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx* ctx0, cryptonight_ctx* ctx1);
extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm;
extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm;
extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm;
extern xmrig::CpuThread::cn_mainloop_double_fun cn_half_double_mainloop_sandybridge_asm;
extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm;
extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ryzen_asm;
extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm;
extern xmrig::CpuThread::cn_mainloop_double_fun cn_trtl_double_mainloop_sandybridge_asm;
void v4_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM);
void v4_64_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM);
void v4_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM);
void v4_64_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM);
template<xmrig::Algo ALGO, xmrig::Variant VARIANT, xmrig::Assembly ASM>
inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
{
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
if ((VARIANT == xmrig::VARIANT_WOW) && (height != ctx[0]->generated_code_height)) {
V4_Instruction code[256];
const int code_size = v4_random_math_init(code, height);
v4_compile_code(code, code_size, reinterpret_cast<void*>(ctx[0]->generated_code), ASM);
ctx[0]->generated_code_height = height;
}
xmrig::keccak(input, size, ctx[0]->state);
cn_explode_scratchpad<ALGO, MEM, false>(reinterpret_cast<__m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory));
if (ASM == xmrig::ASM_INTEL) {
cnv2_mainloop_ivybridge_asm(ctx[0]);
if (VARIANT == xmrig::VARIANT_2) {
if (ASM == xmrig::ASM_INTEL) {
cnv2_mainloop_ivybridge_asm(ctx[0]);
}
else if (ASM == xmrig::ASM_RYZEN) {
cnv2_mainloop_ryzen_asm(ctx[0]);
}
else {
cnv2_mainloop_bulldozer_asm(ctx[0]);
}
}
else {
cnv2_mainloop_ryzen_asm(ctx[0]);
else if (VARIANT == xmrig::VARIANT_HALF) {
if (ASM == xmrig::ASM_INTEL) {
cn_half_mainloop_ivybridge_asm(ctx[0]);
}
else if (ASM == xmrig::ASM_RYZEN) {
cn_half_mainloop_ryzen_asm(ctx[0]);
}
else {
cn_half_mainloop_bulldozer_asm(ctx[0]);
}
}
else if (VARIANT == xmrig::VARIANT_TRTL) {
if (ASM == xmrig::ASM_INTEL) {
cn_trtl_mainloop_ivybridge_asm(ctx[0]);
}
else if (ASM == xmrig::ASM_RYZEN) {
cn_trtl_mainloop_ryzen_asm(ctx[0]);
}
else {
cn_trtl_mainloop_bulldozer_asm(ctx[0]);
}
}
else if (VARIANT == xmrig::VARIANT_WOW) {
ctx[0]->generated_code(ctx[0]);
}
cn_implode_scratchpad<ALGO, MEM, false>(reinterpret_cast<__m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
@@ -589,17 +722,35 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
template<xmrig::Algo ALGO, xmrig::Variant VARIANT, xmrig::Assembly ASM>
inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
{
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
if ((VARIANT == xmrig::VARIANT_WOW) && (height != ctx[0]->generated_code_double_height)) {
V4_Instruction code[256];
const int code_size = v4_random_math_init(code, height);
v4_compile_code_double(code, code_size, reinterpret_cast<void*>(ctx[0]->generated_code_double), ASM);
ctx[0]->generated_code_double_height = height;
}
xmrig::keccak(input, size, ctx[0]->state);
xmrig::keccak(input + size, size, ctx[1]->state);
cn_explode_scratchpad<ALGO, MEM, false>(reinterpret_cast<__m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory));
cn_explode_scratchpad<ALGO, MEM, false>(reinterpret_cast<__m128i*>(ctx[1]->state), reinterpret_cast<__m128i*>(ctx[1]->memory));
cnv2_double_mainloop_sandybridge_asm(ctx[0], ctx[1]);
if (VARIANT == xmrig::VARIANT_2) {
cnv2_double_mainloop_sandybridge_asm(ctx[0], ctx[1]);
}
else if (VARIANT == xmrig::VARIANT_HALF) {
cn_half_double_mainloop_sandybridge_asm(ctx[0], ctx[1]);
}
else if (VARIANT == xmrig::VARIANT_TRTL) {
cn_trtl_double_mainloop_sandybridge_asm(ctx[0], ctx[1]);
}
else if (VARIANT == xmrig::VARIANT_WOW) {
ctx[0]->generated_code_double(ctx[0], ctx[1]);
}
cn_implode_scratchpad<ALGO, MEM, false>(reinterpret_cast<__m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
cn_implode_scratchpad<ALGO, MEM, false>(reinterpret_cast<__m128i*>(ctx[1]->memory), reinterpret_cast<__m128i*>(ctx[1]->state));
@@ -614,14 +765,14 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
{
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
constexpr bool IS_V1 = xmrig::cn_base_variant<VARIANT>() == xmrig::VARIANT_1;
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
constexpr xmrig::Variant BASE = xmrig::cn_base_variant<VARIANT>();
if (IS_V1 && size < 43) {
if (BASE == xmrig::VARIANT_1 && size < 43) {
memset(output, 0, 64);
return;
}
@@ -639,6 +790,8 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
VARIANT2_INIT(0);
VARIANT2_INIT(1);
VARIANT2_SET_ROUNDING_MODE();
VARIANT4_RANDOM_MATH_INIT(0);
VARIANT4_RANDOM_MATH_INIT(1);
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
@@ -678,9 +831,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
cx1 = _mm_aesenc_si128(cx1, ax1);
}
if (IS_V1 || (VARIANT == xmrig::VARIANT_2)) {
cryptonight_monero_tweak<VARIANT>((uint64_t*)&l0[idx0 & MASK], l0, idx0 & MASK, ax0, bx00, bx01, cx0);
cryptonight_monero_tweak<VARIANT>((uint64_t*)&l1[idx1 & MASK], l1, idx1 & MASK, ax1, bx10, bx11, cx1);
if (BASE == xmrig::VARIANT_1 || (BASE == xmrig::VARIANT_2)) {
cryptonight_monero_tweak<VARIANT, BASE>((uint64_t*)&l0[idx0 & MASK], l0, idx0 & MASK, ax0, bx00, bx01, cx0);
cryptonight_monero_tweak<VARIANT, BASE>((uint64_t*)&l1[idx1 & MASK], l1, idx1 & MASK, ax1, bx10, bx11, cx1);
} else {
_mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
_mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx10, cx1));
@@ -692,12 +845,19 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & MASK])[0];
ch = ((uint64_t*) &l0[idx0 & MASK])[1];
if (VARIANT == xmrig::VARIANT_2) {
VARIANT2_INTEGER_MATH(0, cl, cx0);
lo = __umul128(idx0, cl, &hi);
if (BASE == xmrig::VARIANT_2) {
if (VARIANT == xmrig::VARIANT_WOW) {
VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx01);
} else {
VARIANT2_INTEGER_MATH(0, cl, cx0);
}
}
lo = __umul128(idx0, cl, &hi);
if (BASE == xmrig::VARIANT_2) {
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo);
} else {
lo = __umul128(idx0, cl, &hi);
}
al0 += hi;
@@ -705,9 +865,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
((uint64_t*)&l0[idx0 & MASK])[0] = al0;
if (IS_V1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) {
if (BASE == xmrig::VARIANT_1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) {
((uint64_t*) &l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0;
} else if (IS_V1) {
} else if (BASE == xmrig::VARIANT_1) {
((uint64_t*) &l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0;
} else {
((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
@@ -733,12 +893,19 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
cl = ((uint64_t*) &l1[idx1 & MASK])[0];
ch = ((uint64_t*) &l1[idx1 & MASK])[1];
if (VARIANT == xmrig::VARIANT_2) {
VARIANT2_INTEGER_MATH(1, cl, cx1);
lo = __umul128(idx1, cl, &hi);
if (BASE == xmrig::VARIANT_2) {
if (VARIANT == xmrig::VARIANT_WOW) {
VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx10, bx11);
} else {
VARIANT2_INTEGER_MATH(1, cl, cx1);
}
}
lo = __umul128(idx1, cl, &hi);
if (BASE == xmrig::VARIANT_2) {
VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo);
} else {
lo = __umul128(idx1, cl, &hi);
}
al1 += hi;
@@ -746,9 +913,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
((uint64_t*)&l1[idx1 & MASK])[0] = al1;
if (IS_V1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) {
if (BASE == xmrig::VARIANT_1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) {
((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1 ^ al1;
} else if (IS_V1) {
} else if (BASE == xmrig::VARIANT_1) {
((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1;
} else {
((uint64_t*)&l1[idx1 & MASK])[1] = ah1;
@@ -772,10 +939,11 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
idx1 = d ^ q;
}
if (VARIANT == xmrig::VARIANT_2) {
if (BASE == xmrig::VARIANT_2) {
bx01 = bx00;
bx11 = bx10;
}
bx00 = cx0;
bx10 = cx1;
}
@@ -806,8 +974,8 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
c = _mm_aesenc_si128(c, a); \
} \
\
if (IS_V1 || (VARIANT == xmrig::VARIANT_2)) { \
cryptonight_monero_tweak<VARIANT>((uint64_t*)ptr, l, idx & MASK, a, b0, b1, c); \
if (BASE == xmrig::VARIANT_1 || BASE == xmrig::VARIANT_2) { \
cryptonight_monero_tweak<VARIANT, BASE>((uint64_t*)ptr, l, idx & MASK, a, b0, b1, c); \
} else { \
_mm_store_si128(ptr, _mm_xor_si128(b0, c)); \
}
@@ -821,16 +989,22 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
#define CN_STEP4(part, a, b0, b1, c, l, mc, ptr, idx) \
if (VARIANT == xmrig::VARIANT_2) { \
VARIANT2_INTEGER_MATH(part, cl##part, c); \
lo = __umul128(idx, cl##part, &hi); \
if (BASE == xmrig::VARIANT_2) { \
if (VARIANT == xmrig::VARIANT_WOW) { \
const uint64_t al = _mm_cvtsi128_si64(a); \
const uint64_t ah = _mm_cvtsi128_si64(_mm_srli_si128(a, 8)); \
VARIANT4_RANDOM_MATH(part, al, ah, cl##part, b0, b1); \
} else { \
VARIANT2_INTEGER_MATH(part, cl##part, c); \
} \
} \
lo = __umul128(idx, cl##part, &hi); \
if (BASE == xmrig::VARIANT_2) { \
VARIANT2_SHUFFLE2(l, idx & MASK, a, b0, b1, hi, lo); \
} else { \
lo = __umul128(idx, cl##part, &hi); \
} \
a = _mm_add_epi64(a, _mm_set_epi64x(lo, hi)); \
\
if (IS_V1) { \
if (BASE == xmrig::VARIANT_1) { \
_mm_store_si128(ptr, _mm_xor_si128(a, mc)); \
\
if (VARIANT == xmrig::VARIANT_TUBE || \
@@ -855,7 +1029,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
\
idx = d ^ q; \
} \
if (VARIANT == xmrig::VARIANT_2) { \
if (BASE == xmrig::VARIANT_2) { \
b1 = b0; \
} \
b0 = c;
@@ -865,29 +1039,30 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
__m128i mc##n; \
__m128i division_result_xmm_##n; \
__m128i sqrt_result_xmm_##n; \
if (IS_V1) { \
if (BASE == xmrig::VARIANT_1) { \
mc##n = _mm_set_epi64x(*reinterpret_cast<const uint64_t*>(input + n * size + 35) ^ \
*(reinterpret_cast<const uint64_t*>((ctx)->state) + 24), 0); \
} \
if (VARIANT == xmrig::VARIANT_2) { \
if (BASE == xmrig::VARIANT_2) { \
division_result_xmm_##n = _mm_cvtsi64_si128(h##n[12]); \
sqrt_result_xmm_##n = _mm_cvtsi64_si128(h##n[13]); \
} \
__m128i ax##n = _mm_set_epi64x(h##n[1] ^ h##n[5], h##n[0] ^ h##n[4]); \
__m128i bx##n##0 = _mm_set_epi64x(h##n[3] ^ h##n[7], h##n[2] ^ h##n[6]); \
__m128i bx##n##1 = _mm_set_epi64x(h##n[9] ^ h##n[11], h##n[8] ^ h##n[10]); \
__m128i cx##n = _mm_setzero_si128();
__m128i cx##n = _mm_setzero_si128(); \
VARIANT4_RANDOM_MATH_INIT(n);
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
{
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
constexpr bool IS_V1 = xmrig::cn_base_variant<VARIANT>() == xmrig::VARIANT_1;
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
constexpr xmrig::Variant BASE = xmrig::cn_base_variant<VARIANT>();
if (IS_V1 && size < 43) {
if (BASE == xmrig::VARIANT_1 && size < 43) {
memset(output, 0, 32 * 3);
return;
}
@@ -944,14 +1119,14 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
{
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
constexpr bool IS_V1 = xmrig::cn_base_variant<VARIANT>() == xmrig::VARIANT_1;;
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
constexpr xmrig::Variant BASE = xmrig::cn_base_variant<VARIANT>();
if (IS_V1 && size < 43) {
if (BASE == xmrig::VARIANT_1 && size < 43) {
memset(output, 0, 32 * 4);
return;
}
@@ -1017,14 +1192,14 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
{
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
constexpr bool IS_V1 = xmrig::cn_base_variant<VARIANT>() == xmrig::VARIANT_1;
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
constexpr xmrig::Variant BASE = xmrig::cn_base_variant<VARIANT>();
if (IS_V1 && size < 43) {
if (BASE == xmrig::VARIANT_1 && size < 43) {
memset(output, 0, 32 * 5);
return;
}

View File

@@ -0,0 +1,132 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <cstring>
#include "crypto/CryptoNight_monero.h"
typedef void(*void_func)();
#include "crypto/asm/CryptonightR_template.h"
#include "Mem.h"
#if !defined XMRIG_ARM && !defined XMRIG_NO_ASM
static inline void add_code(uint8_t* &p, void (*p1)(), void (*p2)())
{
const ptrdiff_t size = reinterpret_cast<const uint8_t*>(p2) - reinterpret_cast<const uint8_t*>(p1);
if (size > 0) {
memcpy(p, reinterpret_cast<void*>(p1), size);
p += size;
}
}
static inline void add_random_math(uint8_t* &p, const V4_Instruction* code, int code_size, const void_func* instructions, const void_func* instructions_mov, bool is_64_bit, xmrig::Assembly ASM)
{
uint32_t prev_rot_src = (uint32_t)(-1);
for (int i = 0;; ++i) {
const V4_Instruction inst = code[i];
if (inst.opcode == RET) {
break;
}
uint8_t opcode = (inst.opcode == MUL) ? inst.opcode : (inst.opcode + 2);
uint8_t dst_index = inst.dst_index;
uint8_t src_index = inst.src_index;
const uint32_t a = inst.dst_index;
const uint32_t b = inst.src_index;
const uint8_t c = opcode | (dst_index << V4_OPCODE_BITS) | (src_index << (V4_OPCODE_BITS + V4_DST_INDEX_BITS));
switch (inst.opcode) {
case ROR:
case ROL:
if (b != prev_rot_src) {
prev_rot_src = b;
add_code(p, instructions_mov[c], instructions_mov[c + 1]);
}
break;
}
if (a == prev_rot_src) {
prev_rot_src = (uint32_t)(-1);
}
void_func begin = instructions[c];
if ((ASM = xmrig::ASM_BULLDOZER) && (inst.opcode == MUL) && !is_64_bit) {
// AMD Bulldozer has latency 4 for 32-bit IMUL and 6 for 64-bit IMUL
// Always use 32-bit IMUL for AMD Bulldozer in 32-bit mode - skip prefix 0x48 and change 0x49 to 0x41
uint8_t* prefix = reinterpret_cast<uint8_t*>(begin);
if (*prefix == 0x49) {
*(p++) = 0x41;
}
begin = reinterpret_cast<void_func>(prefix + 1);
}
add_code(p, begin, instructions[c + 1]);
if (inst.opcode == ADD) {
*(uint32_t*)(p - sizeof(uint32_t) - (is_64_bit ? 3 : 0)) = inst.C;
if (is_64_bit) {
prev_rot_src = (uint32_t)(-1);
}
}
}
}
void v4_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM)
{
uint8_t* p0 = reinterpret_cast<uint8_t*>(machine_code);
uint8_t* p = p0;
add_code(p, CryptonightR_template_part1, CryptonightR_template_part2);
add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
add_code(p, CryptonightR_template_part2, CryptonightR_template_part3);
*(int*)(p - 4) = static_cast<int>((((const uint8_t*)CryptonightR_template_mainloop) - ((const uint8_t*)CryptonightR_template_part1)) - (p - p0));
add_code(p, CryptonightR_template_part3, CryptonightR_template_end);
Mem::flushInstructionCache(machine_code, p - p0);
}
void v4_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM)
{
uint8_t* p0 = reinterpret_cast<uint8_t*>(machine_code);
uint8_t* p = p0;
add_code(p, CryptonightR_template_double_part1, CryptonightR_template_double_part2);
add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
add_code(p, CryptonightR_template_double_part2, CryptonightR_template_double_part3);
add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
add_code(p, CryptonightR_template_double_part3, CryptonightR_template_double_part4);
*(int*)(p - 4) = static_cast<int>((((const uint8_t*)CryptonightR_template_double_mainloop) - ((const uint8_t*)CryptonightR_template_double_part1)) - (p - p0));
add_code(p, CryptonightR_template_double_part4, CryptonightR_template_double_end);
Mem::flushInstructionCache(machine_code, p - p0);
}
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,478 @@
PUBLIC FN_PREFIX(CryptonightR_template_part1)
PUBLIC FN_PREFIX(CryptonightR_template_mainloop)
PUBLIC FN_PREFIX(CryptonightR_template_part2)
PUBLIC FN_PREFIX(CryptonightR_template_part3)
PUBLIC FN_PREFIX(CryptonightR_template_end)
PUBLIC FN_PREFIX(CryptonightR_template_double_part1)
PUBLIC FN_PREFIX(CryptonightR_template_double_mainloop)
PUBLIC FN_PREFIX(CryptonightR_template_double_part2)
PUBLIC FN_PREFIX(CryptonightR_template_double_part3)
PUBLIC FN_PREFIX(CryptonightR_template_double_part4)
PUBLIC FN_PREFIX(CryptonightR_template_double_end)
FN_PREFIX(CryptonightR_template_part1):
mov QWORD PTR [rsp+16], rbx
mov QWORD PTR [rsp+24], rbp
mov QWORD PTR [rsp+32], rsi
push r10
push r11
push r12
push r13
push r14
push r15
push rdi
sub rsp, 64
mov r12, rcx
mov r8, QWORD PTR [r12+32]
mov rdx, r12
xor r8, QWORD PTR [r12]
mov r15, QWORD PTR [r12+40]
mov r9, r8
xor r15, QWORD PTR [r12+8]
mov r11, QWORD PTR [r12+224]
mov r12, QWORD PTR [r12+56]
xor r12, QWORD PTR [rdx+24]
mov rax, QWORD PTR [rdx+48]
xor rax, QWORD PTR [rdx+16]
movaps XMMWORD PTR [rsp+48], xmm6
movq xmm0, r12
movaps XMMWORD PTR [rsp+32], xmm7
movaps XMMWORD PTR [rsp+16], xmm8
movaps XMMWORD PTR [rsp], xmm9
mov r12, QWORD PTR [rdx+88]
xor r12, QWORD PTR [rdx+72]
movq xmm6, rax
mov rax, QWORD PTR [rdx+80]
xor rax, QWORD PTR [rdx+64]
punpcklqdq xmm6, xmm0
and r9d, 2097136
movq xmm0, r12
movq xmm7, rax
punpcklqdq xmm7, xmm0
mov r10d, r9d
movq xmm9, rsp
mov rsp, r8
mov r8d, 524288
mov ebx, [rdx+96]
mov esi, [rdx+100]
mov edi, [rdx+104]
mov ebp, [rdx+108]
ALIGN(64)
FN_PREFIX(CryptonightR_template_mainloop):
movdqa xmm5, XMMWORD PTR [r9+r11]
movq xmm0, r15
movq xmm4, rsp
punpcklqdq xmm4, xmm0
lea rdx, QWORD PTR [r9+r11]
aesenc xmm5, xmm4
movd r10d, xmm5
and r10d, 2097136
mov r12d, r9d
mov eax, r9d
xor r9d, 48
xor r12d, 16
xor eax, 32
movdqu xmm0, XMMWORD PTR [r9+r11]
movdqu xmm2, XMMWORD PTR [r12+r11]
movdqu xmm1, XMMWORD PTR [rax+r11]
paddq xmm0, xmm7
paddq xmm2, xmm6
paddq xmm1, xmm4
movdqu XMMWORD PTR [r12+r11], xmm0
movq r12, xmm5
movdqu XMMWORD PTR [rax+r11], xmm2
movdqu XMMWORD PTR [r9+r11], xmm1
movdqa xmm0, xmm5
pxor xmm0, xmm6
movdqu XMMWORD PTR [rdx], xmm0
lea r13d, [ebx+esi]
lea edx, [edi+ebp]
shl rdx, 32
or r13, rdx
xor r13, QWORD PTR [r10+r11]
mov r14, QWORD PTR [r10+r11+8]
movd eax, xmm6
movd edx, xmm7
FN_PREFIX(CryptonightR_template_part2):
mov rax, r13
mul r12
movq xmm0, rax
movq xmm3, rdx
punpcklqdq xmm3, xmm0
mov r9d, r10d
mov r12d, r10d
xor r9d, 16
xor r12d, 32
xor r10d, 48
movdqa xmm1, XMMWORD PTR [r12+r11]
xor rdx, QWORD PTR [r12+r11]
xor rax, QWORD PTR [r11+r12+8]
movdqa xmm2, XMMWORD PTR [r9+r11]
pxor xmm3, xmm2
paddq xmm7, XMMWORD PTR [r10+r11]
paddq xmm1, xmm4
paddq xmm3, xmm6
movdqu XMMWORD PTR [r9+r11], xmm7
movdqu XMMWORD PTR [r12+r11], xmm3
movdqu XMMWORD PTR [r10+r11], xmm1
movdqa xmm7, xmm6
add r15, rax
add rsp, rdx
xor r10, 48
mov QWORD PTR [r10+r11], rsp
xor rsp, r13
mov r9d, esp
mov QWORD PTR [r10+r11+8], r15
and r9d, 2097136
xor r15, r14
movdqa xmm6, xmm5
dec r8d
jnz FN_PREFIX(CryptonightR_template_mainloop)
FN_PREFIX(CryptonightR_template_part3):
movq rsp, xmm9
mov rbx, QWORD PTR [rsp+136]
mov rbp, QWORD PTR [rsp+144]
mov rsi, QWORD PTR [rsp+152]
movaps xmm6, XMMWORD PTR [rsp+48]
movaps xmm7, XMMWORD PTR [rsp+32]
movaps xmm8, XMMWORD PTR [rsp+16]
movaps xmm9, XMMWORD PTR [rsp]
add rsp, 64
pop rdi
pop r15
pop r14
pop r13
pop r12
pop r11
pop r10
ret 0
FN_PREFIX(CryptonightR_template_end):
ALIGN(64)
FN_PREFIX(CryptonightR_template_double_part1):
mov QWORD PTR [rsp+24], rbx
push rbp
push rsi
push rdi
push r12
push r13
push r14
push r15
sub rsp, 320
mov r14, QWORD PTR [rcx+32]
mov r8, rcx
xor r14, QWORD PTR [rcx]
mov r12, QWORD PTR [rcx+40]
mov ebx, r14d
mov rsi, QWORD PTR [rcx+224]
and ebx, 2097136
xor r12, QWORD PTR [rcx+8]
mov rcx, QWORD PTR [rcx+56]
xor rcx, QWORD PTR [r8+24]
mov rax, QWORD PTR [r8+48]
xor rax, QWORD PTR [r8+16]
mov r15, QWORD PTR [rdx+32]
xor r15, QWORD PTR [rdx]
movq xmm0, rcx
mov rcx, QWORD PTR [r8+88]
xor rcx, QWORD PTR [r8+72]
mov r13, QWORD PTR [rdx+40]
mov rdi, QWORD PTR [rdx+224]
xor r13, QWORD PTR [rdx+8]
movaps XMMWORD PTR [rsp+160], xmm6
movaps XMMWORD PTR [rsp+176], xmm7
movaps XMMWORD PTR [rsp+192], xmm8
movaps XMMWORD PTR [rsp+208], xmm9
movaps XMMWORD PTR [rsp+224], xmm10
movaps XMMWORD PTR [rsp+240], xmm11
movaps XMMWORD PTR [rsp+256], xmm12
movaps XMMWORD PTR [rsp+272], xmm13
movaps XMMWORD PTR [rsp+288], xmm14
movaps XMMWORD PTR [rsp+304], xmm15
movq xmm7, rax
mov rax, QWORD PTR [r8+80]
xor rax, QWORD PTR [r8+64]
movaps xmm1, XMMWORD PTR [rdx+96]
movaps xmm2, XMMWORD PTR [r8+96]
movaps XMMWORD PTR [rsp], xmm1
movaps XMMWORD PTR [rsp+16], xmm2
mov r8d, r15d
punpcklqdq xmm7, xmm0
movq xmm0, rcx
mov rcx, QWORD PTR [rdx+56]
xor rcx, QWORD PTR [rdx+24]
movq xmm9, rax
mov QWORD PTR [rsp+128], rsi
mov rax, QWORD PTR [rdx+48]
xor rax, QWORD PTR [rdx+16]
punpcklqdq xmm9, xmm0
movq xmm0, rcx
mov rcx, QWORD PTR [rdx+88]
xor rcx, QWORD PTR [rdx+72]
movq xmm8, rax
mov QWORD PTR [rsp+136], rdi
mov rax, QWORD PTR [rdx+80]
xor rax, QWORD PTR [rdx+64]
punpcklqdq xmm8, xmm0
and r8d, 2097136
movq xmm0, rcx
mov r11d, 524288
movq xmm10, rax
punpcklqdq xmm10, xmm0
movq xmm14, QWORD PTR [rsp+128]
movq xmm15, QWORD PTR [rsp+136]
ALIGN(64)
FN_PREFIX(CryptonightR_template_double_mainloop):
movdqu xmm6, XMMWORD PTR [rbx+rsi]
movq xmm0, r12
mov ecx, ebx
movq xmm3, r14
punpcklqdq xmm3, xmm0
xor ebx, 16
aesenc xmm6, xmm3
movq rdx, xmm6
movq xmm4, r15
movdqu xmm0, XMMWORD PTR [rbx+rsi]
xor ebx, 48
paddq xmm0, xmm7
movdqu xmm1, XMMWORD PTR [rbx+rsi]
movdqu XMMWORD PTR [rbx+rsi], xmm0
paddq xmm1, xmm3
xor ebx, 16
mov eax, ebx
xor rax, 32
movdqu xmm0, XMMWORD PTR [rbx+rsi]
movdqu XMMWORD PTR [rbx+rsi], xmm1
paddq xmm0, xmm9
movdqu XMMWORD PTR [rax+rsi], xmm0
movdqa xmm0, xmm6
pxor xmm0, xmm7
movdqu XMMWORD PTR [rcx+rsi], xmm0
mov esi, edx
movdqu xmm5, XMMWORD PTR [r8+rdi]
and esi, 2097136
mov ecx, r8d
movq xmm0, r13
punpcklqdq xmm4, xmm0
xor r8d, 16
aesenc xmm5, xmm4
movdqu xmm0, XMMWORD PTR [r8+rdi]
xor r8d, 48
paddq xmm0, xmm8
movdqu xmm1, XMMWORD PTR [r8+rdi]
movdqu XMMWORD PTR [r8+rdi], xmm0
paddq xmm1, xmm4
xor r8d, 16
mov eax, r8d
xor rax, 32
movdqu xmm0, XMMWORD PTR [r8+rdi]
movdqu XMMWORD PTR [r8+rdi], xmm1
paddq xmm0, xmm10
movdqu XMMWORD PTR [rax+rdi], xmm0
movdqa xmm0, xmm5
pxor xmm0, xmm8
movdqu XMMWORD PTR [rcx+rdi], xmm0
movq rdi, xmm5
movq rcx, xmm14
mov ebp, edi
mov r8, QWORD PTR [rcx+rsi]
mov r10, QWORD PTR [rcx+rsi+8]
lea r9, QWORD PTR [rcx+rsi]
xor esi, 16
movq xmm0, rsp
movq xmm1, rsi
movq xmm2, rdi
movq xmm11, rbp
movq xmm12, r15
movq xmm13, rdx
mov [rsp+112], rcx
mov ebx, DWORD PTR [rsp+16]
mov esi, DWORD PTR [rsp+20]
mov edi, DWORD PTR [rsp+24]
mov ebp, DWORD PTR [rsp+28]
lea eax, [ebx+esi]
lea edx, [edi+ebp]
shl rdx, 32
or rax, rdx
xor r8, rax
movd esp, xmm3
pextrd r15d, xmm3, 2
movd eax, xmm7
movd edx, xmm9
FN_PREFIX(CryptonightR_template_double_part2):
movq rsp, xmm0
mov DWORD PTR [rsp+16], ebx
mov DWORD PTR [rsp+20], esi
mov DWORD PTR [rsp+24], edi
mov DWORD PTR [rsp+28], ebp
movq rsi, xmm1
movq rdi, xmm2
movq rbp, xmm11
movq r15, xmm12
movq rdx, xmm13
mov rcx, [rsp+112]
mov rbx, r8
mov rax, r8
mul rdx
and ebp, 2097136
mov r8, rax
movq xmm1, rdx
movq xmm0, r8
punpcklqdq xmm1, xmm0
pxor xmm1, XMMWORD PTR [rcx+rsi]
xor esi, 48
paddq xmm1, xmm7
movdqu xmm2, XMMWORD PTR [rsi+rcx]
xor rdx, QWORD PTR [rsi+rcx]
paddq xmm2, xmm3
xor r8, QWORD PTR [rsi+rcx+8]
movdqu XMMWORD PTR [rsi+rcx], xmm1
xor esi, 16
mov eax, esi
mov rsi, rcx
movdqu xmm0, XMMWORD PTR [rax+rcx]
movdqu XMMWORD PTR [rax+rcx], xmm2
paddq xmm0, xmm9
add r12, r8
xor rax, 32
add r14, rdx
movdqa xmm9, xmm7
movdqa xmm7, xmm6
movdqu XMMWORD PTR [rax+rcx], xmm0
mov QWORD PTR [r9+8], r12
xor r12, r10
mov QWORD PTR [r9], r14
movq rcx, xmm15
xor r14, rbx
mov r10d, ebp
mov ebx, r14d
xor ebp, 16
and ebx, 2097136
mov r8, QWORD PTR [r10+rcx]
mov r9, QWORD PTR [r10+rcx+8]
movq xmm0, rsp
movq xmm1, rbx
movq xmm2, rsi
movq xmm11, rdi
movq xmm12, rbp
movq xmm13, r15
mov [rsp+104], rcx
mov ebx, DWORD PTR [rsp]
mov esi, DWORD PTR [rsp+4]
mov edi, DWORD PTR [rsp+8]
mov ebp, DWORD PTR [rsp+12]
lea eax, [ebx+esi]
lea edx, [edi+ebp]
shl rdx, 32
or rax, rdx
xor r8, rax
movq xmm3, r8
movd esp, xmm4
pextrd r15d, xmm4, 2
movd eax, xmm8
movd edx, xmm10
FN_PREFIX(CryptonightR_template_double_part3):
movq rsp, xmm0
mov DWORD PTR [rsp], ebx
mov DWORD PTR [rsp+4], esi
mov DWORD PTR [rsp+8], edi
mov DWORD PTR [rsp+12], ebp
movq rbx, xmm1
movq rsi, xmm2
movq rdi, xmm11
movq rbp, xmm12
movq r15, xmm13
mov rcx, [rsp+104]
mov rax, r8
mul rdi
movq xmm1, rdx
movq xmm0, rax
punpcklqdq xmm1, xmm0
mov rdi, rcx
mov r8, rax
pxor xmm1, XMMWORD PTR [rbp+rcx]
xor ebp, 48
paddq xmm1, xmm8
xor r8, QWORD PTR [rbp+rcx+8]
xor rdx, QWORD PTR [rbp+rcx]
add r13, r8
movdqu xmm2, XMMWORD PTR [rbp+rcx]
add r15, rdx
movdqu XMMWORD PTR [rbp+rcx], xmm1
paddq xmm2, xmm4
xor ebp, 16
mov eax, ebp
xor rax, 32
movdqu xmm0, XMMWORD PTR [rbp+rcx]
movdqu XMMWORD PTR [rbp+rcx], xmm2
paddq xmm0, xmm10
movdqu XMMWORD PTR [rax+rcx], xmm0
movq rax, xmm3
movdqa xmm10, xmm8
mov QWORD PTR [r10+rcx], r15
movdqa xmm8, xmm5
xor r15, rax
mov QWORD PTR [r10+rcx+8], r13
mov r8d, r15d
xor r13, r9
and r8d, 2097136
dec r11d
jnz FN_PREFIX(CryptonightR_template_double_mainloop)
FN_PREFIX(CryptonightR_template_double_part4):
mov rbx, QWORD PTR [rsp+400]
movaps xmm6, XMMWORD PTR [rsp+160]
movaps xmm7, XMMWORD PTR [rsp+176]
movaps xmm8, XMMWORD PTR [rsp+192]
movaps xmm9, XMMWORD PTR [rsp+208]
movaps xmm10, XMMWORD PTR [rsp+224]
movaps xmm11, XMMWORD PTR [rsp+240]
movaps xmm12, XMMWORD PTR [rsp+256]
movaps xmm13, XMMWORD PTR [rsp+272]
movaps xmm14, XMMWORD PTR [rsp+288]
movaps xmm15, XMMWORD PTR [rsp+304]
add rsp, 320
pop r15
pop r14
pop r13
pop r12
pop rdi
pop rsi
pop rbp
ret 0
FN_PREFIX(CryptonightR_template_double_end):

View File

@@ -0,0 +1,478 @@
PUBLIC CryptonightR_template_part1
PUBLIC CryptonightR_template_mainloop
PUBLIC CryptonightR_template_part2
PUBLIC CryptonightR_template_part3
PUBLIC CryptonightR_template_end
PUBLIC CryptonightR_template_double_part1
PUBLIC CryptonightR_template_double_mainloop
PUBLIC CryptonightR_template_double_part2
PUBLIC CryptonightR_template_double_part3
PUBLIC CryptonightR_template_double_part4
PUBLIC CryptonightR_template_double_end
CryptonightR_template_part1:
mov QWORD PTR [rsp+16], rbx
mov QWORD PTR [rsp+24], rbp
mov QWORD PTR [rsp+32], rsi
push r10
push r11
push r12
push r13
push r14
push r15
push rdi
sub rsp, 64
mov r12, rcx
mov r8, QWORD PTR [r12+32]
mov rdx, r12
xor r8, QWORD PTR [r12]
mov r15, QWORD PTR [r12+40]
mov r9, r8
xor r15, QWORD PTR [r12+8]
mov r11, QWORD PTR [r12+224]
mov r12, QWORD PTR [r12+56]
xor r12, QWORD PTR [rdx+24]
mov rax, QWORD PTR [rdx+48]
xor rax, QWORD PTR [rdx+16]
movaps XMMWORD PTR [rsp+48], xmm6
movq xmm0, r12
movaps XMMWORD PTR [rsp+32], xmm7
movaps XMMWORD PTR [rsp+16], xmm8
movaps XMMWORD PTR [rsp], xmm9
mov r12, QWORD PTR [rdx+88]
xor r12, QWORD PTR [rdx+72]
movq xmm6, rax
mov rax, QWORD PTR [rdx+80]
xor rax, QWORD PTR [rdx+64]
punpcklqdq xmm6, xmm0
and r9d, 2097136
movq xmm0, r12
movq xmm7, rax
punpcklqdq xmm7, xmm0
mov r10d, r9d
movq xmm9, rsp
mov rsp, r8
mov r8d, 524288
mov ebx, [rdx+96]
mov esi, [rdx+100]
mov edi, [rdx+104]
mov ebp, [rdx+108]
ALIGN(64)
CryptonightR_template_mainloop:
movdqa xmm5, XMMWORD PTR [r9+r11]
movq xmm0, r15
movq xmm4, rsp
punpcklqdq xmm4, xmm0
lea rdx, QWORD PTR [r9+r11]
aesenc xmm5, xmm4
movd r10d, xmm5
and r10d, 2097136
mov r12d, r9d
mov eax, r9d
xor r9d, 48
xor r12d, 16
xor eax, 32
movdqu xmm0, XMMWORD PTR [r9+r11]
movdqu xmm2, XMMWORD PTR [r12+r11]
movdqu xmm1, XMMWORD PTR [rax+r11]
paddq xmm0, xmm7
paddq xmm2, xmm6
paddq xmm1, xmm4
movdqu XMMWORD PTR [r12+r11], xmm0
movq r12, xmm5
movdqu XMMWORD PTR [rax+r11], xmm2
movdqu XMMWORD PTR [r9+r11], xmm1
movdqa xmm0, xmm5
pxor xmm0, xmm6
movdqu XMMWORD PTR [rdx], xmm0
lea r13d, [ebx+esi]
lea edx, [edi+ebp]
shl rdx, 32
or r13, rdx
xor r13, QWORD PTR [r10+r11]
mov r14, QWORD PTR [r10+r11+8]
movd eax, xmm6
movd edx, xmm7
CryptonightR_template_part2:
mov rax, r13
mul r12
movq xmm0, rax
movq xmm3, rdx
punpcklqdq xmm3, xmm0
mov r9d, r10d
mov r12d, r10d
xor r9d, 16
xor r12d, 32
xor r10d, 48
movdqa xmm1, XMMWORD PTR [r12+r11]
xor rdx, QWORD PTR [r12+r11]
xor rax, QWORD PTR [r11+r12+8]
movdqa xmm2, XMMWORD PTR [r9+r11]
pxor xmm3, xmm2
paddq xmm7, XMMWORD PTR [r10+r11]
paddq xmm1, xmm4
paddq xmm3, xmm6
movdqu XMMWORD PTR [r9+r11], xmm7
movdqu XMMWORD PTR [r12+r11], xmm3
movdqu XMMWORD PTR [r10+r11], xmm1
movdqa xmm7, xmm6
add r15, rax
add rsp, rdx
xor r10, 48
mov QWORD PTR [r10+r11], rsp
xor rsp, r13
mov r9d, esp
mov QWORD PTR [r10+r11+8], r15
and r9d, 2097136
xor r15, r14
movdqa xmm6, xmm5
dec r8d
jnz CryptonightR_template_mainloop
CryptonightR_template_part3:
movq rsp, xmm9
mov rbx, QWORD PTR [rsp+136]
mov rbp, QWORD PTR [rsp+144]
mov rsi, QWORD PTR [rsp+152]
movaps xmm6, XMMWORD PTR [rsp+48]
movaps xmm7, XMMWORD PTR [rsp+32]
movaps xmm8, XMMWORD PTR [rsp+16]
movaps xmm9, XMMWORD PTR [rsp]
add rsp, 64
pop rdi
pop r15
pop r14
pop r13
pop r12
pop r11
pop r10
ret 0
CryptonightR_template_end:
ALIGN(64)
CryptonightR_template_double_part1:
mov QWORD PTR [rsp+24], rbx
push rbp
push rsi
push rdi
push r12
push r13
push r14
push r15
sub rsp, 320
mov r14, QWORD PTR [rcx+32]
mov r8, rcx
xor r14, QWORD PTR [rcx]
mov r12, QWORD PTR [rcx+40]
mov ebx, r14d
mov rsi, QWORD PTR [rcx+224]
and ebx, 2097136
xor r12, QWORD PTR [rcx+8]
mov rcx, QWORD PTR [rcx+56]
xor rcx, QWORD PTR [r8+24]
mov rax, QWORD PTR [r8+48]
xor rax, QWORD PTR [r8+16]
mov r15, QWORD PTR [rdx+32]
xor r15, QWORD PTR [rdx]
movq xmm0, rcx
mov rcx, QWORD PTR [r8+88]
xor rcx, QWORD PTR [r8+72]
mov r13, QWORD PTR [rdx+40]
mov rdi, QWORD PTR [rdx+224]
xor r13, QWORD PTR [rdx+8]
movaps XMMWORD PTR [rsp+160], xmm6
movaps XMMWORD PTR [rsp+176], xmm7
movaps XMMWORD PTR [rsp+192], xmm8
movaps XMMWORD PTR [rsp+208], xmm9
movaps XMMWORD PTR [rsp+224], xmm10
movaps XMMWORD PTR [rsp+240], xmm11
movaps XMMWORD PTR [rsp+256], xmm12
movaps XMMWORD PTR [rsp+272], xmm13
movaps XMMWORD PTR [rsp+288], xmm14
movaps XMMWORD PTR [rsp+304], xmm15
movq xmm7, rax
mov rax, QWORD PTR [r8+80]
xor rax, QWORD PTR [r8+64]
movaps xmm1, XMMWORD PTR [rdx+96]
movaps xmm2, XMMWORD PTR [r8+96]
movaps XMMWORD PTR [rsp], xmm1
movaps XMMWORD PTR [rsp+16], xmm2
mov r8d, r15d
punpcklqdq xmm7, xmm0
movq xmm0, rcx
mov rcx, QWORD PTR [rdx+56]
xor rcx, QWORD PTR [rdx+24]
movq xmm9, rax
mov QWORD PTR [rsp+128], rsi
mov rax, QWORD PTR [rdx+48]
xor rax, QWORD PTR [rdx+16]
punpcklqdq xmm9, xmm0
movq xmm0, rcx
mov rcx, QWORD PTR [rdx+88]
xor rcx, QWORD PTR [rdx+72]
movq xmm8, rax
mov QWORD PTR [rsp+136], rdi
mov rax, QWORD PTR [rdx+80]
xor rax, QWORD PTR [rdx+64]
punpcklqdq xmm8, xmm0
and r8d, 2097136
movq xmm0, rcx
mov r11d, 524288
movq xmm10, rax
punpcklqdq xmm10, xmm0
movq xmm14, QWORD PTR [rsp+128]
movq xmm15, QWORD PTR [rsp+136]
ALIGN(64)
CryptonightR_template_double_mainloop:
movdqu xmm6, XMMWORD PTR [rbx+rsi]
movq xmm0, r12
mov ecx, ebx
movq xmm3, r14
punpcklqdq xmm3, xmm0
xor ebx, 16
aesenc xmm6, xmm3
movq rdx, xmm6
movq xmm4, r15
movdqu xmm0, XMMWORD PTR [rbx+rsi]
xor ebx, 48
paddq xmm0, xmm7
movdqu xmm1, XMMWORD PTR [rbx+rsi]
movdqu XMMWORD PTR [rbx+rsi], xmm0
paddq xmm1, xmm3
xor ebx, 16
mov eax, ebx
xor rax, 32
movdqu xmm0, XMMWORD PTR [rbx+rsi]
movdqu XMMWORD PTR [rbx+rsi], xmm1
paddq xmm0, xmm9
movdqu XMMWORD PTR [rax+rsi], xmm0
movdqa xmm0, xmm6
pxor xmm0, xmm7
movdqu XMMWORD PTR [rcx+rsi], xmm0
mov esi, edx
movdqu xmm5, XMMWORD PTR [r8+rdi]
and esi, 2097136
mov ecx, r8d
movq xmm0, r13
punpcklqdq xmm4, xmm0
xor r8d, 16
aesenc xmm5, xmm4
movdqu xmm0, XMMWORD PTR [r8+rdi]
xor r8d, 48
paddq xmm0, xmm8
movdqu xmm1, XMMWORD PTR [r8+rdi]
movdqu XMMWORD PTR [r8+rdi], xmm0
paddq xmm1, xmm4
xor r8d, 16
mov eax, r8d
xor rax, 32
movdqu xmm0, XMMWORD PTR [r8+rdi]
movdqu XMMWORD PTR [r8+rdi], xmm1
paddq xmm0, xmm10
movdqu XMMWORD PTR [rax+rdi], xmm0
movdqa xmm0, xmm5
pxor xmm0, xmm8
movdqu XMMWORD PTR [rcx+rdi], xmm0
movq rdi, xmm5
movq rcx, xmm14
mov ebp, edi
mov r8, QWORD PTR [rcx+rsi]
mov r10, QWORD PTR [rcx+rsi+8]
lea r9, QWORD PTR [rcx+rsi]
xor esi, 16
movq xmm0, rsp
movq xmm1, rsi
movq xmm2, rdi
movq xmm11, rbp
movq xmm12, r15
movq xmm13, rdx
mov [rsp+112], rcx
mov ebx, DWORD PTR [rsp+16]
mov esi, DWORD PTR [rsp+20]
mov edi, DWORD PTR [rsp+24]
mov ebp, DWORD PTR [rsp+28]
lea eax, [ebx+esi]
lea edx, [edi+ebp]
shl rdx, 32
or rax, rdx
xor r8, rax
movd esp, xmm3
pextrd r15d, xmm3, 2
movd eax, xmm7
movd edx, xmm9
CryptonightR_template_double_part2:
movq rsp, xmm0
mov DWORD PTR [rsp+16], ebx
mov DWORD PTR [rsp+20], esi
mov DWORD PTR [rsp+24], edi
mov DWORD PTR [rsp+28], ebp
movq rsi, xmm1
movq rdi, xmm2
movq rbp, xmm11
movq r15, xmm12
movq rdx, xmm13
mov rcx, [rsp+112]
mov rbx, r8
mov rax, r8
mul rdx
and ebp, 2097136
mov r8, rax
movq xmm1, rdx
movq xmm0, r8
punpcklqdq xmm1, xmm0
pxor xmm1, XMMWORD PTR [rcx+rsi]
xor esi, 48
paddq xmm1, xmm7
movdqu xmm2, XMMWORD PTR [rsi+rcx]
xor rdx, QWORD PTR [rsi+rcx]
paddq xmm2, xmm3
xor r8, QWORD PTR [rsi+rcx+8]
movdqu XMMWORD PTR [rsi+rcx], xmm1
xor esi, 16
mov eax, esi
mov rsi, rcx
movdqu xmm0, XMMWORD PTR [rax+rcx]
movdqu XMMWORD PTR [rax+rcx], xmm2
paddq xmm0, xmm9
add r12, r8
xor rax, 32
add r14, rdx
movdqa xmm9, xmm7
movdqa xmm7, xmm6
movdqu XMMWORD PTR [rax+rcx], xmm0
mov QWORD PTR [r9+8], r12
xor r12, r10
mov QWORD PTR [r9], r14
movq rcx, xmm15
xor r14, rbx
mov r10d, ebp
mov ebx, r14d
xor ebp, 16
and ebx, 2097136
mov r8, QWORD PTR [r10+rcx]
mov r9, QWORD PTR [r10+rcx+8]
movq xmm0, rsp
movq xmm1, rbx
movq xmm2, rsi
movq xmm11, rdi
movq xmm12, rbp
movq xmm13, r15
mov [rsp+104], rcx
mov ebx, DWORD PTR [rsp]
mov esi, DWORD PTR [rsp+4]
mov edi, DWORD PTR [rsp+8]
mov ebp, DWORD PTR [rsp+12]
lea eax, [ebx+esi]
lea edx, [edi+ebp]
shl rdx, 32
or rax, rdx
xor r8, rax
movq xmm3, r8
movd esp, xmm4
pextrd r15d, xmm4, 2
movd eax, xmm8
movd edx, xmm10
CryptonightR_template_double_part3:
movq rsp, xmm0
mov DWORD PTR [rsp], ebx
mov DWORD PTR [rsp+4], esi
mov DWORD PTR [rsp+8], edi
mov DWORD PTR [rsp+12], ebp
movq rbx, xmm1
movq rsi, xmm2
movq rdi, xmm11
movq rbp, xmm12
movq r15, xmm13
mov rcx, [rsp+104]
mov rax, r8
mul rdi
movq xmm1, rdx
movq xmm0, rax
punpcklqdq xmm1, xmm0
mov rdi, rcx
mov r8, rax
pxor xmm1, XMMWORD PTR [rbp+rcx]
xor ebp, 48
paddq xmm1, xmm8
xor r8, QWORD PTR [rbp+rcx+8]
xor rdx, QWORD PTR [rbp+rcx]
add r13, r8
movdqu xmm2, XMMWORD PTR [rbp+rcx]
add r15, rdx
movdqu XMMWORD PTR [rbp+rcx], xmm1
paddq xmm2, xmm4
xor ebp, 16
mov eax, ebp
xor rax, 32
movdqu xmm0, XMMWORD PTR [rbp+rcx]
movdqu XMMWORD PTR [rbp+rcx], xmm2
paddq xmm0, xmm10
movdqu XMMWORD PTR [rax+rcx], xmm0
movq rax, xmm3
movdqa xmm10, xmm8
mov QWORD PTR [r10+rcx], r15
movdqa xmm8, xmm5
xor r15, rax
mov QWORD PTR [r10+rcx+8], r13
mov r8d, r15d
xor r13, r9
and r8d, 2097136
dec r11d
jnz CryptonightR_template_double_mainloop
CryptonightR_template_double_part4:
mov rbx, QWORD PTR [rsp+400]
movaps xmm6, XMMWORD PTR [rsp+160]
movaps xmm7, XMMWORD PTR [rsp+176]
movaps xmm8, XMMWORD PTR [rsp+192]
movaps xmm9, XMMWORD PTR [rsp+208]
movaps xmm10, XMMWORD PTR [rsp+224]
movaps xmm11, XMMWORD PTR [rsp+240]
movaps xmm12, XMMWORD PTR [rsp+256]
movaps xmm13, XMMWORD PTR [rsp+272]
movaps xmm14, XMMWORD PTR [rsp+288]
movaps xmm15, XMMWORD PTR [rsp+304]
add rsp, 320
pop r15
pop r14
pop r13
pop r12
pop rdi
pop rsi
pop rbp
ret 0
CryptonightR_template_double_end:

View File

@@ -94,7 +94,7 @@
lea r9, QWORD PTR [rdx+r13]
movdqu xmm15, XMMWORD PTR [r9]
ALIGN 16
ALIGN(64)
main_loop_double_sandybridge:
movdqu xmm9, xmm15
mov eax, edx

View File

@@ -0,0 +1,180 @@
mov QWORD PTR [rsp+16], rbx
mov QWORD PTR [rsp+24], rbp
mov QWORD PTR [rsp+32], rsi
push rdi
push r12
push r13
push r14
push r15
sub rsp, 64
stmxcsr DWORD PTR [rsp]
mov DWORD PTR [rsp+4], 24448
ldmxcsr DWORD PTR [rsp+4]
mov rax, QWORD PTR [rcx+48]
mov r9, rcx
xor rax, QWORD PTR [rcx+16]
mov ebp, 524288
mov r8, QWORD PTR [rcx+32]
xor r8, QWORD PTR [rcx]
mov r11, QWORD PTR [rcx+40]
mov r10, r8
mov rdx, QWORD PTR [rcx+56]
movq xmm3, rax
xor rdx, QWORD PTR [rcx+24]
xor r11, QWORD PTR [rcx+8]
mov rbx, QWORD PTR [rcx+224]
mov rax, QWORD PTR [r9+80]
xor rax, QWORD PTR [r9+64]
movq xmm0, rdx
mov rcx, QWORD PTR [rcx+88]
xor rcx, QWORD PTR [r9+72]
mov rdi, QWORD PTR [r9+104]
and r10d, 2097136
movaps XMMWORD PTR [rsp+48], xmm6
movq xmm4, rax
movaps XMMWORD PTR [rsp+32], xmm7
movaps XMMWORD PTR [rsp+16], xmm8
xorps xmm8, xmm8
mov ax, 1023
shl rax, 52
movq xmm7, rax
mov r15, QWORD PTR [r9+96]
punpcklqdq xmm3, xmm0
movq xmm0, rcx
punpcklqdq xmm4, xmm0
ALIGN(64)
cnv2_main_loop_bulldozer:
movdqa xmm5, XMMWORD PTR [r10+rbx]
movq xmm6, r8
pinsrq xmm6, r11, 1
lea rdx, QWORD PTR [r10+rbx]
lea r9, QWORD PTR [rdi+rdi]
shl rdi, 32
mov ecx, r10d
mov eax, r10d
xor ecx, 16
xor eax, 32
xor r10d, 48
aesenc xmm5, xmm6
movdqa xmm2, XMMWORD PTR [rcx+rbx]
movdqa xmm1, XMMWORD PTR [rax+rbx]
movdqa xmm0, XMMWORD PTR [r10+rbx]
paddq xmm2, xmm3
paddq xmm1, xmm6
paddq xmm0, xmm4
movdqa XMMWORD PTR [rcx+rbx], xmm0
movdqa XMMWORD PTR [rax+rbx], xmm2
movdqa XMMWORD PTR [r10+rbx], xmm1
movaps xmm1, xmm8
mov rsi, r15
xor rsi, rdi
mov edi, 1023
shl rdi, 52
movq r14, xmm5
pextrq rax, xmm5, 1
movdqa xmm0, xmm5
pxor xmm0, xmm3
mov r10, r14
and r10d, 2097136
movdqa XMMWORD PTR [rdx], xmm0
xor rsi, QWORD PTR [r10+rbx]
lea r12, QWORD PTR [r10+rbx]
mov r13, QWORD PTR [r10+rbx+8]
add r9d, r14d
or r9d, -2147483647
xor edx, edx
div r9
mov eax, eax
shl rdx, 32
lea r15, [rax+rdx]
lea rax, [r14+r15]
shr rax, 12
add rax, rdi
movq xmm0, rax
sqrtsd xmm1, xmm0
movq rdi, xmm1
test rdi, 524287
je sqrt_fixup_bulldozer
shr rdi, 19
sqrt_fixup_bulldozer_ret:
mov rax, rsi
mul r14
movq xmm1, rax
movq xmm0, rdx
punpcklqdq xmm0, xmm1
mov r9d, r10d
mov ecx, r10d
xor r9d, 16
xor ecx, 32
xor r10d, 48
movdqa xmm1, XMMWORD PTR [rcx+rbx]
xor rdx, [rcx+rbx]
xor rax, [rcx+rbx+8]
movdqa xmm2, XMMWORD PTR [r9+rbx]
pxor xmm2, xmm0
paddq xmm4, XMMWORD PTR [r10+rbx]
paddq xmm2, xmm3
paddq xmm1, xmm6
movdqa XMMWORD PTR [r9+rbx], xmm4
movdqa XMMWORD PTR [rcx+rbx], xmm2
movdqa XMMWORD PTR [r10+rbx], xmm1
movdqa xmm4, xmm3
add r8, rdx
add r11, rax
mov QWORD PTR [r12], r8
xor r8, rsi
mov QWORD PTR [r12+8], r11
mov r10, r8
xor r11, r13
and r10d, 2097136
movdqa xmm3, xmm5
dec ebp
jne cnv2_main_loop_bulldozer
ldmxcsr DWORD PTR [rsp]
movaps xmm6, XMMWORD PTR [rsp+48]
lea r11, QWORD PTR [rsp+64]
mov rbx, QWORD PTR [r11+56]
mov rbp, QWORD PTR [r11+64]
mov rsi, QWORD PTR [r11+72]
movaps xmm8, XMMWORD PTR [r11-48]
movaps xmm7, XMMWORD PTR [rsp+32]
mov rsp, r11
pop r15
pop r14
pop r13
pop r12
pop rdi
jmp cnv2_main_loop_bulldozer_endp
sqrt_fixup_bulldozer:
movq r9, xmm5
add r9, r15
dec rdi
mov edx, -1022
shl rdx, 32
mov rax, rdi
shr rdi, 19
shr rax, 20
mov rcx, rdi
sub rcx, rax
lea rcx, [rcx+rdx+1]
add rax, rdx
imul rcx, rax
sub rcx, r9
adc rdi, 0
jmp sqrt_fixup_bulldozer_ret
cnv2_main_loop_bulldozer_endp:

View File

@@ -50,7 +50,7 @@
punpcklqdq xmm5, xmm0
movdqu xmm6, XMMWORD PTR [r10+rbx]
ALIGN 16
ALIGN(64)
main_loop_ivybridge:
lea rdx, QWORD PTR [r10+rbx]
mov ecx, r10d

View File

@@ -45,7 +45,7 @@
movq xmm0, rcx
punpcklqdq xmm4, xmm0
ALIGN 16
ALIGN(64)
main_loop_ryzen:
movdqa xmm5, XMMWORD PTR [r10+rbx]
movq xmm0, r11

View File

@@ -1,4 +1,8 @@
#define ALIGN .align
#ifdef __APPLE__
# define ALIGN(x) .align 6
#else
# define ALIGN(x) .align 64
#endif
.intel_syntax noprefix
#ifdef __APPLE__
# define FN_PREFIX(fn) _ ## fn
@@ -9,29 +13,42 @@
#endif
.global FN_PREFIX(cnv2_mainloop_ivybridge_asm)
.global FN_PREFIX(cnv2_mainloop_ryzen_asm)
.global FN_PREFIX(cnv2_mainloop_bulldozer_asm)
.global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm)
ALIGN 16
ALIGN(64)
FN_PREFIX(cnv2_mainloop_ivybridge_asm):
sub rsp, 48
mov rcx, rdi
#include "cnv2_main_loop_ivybridge.inc"
#include "cn2/cnv2_main_loop_ivybridge.inc"
add rsp, 48
ret 0
mov eax, 3735929054
ALIGN 16
ALIGN(64)
FN_PREFIX(cnv2_mainloop_ryzen_asm):
sub rsp, 48
mov rcx, rdi
#include "cnv2_main_loop_ryzen.inc"
#include "cn2/cnv2_main_loop_ryzen.inc"
add rsp, 48
ret 0
mov eax, 3735929054
ALIGN 16
ALIGN(64)
FN_PREFIX(cnv2_mainloop_bulldozer_asm):
sub rsp, 48
mov rcx, rdi
#include "cn2/cnv2_main_loop_bulldozer.inc"
add rsp, 48
ret 0
mov eax, 3735929054
ALIGN(64)
FN_PREFIX(cnv2_double_mainloop_sandybridge_asm):
sub rsp, 48
mov rcx, rdi
mov rdx, rsi
#include "cnv2_double_main_loop_sandybridge.inc"
#include "cn2/cnv2_double_main_loop_sandybridge.inc"
add rsp, 48
ret 0
mov eax, 3735929054

View File

@@ -0,0 +1,36 @@
_TEXT_CNV2_MAINLOOP SEGMENT PAGE READ EXECUTE
PUBLIC cnv2_mainloop_ivybridge_asm
PUBLIC cnv2_mainloop_ryzen_asm
PUBLIC cnv2_mainloop_bulldozer_asm
PUBLIC cnv2_double_mainloop_sandybridge_asm
ALIGN(64)
cnv2_mainloop_ivybridge_asm PROC
INCLUDE cn2/cnv2_main_loop_ivybridge.inc
ret 0
mov eax, 3735929054
cnv2_mainloop_ivybridge_asm ENDP
ALIGN(64)
cnv2_mainloop_ryzen_asm PROC
INCLUDE cn2/cnv2_main_loop_ryzen.inc
ret 0
mov eax, 3735929054
cnv2_mainloop_ryzen_asm ENDP
ALIGN(64)
cnv2_mainloop_bulldozer_asm PROC
INCLUDE cn2/cnv2_main_loop_bulldozer.inc
ret 0
mov eax, 3735929054
cnv2_mainloop_bulldozer_asm ENDP
ALIGN(64)
cnv2_double_mainloop_sandybridge_asm PROC
INCLUDE cn2/cnv2_double_main_loop_sandybridge.inc
ret 0
mov eax, 3735929054
cnv2_double_mainloop_sandybridge_asm ENDP
_TEXT_CNV2_MAINLOOP ENDS
END

View File

@@ -1,21 +0,0 @@
#define ALIGN .align
.intel_syntax noprefix
.section .text
.global cnv2_mainloop_ivybridge_asm
.global cnv2_mainloop_ryzen_asm
.global cnv2_double_mainloop_sandybridge_asm
ALIGN 16
cnv2_mainloop_ivybridge_asm:
#include "cnv2_main_loop_ivybridge.inc"
ret 0
ALIGN 16
cnv2_mainloop_ryzen_asm:
#include "cnv2_main_loop_ryzen.inc"
ret 0
ALIGN 16
cnv2_double_mainloop_sandybridge_asm:
#include "cnv2_double_main_loop_sandybridge.inc"
ret 0

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,478 @@
PUBLIC FN_PREFIX(CryptonightR_template_part1)
PUBLIC FN_PREFIX(CryptonightR_template_mainloop)
PUBLIC FN_PREFIX(CryptonightR_template_part2)
PUBLIC FN_PREFIX(CryptonightR_template_part3)
PUBLIC FN_PREFIX(CryptonightR_template_end)
PUBLIC FN_PREFIX(CryptonightR_template_double_part1)
PUBLIC FN_PREFIX(CryptonightR_template_double_mainloop)
PUBLIC FN_PREFIX(CryptonightR_template_double_part2)
PUBLIC FN_PREFIX(CryptonightR_template_double_part3)
PUBLIC FN_PREFIX(CryptonightR_template_double_part4)
PUBLIC FN_PREFIX(CryptonightR_template_double_end)
FN_PREFIX(CryptonightR_template_part1):
mov QWORD PTR [rsp+16], rbx
mov QWORD PTR [rsp+24], rbp
mov QWORD PTR [rsp+32], rsi
push r10
push r11
push r12
push r13
push r14
push r15
push rdi
sub rsp, 64
mov r12, rcx
mov r8, QWORD PTR [r12+32]
mov rdx, r12
xor r8, QWORD PTR [r12]
mov r15, QWORD PTR [r12+40]
mov r9, r8
xor r15, QWORD PTR [r12+8]
mov r11, QWORD PTR [r12+224]
mov r12, QWORD PTR [r12+56]
xor r12, QWORD PTR [rdx+24]
mov rax, QWORD PTR [rdx+48]
xor rax, QWORD PTR [rdx+16]
movaps XMMWORD PTR [rsp+48], xmm6
movd xmm0, r12
movaps XMMWORD PTR [rsp+32], xmm7
movaps XMMWORD PTR [rsp+16], xmm8
movaps XMMWORD PTR [rsp], xmm9
mov r12, QWORD PTR [rdx+88]
xor r12, QWORD PTR [rdx+72]
movd xmm6, rax
mov rax, QWORD PTR [rdx+80]
xor rax, QWORD PTR [rdx+64]
punpcklqdq xmm6, xmm0
and r9d, 2097136
movd xmm0, r12
movd xmm7, rax
punpcklqdq xmm7, xmm0
mov r10d, r9d
movd xmm9, rsp
mov rsp, r8
mov r8d, 524288
mov ebx, [rdx+96]
mov esi, [rdx+100]
mov edi, [rdx+104]
mov ebp, [rdx+108]
ALIGN(64)
FN_PREFIX(CryptonightR_template_mainloop):
movdqa xmm5, XMMWORD PTR [r9+r11]
movd xmm0, r15
movd xmm4, rsp
punpcklqdq xmm4, xmm0
lea rdx, QWORD PTR [r9+r11]
aesenc xmm5, xmm4
movd r10d, xmm5
and r10d, 2097136
mov r12d, r9d
mov eax, r9d
xor r9d, 48
xor r12d, 16
xor eax, 32
movdqu xmm0, XMMWORD PTR [r9+r11]
movdqu xmm2, XMMWORD PTR [r12+r11]
movdqu xmm1, XMMWORD PTR [rax+r11]
paddq xmm0, xmm7
paddq xmm2, xmm6
paddq xmm1, xmm4
movdqu XMMWORD PTR [r12+r11], xmm0
movd r12, xmm5
movdqu XMMWORD PTR [rax+r11], xmm2
movdqu XMMWORD PTR [r9+r11], xmm1
movdqa xmm0, xmm5
pxor xmm0, xmm6
movdqu XMMWORD PTR [rdx], xmm0
lea r13d, [ebx+esi]
lea edx, [edi+ebp]
shl rdx, 32
or r13, rdx
xor r13, QWORD PTR [r10+r11]
mov r14, QWORD PTR [r10+r11+8]
movd eax, xmm6
movd edx, xmm7
FN_PREFIX(CryptonightR_template_part2):
mov rax, r13
mul r12
movd xmm0, rax
movd xmm3, rdx
punpcklqdq xmm3, xmm0
mov r9d, r10d
mov r12d, r10d
xor r9d, 16
xor r12d, 32
xor r10d, 48
movdqa xmm1, XMMWORD PTR [r12+r11]
xor rdx, QWORD PTR [r12+r11]
xor rax, QWORD PTR [r11+r12+8]
movdqa xmm2, XMMWORD PTR [r9+r11]
pxor xmm3, xmm2
paddq xmm7, XMMWORD PTR [r10+r11]
paddq xmm1, xmm4
paddq xmm3, xmm6
movdqu XMMWORD PTR [r9+r11], xmm7
movdqu XMMWORD PTR [r12+r11], xmm3
movdqu XMMWORD PTR [r10+r11], xmm1
movdqa xmm7, xmm6
add r15, rax
add rsp, rdx
xor r10, 48
mov QWORD PTR [r10+r11], rsp
xor rsp, r13
mov r9d, esp
mov QWORD PTR [r10+r11+8], r15
and r9d, 2097136
xor r15, r14
movdqa xmm6, xmm5
dec r8d
jnz FN_PREFIX(CryptonightR_template_mainloop)
FN_PREFIX(CryptonightR_template_part3):
movd rsp, xmm9
mov rbx, QWORD PTR [rsp+136]
mov rbp, QWORD PTR [rsp+144]
mov rsi, QWORD PTR [rsp+152]
movaps xmm6, XMMWORD PTR [rsp+48]
movaps xmm7, XMMWORD PTR [rsp+32]
movaps xmm8, XMMWORD PTR [rsp+16]
movaps xmm9, XMMWORD PTR [rsp]
add rsp, 64
pop rdi
pop r15
pop r14
pop r13
pop r12
pop r11
pop r10
ret 0
FN_PREFIX(CryptonightR_template_end):
ALIGN(64)
FN_PREFIX(CryptonightR_template_double_part1):
mov QWORD PTR [rsp+24], rbx
push rbp
push rsi
push rdi
push r12
push r13
push r14
push r15
sub rsp, 320
mov r14, QWORD PTR [rcx+32]
mov r8, rcx
xor r14, QWORD PTR [rcx]
mov r12, QWORD PTR [rcx+40]
mov ebx, r14d
mov rsi, QWORD PTR [rcx+224]
and ebx, 2097136
xor r12, QWORD PTR [rcx+8]
mov rcx, QWORD PTR [rcx+56]
xor rcx, QWORD PTR [r8+24]
mov rax, QWORD PTR [r8+48]
xor rax, QWORD PTR [r8+16]
mov r15, QWORD PTR [rdx+32]
xor r15, QWORD PTR [rdx]
movd xmm0, rcx
mov rcx, QWORD PTR [r8+88]
xor rcx, QWORD PTR [r8+72]
mov r13, QWORD PTR [rdx+40]
mov rdi, QWORD PTR [rdx+224]
xor r13, QWORD PTR [rdx+8]
movaps XMMWORD PTR [rsp+160], xmm6
movaps XMMWORD PTR [rsp+176], xmm7
movaps XMMWORD PTR [rsp+192], xmm8
movaps XMMWORD PTR [rsp+208], xmm9
movaps XMMWORD PTR [rsp+224], xmm10
movaps XMMWORD PTR [rsp+240], xmm11
movaps XMMWORD PTR [rsp+256], xmm12
movaps XMMWORD PTR [rsp+272], xmm13
movaps XMMWORD PTR [rsp+288], xmm14
movaps XMMWORD PTR [rsp+304], xmm15
movd xmm7, rax
mov rax, QWORD PTR [r8+80]
xor rax, QWORD PTR [r8+64]
movaps xmm1, XMMWORD PTR [rdx+96]
movaps xmm2, XMMWORD PTR [r8+96]
movaps XMMWORD PTR [rsp], xmm1
movaps XMMWORD PTR [rsp+16], xmm2
mov r8d, r15d
punpcklqdq xmm7, xmm0
movd xmm0, rcx
mov rcx, QWORD PTR [rdx+56]
xor rcx, QWORD PTR [rdx+24]
movd xmm9, rax
mov QWORD PTR [rsp+128], rsi
mov rax, QWORD PTR [rdx+48]
xor rax, QWORD PTR [rdx+16]
punpcklqdq xmm9, xmm0
movd xmm0, rcx
mov rcx, QWORD PTR [rdx+88]
xor rcx, QWORD PTR [rdx+72]
movd xmm8, rax
mov QWORD PTR [rsp+136], rdi
mov rax, QWORD PTR [rdx+80]
xor rax, QWORD PTR [rdx+64]
punpcklqdq xmm8, xmm0
and r8d, 2097136
movd xmm0, rcx
mov r11d, 524288
movd xmm10, rax
punpcklqdq xmm10, xmm0
movd xmm14, QWORD PTR [rsp+128]
movd xmm15, QWORD PTR [rsp+136]
ALIGN(64)
FN_PREFIX(CryptonightR_template_double_mainloop):
movdqu xmm6, XMMWORD PTR [rbx+rsi]
movd xmm0, r12
mov ecx, ebx
movd xmm3, r14
punpcklqdq xmm3, xmm0
xor ebx, 16
aesenc xmm6, xmm3
movd rdx, xmm6
movd xmm4, r15
movdqu xmm0, XMMWORD PTR [rbx+rsi]
xor ebx, 48
paddq xmm0, xmm7
movdqu xmm1, XMMWORD PTR [rbx+rsi]
movdqu XMMWORD PTR [rbx+rsi], xmm0
paddq xmm1, xmm3
xor ebx, 16
mov eax, ebx
xor rax, 32
movdqu xmm0, XMMWORD PTR [rbx+rsi]
movdqu XMMWORD PTR [rbx+rsi], xmm1
paddq xmm0, xmm9
movdqu XMMWORD PTR [rax+rsi], xmm0
movdqa xmm0, xmm6
pxor xmm0, xmm7
movdqu XMMWORD PTR [rcx+rsi], xmm0
mov esi, edx
movdqu xmm5, XMMWORD PTR [r8+rdi]
and esi, 2097136
mov ecx, r8d
movd xmm0, r13
punpcklqdq xmm4, xmm0
xor r8d, 16
aesenc xmm5, xmm4
movdqu xmm0, XMMWORD PTR [r8+rdi]
xor r8d, 48
paddq xmm0, xmm8
movdqu xmm1, XMMWORD PTR [r8+rdi]
movdqu XMMWORD PTR [r8+rdi], xmm0
paddq xmm1, xmm4
xor r8d, 16
mov eax, r8d
xor rax, 32
movdqu xmm0, XMMWORD PTR [r8+rdi]
movdqu XMMWORD PTR [r8+rdi], xmm1
paddq xmm0, xmm10
movdqu XMMWORD PTR [rax+rdi], xmm0
movdqa xmm0, xmm5
pxor xmm0, xmm8
movdqu XMMWORD PTR [rcx+rdi], xmm0
movd rdi, xmm5
movd rcx, xmm14
mov ebp, edi
mov r8, QWORD PTR [rcx+rsi]
mov r10, QWORD PTR [rcx+rsi+8]
lea r9, QWORD PTR [rcx+rsi]
xor esi, 16
movd xmm0, rsp
movd xmm1, rsi
movd xmm2, rdi
movd xmm11, rbp
movd xmm12, r15
movd xmm13, rdx
mov [rsp+112], rcx
mov ebx, DWORD PTR [rsp+16]
mov esi, DWORD PTR [rsp+20]
mov edi, DWORD PTR [rsp+24]
mov ebp, DWORD PTR [rsp+28]
lea eax, [ebx+esi]
lea edx, [edi+ebp]
shl rdx, 32
or rax, rdx
xor r8, rax
movd esp, xmm3
pextrd r15d, xmm3, 2
movd eax, xmm7
movd edx, xmm9
FN_PREFIX(CryptonightR_template_double_part2):
movd rsp, xmm0
mov DWORD PTR [rsp+16], ebx
mov DWORD PTR [rsp+20], esi
mov DWORD PTR [rsp+24], edi
mov DWORD PTR [rsp+28], ebp
movd rsi, xmm1
movd rdi, xmm2
movd rbp, xmm11
movd r15, xmm12
movd rdx, xmm13
mov rcx, [rsp+112]
mov rbx, r8
mov rax, r8
mul rdx
and ebp, 2097136
mov r8, rax
movd xmm1, rdx
movd xmm0, r8
punpcklqdq xmm1, xmm0
pxor xmm1, XMMWORD PTR [rcx+rsi]
xor esi, 48
paddq xmm1, xmm7
movdqu xmm2, XMMWORD PTR [rsi+rcx]
xor rdx, QWORD PTR [rsi+rcx]
paddq xmm2, xmm3
xor r8, QWORD PTR [rsi+rcx+8]
movdqu XMMWORD PTR [rsi+rcx], xmm1
xor esi, 16
mov eax, esi
mov rsi, rcx
movdqu xmm0, XMMWORD PTR [rax+rcx]
movdqu XMMWORD PTR [rax+rcx], xmm2
paddq xmm0, xmm9
add r12, r8
xor rax, 32
add r14, rdx
movdqa xmm9, xmm7
movdqa xmm7, xmm6
movdqu XMMWORD PTR [rax+rcx], xmm0
mov QWORD PTR [r9+8], r12
xor r12, r10
mov QWORD PTR [r9], r14
movd rcx, xmm15
xor r14, rbx
mov r10d, ebp
mov ebx, r14d
xor ebp, 16
and ebx, 2097136
mov r8, QWORD PTR [r10+rcx]
mov r9, QWORD PTR [r10+rcx+8]
movd xmm0, rsp
movd xmm1, rbx
movd xmm2, rsi
movd xmm11, rdi
movd xmm12, rbp
movd xmm13, r15
mov [rsp+104], rcx
mov ebx, DWORD PTR [rsp]
mov esi, DWORD PTR [rsp+4]
mov edi, DWORD PTR [rsp+8]
mov ebp, DWORD PTR [rsp+12]
lea eax, [ebx+esi]
lea edx, [edi+ebp]
shl rdx, 32
or rax, rdx
xor r8, rax
movd xmm3, r8
movd esp, xmm4
pextrd r15d, xmm4, 2
movd eax, xmm8
movd edx, xmm10
FN_PREFIX(CryptonightR_template_double_part3):
movd rsp, xmm0
mov DWORD PTR [rsp], ebx
mov DWORD PTR [rsp+4], esi
mov DWORD PTR [rsp+8], edi
mov DWORD PTR [rsp+12], ebp
movd rbx, xmm1
movd rsi, xmm2
movd rdi, xmm11
movd rbp, xmm12
movd r15, xmm13
mov rcx, [rsp+104]
mov rax, r8
mul rdi
movd xmm1, rdx
movd xmm0, rax
punpcklqdq xmm1, xmm0
mov rdi, rcx
mov r8, rax
pxor xmm1, XMMWORD PTR [rbp+rcx]
xor ebp, 48
paddq xmm1, xmm8
xor r8, QWORD PTR [rbp+rcx+8]
xor rdx, QWORD PTR [rbp+rcx]
add r13, r8
movdqu xmm2, XMMWORD PTR [rbp+rcx]
add r15, rdx
movdqu XMMWORD PTR [rbp+rcx], xmm1
paddq xmm2, xmm4
xor ebp, 16
mov eax, ebp
xor rax, 32
movdqu xmm0, XMMWORD PTR [rbp+rcx]
movdqu XMMWORD PTR [rbp+rcx], xmm2
paddq xmm0, xmm10
movdqu XMMWORD PTR [rax+rcx], xmm0
movd rax, xmm3
movdqa xmm10, xmm8
mov QWORD PTR [r10+rcx], r15
movdqa xmm8, xmm5
xor r15, rax
mov QWORD PTR [r10+rcx+8], r13
mov r8d, r15d
xor r13, r9
and r8d, 2097136
dec r11d
jnz FN_PREFIX(CryptonightR_template_double_mainloop)
FN_PREFIX(CryptonightR_template_double_part4):
mov rbx, QWORD PTR [rsp+400]
movaps xmm6, XMMWORD PTR [rsp+160]
movaps xmm7, XMMWORD PTR [rsp+176]
movaps xmm8, XMMWORD PTR [rsp+192]
movaps xmm9, XMMWORD PTR [rsp+208]
movaps xmm10, XMMWORD PTR [rsp+224]
movaps xmm11, XMMWORD PTR [rsp+240]
movaps xmm12, XMMWORD PTR [rsp+256]
movaps xmm13, XMMWORD PTR [rsp+272]
movaps xmm14, XMMWORD PTR [rsp+288]
movaps xmm15, XMMWORD PTR [rsp+304]
add rsp, 320
pop r15
pop r14
pop r13
pop r12
pop rdi
pop rsi
pop rbp
ret 0
FN_PREFIX(CryptonightR_template_double_end):

View File

@@ -0,0 +1,478 @@
PUBLIC CryptonightR_template_part1
PUBLIC CryptonightR_template_mainloop
PUBLIC CryptonightR_template_part2
PUBLIC CryptonightR_template_part3
PUBLIC CryptonightR_template_end
PUBLIC CryptonightR_template_double_part1
PUBLIC CryptonightR_template_double_mainloop
PUBLIC CryptonightR_template_double_part2
PUBLIC CryptonightR_template_double_part3
PUBLIC CryptonightR_template_double_part4
PUBLIC CryptonightR_template_double_end
CryptonightR_template_part1:
mov QWORD PTR [rsp+16], rbx
mov QWORD PTR [rsp+24], rbp
mov QWORD PTR [rsp+32], rsi
push r10
push r11
push r12
push r13
push r14
push r15
push rdi
sub rsp, 64
mov r12, rcx
mov r8, QWORD PTR [r12+32]
mov rdx, r12
xor r8, QWORD PTR [r12]
mov r15, QWORD PTR [r12+40]
mov r9, r8
xor r15, QWORD PTR [r12+8]
mov r11, QWORD PTR [r12+224]
mov r12, QWORD PTR [r12+56]
xor r12, QWORD PTR [rdx+24]
mov rax, QWORD PTR [rdx+48]
xor rax, QWORD PTR [rdx+16]
movaps XMMWORD PTR [rsp+48], xmm6
movd xmm0, r12
movaps XMMWORD PTR [rsp+32], xmm7
movaps XMMWORD PTR [rsp+16], xmm8
movaps XMMWORD PTR [rsp], xmm9
mov r12, QWORD PTR [rdx+88]
xor r12, QWORD PTR [rdx+72]
movd xmm6, rax
mov rax, QWORD PTR [rdx+80]
xor rax, QWORD PTR [rdx+64]
punpcklqdq xmm6, xmm0
and r9d, 2097136
movd xmm0, r12
movd xmm7, rax
punpcklqdq xmm7, xmm0
mov r10d, r9d
movd xmm9, rsp
mov rsp, r8
mov r8d, 524288
mov ebx, [rdx+96]
mov esi, [rdx+100]
mov edi, [rdx+104]
mov ebp, [rdx+108]
ALIGN(64)
CryptonightR_template_mainloop:
movdqa xmm5, XMMWORD PTR [r9+r11]
movd xmm0, r15
movd xmm4, rsp
punpcklqdq xmm4, xmm0
lea rdx, QWORD PTR [r9+r11]
aesenc xmm5, xmm4
movd r10d, xmm5
and r10d, 2097136
mov r12d, r9d
mov eax, r9d
xor r9d, 48
xor r12d, 16
xor eax, 32
movdqu xmm0, XMMWORD PTR [r9+r11]
movdqu xmm2, XMMWORD PTR [r12+r11]
movdqu xmm1, XMMWORD PTR [rax+r11]
paddq xmm0, xmm7
paddq xmm2, xmm6
paddq xmm1, xmm4
movdqu XMMWORD PTR [r12+r11], xmm0
movd r12, xmm5
movdqu XMMWORD PTR [rax+r11], xmm2
movdqu XMMWORD PTR [r9+r11], xmm1
movdqa xmm0, xmm5
pxor xmm0, xmm6
movdqu XMMWORD PTR [rdx], xmm0
lea r13d, [ebx+esi]
lea edx, [edi+ebp]
shl rdx, 32
or r13, rdx
xor r13, QWORD PTR [r10+r11]
mov r14, QWORD PTR [r10+r11+8]
movd eax, xmm6
movd edx, xmm7
CryptonightR_template_part2:
mov rax, r13
mul r12
movd xmm0, rax
movd xmm3, rdx
punpcklqdq xmm3, xmm0
mov r9d, r10d
mov r12d, r10d
xor r9d, 16
xor r12d, 32
xor r10d, 48
movdqa xmm1, XMMWORD PTR [r12+r11]
xor rdx, QWORD PTR [r12+r11]
xor rax, QWORD PTR [r11+r12+8]
movdqa xmm2, XMMWORD PTR [r9+r11]
pxor xmm3, xmm2
paddq xmm7, XMMWORD PTR [r10+r11]
paddq xmm1, xmm4
paddq xmm3, xmm6
movdqu XMMWORD PTR [r9+r11], xmm7
movdqu XMMWORD PTR [r12+r11], xmm3
movdqu XMMWORD PTR [r10+r11], xmm1
movdqa xmm7, xmm6
add r15, rax
add rsp, rdx
xor r10, 48
mov QWORD PTR [r10+r11], rsp
xor rsp, r13
mov r9d, esp
mov QWORD PTR [r10+r11+8], r15
and r9d, 2097136
xor r15, r14
movdqa xmm6, xmm5
dec r8d
jnz CryptonightR_template_mainloop
CryptonightR_template_part3:
movd rsp, xmm9
mov rbx, QWORD PTR [rsp+136]
mov rbp, QWORD PTR [rsp+144]
mov rsi, QWORD PTR [rsp+152]
movaps xmm6, XMMWORD PTR [rsp+48]
movaps xmm7, XMMWORD PTR [rsp+32]
movaps xmm8, XMMWORD PTR [rsp+16]
movaps xmm9, XMMWORD PTR [rsp]
add rsp, 64
pop rdi
pop r15
pop r14
pop r13
pop r12
pop r11
pop r10
ret 0
CryptonightR_template_end:
ALIGN(64)
CryptonightR_template_double_part1:
mov QWORD PTR [rsp+24], rbx
push rbp
push rsi
push rdi
push r12
push r13
push r14
push r15
sub rsp, 320
mov r14, QWORD PTR [rcx+32]
mov r8, rcx
xor r14, QWORD PTR [rcx]
mov r12, QWORD PTR [rcx+40]
mov ebx, r14d
mov rsi, QWORD PTR [rcx+224]
and ebx, 2097136
xor r12, QWORD PTR [rcx+8]
mov rcx, QWORD PTR [rcx+56]
xor rcx, QWORD PTR [r8+24]
mov rax, QWORD PTR [r8+48]
xor rax, QWORD PTR [r8+16]
mov r15, QWORD PTR [rdx+32]
xor r15, QWORD PTR [rdx]
movd xmm0, rcx
mov rcx, QWORD PTR [r8+88]
xor rcx, QWORD PTR [r8+72]
mov r13, QWORD PTR [rdx+40]
mov rdi, QWORD PTR [rdx+224]
xor r13, QWORD PTR [rdx+8]
movaps XMMWORD PTR [rsp+160], xmm6
movaps XMMWORD PTR [rsp+176], xmm7
movaps XMMWORD PTR [rsp+192], xmm8
movaps XMMWORD PTR [rsp+208], xmm9
movaps XMMWORD PTR [rsp+224], xmm10
movaps XMMWORD PTR [rsp+240], xmm11
movaps XMMWORD PTR [rsp+256], xmm12
movaps XMMWORD PTR [rsp+272], xmm13
movaps XMMWORD PTR [rsp+288], xmm14
movaps XMMWORD PTR [rsp+304], xmm15
movd xmm7, rax
mov rax, QWORD PTR [r8+80]
xor rax, QWORD PTR [r8+64]
movaps xmm1, XMMWORD PTR [rdx+96]
movaps xmm2, XMMWORD PTR [r8+96]
movaps XMMWORD PTR [rsp], xmm1
movaps XMMWORD PTR [rsp+16], xmm2
mov r8d, r15d
punpcklqdq xmm7, xmm0
movd xmm0, rcx
mov rcx, QWORD PTR [rdx+56]
xor rcx, QWORD PTR [rdx+24]
movd xmm9, rax
mov QWORD PTR [rsp+128], rsi
mov rax, QWORD PTR [rdx+48]
xor rax, QWORD PTR [rdx+16]
punpcklqdq xmm9, xmm0
movd xmm0, rcx
mov rcx, QWORD PTR [rdx+88]
xor rcx, QWORD PTR [rdx+72]
movd xmm8, rax
mov QWORD PTR [rsp+136], rdi
mov rax, QWORD PTR [rdx+80]
xor rax, QWORD PTR [rdx+64]
punpcklqdq xmm8, xmm0
and r8d, 2097136
movd xmm0, rcx
mov r11d, 524288
movd xmm10, rax
punpcklqdq xmm10, xmm0
movd xmm14, QWORD PTR [rsp+128]
movd xmm15, QWORD PTR [rsp+136]
ALIGN(64)
CryptonightR_template_double_mainloop:
movdqu xmm6, XMMWORD PTR [rbx+rsi]
movd xmm0, r12
mov ecx, ebx
movd xmm3, r14
punpcklqdq xmm3, xmm0
xor ebx, 16
aesenc xmm6, xmm3
movd rdx, xmm6
movd xmm4, r15
movdqu xmm0, XMMWORD PTR [rbx+rsi]
xor ebx, 48
paddq xmm0, xmm7
movdqu xmm1, XMMWORD PTR [rbx+rsi]
movdqu XMMWORD PTR [rbx+rsi], xmm0
paddq xmm1, xmm3
xor ebx, 16
mov eax, ebx
xor rax, 32
movdqu xmm0, XMMWORD PTR [rbx+rsi]
movdqu XMMWORD PTR [rbx+rsi], xmm1
paddq xmm0, xmm9
movdqu XMMWORD PTR [rax+rsi], xmm0
movdqa xmm0, xmm6
pxor xmm0, xmm7
movdqu XMMWORD PTR [rcx+rsi], xmm0
mov esi, edx
movdqu xmm5, XMMWORD PTR [r8+rdi]
and esi, 2097136
mov ecx, r8d
movd xmm0, r13
punpcklqdq xmm4, xmm0
xor r8d, 16
aesenc xmm5, xmm4
movdqu xmm0, XMMWORD PTR [r8+rdi]
xor r8d, 48
paddq xmm0, xmm8
movdqu xmm1, XMMWORD PTR [r8+rdi]
movdqu XMMWORD PTR [r8+rdi], xmm0
paddq xmm1, xmm4
xor r8d, 16
mov eax, r8d
xor rax, 32
movdqu xmm0, XMMWORD PTR [r8+rdi]
movdqu XMMWORD PTR [r8+rdi], xmm1
paddq xmm0, xmm10
movdqu XMMWORD PTR [rax+rdi], xmm0
movdqa xmm0, xmm5
pxor xmm0, xmm8
movdqu XMMWORD PTR [rcx+rdi], xmm0
movd rdi, xmm5
movd rcx, xmm14
mov ebp, edi
mov r8, QWORD PTR [rcx+rsi]
mov r10, QWORD PTR [rcx+rsi+8]
lea r9, QWORD PTR [rcx+rsi]
xor esi, 16
movd xmm0, rsp
movd xmm1, rsi
movd xmm2, rdi
movd xmm11, rbp
movd xmm12, r15
movd xmm13, rdx
mov [rsp+112], rcx
mov ebx, DWORD PTR [rsp+16]
mov esi, DWORD PTR [rsp+20]
mov edi, DWORD PTR [rsp+24]
mov ebp, DWORD PTR [rsp+28]
lea eax, [ebx+esi]
lea edx, [edi+ebp]
shl rdx, 32
or rax, rdx
xor r8, rax
movd esp, xmm3
pextrd r15d, xmm3, 2
movd eax, xmm7
movd edx, xmm9
CryptonightR_template_double_part2:
movd rsp, xmm0
mov DWORD PTR [rsp+16], ebx
mov DWORD PTR [rsp+20], esi
mov DWORD PTR [rsp+24], edi
mov DWORD PTR [rsp+28], ebp
movd rsi, xmm1
movd rdi, xmm2
movd rbp, xmm11
movd r15, xmm12
movd rdx, xmm13
mov rcx, [rsp+112]
mov rbx, r8
mov rax, r8
mul rdx
and ebp, 2097136
mov r8, rax
movd xmm1, rdx
movd xmm0, r8
punpcklqdq xmm1, xmm0
pxor xmm1, XMMWORD PTR [rcx+rsi]
xor esi, 48
paddq xmm1, xmm7
movdqu xmm2, XMMWORD PTR [rsi+rcx]
xor rdx, QWORD PTR [rsi+rcx]
paddq xmm2, xmm3
xor r8, QWORD PTR [rsi+rcx+8]
movdqu XMMWORD PTR [rsi+rcx], xmm1
xor esi, 16
mov eax, esi
mov rsi, rcx
movdqu xmm0, XMMWORD PTR [rax+rcx]
movdqu XMMWORD PTR [rax+rcx], xmm2
paddq xmm0, xmm9
add r12, r8
xor rax, 32
add r14, rdx
movdqa xmm9, xmm7
movdqa xmm7, xmm6
movdqu XMMWORD PTR [rax+rcx], xmm0
mov QWORD PTR [r9+8], r12
xor r12, r10
mov QWORD PTR [r9], r14
movd rcx, xmm15
xor r14, rbx
mov r10d, ebp
mov ebx, r14d
xor ebp, 16
and ebx, 2097136
mov r8, QWORD PTR [r10+rcx]
mov r9, QWORD PTR [r10+rcx+8]
movd xmm0, rsp
movd xmm1, rbx
movd xmm2, rsi
movd xmm11, rdi
movd xmm12, rbp
movd xmm13, r15
mov [rsp+104], rcx
mov ebx, DWORD PTR [rsp]
mov esi, DWORD PTR [rsp+4]
mov edi, DWORD PTR [rsp+8]
mov ebp, DWORD PTR [rsp+12]
lea eax, [ebx+esi]
lea edx, [edi+ebp]
shl rdx, 32
or rax, rdx
xor r8, rax
movd xmm3, r8
movd esp, xmm4
pextrd r15d, xmm4, 2
movd eax, xmm8
movd edx, xmm10
CryptonightR_template_double_part3:
movd rsp, xmm0
mov DWORD PTR [rsp], ebx
mov DWORD PTR [rsp+4], esi
mov DWORD PTR [rsp+8], edi
mov DWORD PTR [rsp+12], ebp
movd rbx, xmm1
movd rsi, xmm2
movd rdi, xmm11
movd rbp, xmm12
movd r15, xmm13
mov rcx, [rsp+104]
mov rax, r8
mul rdi
movd xmm1, rdx
movd xmm0, rax
punpcklqdq xmm1, xmm0
mov rdi, rcx
mov r8, rax
pxor xmm1, XMMWORD PTR [rbp+rcx]
xor ebp, 48
paddq xmm1, xmm8
xor r8, QWORD PTR [rbp+rcx+8]
xor rdx, QWORD PTR [rbp+rcx]
add r13, r8
movdqu xmm2, XMMWORD PTR [rbp+rcx]
add r15, rdx
movdqu XMMWORD PTR [rbp+rcx], xmm1
paddq xmm2, xmm4
xor ebp, 16
mov eax, ebp
xor rax, 32
movdqu xmm0, XMMWORD PTR [rbp+rcx]
movdqu XMMWORD PTR [rbp+rcx], xmm2
paddq xmm0, xmm10
movdqu XMMWORD PTR [rax+rcx], xmm0
movd rax, xmm3
movdqa xmm10, xmm8
mov QWORD PTR [r10+rcx], r15
movdqa xmm8, xmm5
xor r15, rax
mov QWORD PTR [r10+rcx+8], r13
mov r8d, r15d
xor r13, r9
and r8d, 2097136
dec r11d
jnz CryptonightR_template_double_mainloop
CryptonightR_template_double_part4:
mov rbx, QWORD PTR [rsp+400]
movaps xmm6, XMMWORD PTR [rsp+160]
movaps xmm7, XMMWORD PTR [rsp+176]
movaps xmm8, XMMWORD PTR [rsp+192]
movaps xmm9, XMMWORD PTR [rsp+208]
movaps xmm10, XMMWORD PTR [rsp+224]
movaps xmm11, XMMWORD PTR [rsp+240]
movaps xmm12, XMMWORD PTR [rsp+256]
movaps xmm13, XMMWORD PTR [rsp+272]
movaps xmm14, XMMWORD PTR [rsp+288]
movaps xmm15, XMMWORD PTR [rsp+304]
add rsp, 320
pop r15
pop r14
pop r13
pop r12
pop rdi
pop rsi
pop rbp
ret 0
CryptonightR_template_double_end:

View File

@@ -0,0 +1,410 @@
mov rax, rsp
push rbx
push rbp
push rsi
push rdi
push r12
push r13
push r14
push r15
sub rsp, 184
stmxcsr DWORD PTR [rsp+272]
mov DWORD PTR [rsp+276], 24448
ldmxcsr DWORD PTR [rsp+276]
mov r13, QWORD PTR [rcx+224]
mov r9, rdx
mov r10, QWORD PTR [rcx+32]
mov r8, rcx
xor r10, QWORD PTR [rcx]
mov r14d, 524288
mov r11, QWORD PTR [rcx+40]
xor r11, QWORD PTR [rcx+8]
mov rsi, QWORD PTR [rdx+224]
mov rdx, QWORD PTR [rcx+56]
xor rdx, QWORD PTR [rcx+24]
mov rdi, QWORD PTR [r9+32]
xor rdi, QWORD PTR [r9]
mov rbp, QWORD PTR [r9+40]
xor rbp, QWORD PTR [r9+8]
movd xmm0, rdx
movaps XMMWORD PTR [rax-88], xmm6
movaps XMMWORD PTR [rax-104], xmm7
movaps XMMWORD PTR [rax-120], xmm8
movaps XMMWORD PTR [rsp+112], xmm9
movaps XMMWORD PTR [rsp+96], xmm10
movaps XMMWORD PTR [rsp+80], xmm11
movaps XMMWORD PTR [rsp+64], xmm12
movaps XMMWORD PTR [rsp+48], xmm13
movaps XMMWORD PTR [rsp+32], xmm14
movaps XMMWORD PTR [rsp+16], xmm15
mov rdx, r10
movd xmm4, QWORD PTR [r8+96]
and edx, 2097136
mov rax, QWORD PTR [rcx+48]
xorps xmm13, xmm13
xor rax, QWORD PTR [rcx+16]
mov rcx, QWORD PTR [rcx+88]
xor rcx, QWORD PTR [r8+72]
movd xmm5, QWORD PTR [r8+104]
movd xmm7, rax
mov eax, 1
shl rax, 52
movd xmm14, rax
punpcklqdq xmm14, xmm14
mov eax, 1023
shl rax, 52
movd xmm12, rax
punpcklqdq xmm12, xmm12
mov rax, QWORD PTR [r8+80]
xor rax, QWORD PTR [r8+64]
punpcklqdq xmm7, xmm0
movd xmm0, rcx
mov rcx, QWORD PTR [r9+56]
xor rcx, QWORD PTR [r9+24]
movd xmm3, rax
mov rax, QWORD PTR [r9+48]
xor rax, QWORD PTR [r9+16]
punpcklqdq xmm3, xmm0
movd xmm0, rcx
mov QWORD PTR [rsp], r13
mov rcx, QWORD PTR [r9+88]
xor rcx, QWORD PTR [r9+72]
movd xmm6, rax
mov rax, QWORD PTR [r9+80]
xor rax, QWORD PTR [r9+64]
punpcklqdq xmm6, xmm0
movd xmm0, rcx
mov QWORD PTR [rsp+256], r10
mov rcx, rdi
mov QWORD PTR [rsp+264], r11
movd xmm8, rax
and ecx, 2097136
punpcklqdq xmm8, xmm0
movd xmm0, QWORD PTR [r9+96]
punpcklqdq xmm4, xmm0
movd xmm0, QWORD PTR [r9+104]
lea r8, QWORD PTR [rcx+rsi]
movdqu xmm11, XMMWORD PTR [r8]
punpcklqdq xmm5, xmm0
lea r9, QWORD PTR [rdx+r13]
movdqu xmm15, XMMWORD PTR [r9]
ALIGN(64)
main_loop_double_sandybridge:
movdqu xmm9, xmm15
mov eax, edx
mov ebx, edx
xor eax, 16
xor ebx, 32
xor edx, 48
movd xmm0, r11
movd xmm2, r10
punpcklqdq xmm2, xmm0
aesenc xmm9, xmm2
movdqu xmm0, XMMWORD PTR [rax+r13]
movdqu xmm1, XMMWORD PTR [rbx+r13]
paddq xmm0, xmm7
paddq xmm1, xmm2
movdqu XMMWORD PTR [rbx+r13], xmm0
movdqu xmm0, XMMWORD PTR [rdx+r13]
movdqu XMMWORD PTR [rdx+r13], xmm1
paddq xmm0, xmm3
movdqu XMMWORD PTR [rax+r13], xmm0
movd r11, xmm9
mov edx, r11d
and edx, 2097136
movdqa xmm0, xmm9
pxor xmm0, xmm7
movdqu XMMWORD PTR [r9], xmm0
lea rbx, QWORD PTR [rdx+r13]
mov r10, QWORD PTR [rdx+r13]
movdqu xmm10, xmm11
movd xmm0, rbp
movd xmm11, rdi
punpcklqdq xmm11, xmm0
aesenc xmm10, xmm11
mov eax, ecx
mov r12d, ecx
xor eax, 16
xor r12d, 32
xor ecx, 48
movdqu xmm0, XMMWORD PTR [rax+rsi]
paddq xmm0, xmm6
movdqu xmm1, XMMWORD PTR [r12+rsi]
movdqu XMMWORD PTR [r12+rsi], xmm0
paddq xmm1, xmm11
movdqu xmm0, XMMWORD PTR [rcx+rsi]
movdqu XMMWORD PTR [rcx+rsi], xmm1
paddq xmm0, xmm8
movdqu XMMWORD PTR [rax+rsi], xmm0
movd rcx, xmm10
and ecx, 2097136
movdqa xmm0, xmm10
pxor xmm0, xmm6
movdqu XMMWORD PTR [r8], xmm0
mov r12, QWORD PTR [rcx+rsi]
mov r9, QWORD PTR [rbx+8]
xor edx, 16
mov r8d, edx
mov r15d, edx
movd rdx, xmm5
shl rdx, 32
movd rax, xmm4
xor rdx, rax
xor r10, rdx
mov rax, r10
mul r11
mov r11d, r8d
xor r11d, 48
movd xmm0, rdx
xor rdx, [r11+r13]
movd xmm1, rax
xor rax, [r11+r13+8]
punpcklqdq xmm0, xmm1
pxor xmm0, XMMWORD PTR [r8+r13]
xor r8d, 32
movdqu xmm1, XMMWORD PTR [r11+r13]
paddq xmm0, xmm7
paddq xmm1, xmm2
movdqu XMMWORD PTR [r11+r13], xmm0
movdqu xmm0, XMMWORD PTR [r8+r13]
movdqu XMMWORD PTR [r8+r13], xmm1
paddq xmm0, xmm3
movdqu XMMWORD PTR [r15+r13], xmm0
mov r11, QWORD PTR [rsp+256]
add r11, rdx
mov rdx, QWORD PTR [rsp+264]
add rdx, rax
mov QWORD PTR [rbx], r11
xor r11, r10
mov QWORD PTR [rbx+8], rdx
xor rdx, r9
mov QWORD PTR [rsp+256], r11
and r11d, 2097136
mov QWORD PTR [rsp+264], rdx
mov QWORD PTR [rsp+8], r11
lea r15, QWORD PTR [r11+r13]
movdqu xmm15, XMMWORD PTR [r11+r13]
lea r13, QWORD PTR [rsi+rcx]
movdqa xmm0, xmm5
psrldq xmm0, 8
movaps xmm2, xmm13
movd r10, xmm0
psllq xmm5, 1
shl r10, 32
movdqa xmm0, xmm9
psrldq xmm0, 8
movdqa xmm1, xmm10
movd r11, xmm0
psrldq xmm1, 8
movd r8, xmm1
psrldq xmm4, 8
movaps xmm0, xmm13
movd rax, xmm4
xor r10, rax
movaps xmm1, xmm13
xor r10, r12
lea rax, QWORD PTR [r11+1]
shr rax, 1
movdqa xmm3, xmm9
punpcklqdq xmm3, xmm10
paddq xmm5, xmm3
movd rdx, xmm5
psrldq xmm5, 8
cvtsi2sd xmm2, rax
or edx, -2147483647
lea rax, QWORD PTR [r8+1]
shr rax, 1
movd r9, xmm5
cvtsi2sd xmm0, rax
or r9d, -2147483647
cvtsi2sd xmm1, rdx
unpcklpd xmm2, xmm0
movaps xmm0, xmm13
cvtsi2sd xmm0, r9
unpcklpd xmm1, xmm0
divpd xmm2, xmm1
paddq xmm2, xmm14
cvttsd2si rax, xmm2
psrldq xmm2, 8
mov rbx, rax
imul rax, rdx
sub r11, rax
js div_fix_1_sandybridge
div_fix_1_ret_sandybridge:
cvttsd2si rdx, xmm2
mov rax, rdx
imul rax, r9
movd xmm2, r11d
movd xmm4, ebx
sub r8, rax
js div_fix_2_sandybridge
div_fix_2_ret_sandybridge:
movd xmm1, r8d
movd xmm0, edx
punpckldq xmm2, xmm1
punpckldq xmm4, xmm0
punpckldq xmm4, xmm2
paddq xmm3, xmm4
movdqa xmm0, xmm3
psrlq xmm0, 12
paddq xmm0, xmm12
sqrtpd xmm1, xmm0
movd r9, xmm1
movdqa xmm5, xmm1
psrlq xmm5, 19
test r9, 524287
je sqrt_fix_1_sandybridge
sqrt_fix_1_ret_sandybridge:
movd r9, xmm10
psrldq xmm1, 8
movd r8, xmm1
test r8, 524287
je sqrt_fix_2_sandybridge
sqrt_fix_2_ret_sandybridge:
mov r12d, ecx
mov r8d, ecx
xor r12d, 16
xor r8d, 32
xor ecx, 48
mov rax, r10
mul r9
movd xmm0, rax
movd xmm3, rdx
punpcklqdq xmm3, xmm0
movdqu xmm0, XMMWORD PTR [r12+rsi]
pxor xmm0, xmm3
movdqu xmm1, XMMWORD PTR [r8+rsi]
xor rdx, [r8+rsi]
xor rax, [r8+rsi+8]
movdqu xmm3, XMMWORD PTR [rcx+rsi]
paddq xmm0, xmm6
paddq xmm1, xmm11
paddq xmm3, xmm8
movdqu XMMWORD PTR [r8+rsi], xmm0
movdqu XMMWORD PTR [rcx+rsi], xmm1
movdqu XMMWORD PTR [r12+rsi], xmm3
add rdi, rdx
mov QWORD PTR [r13], rdi
xor rdi, r10
mov ecx, edi
and ecx, 2097136
lea r8, QWORD PTR [rcx+rsi]
mov rdx, QWORD PTR [r13+8]
add rbp, rax
mov QWORD PTR [r13+8], rbp
movdqu xmm11, XMMWORD PTR [rcx+rsi]
xor rbp, rdx
mov r13, QWORD PTR [rsp]
movdqa xmm3, xmm7
mov rdx, QWORD PTR [rsp+8]
movdqa xmm8, xmm6
mov r10, QWORD PTR [rsp+256]
movdqa xmm7, xmm9
mov r11, QWORD PTR [rsp+264]
movdqa xmm6, xmm10
mov r9, r15
dec r14d
jne main_loop_double_sandybridge
ldmxcsr DWORD PTR [rsp+272]
movaps xmm13, XMMWORD PTR [rsp+48]
lea r11, QWORD PTR [rsp+184]
movaps xmm6, XMMWORD PTR [r11-24]
movaps xmm7, XMMWORD PTR [r11-40]
movaps xmm8, XMMWORD PTR [r11-56]
movaps xmm9, XMMWORD PTR [r11-72]
movaps xmm10, XMMWORD PTR [r11-88]
movaps xmm11, XMMWORD PTR [r11-104]
movaps xmm12, XMMWORD PTR [r11-120]
movaps xmm14, XMMWORD PTR [rsp+32]
movaps xmm15, XMMWORD PTR [rsp+16]
mov rsp, r11
pop r15
pop r14
pop r13
pop r12
pop rdi
pop rsi
pop rbp
pop rbx
jmp cnv2_double_mainloop_asm_sandybridge_endp
div_fix_1_sandybridge:
dec rbx
add r11, rdx
jmp div_fix_1_ret_sandybridge
div_fix_2_sandybridge:
dec rdx
add r8, r9
jmp div_fix_2_ret_sandybridge
sqrt_fix_1_sandybridge:
movd r8, xmm3
movdqa xmm0, xmm5
psrldq xmm0, 8
dec r9
mov r11d, -1022
shl r11, 32
mov rax, r9
shr r9, 19
shr rax, 20
mov rdx, r9
sub rdx, rax
lea rdx, [rdx+r11+1]
add rax, r11
imul rdx, rax
sub rdx, r8
adc r9, 0
movd xmm5, r9
punpcklqdq xmm5, xmm0
jmp sqrt_fix_1_ret_sandybridge
sqrt_fix_2_sandybridge:
psrldq xmm3, 8
movd r11, xmm3
dec r8
mov ebx, -1022
shl rbx, 32
mov rax, r8
shr r8, 19
shr rax, 20
mov rdx, r8
sub rdx, rax
lea rdx, [rdx+rbx+1]
add rax, rbx
imul rdx, rax
sub rdx, r11
adc r8, 0
movd xmm0, r8
punpcklqdq xmm5, xmm0
jmp sqrt_fix_2_ret_sandybridge
cnv2_double_mainloop_asm_sandybridge_endp:

View File

@@ -0,0 +1,180 @@
mov QWORD PTR [rsp+16], rbx
mov QWORD PTR [rsp+24], rbp
mov QWORD PTR [rsp+32], rsi
push rdi
push r12
push r13
push r14
push r15
sub rsp, 64
stmxcsr DWORD PTR [rsp]
mov DWORD PTR [rsp+4], 24448
ldmxcsr DWORD PTR [rsp+4]
mov rax, QWORD PTR [rcx+48]
mov r9, rcx
xor rax, QWORD PTR [rcx+16]
mov ebp, 524288
mov r8, QWORD PTR [rcx+32]
xor r8, QWORD PTR [rcx]
mov r11, QWORD PTR [rcx+40]
mov r10, r8
mov rdx, QWORD PTR [rcx+56]
movd xmm3, rax
xor rdx, QWORD PTR [rcx+24]
xor r11, QWORD PTR [rcx+8]
mov rbx, QWORD PTR [rcx+224]
mov rax, QWORD PTR [r9+80]
xor rax, QWORD PTR [r9+64]
movd xmm0, rdx
mov rcx, QWORD PTR [rcx+88]
xor rcx, QWORD PTR [r9+72]
mov rdi, QWORD PTR [r9+104]
and r10d, 2097136
movaps XMMWORD PTR [rsp+48], xmm6
movd xmm4, rax
movaps XMMWORD PTR [rsp+32], xmm7
movaps XMMWORD PTR [rsp+16], xmm8
xorps xmm8, xmm8
mov ax, 1023
shl rax, 52
movd xmm7, rax
mov r15, QWORD PTR [r9+96]
punpcklqdq xmm3, xmm0
movd xmm0, rcx
punpcklqdq xmm4, xmm0
ALIGN(64)
cnv2_main_loop_bulldozer:
movdqa xmm5, XMMWORD PTR [r10+rbx]
movd xmm6, r8
pinsrq xmm6, r11, 1
lea rdx, QWORD PTR [r10+rbx]
lea r9, QWORD PTR [rdi+rdi]
shl rdi, 32
mov ecx, r10d
mov eax, r10d
xor ecx, 16
xor eax, 32
xor r10d, 48
aesenc xmm5, xmm6
movdqa xmm2, XMMWORD PTR [rcx+rbx]
movdqa xmm1, XMMWORD PTR [rax+rbx]
movdqa xmm0, XMMWORD PTR [r10+rbx]
paddq xmm2, xmm3
paddq xmm1, xmm6
paddq xmm0, xmm4
movdqa XMMWORD PTR [rcx+rbx], xmm0
movdqa XMMWORD PTR [rax+rbx], xmm2
movdqa XMMWORD PTR [r10+rbx], xmm1
movaps xmm1, xmm8
mov rsi, r15
xor rsi, rdi
mov edi, 1023
shl rdi, 52
movd r14, xmm5
pextrq rax, xmm5, 1
movdqa xmm0, xmm5
pxor xmm0, xmm3
mov r10, r14
and r10d, 2097136
movdqa XMMWORD PTR [rdx], xmm0
xor rsi, QWORD PTR [r10+rbx]
lea r12, QWORD PTR [r10+rbx]
mov r13, QWORD PTR [r10+rbx+8]
add r9d, r14d
or r9d, -2147483647
xor edx, edx
div r9
mov eax, eax
shl rdx, 32
lea r15, [rax+rdx]
lea rax, [r14+r15]
shr rax, 12
add rax, rdi
movd xmm0, rax
sqrtsd xmm1, xmm0
movd rdi, xmm1
test rdi, 524287
je sqrt_fixup_bulldozer
shr rdi, 19
sqrt_fixup_bulldozer_ret:
mov rax, rsi
mul r14
movd xmm1, rax
movd xmm0, rdx
punpcklqdq xmm0, xmm1
mov r9d, r10d
mov ecx, r10d
xor r9d, 16
xor ecx, 32
xor r10d, 48
movdqa xmm1, XMMWORD PTR [rcx+rbx]
xor rdx, [rcx+rbx]
xor rax, [rcx+rbx+8]
movdqa xmm2, XMMWORD PTR [r9+rbx]
pxor xmm2, xmm0
paddq xmm4, XMMWORD PTR [r10+rbx]
paddq xmm2, xmm3
paddq xmm1, xmm6
movdqa XMMWORD PTR [r9+rbx], xmm4
movdqa XMMWORD PTR [rcx+rbx], xmm2
movdqa XMMWORD PTR [r10+rbx], xmm1
movdqa xmm4, xmm3
add r8, rdx
add r11, rax
mov QWORD PTR [r12], r8
xor r8, rsi
mov QWORD PTR [r12+8], r11
mov r10, r8
xor r11, r13
and r10d, 2097136
movdqa xmm3, xmm5
dec ebp
jne cnv2_main_loop_bulldozer
ldmxcsr DWORD PTR [rsp]
movaps xmm6, XMMWORD PTR [rsp+48]
lea r11, QWORD PTR [rsp+64]
mov rbx, QWORD PTR [r11+56]
mov rbp, QWORD PTR [r11+64]
mov rsi, QWORD PTR [r11+72]
movaps xmm8, XMMWORD PTR [r11-48]
movaps xmm7, XMMWORD PTR [rsp+32]
mov rsp, r11
pop r15
pop r14
pop r13
pop r12
pop rdi
jmp cnv2_main_loop_bulldozer_endp
sqrt_fixup_bulldozer:
movd r9, xmm5
add r9, r15
dec rdi
mov edx, -1022
shl rdx, 32
mov rax, rdi
shr rdi, 19
shr rax, 20
mov rcx, rdi
sub rcx, rax
lea rcx, [rcx+rdx+1]
add rax, rdx
imul rcx, rax
sub rcx, r9
adc rdi, 0
jmp sqrt_fixup_bulldozer_ret
cnv2_main_loop_bulldozer_endp:

View File

@@ -0,0 +1,186 @@
mov QWORD PTR [rsp+24], rbx
push rbp
push rsi
push rdi
push r12
push r13
push r14
push r15
sub rsp, 80
stmxcsr DWORD PTR [rsp]
mov DWORD PTR [rsp+4], 24448
ldmxcsr DWORD PTR [rsp+4]
mov rax, QWORD PTR [rcx+48]
mov r9, rcx
xor rax, QWORD PTR [rcx+16]
mov esi, 524288
mov r8, QWORD PTR [rcx+32]
mov r13d, -2147483647
xor r8, QWORD PTR [rcx]
mov r11, QWORD PTR [rcx+40]
mov r10, r8
mov rdx, QWORD PTR [rcx+56]
movd xmm4, rax
xor rdx, QWORD PTR [rcx+24]
xor r11, QWORD PTR [rcx+8]
mov rbx, QWORD PTR [rcx+224]
mov rax, QWORD PTR [r9+80]
xor rax, QWORD PTR [r9+64]
movd xmm0, rdx
mov rcx, QWORD PTR [rcx+88]
xor rcx, QWORD PTR [r9+72]
movd xmm3, QWORD PTR [r9+104]
movaps XMMWORD PTR [rsp+64], xmm6
movaps XMMWORD PTR [rsp+48], xmm7
movaps XMMWORD PTR [rsp+32], xmm8
and r10d, 2097136
movd xmm5, rax
xor eax, eax
mov QWORD PTR [rsp+16], rax
mov ax, 1023
shl rax, 52
movd xmm8, rax
mov r15, QWORD PTR [r9+96]
punpcklqdq xmm4, xmm0
movd xmm0, rcx
punpcklqdq xmm5, xmm0
movdqu xmm6, XMMWORD PTR [r10+rbx]
ALIGN(64)
main_loop_ivybridge:
lea rdx, QWORD PTR [r10+rbx]
mov ecx, r10d
mov eax, r10d
mov rdi, r15
xor ecx, 16
xor eax, 32
xor r10d, 48
movd xmm0, r11
movd xmm7, r8
punpcklqdq xmm7, xmm0
aesenc xmm6, xmm7
movd rbp, xmm6
mov r9, rbp
and r9d, 2097136
movdqu xmm2, XMMWORD PTR [rcx+rbx]
movdqu xmm1, XMMWORD PTR [rax+rbx]
movdqu xmm0, XMMWORD PTR [r10+rbx]
paddq xmm1, xmm7
paddq xmm0, xmm5
paddq xmm2, xmm4
movdqu XMMWORD PTR [rcx+rbx], xmm0
movdqu XMMWORD PTR [rax+rbx], xmm2
movdqu XMMWORD PTR [r10+rbx], xmm1
mov r10, r9
xor r10d, 32
movd rcx, xmm3
mov rax, rcx
shl rax, 32
xor rdi, rax
movdqa xmm0, xmm6
pxor xmm0, xmm4
movdqu XMMWORD PTR [rdx], xmm0
xor rdi, QWORD PTR [r9+rbx]
lea r14, QWORD PTR [r9+rbx]
mov r12, QWORD PTR [r14+8]
xor edx, edx
lea r9d, DWORD PTR [ecx+ecx]
add r9d, ebp
movdqa xmm0, xmm6
psrldq xmm0, 8
or r9d, r13d
movd rax, xmm0
div r9
xorps xmm3, xmm3
mov eax, eax
shl rdx, 32
add rdx, rax
lea r9, QWORD PTR [rdx+rbp]
mov r15, rdx
mov rax, r9
shr rax, 12
movd xmm0, rax
paddq xmm0, xmm8
sqrtsd xmm3, xmm0
psubq xmm3, XMMWORD PTR [rsp+16]
movd rdx, xmm3
test edx, 524287
je sqrt_fixup_ivybridge
psrlq xmm3, 19
sqrt_fixup_ivybridge_ret:
mov ecx, r10d
mov rax, rdi
mul rbp
movd xmm2, rdx
xor rdx, [rcx+rbx]
add r8, rdx
mov QWORD PTR [r14], r8
xor r8, rdi
mov edi, r8d
and edi, 2097136
movd xmm0, rax
xor rax, [rcx+rbx+8]
add r11, rax
mov QWORD PTR [r14+8], r11
punpcklqdq xmm2, xmm0
mov r9d, r10d
xor r9d, 48
xor r10d, 16
pxor xmm2, XMMWORD PTR [r9+rbx]
movdqu xmm0, XMMWORD PTR [r10+rbx]
paddq xmm0, xmm5
movdqu xmm1, XMMWORD PTR [rcx+rbx]
paddq xmm2, xmm4
paddq xmm1, xmm7
movdqa xmm5, xmm4
movdqu XMMWORD PTR [r9+rbx], xmm0
movdqa xmm4, xmm6
movdqu XMMWORD PTR [rcx+rbx], xmm2
movdqu XMMWORD PTR [r10+rbx], xmm1
movdqu xmm6, [rdi+rbx]
mov r10d, edi
xor r11, r12
dec rsi
jne main_loop_ivybridge
ldmxcsr DWORD PTR [rsp]
mov rbx, QWORD PTR [rsp+160]
movaps xmm6, XMMWORD PTR [rsp+64]
movaps xmm7, XMMWORD PTR [rsp+48]
movaps xmm8, XMMWORD PTR [rsp+32]
add rsp, 80
pop r15
pop r14
pop r13
pop r12
pop rdi
pop rsi
pop rbp
jmp cnv2_main_loop_ivybridge_endp
sqrt_fixup_ivybridge:
dec rdx
mov r13d, -1022
shl r13, 32
mov rax, rdx
shr rdx, 19
shr rax, 20
mov rcx, rdx
sub rcx, rax
add rax, r13
not r13
sub rcx, r13
mov r13d, -2147483647
imul rcx, rax
sub rcx, r9
adc rdx, 0
movd xmm3, rdx
jmp sqrt_fixup_ivybridge_ret
cnv2_main_loop_ivybridge_endp:

View File

@@ -0,0 +1,179 @@
mov QWORD PTR [rsp+16], rbx
mov QWORD PTR [rsp+24], rbp
mov QWORD PTR [rsp+32], rsi
push rdi
push r12
push r13
push r14
push r15
sub rsp, 64
stmxcsr DWORD PTR [rsp]
mov DWORD PTR [rsp+4], 24448
ldmxcsr DWORD PTR [rsp+4]
mov rax, QWORD PTR [rcx+48]
mov r9, rcx
xor rax, QWORD PTR [rcx+16]
mov ebp, 524288
mov r8, QWORD PTR [rcx+32]
xor r8, QWORD PTR [rcx]
mov r11, QWORD PTR [rcx+40]
mov r10, r8
mov rdx, QWORD PTR [rcx+56]
movd xmm3, rax
xor rdx, QWORD PTR [rcx+24]
xor r11, QWORD PTR [rcx+8]
mov rbx, QWORD PTR [rcx+224]
mov rax, QWORD PTR [r9+80]
xor rax, QWORD PTR [r9+64]
movd xmm0, rdx
mov rcx, QWORD PTR [rcx+88]
xor rcx, QWORD PTR [r9+72]
mov rdi, QWORD PTR [r9+104]
and r10d, 2097136
movaps XMMWORD PTR [rsp+48], xmm6
movd xmm4, rax
movaps XMMWORD PTR [rsp+32], xmm7
movaps XMMWORD PTR [rsp+16], xmm8
xorps xmm8, xmm8
mov ax, 1023
shl rax, 52
movd xmm7, rax
mov r15, QWORD PTR [r9+96]
punpcklqdq xmm3, xmm0
movd xmm0, rcx
punpcklqdq xmm4, xmm0
ALIGN(64)
main_loop_ryzen:
movdqa xmm5, XMMWORD PTR [r10+rbx]
movd xmm0, r11
movd xmm6, r8
punpcklqdq xmm6, xmm0
lea rdx, QWORD PTR [r10+rbx]
lea r9, QWORD PTR [rdi+rdi]
shl rdi, 32
mov ecx, r10d
mov eax, r10d
xor ecx, 16
xor eax, 32
xor r10d, 48
aesenc xmm5, xmm6
movdqa xmm2, XMMWORD PTR [rcx+rbx]
movdqa xmm1, XMMWORD PTR [rax+rbx]
movdqa xmm0, XMMWORD PTR [r10+rbx]
paddq xmm2, xmm3
paddq xmm1, xmm6
paddq xmm0, xmm4
movdqa XMMWORD PTR [rcx+rbx], xmm0
movdqa XMMWORD PTR [rax+rbx], xmm2
movdqa XMMWORD PTR [r10+rbx], xmm1
movaps xmm1, xmm8
mov rsi, r15
xor rsi, rdi
movd r14, xmm5
movdqa xmm0, xmm5
pxor xmm0, xmm3
mov r10, r14
and r10d, 2097136
movdqa XMMWORD PTR [rdx], xmm0
xor rsi, QWORD PTR [r10+rbx]
lea r12, QWORD PTR [r10+rbx]
mov r13, QWORD PTR [r10+rbx+8]
add r9d, r14d
or r9d, -2147483647
xor edx, edx
movdqa xmm0, xmm5
psrldq xmm0, 8
movd rax, xmm0
div r9
movd xmm0, rax
movd xmm1, rdx
punpckldq xmm0, xmm1
movd r15, xmm0
paddq xmm0, xmm5
movdqa xmm2, xmm0
psrlq xmm0, 12
paddq xmm0, xmm7
sqrtsd xmm1, xmm0
movd rdi, xmm1
test rdi, 524287
je sqrt_fixup_ryzen
shr rdi, 19
sqrt_fixup_ryzen_ret:
mov rax, rsi
mul r14
movd xmm1, rax
movd xmm0, rdx
punpcklqdq xmm0, xmm1
mov r9d, r10d
mov ecx, r10d
xor r9d, 16
xor ecx, 32
xor r10d, 48
movdqa xmm1, XMMWORD PTR [rcx+rbx]
xor rdx, [rcx+rbx]
xor rax, [rcx+rbx+8]
movdqa xmm2, XMMWORD PTR [r9+rbx]
pxor xmm2, xmm0
paddq xmm4, XMMWORD PTR [r10+rbx]
paddq xmm2, xmm3
paddq xmm1, xmm6
movdqa XMMWORD PTR [r9+rbx], xmm4
movdqa XMMWORD PTR [rcx+rbx], xmm2
movdqa XMMWORD PTR [r10+rbx], xmm1
movdqa xmm4, xmm3
add r8, rdx
add r11, rax
mov QWORD PTR [r12], r8
xor r8, rsi
mov QWORD PTR [r12+8], r11
mov r10, r8
xor r11, r13
and r10d, 2097136
movdqa xmm3, xmm5
dec ebp
jne main_loop_ryzen
ldmxcsr DWORD PTR [rsp]
movaps xmm6, XMMWORD PTR [rsp+48]
lea r11, QWORD PTR [rsp+64]
mov rbx, QWORD PTR [r11+56]
mov rbp, QWORD PTR [r11+64]
mov rsi, QWORD PTR [r11+72]
movaps xmm8, XMMWORD PTR [r11-48]
movaps xmm7, XMMWORD PTR [rsp+32]
mov rsp, r11
pop r15
pop r14
pop r13
pop r12
pop rdi
jmp cnv2_main_loop_ryzen_endp
sqrt_fixup_ryzen:
movd r9, xmm2
dec rdi
mov edx, -1022
shl rdx, 32
mov rax, rdi
shr rdi, 19
shr rax, 20
mov rcx, rdi
sub rcx, rax
lea rcx, [rcx+rdx+1]
add rax, rdx
imul rcx, rax
sub rcx, r9
adc rdi, 0
jmp sqrt_fixup_ryzen_ret
cnv2_main_loop_ryzen_endp:

View File

@@ -0,0 +1,31 @@
#define ALIGN(x) .align 64
.intel_syntax noprefix
.section .text
.global cnv2_mainloop_ivybridge_asm
.global cnv2_mainloop_ryzen_asm
.global cnv2_mainloop_bulldozer_asm
.global cnv2_double_mainloop_sandybridge_asm
ALIGN(64)
cnv2_mainloop_ivybridge_asm:
#include "../cn2/cnv2_main_loop_ivybridge.inc"
ret 0
mov eax, 3735929054
ALIGN(64)
cnv2_mainloop_ryzen_asm:
#include "../cn2/cnv2_main_loop_ryzen.inc"
ret 0
mov eax, 3735929054
ALIGN(64)
cnv2_mainloop_bulldozer_asm:
#include "../cn2/cnv2_main_loop_bulldozer.inc"
ret 0
mov eax, 3735929054
ALIGN(64)
cnv2_double_mainloop_sandybridge_asm:
#include "../cn2/cnv2_double_main_loop_sandybridge.inc"
ret 0
mov eax, 3735929054

View File

@@ -1,24 +1,35 @@
_TEXT_CNV2_MAINLOOP SEGMENT PAGE READ EXECUTE
PUBLIC cnv2_mainloop_ivybridge_asm
PUBLIC cnv2_mainloop_ryzen_asm
PUBLIC cnv2_mainloop_bulldozer_asm
PUBLIC cnv2_double_mainloop_sandybridge_asm
ALIGN 64
cnv2_mainloop_ivybridge_asm PROC
INCLUDE cnv2_main_loop_ivybridge.inc
INCLUDE cn2/cnv2_main_loop_ivybridge.inc
ret 0
mov eax, 3735929054
cnv2_mainloop_ivybridge_asm ENDP
ALIGN 64
cnv2_mainloop_ryzen_asm PROC
INCLUDE cnv2_main_loop_ryzen.inc
INCLUDE cn2/cnv2_main_loop_ryzen.inc
ret 0
mov eax, 3735929054
cnv2_mainloop_ryzen_asm ENDP
ALIGN 64
cnv2_double_mainloop_sandybridge_asm PROC
INCLUDE cnv2_double_main_loop_sandybridge.inc
cnv2_mainloop_bulldozer_asm PROC
INCLUDE cn2/cnv2_main_loop_bulldozer.inc
ret 0
mov eax, 3735929054
cnv2_mainloop_bulldozer_asm ENDP
ALIGN 64
cnv2_double_mainloop_sandybridge_asm PROC
INCLUDE cn2/cnv2_double_main_loop_sandybridge.inc
ret 0
mov eax, 3735929054
cnv2_double_mainloop_sandybridge_asm ENDP
_TEXT_CNV2_MAINLOOP ENDS

240
src/crypto/cn_gpu_arm.cpp Normal file
View File

@@ -0,0 +1,240 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <arm_neon.h>
#include "crypto/CryptoNight_constants.h"
inline void vandq_f32(float32x4_t &v, uint32_t v2)
{
uint32x4_t vc = vdupq_n_u32(v2);
v = (float32x4_t)vandq_u32((uint32x4_t)v, vc);
}
inline void vorq_f32(float32x4_t &v, uint32_t v2)
{
uint32x4_t vc = vdupq_n_u32(v2);
v = (float32x4_t)vorrq_u32((uint32x4_t)v, vc);
}
template <size_t v>
inline void vrot_si32(int32x4_t &r)
{
r = (int32x4_t)vextq_s8((int8x16_t)r, (int8x16_t)r, v);
}
template <>
inline void vrot_si32<0>(int32x4_t &r)
{
}
inline uint32_t vheor_s32(const int32x4_t &v)
{
int32x4_t v0 = veorq_s32(v, vrev64q_s32(v));
int32x2_t vf = veor_s32(vget_high_s32(v0), vget_low_s32(v0));
return (uint32_t)vget_lane_s32(vf, 0);
}
inline void prep_dv(int32_t *idx, int32x4_t &v, float32x4_t &n)
{
v = vld1q_s32(idx);
n = vcvtq_f32_s32(v);
}
inline void sub_round(const float32x4_t &n0, const float32x4_t &n1, const float32x4_t &n2, const float32x4_t &n3, const float32x4_t &rnd_c, float32x4_t &n, float32x4_t &d, float32x4_t &c)
{
float32x4_t ln1 = vaddq_f32(n1, c);
float32x4_t nn = vmulq_f32(n0, c);
nn = vmulq_f32(ln1, vmulq_f32(nn, nn));
vandq_f32(nn, 0xFEFFFFFF);
vorq_f32(nn, 0x00800000);
n = vaddq_f32(n, nn);
float32x4_t ln3 = vsubq_f32(n3, c);
float32x4_t dd = vmulq_f32(n2, c);
dd = vmulq_f32(ln3, vmulq_f32(dd, dd));
vandq_f32(dd, 0xFEFFFFFF);
vorq_f32(dd, 0x00800000);
d = vaddq_f32(d, dd);
//Constant feedback
c = vaddq_f32(c, rnd_c);
c = vaddq_f32(c, vdupq_n_f32(0.734375f));
float32x4_t r = vaddq_f32(nn, dd);
vandq_f32(r, 0x807FFFFF);
vorq_f32(r, 0x40000000);
c = vaddq_f32(c, r);
}
inline void round_compute(const float32x4_t &n0, const float32x4_t &n1, const float32x4_t &n2, const float32x4_t &n3, const float32x4_t &rnd_c, float32x4_t &c, float32x4_t &r)
{
float32x4_t n = vdupq_n_f32(0.0f), d = vdupq_n_f32(0.0f);
sub_round(n0, n1, n2, n3, rnd_c, n, d, c);
sub_round(n1, n2, n3, n0, rnd_c, n, d, c);
sub_round(n2, n3, n0, n1, rnd_c, n, d, c);
sub_round(n3, n0, n1, n2, rnd_c, n, d, c);
sub_round(n3, n2, n1, n0, rnd_c, n, d, c);
sub_round(n2, n1, n0, n3, rnd_c, n, d, c);
sub_round(n1, n0, n3, n2, rnd_c, n, d, c);
sub_round(n0, n3, n2, n1, rnd_c, n, d, c);
// Make sure abs(d) > 2.0 - this prevents division by zero and accidental overflows by division by < 1.0
vandq_f32(d, 0xFF7FFFFF);
vorq_f32(d, 0x40000000);
r = vaddq_f32(r, vdivq_f32(n, d));
}
// 112×4 = 448
template <bool add>
inline int32x4_t single_compute(const float32x4_t &n0, const float32x4_t &n1, const float32x4_t &n2, const float32x4_t &n3, float cnt, const float32x4_t &rnd_c, float32x4_t &sum)
{
float32x4_t c = vdupq_n_f32(cnt);
float32x4_t r = vdupq_n_f32(0.0f);
round_compute(n0, n1, n2, n3, rnd_c, c, r);
round_compute(n0, n1, n2, n3, rnd_c, c, r);
round_compute(n0, n1, n2, n3, rnd_c, c, r);
round_compute(n0, n1, n2, n3, rnd_c, c, r);
// do a quick fmod by setting exp to 2
vandq_f32(r, 0x807FFFFF);
vorq_f32(r, 0x40000000);
if (add) {
sum = vaddq_f32(sum, r);
} else {
sum = r;
}
const float32x4_t cc2 = vdupq_n_f32(536870880.0f);
r = vmulq_f32(r, cc2); // 35
return vcvtq_s32_f32(r);
}
template<size_t rot>
inline void single_compute_wrap(const float32x4_t &n0, const float32x4_t &n1, const float32x4_t &n2, const float32x4_t &n3, float cnt, const float32x4_t &rnd_c, float32x4_t &sum, int32x4_t &out)
{
int32x4_t r = single_compute<rot % 2 != 0>(n0, n1, n2, n3, cnt, rnd_c, sum);
vrot_si32<rot>(r);
out = veorq_s32(out, r);
}
template<uint32_t MASK>
inline int32_t *scratchpad_ptr(uint8_t* lpad, uint32_t idx, size_t n) { return reinterpret_cast<int32_t *>(lpad + (idx & MASK) + n * 16); }
template<size_t ITER, uint32_t MASK>
void cn_gpu_inner_arm(const uint8_t *spad, uint8_t *lpad)
{
uint32_t s = reinterpret_cast<const uint32_t*>(spad)[0] >> 8;
int32_t *idx0 = scratchpad_ptr<MASK>(lpad, s, 0);
int32_t *idx1 = scratchpad_ptr<MASK>(lpad, s, 1);
int32_t *idx2 = scratchpad_ptr<MASK>(lpad, s, 2);
int32_t *idx3 = scratchpad_ptr<MASK>(lpad, s, 3);
float32x4_t sum0 = vdupq_n_f32(0.0f);
for (size_t i = 0; i < ITER; i++) {
float32x4_t n0, n1, n2, n3;
int32x4_t v0, v1, v2, v3;
float32x4_t suma, sumb, sum1, sum2, sum3;
prep_dv(idx0, v0, n0);
prep_dv(idx1, v1, n1);
prep_dv(idx2, v2, n2);
prep_dv(idx3, v3, n3);
float32x4_t rc = sum0;
int32x4_t out, out2;
out = vdupq_n_s32(0);
single_compute_wrap<0>(n0, n1, n2, n3, 1.3437500f, rc, suma, out);
single_compute_wrap<1>(n0, n2, n3, n1, 1.2812500f, rc, suma, out);
single_compute_wrap<2>(n0, n3, n1, n2, 1.3593750f, rc, sumb, out);
single_compute_wrap<3>(n0, n3, n2, n1, 1.3671875f, rc, sumb, out);
sum0 = vaddq_f32(suma, sumb);
vst1q_s32(idx0, veorq_s32(v0, out));
out2 = out;
out = vdupq_n_s32(0);
single_compute_wrap<0>(n1, n0, n2, n3, 1.4296875f, rc, suma, out);
single_compute_wrap<1>(n1, n2, n3, n0, 1.3984375f, rc, suma, out);
single_compute_wrap<2>(n1, n3, n0, n2, 1.3828125f, rc, sumb, out);
single_compute_wrap<3>(n1, n3, n2, n0, 1.3046875f, rc, sumb, out);
sum1 = vaddq_f32(suma, sumb);
vst1q_s32(idx1, veorq_s32(v1, out));
out2 = veorq_s32(out2, out);
out = vdupq_n_s32(0);
single_compute_wrap<0>(n2, n1, n0, n3, 1.4140625f, rc, suma, out);
single_compute_wrap<1>(n2, n0, n3, n1, 1.2734375f, rc, suma, out);
single_compute_wrap<2>(n2, n3, n1, n0, 1.2578125f, rc, sumb, out);
single_compute_wrap<3>(n2, n3, n0, n1, 1.2890625f, rc, sumb, out);
sum2 = vaddq_f32(suma, sumb);
vst1q_s32(idx2, veorq_s32(v2, out));
out2 = veorq_s32(out2, out);
out = vdupq_n_s32(0);
single_compute_wrap<0>(n3, n1, n2, n0, 1.3203125f, rc, suma, out);
single_compute_wrap<1>(n3, n2, n0, n1, 1.3515625f, rc, suma, out);
single_compute_wrap<2>(n3, n0, n1, n2, 1.3359375f, rc, sumb, out);
single_compute_wrap<3>(n3, n0, n2, n1, 1.4609375f, rc, sumb, out);
sum3 = vaddq_f32(suma, sumb);
vst1q_s32(idx3, veorq_s32(v3, out));
out2 = veorq_s32(out2, out);
sum0 = vaddq_f32(sum0, sum1);
sum2 = vaddq_f32(sum2, sum3);
sum0 = vaddq_f32(sum0, sum2);
const float32x4_t cc1 = vdupq_n_f32(16777216.0f);
const float32x4_t cc2 = vdupq_n_f32(64.0f);
vandq_f32(sum0, 0x7fffffff); // take abs(va) by masking the float sign bit
// vs range 0 - 64
n0 = vmulq_f32(sum0, cc1);
v0 = vcvtq_s32_f32(n0);
v0 = veorq_s32(v0, out2);
uint32_t n = vheor_s32(v0);
// vs is now between 0 and 1
sum0 = vdivq_f32(sum0, cc2);
idx0 = scratchpad_ptr<MASK>(lpad, n, 0);
idx1 = scratchpad_ptr<MASK>(lpad, n, 1);
idx2 = scratchpad_ptr<MASK>(lpad, n, 2);
idx3 = scratchpad_ptr<MASK>(lpad, n, 3);
}
}
template void cn_gpu_inner_arm<xmrig::CRYPTONIGHT_GPU_ITER, xmrig::CRYPTONIGHT_GPU_MASK>(const uint8_t* spad, uint8_t* lpad);

203
src/crypto/cn_gpu_avx.cpp Normal file
View File

@@ -0,0 +1,203 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "crypto/CryptoNight_constants.h"
#ifdef __GNUC__
# include <x86intrin.h>
#else
# include <intrin.h>
# define __restrict__ __restrict
#endif
inline void prep_dv_avx(__m256i* idx, __m256i& v, __m256& n01)
{
v = _mm256_load_si256(idx);
n01 = _mm256_cvtepi32_ps(v);
}
inline __m256 fma_break(const __m256& x)
{
// Break the dependency chain by setitng the exp to ?????01
__m256 xx = _mm256_and_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0xFEFFFFFF)), x);
return _mm256_or_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0x00800000)), xx);
}
// 14
inline void sub_round(const __m256& n0, const __m256& n1, const __m256& n2, const __m256& n3, const __m256& rnd_c, __m256& n, __m256& d, __m256& c)
{
__m256 nn = _mm256_mul_ps(n0, c);
nn = _mm256_mul_ps(_mm256_add_ps(n1, c), _mm256_mul_ps(nn, nn));
nn = fma_break(nn);
n = _mm256_add_ps(n, nn);
__m256 dd = _mm256_mul_ps(n2, c);
dd = _mm256_mul_ps(_mm256_sub_ps(n3, c), _mm256_mul_ps(dd, dd));
dd = fma_break(dd);
d = _mm256_add_ps(d, dd);
//Constant feedback
c = _mm256_add_ps(c, rnd_c);
c = _mm256_add_ps(c, _mm256_set1_ps(0.734375f));
__m256 r = _mm256_add_ps(nn, dd);
r = _mm256_and_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0x807FFFFF)), r);
r = _mm256_or_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0x40000000)), r);
c = _mm256_add_ps(c, r);
}
// 14*8 + 2 = 112
inline void round_compute(const __m256& n0, const __m256& n1, const __m256& n2, const __m256& n3, const __m256& rnd_c, __m256& c, __m256& r)
{
__m256 n = _mm256_setzero_ps(), d = _mm256_setzero_ps();
sub_round(n0, n1, n2, n3, rnd_c, n, d, c);
sub_round(n1, n2, n3, n0, rnd_c, n, d, c);
sub_round(n2, n3, n0, n1, rnd_c, n, d, c);
sub_round(n3, n0, n1, n2, rnd_c, n, d, c);
sub_round(n3, n2, n1, n0, rnd_c, n, d, c);
sub_round(n2, n1, n0, n3, rnd_c, n, d, c);
sub_round(n1, n0, n3, n2, rnd_c, n, d, c);
sub_round(n0, n3, n2, n1, rnd_c, n, d, c);
// Make sure abs(d) > 2.0 - this prevents division by zero and accidental overflows by division by < 1.0
d = _mm256_and_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0xFF7FFFFF)), d);
d = _mm256_or_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0x40000000)), d);
r = _mm256_add_ps(r, _mm256_div_ps(n, d));
}
// 112×4 = 448
template <bool add>
inline __m256i double_compute(const __m256& n0, const __m256& n1, const __m256& n2, const __m256& n3,
float lcnt, float hcnt, const __m256& rnd_c, __m256& sum)
{
__m256 c = _mm256_insertf128_ps(_mm256_castps128_ps256(_mm_set1_ps(lcnt)), _mm_set1_ps(hcnt), 1);
__m256 r = _mm256_setzero_ps();
round_compute(n0, n1, n2, n3, rnd_c, c, r);
round_compute(n0, n1, n2, n3, rnd_c, c, r);
round_compute(n0, n1, n2, n3, rnd_c, c, r);
round_compute(n0, n1, n2, n3, rnd_c, c, r);
// do a quick fmod by setting exp to 2
r = _mm256_and_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0x807FFFFF)), r);
r = _mm256_or_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0x40000000)), r);
if(add)
sum = _mm256_add_ps(sum, r);
else
sum = r;
r = _mm256_mul_ps(r, _mm256_set1_ps(536870880.0f)); // 35
return _mm256_cvttps_epi32(r);
}
template <size_t rot>
inline void double_compute_wrap(const __m256& n0, const __m256& n1, const __m256& n2, const __m256& n3,
float lcnt, float hcnt, const __m256& rnd_c, __m256& sum, __m256i& out)
{
__m256i r = double_compute<rot % 2 != 0>(n0, n1, n2, n3, lcnt, hcnt, rnd_c, sum);
if(rot != 0)
r = _mm256_or_si256(_mm256_bslli_epi128(r, 16 - rot), _mm256_bsrli_epi128(r, rot));
out = _mm256_xor_si256(out, r);
}
template<uint32_t MASK>
inline __m256i* scratchpad_ptr(uint8_t* lpad, uint32_t idx, size_t n) { return reinterpret_cast<__m256i*>(lpad + (idx & MASK) + n*16); }
template<size_t ITER, uint32_t MASK>
void cn_gpu_inner_avx(const uint8_t* spad, uint8_t* lpad)
{
uint32_t s = reinterpret_cast<const uint32_t*>(spad)[0] >> 8;
__m256i* idx0 = scratchpad_ptr<MASK>(lpad, s, 0);
__m256i* idx2 = scratchpad_ptr<MASK>(lpad, s, 2);
__m256 sum0 = _mm256_setzero_ps();
for(size_t i = 0; i < ITER; i++)
{
__m256i v01, v23;
__m256 suma, sumb, sum1;
__m256 rc = sum0;
__m256 n01, n23;
prep_dv_avx(idx0, v01, n01);
prep_dv_avx(idx2, v23, n23);
__m256i out, out2;
__m256 n10, n22, n33;
n10 = _mm256_permute2f128_ps(n01, n01, 0x01);
n22 = _mm256_permute2f128_ps(n23, n23, 0x00);
n33 = _mm256_permute2f128_ps(n23, n23, 0x11);
out = _mm256_setzero_si256();
double_compute_wrap<0>(n01, n10, n22, n33, 1.3437500f, 1.4296875f, rc, suma, out);
double_compute_wrap<1>(n01, n22, n33, n10, 1.2812500f, 1.3984375f, rc, suma, out);
double_compute_wrap<2>(n01, n33, n10, n22, 1.3593750f, 1.3828125f, rc, sumb, out);
double_compute_wrap<3>(n01, n33, n22, n10, 1.3671875f, 1.3046875f, rc, sumb, out);
_mm256_store_si256(idx0, _mm256_xor_si256(v01, out));
sum0 = _mm256_add_ps(suma, sumb);
out2 = out;
__m256 n11, n02, n30;
n11 = _mm256_permute2f128_ps(n01, n01, 0x11);
n02 = _mm256_permute2f128_ps(n01, n23, 0x20);
n30 = _mm256_permute2f128_ps(n01, n23, 0x03);
out = _mm256_setzero_si256();
double_compute_wrap<0>(n23, n11, n02, n30, 1.4140625f, 1.3203125f, rc, suma, out);
double_compute_wrap<1>(n23, n02, n30, n11, 1.2734375f, 1.3515625f, rc, suma, out);
double_compute_wrap<2>(n23, n30, n11, n02, 1.2578125f, 1.3359375f, rc, sumb, out);
double_compute_wrap<3>(n23, n30, n02, n11, 1.2890625f, 1.4609375f, rc, sumb, out);
_mm256_store_si256(idx2, _mm256_xor_si256(v23, out));
sum1 = _mm256_add_ps(suma, sumb);
out2 = _mm256_xor_si256(out2, out);
out2 = _mm256_xor_si256(_mm256_permute2x128_si256(out2,out2,0x41), out2);
suma = _mm256_permute2f128_ps(sum0, sum1, 0x30);
sumb = _mm256_permute2f128_ps(sum0, sum1, 0x21);
sum0 = _mm256_add_ps(suma, sumb);
sum0 = _mm256_add_ps(sum0, _mm256_permute2f128_ps(sum0, sum0, 0x41));
// Clear the high 128 bits
__m128 sum = _mm256_castps256_ps128(sum0);
sum = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)), sum); // take abs(va) by masking the float sign bit
// vs range 0 - 64
__m128i v0 = _mm_cvttps_epi32(_mm_mul_ps(sum, _mm_set1_ps(16777216.0f)));
v0 = _mm_xor_si128(v0, _mm256_castsi256_si128(out2));
__m128i v1 = _mm_shuffle_epi32(v0, _MM_SHUFFLE(0, 1, 2, 3));
v0 = _mm_xor_si128(v0, v1);
v1 = _mm_shuffle_epi32(v0, _MM_SHUFFLE(0, 1, 0, 1));
v0 = _mm_xor_si128(v0, v1);
// vs is now between 0 and 1
sum = _mm_div_ps(sum, _mm_set1_ps(64.0f));
sum0 = _mm256_insertf128_ps(_mm256_castps128_ps256(sum), sum, 1);
uint32_t n = _mm_cvtsi128_si32(v0);
idx0 = scratchpad_ptr<MASK>(lpad, n, 0);
idx2 = scratchpad_ptr<MASK>(lpad, n, 2);
}
}
template void cn_gpu_inner_avx<xmrig::CRYPTONIGHT_GPU_ITER, xmrig::CRYPTONIGHT_GPU_MASK>(const uint8_t* spad, uint8_t* lpad);

210
src/crypto/cn_gpu_ssse3.cpp Normal file
View File

@@ -0,0 +1,210 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "crypto/CryptoNight_constants.h"
#ifdef __GNUC__
# include <x86intrin.h>
#else
# include <intrin.h>
# define __restrict__ __restrict
#endif
inline void prep_dv(__m128i* idx, __m128i& v, __m128& n)
{
v = _mm_load_si128(idx);
n = _mm_cvtepi32_ps(v);
}
inline __m128 fma_break(__m128 x)
{
// Break the dependency chain by setitng the exp to ?????01
x = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0xFEFFFFFF)), x);
return _mm_or_ps(_mm_castsi128_ps(_mm_set1_epi32(0x00800000)), x);
}
// 14
inline void sub_round(__m128 n0, __m128 n1, __m128 n2, __m128 n3, __m128 rnd_c, __m128& n, __m128& d, __m128& c)
{
n1 = _mm_add_ps(n1, c);
__m128 nn = _mm_mul_ps(n0, c);
nn = _mm_mul_ps(n1, _mm_mul_ps(nn,nn));
nn = fma_break(nn);
n = _mm_add_ps(n, nn);
n3 = _mm_sub_ps(n3, c);
__m128 dd = _mm_mul_ps(n2, c);
dd = _mm_mul_ps(n3, _mm_mul_ps(dd,dd));
dd = fma_break(dd);
d = _mm_add_ps(d, dd);
//Constant feedback
c = _mm_add_ps(c, rnd_c);
c = _mm_add_ps(c, _mm_set1_ps(0.734375f));
__m128 r = _mm_add_ps(nn, dd);
r = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x807FFFFF)), r);
r = _mm_or_ps(_mm_castsi128_ps(_mm_set1_epi32(0x40000000)), r);
c = _mm_add_ps(c, r);
}
// 14*8 + 2 = 112
inline void round_compute(__m128 n0, __m128 n1, __m128 n2, __m128 n3, __m128 rnd_c, __m128& c, __m128& r)
{
__m128 n = _mm_setzero_ps(), d = _mm_setzero_ps();
sub_round(n0, n1, n2, n3, rnd_c, n, d, c);
sub_round(n1, n2, n3, n0, rnd_c, n, d, c);
sub_round(n2, n3, n0, n1, rnd_c, n, d, c);
sub_round(n3, n0, n1, n2, rnd_c, n, d, c);
sub_round(n3, n2, n1, n0, rnd_c, n, d, c);
sub_round(n2, n1, n0, n3, rnd_c, n, d, c);
sub_round(n1, n0, n3, n2, rnd_c, n, d, c);
sub_round(n0, n3, n2, n1, rnd_c, n, d, c);
// Make sure abs(d) > 2.0 - this prevents division by zero and accidental overflows by division by < 1.0
d = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0xFF7FFFFF)), d);
d = _mm_or_ps(_mm_castsi128_ps(_mm_set1_epi32(0x40000000)), d);
r =_mm_add_ps(r, _mm_div_ps(n,d));
}
// 112×4 = 448
template<bool add>
inline __m128i single_compute(__m128 n0, __m128 n1, __m128 n2, __m128 n3, float cnt, __m128 rnd_c, __m128& sum)
{
__m128 c = _mm_set1_ps(cnt);
__m128 r = _mm_setzero_ps();
round_compute(n0, n1, n2, n3, rnd_c, c, r);
round_compute(n0, n1, n2, n3, rnd_c, c, r);
round_compute(n0, n1, n2, n3, rnd_c, c, r);
round_compute(n0, n1, n2, n3, rnd_c, c, r);
// do a quick fmod by setting exp to 2
r = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x807FFFFF)), r);
r = _mm_or_ps(_mm_castsi128_ps(_mm_set1_epi32(0x40000000)), r);
if(add)
sum = _mm_add_ps(sum, r);
else
sum = r;
r = _mm_mul_ps(r, _mm_set1_ps(536870880.0f)); // 35
return _mm_cvttps_epi32(r);
}
template<size_t rot>
inline void single_compute_wrap(__m128 n0, __m128 n1, __m128 n2, __m128 n3, float cnt, __m128 rnd_c, __m128& sum, __m128i& out)
{
__m128i r = single_compute<rot % 2 != 0>(n0, n1, n2, n3, cnt, rnd_c, sum);
if(rot != 0)
r = _mm_or_si128(_mm_slli_si128(r, 16 - rot), _mm_srli_si128(r, rot));
out = _mm_xor_si128(out, r);
}
template<uint32_t MASK>
inline __m128i* scratchpad_ptr(uint8_t* lpad, uint32_t idx, size_t n) { return reinterpret_cast<__m128i*>(lpad + (idx & MASK) + n*16); }
template<size_t ITER, uint32_t MASK>
void cn_gpu_inner_ssse3(const uint8_t* spad, uint8_t* lpad)
{
uint32_t s = reinterpret_cast<const uint32_t*>(spad)[0] >> 8;
__m128i* idx0 = scratchpad_ptr<MASK>(lpad, s, 0);
__m128i* idx1 = scratchpad_ptr<MASK>(lpad, s, 1);
__m128i* idx2 = scratchpad_ptr<MASK>(lpad, s, 2);
__m128i* idx3 = scratchpad_ptr<MASK>(lpad, s, 3);
__m128 sum0 = _mm_setzero_ps();
for(size_t i = 0; i < ITER; i++)
{
__m128 n0, n1, n2, n3;
__m128i v0, v1, v2, v3;
__m128 suma, sumb, sum1, sum2, sum3;
prep_dv(idx0, v0, n0);
prep_dv(idx1, v1, n1);
prep_dv(idx2, v2, n2);
prep_dv(idx3, v3, n3);
__m128 rc = sum0;
__m128i out, out2;
out = _mm_setzero_si128();
single_compute_wrap<0>(n0, n1, n2, n3, 1.3437500f, rc, suma, out);
single_compute_wrap<1>(n0, n2, n3, n1, 1.2812500f, rc, suma, out);
single_compute_wrap<2>(n0, n3, n1, n2, 1.3593750f, rc, sumb, out);
single_compute_wrap<3>(n0, n3, n2, n1, 1.3671875f, rc, sumb, out);
sum0 = _mm_add_ps(suma, sumb);
_mm_store_si128(idx0, _mm_xor_si128(v0, out));
out2 = out;
out = _mm_setzero_si128();
single_compute_wrap<0>(n1, n0, n2, n3, 1.4296875f, rc, suma, out);
single_compute_wrap<1>(n1, n2, n3, n0, 1.3984375f, rc, suma, out);
single_compute_wrap<2>(n1, n3, n0, n2, 1.3828125f, rc, sumb, out);
single_compute_wrap<3>(n1, n3, n2, n0, 1.3046875f, rc, sumb, out);
sum1 = _mm_add_ps(suma, sumb);
_mm_store_si128(idx1, _mm_xor_si128(v1, out));
out2 = _mm_xor_si128(out2, out);
out = _mm_setzero_si128();
single_compute_wrap<0>(n2, n1, n0, n3, 1.4140625f, rc, suma, out);
single_compute_wrap<1>(n2, n0, n3, n1, 1.2734375f, rc, suma, out);
single_compute_wrap<2>(n2, n3, n1, n0, 1.2578125f, rc, sumb, out);
single_compute_wrap<3>(n2, n3, n0, n1, 1.2890625f, rc, sumb, out);
sum2 = _mm_add_ps(suma, sumb);
_mm_store_si128(idx2, _mm_xor_si128(v2, out));
out2 = _mm_xor_si128(out2, out);
out = _mm_setzero_si128();
single_compute_wrap<0>(n3, n1, n2, n0, 1.3203125f, rc, suma, out);
single_compute_wrap<1>(n3, n2, n0, n1, 1.3515625f, rc, suma, out);
single_compute_wrap<2>(n3, n0, n1, n2, 1.3359375f, rc, sumb, out);
single_compute_wrap<3>(n3, n0, n2, n1, 1.4609375f, rc, sumb, out);
sum3 = _mm_add_ps(suma, sumb);
_mm_store_si128(idx3, _mm_xor_si128(v3, out));
out2 = _mm_xor_si128(out2, out);
sum0 = _mm_add_ps(sum0, sum1);
sum2 = _mm_add_ps(sum2, sum3);
sum0 = _mm_add_ps(sum0, sum2);
sum0 = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)), sum0); // take abs(va) by masking the float sign bit
// vs range 0 - 64
n0 = _mm_mul_ps(sum0, _mm_set1_ps(16777216.0f));
v0 = _mm_cvttps_epi32(n0);
v0 = _mm_xor_si128(v0, out2);
v1 = _mm_shuffle_epi32(v0, _MM_SHUFFLE(0, 1, 2, 3));
v0 = _mm_xor_si128(v0, v1);
v1 = _mm_shuffle_epi32(v0, _MM_SHUFFLE(0, 1, 0, 1));
v0 = _mm_xor_si128(v0, v1);
// vs is now between 0 and 1
sum0 = _mm_div_ps(sum0, _mm_set1_ps(64.0f));
uint32_t n = _mm_cvtsi128_si32(v0);
idx0 = scratchpad_ptr<MASK>(lpad, n, 0);
idx1 = scratchpad_ptr<MASK>(lpad, n, 1);
idx2 = scratchpad_ptr<MASK>(lpad, n, 2);
idx3 = scratchpad_ptr<MASK>(lpad, n, 3);
}
}
template void cn_gpu_inner_ssse3<xmrig::CRYPTONIGHT_GPU_ITER, xmrig::CRYPTONIGHT_GPU_MASK>(const uint8_t* spad, uint8_t* lpad);

View File

@@ -0,0 +1,428 @@
#ifndef VARIANT4_RANDOM_MATH_H
#define VARIANT4_RANDOM_MATH_H
extern "C"
{
#include "c_blake256.h"
}
enum V4_Settings
{
// Generate code with minimal theoretical latency = 45 cycles, which is equivalent to 15 multiplications
TOTAL_LATENCY = 15 * 3,
// Always generate at least 60 instructions
NUM_INSTRUCTIONS = 60,
// Available ALUs for MUL
// Modern CPUs typically have only 1 ALU which can do multiplications
ALU_COUNT_MUL = 1,
// Total available ALUs
// Modern CPUs have 4 ALUs, but we use only 3 because random math executes together with other main loop code
ALU_COUNT = 3,
};
enum V4_InstructionList
{
MUL, // a*b
ADD, // a+b + C, C is an unsigned 32-bit constant
SUB, // a-b
ROR, // rotate right "a" by "b & 31" bits
ROL, // rotate left "a" by "b & 31" bits
XOR, // a^b
RET, // finish execution
V4_INSTRUCTION_COUNT = RET,
};
// V4_InstructionDefinition is used to generate code from random data
// Every random sequence of bytes is a valid code
//
// There are 8 registers in total:
// - 4 variable registers
// - 4 constant registers initialized from loop variables
//
// This is why dst_index is 2 bits
enum V4_InstructionDefinition
{
V4_OPCODE_BITS = 3,
V4_DST_INDEX_BITS = 2,
V4_SRC_INDEX_BITS = 3,
};
struct V4_Instruction
{
uint8_t opcode;
uint8_t dst_index;
uint8_t src_index;
uint32_t C;
};
#ifndef FORCEINLINE
#ifdef __GNUC__
#define FORCEINLINE __attribute__((always_inline)) inline
#elif _MSC_VER
#define FORCEINLINE __forceinline
#else
#define FORCEINLINE inline
#endif
#endif
#ifndef UNREACHABLE_CODE
#ifdef __GNUC__
#define UNREACHABLE_CODE __builtin_unreachable()
#elif _MSC_VER
#define UNREACHABLE_CODE __assume(false)
#else
#define UNREACHABLE_CODE
#endif
#endif
// Random math interpreter's loop is fully unrolled and inlined to achieve 100% branch prediction on CPU:
// every switch-case will point to the same destination on every iteration of Cryptonight main loop
//
// This is about as fast as it can get without using low-level machine code generation
template<typename v4_reg>
static void v4_random_math(const struct V4_Instruction* code, v4_reg* r)
{
enum
{
REG_BITS = sizeof(v4_reg) * 8,
};
#define V4_EXEC(i) \
{ \
const struct V4_Instruction* op = code + i; \
const v4_reg src = r[op->src_index]; \
v4_reg* dst = r + op->dst_index; \
switch (op->opcode) \
{ \
case MUL: \
*dst *= src; \
break; \
case ADD: \
*dst += src + op->C; \
break; \
case SUB: \
*dst -= src; \
break; \
case ROR: \
{ \
const uint32_t shift = src % REG_BITS; \
*dst = (*dst >> shift) | (*dst << ((REG_BITS - shift) % REG_BITS)); \
} \
break; \
case ROL: \
{ \
const uint32_t shift = src % REG_BITS; \
*dst = (*dst << shift) | (*dst >> ((REG_BITS - shift) % REG_BITS)); \
} \
break; \
case XOR: \
*dst ^= src; \
break; \
case RET: \
return; \
default: \
UNREACHABLE_CODE; \
break; \
} \
}
#define V4_EXEC_10(j) \
V4_EXEC(j + 0) \
V4_EXEC(j + 1) \
V4_EXEC(j + 2) \
V4_EXEC(j + 3) \
V4_EXEC(j + 4) \
V4_EXEC(j + 5) \
V4_EXEC(j + 6) \
V4_EXEC(j + 7) \
V4_EXEC(j + 8) \
V4_EXEC(j + 9)
// Generated program can have 60 + a few more (usually 2-3) instructions to achieve required latency
// I've checked all block heights < 10,000,000 and here is the distribution of program sizes:
//
// 60 28495
// 61 106077
// 62 2455855
// 63 5114930
// 64 1020868
// 65 1109026
// 66 151756
// 67 8429
// 68 4477
// 69 87
// Unroll 70 instructions here
V4_EXEC_10(0); // instructions 0-9
V4_EXEC_10(10); // instructions 10-19
V4_EXEC_10(20); // instructions 20-29
V4_EXEC_10(30); // instructions 30-39
V4_EXEC_10(40); // instructions 40-49
V4_EXEC_10(50); // instructions 50-59
V4_EXEC_10(60); // instructions 60-69
#undef V4_EXEC_10
#undef V4_EXEC
}
// If we don't have enough data available, generate more
static FORCEINLINE void check_data(size_t* data_index, const size_t bytes_needed, int8_t* data, const size_t data_size)
{
if (*data_index + bytes_needed > data_size)
{
hash_extra_blake(data, data_size, (char*) data);
*data_index = 0;
}
}
// Generates as many random math operations as possible with given latency and ALU restrictions
static int v4_random_math_init(struct V4_Instruction* code, const uint64_t height)
{
// MUL is 3 cycles, 3-way addition and rotations are 2 cycles, SUB/XOR are 1 cycle
// These latencies match real-life instruction latencies for Intel CPUs starting from Sandy Bridge and up to Skylake/Coffee lake
//
// AMD Ryzen has the same latencies except 1-cycle ROR/ROL, so it'll be a bit faster than Intel Sandy Bridge and newer processors
// Surprisingly, Intel Nehalem also has 1-cycle ROR/ROL, so it'll also be faster than Intel Sandy Bridge and newer processors
// AMD Bulldozer has 4 cycles latency for MUL (slower than Intel) and 1 cycle for ROR/ROL (faster than Intel), so average performance will be the same
// Source: https://www.agner.org/optimize/instruction_tables.pdf
const int op_latency[V4_INSTRUCTION_COUNT] = { 3, 2, 1, 2, 2, 1 };
// Instruction latencies for theoretical ASIC implementation
const int asic_op_latency[V4_INSTRUCTION_COUNT] = { 3, 1, 1, 1, 1, 1 };
// Available ALUs for each instruction
const int op_ALUs[V4_INSTRUCTION_COUNT] = { ALU_COUNT_MUL, ALU_COUNT, ALU_COUNT, ALU_COUNT, ALU_COUNT, ALU_COUNT };
int8_t data[32];
memset(data, 0, sizeof(data));
uint64_t tmp = SWAP64LE(height);
memcpy(data, &tmp, sizeof(uint64_t));
// Set data_index past the last byte in data
// to trigger full data update with blake hash
// before we start using it
size_t data_index = sizeof(data);
int code_size;
do {
int latency[8];
int asic_latency[8];
// Tracks previous instruction and value of the source operand for registers R0-R3 throughout code execution
// byte 0: current value of the destination register
// byte 1: instruction opcode
// byte 2: current value of the source register
//
// Registers R4-R7 are constant and are treated as having the same value because when we do
// the same operation twice with two constant source registers, it can be optimized into a single operation
uint32_t inst_data[8] = { 0, 1, 2, 3, 0xFFFFFF, 0xFFFFFF, 0xFFFFFF, 0xFFFFFF };
bool alu_busy[TOTAL_LATENCY + 1][ALU_COUNT];
bool is_rotation[V4_INSTRUCTION_COUNT];
bool rotated[4];
int rotate_count = 0;
memset(latency, 0, sizeof(latency));
memset(asic_latency, 0, sizeof(asic_latency));
memset(alu_busy, 0, sizeof(alu_busy));
memset(is_rotation, 0, sizeof(is_rotation));
memset(rotated, 0, sizeof(rotated));
is_rotation[ROR] = true;
is_rotation[ROL] = true;
int num_retries = 0;
code_size = 0;
int total_iterations = 0;
// Generate random code to achieve minimal required latency for our abstract CPU
// Try to get this latency for all 4 registers
while (((latency[0] < TOTAL_LATENCY) || (latency[1] < TOTAL_LATENCY) || (latency[2] < TOTAL_LATENCY) || (latency[3] < TOTAL_LATENCY)) && (num_retries < 64))
{
// Fail-safe to guarantee loop termination
++total_iterations;
if (total_iterations > 256)
break;
check_data(&data_index, 1, data, sizeof(data));
const uint8_t c = ((uint8_t*)data)[data_index++];
// MUL = opcodes 0-2
// ADD = opcode 3
// SUB = opcode 4
// ROR/ROL = opcode 5, shift direction is selected randomly
// XOR = opcodes 6-7
uint8_t opcode = c & ((1 << V4_OPCODE_BITS) - 1);
if (opcode == 5)
{
check_data(&data_index, 1, data, sizeof(data));
opcode = (data[data_index++] >= 0) ? ROR : ROL;
}
else if (opcode >= 6)
{
opcode = XOR;
}
else
{
opcode = (opcode <= 2) ? MUL : (opcode - 2);
}
uint8_t dst_index = (c >> V4_OPCODE_BITS) & ((1 << V4_DST_INDEX_BITS) - 1);
uint8_t src_index = (c >> (V4_OPCODE_BITS + V4_DST_INDEX_BITS)) & ((1 << V4_SRC_INDEX_BITS) - 1);
const int a = dst_index;
int b = src_index;
// Don't do ADD/SUB/XOR with the same register
if (((opcode == ADD) || (opcode == SUB) || (opcode == XOR)) && (a == b))
{
// a is always < 4, so we don't need to check bounds here
b = a + 4;
src_index = b;
}
// Don't do rotation with the same destination twice because it's equal to a single rotation
if (is_rotation[opcode] && rotated[a])
{
continue;
}
// Don't do the same instruction (except MUL) with the same source value twice because all other cases can be optimized:
// 2xADD(a, b, C) = ADD(a, b*2, C1+C2), same for SUB and rotations
// 2xXOR(a, b) = NOP
if ((opcode != MUL) && ((inst_data[a] & 0xFFFF00) == (opcode << 8) + ((inst_data[b] & 255) << 16)))
{
continue;
}
// Find which ALU is available (and when) for this instruction
int next_latency = (latency[a] > latency[b]) ? latency[a] : latency[b];
int alu_index = -1;
while (next_latency < TOTAL_LATENCY)
{
for (int i = op_ALUs[opcode] - 1; i >= 0; --i)
{
if (!alu_busy[next_latency][i])
{
// ADD is implemented as two 1-cycle instructions on a real CPU, so do an additional availability check
if ((opcode == ADD) && alu_busy[next_latency + 1][i])
{
continue;
}
// Rotation can only start when previous rotation is finished, so do an additional availability check
if (is_rotation[opcode] && (next_latency < rotate_count * op_latency[opcode]))
{
continue;
}
alu_index = i;
break;
}
}
if (alu_index >= 0)
{
break;
}
++next_latency;
}
// Don't generate instructions that leave some register unchanged for more than 7 cycles
if (next_latency > latency[a] + 7)
{
continue;
}
next_latency += op_latency[opcode];
if (next_latency <= TOTAL_LATENCY)
{
if (is_rotation[opcode])
{
++rotate_count;
}
// Mark ALU as busy only for the first cycle when it starts executing the instruction because ALUs are fully pipelined
alu_busy[next_latency - op_latency[opcode]][alu_index] = true;
latency[a] = next_latency;
// ASIC is supposed to have enough ALUs to run as many independent instructions per cycle as possible, so latency calculation for ASIC is simple
asic_latency[a] = ((asic_latency[a] > asic_latency[b]) ? asic_latency[a] : asic_latency[b]) + asic_op_latency[opcode];
rotated[a] = is_rotation[opcode];
inst_data[a] = code_size + (opcode << 8) + ((inst_data[b] & 255) << 16);
code[code_size].opcode = opcode;
code[code_size].dst_index = dst_index;
code[code_size].src_index = src_index;
code[code_size].C = 0;
if (opcode == ADD)
{
// ADD instruction is implemented as two 1-cycle instructions on a real CPU, so mark ALU as busy for the next cycle too
alu_busy[next_latency - op_latency[opcode] + 1][alu_index] = true;
// ADD instruction requires 4 more random bytes for 32-bit constant "C" in "a = a + b + C"
check_data(&data_index, sizeof(uint32_t), data, sizeof(data));
uint32_t t;
memcpy(&t, data + data_index, sizeof(uint32_t));
code[code_size].C = SWAP32LE(t);
data_index += sizeof(uint32_t);
}
++code_size;
if (code_size >= NUM_INSTRUCTIONS)
{
break;
}
}
else
{
++num_retries;
}
}
// ASIC has more execution resources and can extract as much parallelism from the code as possible
// We need to add a few more MUL and ROR instructions to achieve minimal required latency for ASIC
// Get this latency for at least 1 of the 4 registers
const int prev_code_size = code_size;
while ((asic_latency[0] < TOTAL_LATENCY) && (asic_latency[1] < TOTAL_LATENCY) && (asic_latency[2] < TOTAL_LATENCY) && (asic_latency[3] < TOTAL_LATENCY))
{
int min_idx = 0;
int max_idx = 0;
for (int i = 1; i < 4; ++i)
{
if (asic_latency[i] < asic_latency[min_idx]) min_idx = i;
if (asic_latency[i] > asic_latency[max_idx]) max_idx = i;
}
const uint8_t pattern[3] = { ROR, MUL, MUL };
const uint8_t opcode = pattern[(code_size - prev_code_size) % 3];
latency[min_idx] = latency[max_idx] + op_latency[opcode];
asic_latency[min_idx] = asic_latency[max_idx] + asic_op_latency[opcode];
code[code_size].opcode = opcode;
code[code_size].dst_index = min_idx;
code[code_size].src_index = max_idx;
code[code_size].C = 0;
++code_size;
}
// There is ~99.8% chance that code_size >= NUM_INSTRUCTIONS here, so second iteration is required rarely
} while (code_size < NUM_INSTRUCTIONS);
// Add final instruction to stop the interpreter
code[code_size].opcode = RET;
code[code_size].dst_index = 0;
code[code_size].src_index = 0;
code[code_size].C = 0;
return code_size;
}
#endif

View File

@@ -21,8 +21,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __JOBRESULT_H__
#define __JOBRESULT_H__
#ifndef XMRIG_JOBRESULT_H
#define XMRIG_JOBRESULT_H
#include <memory.h>
@@ -36,17 +36,29 @@ class JobResult
{
public:
inline JobResult() : poolId(0), diff(0), nonce(0) {}
inline JobResult(int poolId, const xmrig::Id &jobId, uint32_t nonce, const uint8_t *result, uint32_t diff, const xmrig::Algorithm &algorithm) :
inline JobResult(int poolId, const xmrig::Id &jobId, const xmrig::Id &clientId, uint32_t nonce, const uint8_t *result, uint32_t diff, const xmrig::Algorithm &algorithm) :
poolId(poolId),
diff(diff),
nonce(nonce),
algorithm(algorithm),
clientId(clientId),
jobId(jobId)
{
memcpy(this->result, result, sizeof(this->result));
}
inline JobResult(const Job &job) : poolId(0), diff(0), nonce(0)
{
jobId = job.id();
clientId = job.clientId();
poolId = job.poolId();
diff = job.diff();
nonce = *job.nonce();
algorithm = job.algorithm();
}
inline uint64_t actualDiff() const
{
return Job::toDiff(reinterpret_cast<const uint64_t*>(result)[3]);
@@ -58,7 +70,8 @@ public:
uint32_t nonce;
uint8_t result[32];
xmrig::Algorithm algorithm;
xmrig::Id clientId;
xmrig::Id jobId;
};
#endif /* __JOBRESULT_H__ */
#endif /* XMRIG_JOBRESULT_H */

View File

@@ -5,7 +5,8 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -108,7 +109,7 @@ void Network::onActive(IStrategy *strategy, Client *client)
const char *fingerprint = client->tlsFingerprint();
if (fingerprint != nullptr) {
LOG_INFO("\x1B[1;30mfingerprint (SHA-256): \"%s\"", fingerprint);
LOG_INFO("%sfingerprint (SHA-256): \"%s\"", isColors() ? "\x1B[1;30m" : "", fingerprint);
}
}
@@ -154,12 +155,12 @@ void Network::onResultAccepted(IStrategy *strategy, Client *client, const Submit
m_state.add(result, error);
if (error) {
LOG_INFO(isColors() ? "\x1B[01;31mrejected\x1B[0m (%" PRId64 "/%" PRId64 ") diff \x1B[01;37m%u\x1B[0m \x1B[31m\"%s\"\x1B[0m \x1B[01;30m(%" PRIu64 " ms)"
LOG_INFO(isColors() ? "\x1B[1;31mrejected\x1B[0m (%" PRId64 "/%" PRId64 ") diff \x1B[1;37m%u\x1B[0m \x1B[31m\"%s\"\x1B[0m \x1B[1;30m(%" PRIu64 " ms)"
: "rejected (%" PRId64 "/%" PRId64 ") diff %u \"%s\" (%" PRIu64 " ms)",
m_state.accepted, m_state.rejected, result.diff, error, result.elapsed);
}
else {
LOG_INFO(isColors() ? "\x1B[01;32maccepted\x1B[0m (%" PRId64 "/%" PRId64 ") diff \x1B[01;37m%u\x1B[0m \x1B[01;30m(%" PRIu64 " ms)"
LOG_INFO(isColors() ? "\x1B[1;32maccepted\x1B[0m (%" PRId64 "/%" PRId64 ") diff \x1B[1;37m%u\x1B[0m \x1B[1;30m(%" PRIu64 " ms)"
: "accepted (%" PRId64 "/%" PRId64 ") diff %u (%" PRIu64 " ms)",
m_state.accepted, m_state.rejected, result.diff, result.elapsed);
}
@@ -174,9 +175,20 @@ bool Network::isColors() const
void Network::setJob(Client *client, const Job &job, bool donate)
{
LOG_INFO(isColors() ? MAGENTA_BOLD("new job") " from " WHITE_BOLD("%s:%d") " diff " WHITE_BOLD("%d") " algo " WHITE_BOLD("%s")
: "new job from %s:%d diff %d algo %s",
client->host(), client->port(), job.diff(), job.algorithm().shortName());
if (job.height()) {
LOG_INFO(isColors() ? MAGENTA_BOLD("new job") " from " WHITE_BOLD("%s:%d") " diff " WHITE_BOLD("%d") " algo " WHITE_BOLD("%s") " height " WHITE_BOLD("%" PRIu64)
: "new job from %s:%d diff %d algo %s height %" PRIu64,
client->host(), client->port(), job.diff(), job.algorithm().shortName(), job.height());
}
else {
LOG_INFO(isColors() ? MAGENTA_BOLD("new job") " from " WHITE_BOLD("%s:%d") " diff " WHITE_BOLD("%d") " algo " WHITE_BOLD("%s")
: "new job from %s:%d diff %d algo %s",
client->host(), client->port(), job.diff(), job.algorithm().shortName());
}
if (!donate && m_donate) {
m_donate->setAlgo(job.algorithm());
}
m_state.diff = job.diff();
Workers::setJob(job, donate);

View File

@@ -5,7 +5,8 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -21,8 +22,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __NETWORK_H__
#define __NETWORK_H__
#ifndef XMRIG_NETWORK_H
#define XMRIG_NETWORK_H
#include <vector>
@@ -47,7 +48,7 @@ class Network : public IJobResultListener, public IStrategyListener
{
public:
Network(xmrig::Controller *controller);
~Network();
~Network() override;
void connect();
void stop();
@@ -76,4 +77,4 @@ private:
};
#endif /* __NETWORK_H__ */
#endif /* XMRIG_NETWORK_H */

View File

@@ -5,7 +5,8 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -43,7 +44,9 @@ DonateStrategy::DonateStrategy(int level, const char *user, xmrig::Algo algo, IS
m_donateTime(level * 60 * 1000),
m_idleTime((100 - level) * 60 * 1000),
m_strategy(nullptr),
m_listener(listener)
m_listener(listener),
m_now(0),
m_stop(0)
{
uint8_t hash[200];
char userId[65] = { 0 };
@@ -93,6 +96,12 @@ void DonateStrategy::connect()
}
void DonateStrategy::setAlgo(const xmrig::Algorithm &algo)
{
m_strategy->setAlgo(algo);
}
void DonateStrategy::stop()
{
uv_timer_stop(&m_timer);
@@ -102,7 +111,14 @@ void DonateStrategy::stop()
void DonateStrategy::tick(uint64_t now)
{
m_now = now;
m_strategy->tick(now);
if (m_stop && now > m_stop) {
m_strategy->stop();
m_stop = 0;
}
}
@@ -119,7 +135,9 @@ void DonateStrategy::onActive(IStrategy *strategy, Client *client)
void DonateStrategy::onJob(IStrategy *strategy, Client *client, const Job &job)
{
m_listener->onJob(this, client, job);
if (isActive()) {
m_listener->onJob(this, client, job);
}
}
@@ -142,7 +160,11 @@ void DonateStrategy::idle(uint64_t timeout)
void DonateStrategy::suspend()
{
m_strategy->stop();
# if defined(XMRIG_AMD_PROJECT) || defined(XMRIG_NVIDIA_PROJECT)
m_stop = m_now + 5000;
# else
m_stop = m_now + 500;
# endif
m_active = false;
m_listener->onPause(this);

View File

@@ -5,7 +5,8 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -44,7 +45,7 @@ class DonateStrategy : public IStrategy, public IStrategyListener
{
public:
DonateStrategy(int level, const char *user, xmrig::Algo algo, IStrategyListener *listener);
~DonateStrategy();
~DonateStrategy() override;
public:
inline bool isActive() const override { return m_active; }
@@ -52,6 +53,7 @@ public:
int64_t submit(const JobResult &result) override;
void connect() override;
void setAlgo(const xmrig::Algorithm &algo) override;
void stop() override;
void tick(uint64_t now) override;
@@ -68,11 +70,13 @@ private:
static void onTimer(uv_timer_t *handle);
bool m_active;
const int m_donateTime;
const int m_idleTime;
const uint64_t m_donateTime;
const uint64_t m_idleTime;
IStrategy *m_strategy;
IStrategyListener *m_listener;
std::vector<Pool> m_pools;
uint64_t m_now;
uint64_t m_stop;
uv_timer_t m_timer;
};

View File

@@ -5,7 +5,8 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -27,14 +28,14 @@
#define APP_ID "xmrig"
#define APP_NAME "XMRig"
#define APP_DESC "XMRig CPU miner"
#define APP_VERSION "2.8.0-rc"
#define APP_VERSION "2.12.0"
#define APP_DOMAIN "xmrig.com"
#define APP_SITE "www.xmrig.com"
#define APP_COPYRIGHT "Copyright (C) 2016-2018 xmrig.com"
#define APP_COPYRIGHT "Copyright (C) 2016-2019 xmrig.com"
#define APP_KIND "cpu"
#define APP_VER_MAJOR 2
#define APP_VER_MINOR 8
#define APP_VER_MINOR 12
#define APP_VER_PATCH 0
#ifdef _MSC_VER

View File

@@ -5,7 +5,8 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -28,6 +29,7 @@
#include "common/log/Log.h"
#include "common/net/Pool.h"
#include "crypto/Asm.h"
#include "Mem.h"
#include "rapidjson/document.h"
#include "workers/CpuThread.h"
@@ -53,23 +55,141 @@ xmrig::CpuThread::CpuThread(size_t index, Algo algorithm, AlgoVariant av, Multiw
}
#ifndef XMRIG_NO_ASM
template<typename T, typename U>
static void patchCode(T dst, U src, const uint32_t iterations, const uint32_t mask)
{
const uint8_t* p = reinterpret_cast<const uint8_t*>(src);
// Workaround for Visual Studio placing trampoline in debug builds.
# if defined(_MSC_VER)
if (p[0] == 0xE9) {
p += *(int32_t*)(p + 1) + 5;
}
# endif
size_t size = 0;
while (*(uint32_t*)(p + size) != 0xDEADC0DE) {
++size;
}
size += sizeof(uint32_t);
memcpy((void*) dst, (const void*) src, size);
uint8_t* patched_data = reinterpret_cast<uint8_t*>(dst);
for (size_t i = 0; i + sizeof(uint32_t) <= size; ++i) {
switch (*(uint32_t*)(patched_data + i)) {
case xmrig::CRYPTONIGHT_ITER:
*(uint32_t*)(patched_data + i) = iterations;
break;
case xmrig::CRYPTONIGHT_MASK:
*(uint32_t*)(patched_data + i) = mask;
break;
}
}
}
extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx *ctx);
extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx *ctx);
extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx *ctx);
extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx *ctx0, cryptonight_ctx *ctx1);
xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm = nullptr;
xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm = nullptr;
xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm = nullptr;
xmrig::CpuThread::cn_mainloop_double_fun cn_half_double_mainloop_sandybridge_asm = nullptr;
xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm = nullptr;
xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ryzen_asm = nullptr;
xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm = nullptr;
xmrig::CpuThread::cn_mainloop_double_fun cn_trtl_double_mainloop_sandybridge_asm = nullptr;
void xmrig::CpuThread::patchAsmVariants()
{
const int allocation_size = 65536;
uint8_t *base = static_cast<uint8_t *>(Mem::allocateExecutableMemory(allocation_size));
cn_half_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x0000);
cn_half_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1000);
cn_half_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x2000);
cn_half_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_double_fun> (base + 0x3000);
cn_trtl_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x4000);
cn_trtl_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x5000);
cn_trtl_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x6000);
cn_trtl_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_double_fun> (base + 0x7000);
patchCode(cn_half_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK);
patchCode(cn_half_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK);
patchCode(cn_half_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK);
patchCode(cn_half_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK);
patchCode(cn_trtl_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK);
patchCode(cn_trtl_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK);
patchCode(cn_trtl_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK);
patchCode(cn_trtl_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK);
Mem::protectExecutableMemory(base, allocation_size);
Mem::flushInstructionCache(base, allocation_size);
}
#endif
bool xmrig::CpuThread::isSoftAES(AlgoVariant av)
{
return av == AV_SINGLE_SOFT || av == AV_DOUBLE_SOFT || av > AV_PENTA;
}
#ifndef XMRIG_NO_ASM
template<xmrig::Algo algo, xmrig::Variant variant>
static inline void add_asm_func(xmrig::CpuThread::cn_hash_fun(&asm_func_map)[xmrig::ALGO_MAX][xmrig::AV_MAX][xmrig::VARIANT_MAX][xmrig::ASM_MAX])
{
asm_func_map[algo][xmrig::AV_SINGLE][variant][xmrig::ASM_INTEL] = cryptonight_single_hash_asm<algo, variant, xmrig::ASM_INTEL>;
asm_func_map[algo][xmrig::AV_SINGLE][variant][xmrig::ASM_RYZEN] = cryptonight_single_hash_asm<algo, variant, xmrig::ASM_RYZEN>;
asm_func_map[algo][xmrig::AV_SINGLE][variant][xmrig::ASM_BULLDOZER] = cryptonight_single_hash_asm<algo, variant, xmrig::ASM_BULLDOZER>;
asm_func_map[algo][xmrig::AV_DOUBLE][variant][xmrig::ASM_INTEL] = cryptonight_double_hash_asm<algo, variant, xmrig::ASM_INTEL>;
asm_func_map[algo][xmrig::AV_DOUBLE][variant][xmrig::ASM_RYZEN] = cryptonight_double_hash_asm<algo, variant, xmrig::ASM_RYZEN>;
asm_func_map[algo][xmrig::AV_DOUBLE][variant][xmrig::ASM_BULLDOZER] = cryptonight_double_hash_asm<algo, variant, xmrig::ASM_BULLDOZER>;
}
#endif
xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant av, Variant variant, Assembly assembly)
{
assert(variant >= VARIANT_0 && variant < VARIANT_MAX);
# ifndef XMRIG_NO_ASM
constexpr const size_t count = VARIANT_MAX * 10 * 3 + 3;
# else
constexpr const size_t count = VARIANT_MAX * 10 * 3;
if (assembly == ASM_AUTO) {
assembly = Cpu::info()->assembly();
}
static cn_hash_fun asm_func_map[ALGO_MAX][AV_MAX][VARIANT_MAX][ASM_MAX] = {};
static bool asm_func_map_initialized = false;
if (!asm_func_map_initialized) {
add_asm_func<CRYPTONIGHT, VARIANT_2>(asm_func_map);
add_asm_func<CRYPTONIGHT, VARIANT_HALF>(asm_func_map);
add_asm_func<CRYPTONIGHT, VARIANT_WOW>(asm_func_map);
add_asm_func<CRYPTONIGHT_PICO, VARIANT_HALF>(asm_func_map);
asm_func_map_initialized = true;
}
cn_hash_fun fun = asm_func_map[algorithm][av][variant][assembly];
if (fun) {
return fun;
}
# endif
static const cn_hash_fun func_table[count] = {
constexpr const size_t count = VARIANT_MAX * 10 * ALGO_MAX;
static const cn_hash_fun func_table[] = {
cryptonight_single_hash<CRYPTONIGHT, false, VARIANT_0>,
cryptonight_double_hash<CRYPTONIGHT, false, VARIANT_0>,
cryptonight_single_hash<CRYPTONIGHT, true, VARIANT_0>,
@@ -151,6 +271,45 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
cryptonight_quad_hash<CRYPTONIGHT, true, VARIANT_2>,
cryptonight_penta_hash<CRYPTONIGHT, true, VARIANT_2>,
cryptonight_single_hash<CRYPTONIGHT, false, VARIANT_HALF>,
cryptonight_double_hash<CRYPTONIGHT, false, VARIANT_HALF>,
cryptonight_single_hash<CRYPTONIGHT, true, VARIANT_HALF>,
cryptonight_double_hash<CRYPTONIGHT, true, VARIANT_HALF>,
cryptonight_triple_hash<CRYPTONIGHT, false, VARIANT_HALF>,
cryptonight_quad_hash<CRYPTONIGHT, false, VARIANT_HALF>,
cryptonight_penta_hash<CRYPTONIGHT, false, VARIANT_HALF>,
cryptonight_triple_hash<CRYPTONIGHT, true, VARIANT_HALF>,
cryptonight_quad_hash<CRYPTONIGHT, true, VARIANT_HALF>,
cryptonight_penta_hash<CRYPTONIGHT, true, VARIANT_HALF>,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TRTL
# ifndef XMRIG_NO_CN_GPU
cryptonight_single_hash_gpu<CRYPTONIGHT, false, VARIANT_GPU>,
nullptr,
cryptonight_single_hash_gpu<CRYPTONIGHT, true, VARIANT_GPU>,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
# else
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU
# endif
cryptonight_single_hash<CRYPTONIGHT, false, VARIANT_WOW>,
cryptonight_double_hash<CRYPTONIGHT, false, VARIANT_WOW>,
cryptonight_single_hash<CRYPTONIGHT, true, VARIANT_WOW>,
cryptonight_double_hash<CRYPTONIGHT, true, VARIANT_WOW>,
cryptonight_triple_hash<CRYPTONIGHT, false, VARIANT_WOW>,
cryptonight_quad_hash<CRYPTONIGHT, false, VARIANT_WOW>,
cryptonight_penta_hash<CRYPTONIGHT, false, VARIANT_WOW>,
cryptonight_triple_hash<CRYPTONIGHT, true, VARIANT_WOW>,
cryptonight_quad_hash<CRYPTONIGHT, true, VARIANT_WOW>,
cryptonight_penta_hash<CRYPTONIGHT, true, VARIANT_WOW>,
# ifndef XMRIG_NO_AEON
cryptonight_single_hash<CRYPTONIGHT_LITE, false, VARIANT_0>,
cryptonight_double_hash<CRYPTONIGHT_LITE, false, VARIANT_0>,
@@ -181,16 +340,24 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_2
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_HALF
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TRTL
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW
# else
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TUBE
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XTL
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_MSR
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XHV
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_2
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_HALF
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TRTL
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW
# endif
# ifndef XMRIG_NO_SUMO
@@ -235,26 +402,73 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_2
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_HALF
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TRTL
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW
# else
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TUBE
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XTL
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_MSR
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XHV
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_2
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_HALF
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TRTL
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW
# endif
# ifndef XMRIG_NO_ASM
cryptonight_single_hash_asm<CRYPTONIGHT, VARIANT_2, ASM_INTEL>,
cryptonight_single_hash_asm<CRYPTONIGHT, VARIANT_2, ASM_RYZEN>,
cryptonight_double_hash_asm<CRYPTONIGHT, VARIANT_2, ASM_INTEL>
# ifndef XMRIG_NO_CN_PICO
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TUBE
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XTL
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_MSR
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XHV
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_2
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_HALF
cryptonight_single_hash<CRYPTONIGHT_PICO, false, VARIANT_TRTL>,
cryptonight_double_hash<CRYPTONIGHT_PICO, false, VARIANT_TRTL>,
cryptonight_single_hash<CRYPTONIGHT_PICO, true, VARIANT_TRTL>,
cryptonight_double_hash<CRYPTONIGHT_PICO, true, VARIANT_TRTL>,
cryptonight_triple_hash<CRYPTONIGHT_PICO, false, VARIANT_TRTL>,
cryptonight_quad_hash<CRYPTONIGHT_PICO, false, VARIANT_TRTL>,
cryptonight_penta_hash<CRYPTONIGHT_PICO, false, VARIANT_TRTL>,
cryptonight_triple_hash<CRYPTONIGHT_PICO, true, VARIANT_TRTL>,
cryptonight_quad_hash<CRYPTONIGHT_PICO, true, VARIANT_TRTL>,
cryptonight_penta_hash<CRYPTONIGHT_PICO, true, VARIANT_TRTL>,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW
# else
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TUBE
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XTL
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_MSR
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XHV
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_2
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_HALF
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TRTL
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW
# endif
};
static_assert(count == sizeof(func_table) / sizeof(func_table[0]), "func_table size mismatch");
const size_t index = VARIANT_MAX * 10 * algorithm + 10 * variant + av - 1;
# ifndef NDEBUG
const size_t index = fnIndex(algorithm, av, variant, assembly);
cn_hash_fun func = func_table[index];
assert(index < sizeof(func_table) / sizeof(func_table[0]));
@@ -262,7 +476,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
return func;
# else
return func_table[fnIndex(algorithm, av, variant, assembly)];
return func_table[index];
# endif
}
@@ -424,33 +638,3 @@ rapidjson::Value xmrig::CpuThread::toConfig(rapidjson::Document &doc) const
return obj;
}
size_t xmrig::CpuThread::fnIndex(Algo algorithm, AlgoVariant av, Variant variant, Assembly assembly)
{
const size_t index = VARIANT_MAX * 10 * algorithm + 10 * variant + av - 1;
# ifndef XMRIG_NO_ASM
if (assembly == ASM_AUTO) {
assembly = Cpu::info()->assembly();
}
if (assembly == ASM_NONE) {
return index;
}
constexpr const size_t offset = VARIANT_MAX * 10 * 3;
if (algorithm == CRYPTONIGHT && variant == VARIANT_2) {
if (av == AV_SINGLE) {
return offset + assembly - 2;
}
if (av == AV_DOUBLE) {
return offset + 2;
}
}
# endif
return index;
}

View File

@@ -5,7 +5,8 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -59,7 +60,13 @@ public:
CpuThread(size_t index, Algo algorithm, AlgoVariant av, Multiway multiway, int64_t affinity, int priority, bool softAES, bool prefetch, Assembly assembly);
typedef void (*cn_hash_fun)(const uint8_t *input, size_t size, uint8_t *output, cryptonight_ctx **ctx);
typedef void (*cn_hash_fun)(const uint8_t *input, size_t size, uint8_t *output, cryptonight_ctx **ctx, uint64_t height);
typedef void (*cn_mainloop_fun)(cryptonight_ctx *ctx);
typedef void (*cn_mainloop_double_fun)(cryptonight_ctx *ctx1, cryptonight_ctx *ctx2);
# ifndef XMRIG_NO_ASM
static void patchAsmVariants();
# endif
static bool isSoftAES(AlgoVariant av);
static cn_hash_fun fn(Algo algorithm, AlgoVariant av, Variant variant, Assembly assembly);
@@ -91,8 +98,6 @@ protected:
rapidjson::Value toConfig(rapidjson::Document &doc) const override;
private:
static size_t fnIndex(Algo algorithm, AlgoVariant av, Variant variant, Assembly assembly);
const Algo m_algorithm;
const AlgoVariant m_av;
const Assembly m_assembly;

View File

@@ -7,7 +7,7 @@
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -25,9 +25,11 @@
#include <thread>
#include <sstream>
#include "crypto/CryptoNight_test.h"
#include "common/log/Log.h"
#include "workers/CpuThread.h"
#include "workers/MultiWorker.h"
#include "workers/Workers.h"
@@ -54,13 +56,29 @@ bool MultiWorker<N>::selfTest()
using namespace xmrig;
if (m_thread->algorithm() == CRYPTONIGHT) {
return verify(VARIANT_0, test_output_v0) &&
verify(VARIANT_1, test_output_v1) &&
verify(VARIANT_2, test_output_v2) &&
verify(VARIANT_XTL, test_output_xtl) &&
verify(VARIANT_MSR, test_output_msr) &&
verify(VARIANT_XAO, test_output_xao) &&
verify(VARIANT_RTO, test_output_rto);
if (!verify2(VARIANT_WOW, test_input_WOW)) {
LOG_WARN("CryptonightR (Wownero) self-test failed");
return false;
}
const bool rc = verify(VARIANT_0, test_output_v0) &&
verify(VARIANT_1, test_output_v1) &&
verify(VARIANT_2, test_output_v2) &&
verify(VARIANT_XTL, test_output_xtl) &&
verify(VARIANT_MSR, test_output_msr) &&
verify(VARIANT_XAO, test_output_xao) &&
verify(VARIANT_RTO, test_output_rto) &&
verify(VARIANT_HALF, test_output_half);
# ifndef XMRIG_NO_CN_GPU
if (!rc || N > 1) {
return rc;
}
return verify(VARIANT_GPU, test_output_gpu);
# else
return rc;
# endif
}
# ifndef XMRIG_NO_AEON
@@ -78,6 +96,12 @@ bool MultiWorker<N>::selfTest()
}
# endif
# ifndef XMRIG_NO_CN_PICO
if (m_thread->algorithm() == CRYPTONIGHT_PICO) {
return verify(VARIANT_TRTL, test_output_pico_trtl);
}
# endif
return false;
}
@@ -104,11 +128,11 @@ void MultiWorker<N>::start()
storeStats();
}
m_thread->fn(m_state.job.algorithm().variant())(m_state.blob, m_state.job.size(), m_hash, m_ctx);
m_thread->fn(m_state.job.algorithm().variant())(m_state.blob, m_state.job.size(), m_hash, m_ctx, m_state.job.height());
for (size_t i = 0; i < N; ++i) {
if (*reinterpret_cast<uint64_t*>(m_hash + (i * 32) + 24) < m_state.job.target()) {
Workers::submit(JobResult(m_state.job.poolId(), m_state.job.id(), *nonce(i), m_hash + (i * 32), m_state.job.diff(), m_state.job.algorithm()));
Workers::submit(JobResult(m_state.job.poolId(), m_state.job.id(), m_state.job.clientId(), *nonce(i), m_hash + (i * 32), m_state.job.diff(), m_state.job.algorithm()));
}
*nonce(i) += 1;
@@ -145,11 +169,71 @@ bool MultiWorker<N>::verify(xmrig::Variant variant, const uint8_t *referenceValu
return false;
}
func(test_input, 76, m_hash, m_ctx);
func(test_input, 76, m_hash, m_ctx, 0);
return memcmp(m_hash, referenceValue, sizeof m_hash) == 0;
}
template<size_t N>
bool MultiWorker<N>::verify2(xmrig::Variant variant, const char *test_data)
{
xmrig::CpuThread::cn_hash_fun func = m_thread->fn(variant);
if (!func) {
return false;
}
std::stringstream s(test_data);
std::string expected_hex;
std::string input_hex;
uint64_t height;
while (!s.eof())
{
uint8_t referenceValue[N * 32];
uint8_t input[N * 256];
s >> expected_hex;
s >> input_hex;
s >> height;
if ((expected_hex.length() != 64) || (input_hex.length() > 512))
{
return false;
}
bool err = false;
for (int i = 0; i < 32; ++i)
{
referenceValue[i] = (hf_hex2bin(expected_hex[i * 2], err) << 4) + hf_hex2bin(expected_hex[i * 2 + 1], err);
}
const size_t input_len = input_hex.length() / 2;
for (size_t i = 0; i < input_len; ++i)
{
input[i] = (hf_hex2bin(input_hex[i * 2], err) << 4) + hf_hex2bin(input_hex[i * 2 + 1], err);
}
if (err)
{
return false;
}
for (size_t i = 1; i < N; ++i)
{
memcpy(input + i * input_len, input, input_len);
memcpy(referenceValue + i * 32, referenceValue, 32);
}
func(input, input_len, m_hash, m_ctx, height);
if (memcmp(m_hash, referenceValue, sizeof m_hash) != 0)
{
return false;
}
}
return true;
}
template<size_t N>
void MultiWorker<N>::consumeJob()
{

Some files were not shown because too many files have changed in this diff Show More