1
0
mirror of https://github.com/xmrig/xmrig.git synced 2025-12-06 23:52:38 -05:00

Compare commits

..

114 Commits

Author SHA1 Message Date
XMRig
d92c1a54de Fixed macOS build. 2019-03-05 01:07:01 +07:00
XMRig
aa474fa51b Fix compile warnings. 2019-03-05 00:49:04 +07:00
XMRig
7976059367 Add renaming ASM codes & update from upstream. 2019-03-05 00:41:01 +07:00
XMRig
c5cbd9d8fe cn/r ASM support for --av 1. 2019-03-04 19:25:59 +07:00
XMRig
ef2e8bed6e Use new style method to call ASM functions for cn/2 & added bulldozer ASM code. 2019-03-04 13:31:25 +07:00
XMRig
7574bfab60 Added self test for cn/r. 2019-03-04 11:52:38 +07:00
XMRig
27980f24f8 Plain C "cn/r" implementation. 2019-03-03 20:19:17 +07:00
XMRig
5e6a69e16f Prepare for cn/r. 2019-03-03 14:09:00 +07:00
XMRig
69513e7049 Merge branch 'classic' into classic-dev 2019-03-03 12:05:14 +07:00
XMRig
b834c50aba Merge branch 'classic-dev' into classic 2018-10-05 16:23:49 +03:00
xmrig
302ebe5a5b Update CHANGELOG.md 2018-10-05 16:22:16 +03:00
XMRig
b9096f2392 Disable donation. 2018-10-05 16:01:22 +03:00
XMRig
b02f4ff163 Autodetect ASM without libcpuid. 2018-10-05 15:58:33 +03:00
XMRig
11748fad78 Add ASM code. 2018-10-05 15:02:52 +03:00
XMRig
e0dc51edf9 Fixed build without cn-lite. 2018-10-04 22:12:33 +03:00
XMRig
779238fc85 Add support for new style algorithm names. 2018-10-04 22:06:08 +03:00
XMRig
a06a224c0a Implement --variant option. 2018-10-04 20:27:29 +03:00
XMRig
bf2eb1a685 Fix misaligned access. 2018-10-04 20:11:47 +03:00
XMRig
0bba8849f0 Fix Linux build. 2018-10-04 20:00:18 +03:00
XMRig
1e22a984af Add double hash cn/2. 2018-10-04 19:25:09 +03:00
XMRig
61b49137c7 Add single hash cn/2. 2018-10-04 18:03:00 +03:00
XMRig
93d072ff6e Massive refactoring, preparing for cn/2. 2018-10-04 15:52:12 +03:00
XMRig
f0b293f650 Add support for "nicehash" protocol extension. 2018-10-03 01:27:45 +03:00
XMRig
b93e7d9daa Workaround for xmrig-proxy bug. 2018-10-03 00:41:14 +03:00
XMRig
0b4b07fcd6 v0.9.0-dev 2018-10-03 00:39:45 +03:00
XMRig
af62621169 Fix CURL detection. 2018-10-02 23:58:53 +03:00
XMRig
ed7260449a v0.8.3 2018-03-11 21:24:55 +07:00
XMRig
33944595a2 Add Monero v7 support. 2018-03-11 21:23:14 +07:00
XMRig
9dc02fc7f3 Fix for -a cryptonight-light. 2017-06-06 03:34:49 +03:00
XMRig
6551818610 Update libjansson to 2.10. 2017-06-06 03:31:44 +03:00
Admin
7741c341c7 Huge pages support on OS X. 2017-05-27 10:34:42 +03:00
XMRig
8a70202a98 Fix. 2017-05-27 08:45:11 +03:00
Admin
bc2b7d1895 Initial OS X support. 2017-05-26 23:17:12 +03:00
xmrig
ebb0f81f2f Update CHANGELOG.md 2017-05-26 09:07:45 +03:00
XMRig
eb3e2b8868 Fix gcc7 support. 2017-05-25 09:32:39 +03:00
XMRig
583d892eb5 Workaround for AMD CPUs https://github.com/anrieff/libcpuid/issues/97 2017-05-24 18:50:24 +03:00
XMRig
b145f14ad8 Merge branch 'dev' 2017-05-20 23:53:16 +03:00
xmrig
ce19edf36c Update CHANGELOG.md 2017-05-20 23:48:53 +03:00
xmrig
108fd5690e Update README.md 2017-05-20 23:43:39 +03:00
XMRig
c19fe3cea7 Add "--nicehash" to help output. 2017-05-20 23:38:05 +03:00
XMRig
187c7680cc Show errors from pool. 2017-05-20 23:27:22 +03:00
XMRig
20061e1b8b Autodetect nicehash by url 2017-05-20 09:31:02 +03:00
XMRig
2baccab0f9 Initial test nicehash support. 2017-05-20 07:08:41 +03:00
XMRig
44782befea Fix 32 bit build. 2017-05-16 17:04:27 +03:00
XMRig
5b7a1bc6dc Merge branch 'dev' 2017-05-15 22:10:45 +03:00
XMRig
e67a95bd8b Version increment and update help. 2017-05-15 22:06:54 +03:00
xmrig
88dd218ad8 Update README.md 2017-05-15 19:57:20 +03:00
xmrig
ee9ba778f8 Update README.md 2017-05-15 18:32:27 +03:00
xmrig
6080f292e7 Update CHANGELOG.md 2017-05-13 22:39:40 +03:00
xmrig
cf8f81f5fa Update CHANGELOG.md 2017-05-13 22:32:29 +03:00
xmrig
aab48fde96 Update README.md 2017-05-13 20:31:27 +03:00
xmrig
bf25b4e5d4 Update README.md 2017-05-13 20:26:35 +03:00
XMRig
0c2bda9aa5 Remove default url. 2017-05-13 19:47:12 +03:00
XMRig
d71a15e8da Use --safe options to disable AES algo variations if CPU not support it. 2017-05-12 15:04:04 +03:00
XMRig
c4bccf410b * Implement --max-cpu-usage.
* Fix L2 cache size detect.
* Add test for get_optimal_threads_count.
2017-05-10 19:38:35 +03:00
XMRig
719601f92b Add test for cryptonight lite. 2017-05-10 15:31:29 +03:00
XMRig
ff7be00f6f Fix test. 2017-05-10 15:06:01 +03:00
XMRig
d3b0038bda Add optional CryptoNight-Lite support. 2017-05-10 12:58:52 +03:00
XMRig
3b46f5eb64 Remove BMI2 av. 2017-05-08 23:28:39 +03:00
XMRig
03dbb85c82 Update test values. 2017-05-08 23:06:00 +03:00
XMRig
a2574e1b1b Added message if huge pages was enabled, but reboot required. 2017-05-08 21:41:27 +03:00
XMRig
15b4244ea8 Added --max-cpu-usage and --safe stub. 2017-05-08 10:29:25 +03:00
XMRig
0dcf127c26 Version increment. 2017-05-06 09:44:50 +03:00
xmrig
9964952c92 Update CHANGELOG.md 2017-05-05 19:54:31 +03:00
XMRig
90648771c0 Fix 32bit build. 2017-05-05 19:51:53 +03:00
XMRig
985adcbc13 No more manual steps to enable huge pages on Windows. XMRig will do it automatically. 2017-05-05 15:49:38 +03:00
XMRig
16f3338e42 Fix crash when use Keepalived. 2017-05-05 10:48:56 +03:00
XMRig
2650545916 Code cleanup. 2017-05-04 14:18:14 +03:00
xmrig
c107547c6c Update CHANGELOG.md 2017-05-03 15:16:46 +03:00
XMRig
60f7f93408 Merge branch 'feature-libcpuid' 2017-05-03 15:07:09 +03:00
XMRig
dfbfde5b22 Fix Linux build. 2017-05-03 15:03:33 +03:00
XMRig
0c752ee018 Use libcpuid as internal dependence. 2017-05-03 14:36:42 +03:00
XMRig
f329410940 Use libcpuid for detect optimal threads count. 2017-05-03 13:48:08 +03:00
XMRig
0a6d70c499 Add optional libcpuid support to cmake. 2017-05-03 10:53:51 +03:00
XMRig
1678dc1d6d Implement low power mode (double hash). 2017-05-01 03:49:05 +03:00
XMRig
caf7cda1d5 Backport changes from xmrig-aeon. 2017-04-30 02:56:47 +03:00
XMRig
3de7983826 Fix for donate level. 2017-04-26 18:05:04 +03:00
XMRig
8dda8d293b Update README.md. 2017-04-25 03:35:03 +03:00
XMRig
e71e9486c6 Remove conflicting declaration for _mulx_u64. 2017-04-25 03:20:32 +03:00
XMRig
b35ecef06f Move common code to cryptonight_p.h 2017-04-24 13:23:49 +03:00
XMRig
454c78cf0a Fix const. 2017-04-23 23:56:47 +03:00
XMRig
c97693cd51 Merge branch 'master' of github.com:xmrig/xmrig 2017-04-22 18:08:44 +03:00
XMRig
d855ae2e36 Merge branch 'bug-128tx-exploit' 2017-04-22 18:08:15 +03:00
XMRig
42d2ab18ee Update tests. 2017-04-22 17:12:50 +03:00
XMRig
97a8d448c0 Pass blob size to cryptonight_hash_ctx. 2017-04-22 15:34:05 +03:00
XMRig
54cef68aa9 Optimize job_decode, support variable length blob and redume mutex lock
time.
2017-04-22 13:19:33 +03:00
xmrig
3492670839 Merge pull request #3 from esfomeado/patch-1
More detailed instructions to build on Windows
2017-04-21 18:39:04 +03:00
xmrig
c43c667fed Update CHANGELOG.md 2017-04-21 18:31:58 +03:00
XMRig
361394be21 Add automatic self test. 2017-04-21 17:29:03 +03:00
XMRig
8235ae0fa6 Add 32 bit support for software AES too. 2017-04-21 15:47:11 +03:00
XMRig
ac89023a79 Add support for 32 bit. 2017-04-21 15:20:08 +03:00
XMRig
f92b5ed9f6 Merge branch 'master' of github.com:xmrig/xmrig 2017-04-21 13:06:29 +03:00
XMRig
7ce21d458a Version increment. 2017-04-21 13:06:13 +03:00
xmrig
5513fab59b Update README.md 2017-04-21 12:34:59 +03:00
XMRig
5e6560cb07 Fix affinity for single thread mode. 2017-04-21 12:13:49 +03:00
Esfomeado
25d76626c1 More detailed instructions to build on Windows 2017-04-21 10:08:04 +01:00
XMRig
cad15069c8 Revert back BMI2 support. 2017-04-21 12:05:28 +03:00
XMRig
8ab4c1c8bd Add memory to cryptonight_ctx. 2017-04-21 11:56:11 +03:00
XMRig
f29d05bdde Simplify cryptonight_ctx. 2017-04-21 11:14:27 +03:00
XMRig
1474d3fe53 Rename algo variants again, should be final numbers. 2017-04-21 10:40:11 +03:00
XMRig
d2fd43ca03 Change algo variant numbers. 2017-04-21 09:20:19 +03:00
XMRig
95f48fd058 Add app.rc and app.ico for Windows. 2017-04-19 20:52:00 +03:00
XMRig
f8bda3a6b3 Add CHANGELOG.md. 2017-04-19 10:20:04 +03:00
XMRig
21c243ed8f Much better software AES implementation (--av 4). 2017-04-19 10:03:40 +03:00
XMRig
1013aa5004 Update av1/av6 2017-04-19 07:58:42 +03:00
XMRig
44875b0a94 Fix test. 2017-04-18 17:40:19 +03:00
XMRig
b1f1474438 Merge branch 'feature-xmr-stak-algo' 2017-04-18 16:04:50 +03:00
XMRig
4eb7e5bbfd Fix stak algo as --av 5, experimental algo now --av 6 2017-04-18 15:57:44 +03:00
XMRig
d874ede49e Fix. 2017-04-18 13:14:09 +03:00
XMRig
add10c829c No templates in C :( 2017-04-18 13:10:40 +03:00
XMRig
4acfb213b8 Add xmr-stak-cpu algo as experimental, use --av=5. 2017-04-18 12:06:46 +03:00
xmrig
78a4b9de0f Update README.md 2017-04-17 05:44:49 +03:00
xmrig
9fe2bbcd81 Update README.md 2017-04-16 16:37:12 +03:00
xmrig
adb778de8a Update README.md 2017-04-15 10:52:08 +03:00
105 changed files with 15920 additions and 4026 deletions

34
CHANGELOG.md Normal file
View File

@@ -0,0 +1,34 @@
# v0.9.0
- **[#753](https://github.com/xmrig/xmrig/issues/753) Added new algorithm [CryptoNight variant 2](https://github.com/xmrig/xmrig/issues/753) for Monero fork, thanks [@SChernykh](https://github.com/SChernykh).**
- Added option `--asm`, possible values `--asm auto`, `--asm none`, `--asm intel` and `--asm ryzen`.
- Added support for new style long and short algorithm names, possible values: `cryptonight`, `cryptonight/0`, `cryptonight/1`, `cryptonight/2`, `cryptonight-lite`, `cryptonight-lite/0`, `cryptonight-lite/1` and short equvalents `cn/2` etc.
- Added `--variant`, example `--algo cn --variant 2`, by default miner automaticaly detect proper variant for Monero by block version.
- Added CryptoNight-Lite variant 1.
- Added xmrig-proxy autodetection, nicehash will be enabled automaticaly.
- Added workaround for xmrig-proxy [bug](https://github.com/xmrig/xmrig-proxy/commit/dfa1960fe3eeb13f80717b7dbfcc7c6e9f222d89).
# v0.8.2
- Fixed L2 cache size detection for AMD CPUs (Bulldozer/Piledriver/Steamroller/Excavator architecture).
- Fixed gcc 7.1 support.
# v0.8.1
- Added nicehash support, detects automaticaly by pool URL, for example `cryptonight.eu.nicehash.com:3355` or manually via option `--nicehash`.
# v0.8.0
- Added double hash mode, also known as lower power mode. `--av=2` and `--av=4`.
- Added smart automatic CPU configuration. Default threads count now depends on size of the L3 cache of CPU.
- Added CryptoNight-Lite support for AEON `-a cryptonight-lite`.
- Added `--max-cpu-usage` option for auto CPU configuration mode.
- Added `--safe` option for adjust threads and algorithm variations to current CPU.
- No more manual steps to enable huge pages on Windows. XMRig will do it automatically.
- Removed BMI2 algorithm variation.
- Removed default pool URL.
# v0.6.0
- Added automatic cryptonight self test.
- New software AES algorithm variation. Will be automatically selected if cpu not support AES-NI.
- Added 32 bit builds.
- Documented [algorithm variations](https://github.com/xmrig/xmrig#algorithm-variations).
# v0.5.0
- Initial public release.

View File

@@ -1,19 +1,28 @@
cmake_minimum_required(VERSION 3.0)
project(xmrig C)
option(WITH_LIBCPUID "Use Libcpuid" ON)
option(WITH_AEON "CryptoNight-Lite support" ON)
option(WITH_ASM "Enable ASM PoW implementations" ON)
set(HEADERS
compat.h
algo/cryptonight/cryptonight.h
elist.h
xmrig.h
version.h
options.h
algo/cryptonight/cryptonight_aesni.h
algo/cryptonight/cryptonight_monero.h
algo/cryptonight/cryptonight_softaes.h
algo/cryptonight/cryptonight_test.h
algo/cryptonight/variant4_random_math.h
compat.h
cpu.h
persistent_memory.h
stratum.h
stats.h
util.h
donate.h
elist.h
options.h
persistent_memory.h
stats.h
stratum.h
util.h
version.h
xmrig.h
)
set(HEADERS_CRYPTO
@@ -21,9 +30,7 @@ set(HEADERS_CRYPTO
crypto/c_blake256.h
crypto/c_jh.h
crypto/c_skein.h
crypto/oaes_lib.h
crypto/oaes_config.h
crypto/aesb.h
crypto/soft_aes.h
)
set(HEADERS_COMPAT
@@ -38,10 +45,17 @@ set(HEADERS_UTILS
set(SOURCES
xmrig.c
algo/cryptonight/cryptonight_common.c
algo/cryptonight/cryptonight.c
algo/cryptonight/cryptonight_av1.c
algo/cryptonight/cryptonight_av2.c
algo/cryptonight/cryptonight_av3.c
algo/cryptonight/cryptonight_av4.c
algo/cryptonight/cryptonight_r_av1.c
algo/cryptonight/cryptonight_r_av2.c
algo/cryptonight/cryptonight_r_av3.c
algo/cryptonight/cryptonight_r_av4.c
util.c
options.c
cpu.c
stratum.c
stats.c
memory.c
@@ -53,8 +67,6 @@ set(SOURCES_CRYPTO
crypto/c_blake256.c
crypto/c_jh.c
crypto/c_skein.c
crypto/oaes_lib.c
crypto/aesb.c
)
set(SOURCES_UTILS
@@ -63,27 +75,34 @@ set(SOURCES_UTILS
)
if (WIN32)
set(SOURCES_OS win/cpu_win.c win/memory_win.c win/xmrig_win.c compat/winansi.c)
set(EXTRA_LIBS ws2_32)
set(SOURCES_OS win/cpu_win.c win/memory_win.c win/xmrig_win.c win/app.rc compat/winansi.c)
set(EXTRA_LIBS ws2_32 crypt32)
add_definitions(/D_WIN32_WINNT=0x600)
elseif (APPLE)
set(SOURCES_OS mac/cpu_mac.c mac/memory_mac.c mac/xmrig_mac.c)
else()
set(SOURCES_OS unix/cpu_unix.c unix/memory_unix.c unix/xmrig_unix.c)
set(EXTRA_LIBS pthread)
set(EXTRA_LIBS pthread rt m)
endif()
include_directories(.)
add_definitions(/DUSE_NATIVE_THREADS)
add_definitions(/D_GNU_SOURCE)
add_definitions(/DDEBUG_THREADS)
add_definitions(/DUNICODE)
if ("${CMAKE_BUILD_TYPE}" STREQUAL "")
set(CMAKE_BUILD_TYPE Release)
endif()
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes -mbmi2")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes -Wno-pointer-to-int-cast")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Ofast -funroll-loops -fvariable-expansion-in-unroller -ftree-loop-if-convert-stores -fmerge-all-constants -fbranch-target-load-optimize2")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -gdwarf-2")
if (CMAKE_C_COMPILER_ID MATCHES "Clang")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Ofast -s -funroll-loops -fvariable-expansion-in-unroller -fmerge-all-constants")
else()
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Ofast -s -funroll-loops -fvariable-expansion-in-unroller -ftree-loop-if-convert-stores -fmerge-all-constants -fbranch-target-load-optimize2")
endif()
#set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -gdwarf-2")
#set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -fprofile-generate")
#set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -fprofile-use -fprofile-correction")
@@ -94,34 +113,44 @@ endif()
include_directories(compat/jansson)
add_subdirectory(compat/jansson)
find_package(CURL REQUIRED)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/")
if (CURL_FOUND)
include_directories(${CURL_INCLUDE_DIRS})
add_definitions(/DCURL_STATICLIB)
link_directories(${CURL_LIBRARIES})
find_package(CURL REQUIRED)
include_directories(${CURL_INCLUDE_DIRS})
add_definitions(/DCURL_STATICLIB)
link_directories(${CURL_LIBRARIES})
if (WITH_LIBCPUID)
add_subdirectory(compat/libcpuid)
include_directories(compat/libcpuid)
set(CPUID_LIB cpuid)
set(SOURCES_CPUID cpu.c)
else()
add_definitions(/DXMRIG_NO_LIBCPUID)
set(SOURCES_CPUID cpu_stub.c)
endif()
include(cmake/asm.cmake)
if (WITH_AEON)
set(SOURCES_AEON
algo/cryptonight-lite/cryptonight_lite_av1.c
algo/cryptonight-lite/cryptonight_lite_av2.c
algo/cryptonight-lite/cryptonight_lite_av3.c
algo/cryptonight-lite/cryptonight_lite_av4.c
algo/cryptonight-lite/cryptonight_lite_aesni.h
algo/cryptonight-lite/cryptonight_lite_softaes.h
)
else()
add_definitions(/DXMRIG_NO_AEON)
endif()
if (CMAKE_SIZEOF_VOID_P EQUAL 8)
add_subdirectory(algo/cryptonight/bmi2)
set(CRYPTONIGHT64
algo/cryptonight/cryptonight_av1_aesni.c
algo/cryptonight/cryptonight_av2_aesni_wolf.c
algo/cryptonight/cryptonight_av4_legacy.c
algo/cryptonight/cryptonight_av5_aesni_experimental.c
)
add_executable(xmrig ${HEADERS} ${HEADERS_CRYPTO} ${SOURCES} ${SOURCES_CRYPTO} ${HEADERS_UTILS} ${SOURCES_UTILS} ${HEADERS_COMPAT} ${SOURCES_COMPAT} ${SOURCES_OS} ${CRYPTONIGHT64})
target_link_libraries(xmrig jansson curl cryptonight_av3_aesni_bmi2 ${EXTRA_LIBS})
add_executable(xmrig ${HEADERS} ${HEADERS_CRYPTO} ${SOURCES} ${SOURCES_CRYPTO} ${HEADERS_UTILS} ${SOURCES_UTILS} ${HEADERS_COMPAT} ${SOURCES_COMPAT} ${SOURCES_OS} ${SOURCES_CPUID} ${SOURCES_AEON} ${XMRIG_ASM_SOURCES})
target_link_libraries(xmrig ${XMRIG_ASM_LIBRARY} jansson ${CURL_LIBRARY} ${CPUID_LIB} ${EXTRA_LIBS})
else()
set(CRYPTONIGHT32
algo/cryptonight/cryptonight_av1_aesni32.c
algo/cryptonight/cryptonight_av4_legacy.c
)
add_executable(xmrig32 ${HEADERS} ${HEADERS_CRYPTO} ${SOURCES} ${SOURCES_CRYPTO} ${HEADERS_UTILS} ${SOURCES_UTILS} ${HEADERS_COMPAT} ${SOURCES_COMPAT} ${SOURCES_OS} ${CRYPTONIGHT32})
target_link_libraries(xmrig32 jansson -L${CURL_LIBRARIES} ${EXTRA_LIBS})
add_executable(xmrig32 ${HEADERS} ${HEADERS_CRYPTO} ${SOURCES} ${SOURCES_CRYPTO} ${HEADERS_UTILS} ${SOURCES_UTILS} ${HEADERS_COMPAT} ${SOURCES_COMPAT} ${SOURCES_OS} ${SOURCES_CPUID} ${SOURCES_AEON} ${XMRIG_ASM_SOURCES})
target_link_libraries(xmrig32 ${XMRIG_ASM_LIBRARY} jansson ${CURL_LIBRARY} ${CPUID_LIB} ${EXTRA_LIBS})
endif()
source_group("HEADERS" FILES ${HEADERS})

View File

@@ -2,24 +2,29 @@
XMRig is high performance Monero (XMR) CPU miner, with the official full Windows support.
Based on cpuminer-multi with heavy optimizations/rewrites and removing a lot of legacy code.
<img src="https://i.imgur.com/GhmdK2f.png" width="480">
<img src="http://i.imgur.com/GdRDnAu.png" width="596" >
#### Table of contents
* [Features](#features)
* [Download](#download)
* [Usage](#usage)
* [Algorithm variations](#algorithm-variations)
* [Build](#build)
* [Common Issues](#common-issues)
* [Other information](#other-information)
* [Donations](#Donations)
* [Donations](#donations)
* [Contacts](#contacts)
## Features
* High performance, faster than others (290+ H/s on i7 6700).
* High performance (290+ H/s on i7 6700).
* Official Windows support.
* Small Windows executable, only 350 KB without dependencies.
* Small Windows executable, only 535 KB without dependencies.
* Support for backup (failover) mining server.
* keepalived support.
* Command line options compatible with cpuminer.
* CryptoNight-Lite support for AEON.
* Smart automatic [CPU configuration](https://github.com/xmrig/xmrig/wiki/Threads).
* Nicehash support
* It's open source software.
## Download
@@ -30,11 +35,12 @@ Based on cpuminer-multi with heavy optimizations/rewrites and removing a lot of
## Usage
### Basic example
```
xmrig.exe -o xmr-eu.dwarfpool.com:8005 -b xmr-usa.dwarfpool.com:8005 -u YOUR_WALLET -p x -k
xmrig.exe -o xmr-eu.dwarfpool.com:8005 -u YOUR_WALLET -p x -k
```
### Options
```
-a, --algo=ALGO cryptonight (default) or cryptonight-lite
-o, --url=URL URL of mining server
-b, --backup-url=URL URL of backup mining server
-O, --userpass=U:P username:password pair for mining server
@@ -50,10 +56,20 @@ xmrig.exe -o xmr-eu.dwarfpool.com:8005 -b xmr-usa.dwarfpool.com:8005 -u YOUR_WAL
--donate-level=N donate level, default 5% (5 minutes in 100 minutes)
-B, --background run the miner in the background
-c, --config=FILE load a JSON-format configuration file
--max-cpu-usage=N maximum cpu usage for automatic threads mode (default 75)
--safe safe adjust threads and av settings for current cpu
--nicehash enable nicehash support
-h, --help display this help and exit
-V, --version output version information and exit
```
## Algorithm variations
Since version 0.8.0.
* `--av=1` For CPUs with hardware AES.
* `--av=2` Lower power mode (double hash) of `1`.
* `--av=3` Software AES implementation.
* `--av=4` Lower power mode (double hash) of `3`.
## Build
### Ubuntu (Debian-based distros)
```
@@ -62,28 +78,40 @@ git clone https://github.com/xmrig/xmrig.git
cd xmrig
mkdir build
cd build
cmake ..
cmake .. -DCMAKE_BUILD_TYPE=Release
make
```
### Windows
It's complicated, you need [MSYS2](http://www.msys2.org/), custom libcurl build, and of course CMake too.
Necessary MSYS2 packages:
```
pacman -Sy
pacman -S mingw-w64-x86_64-gcc
pacman -S make
pacman -S mingw-w64-x86_64-cmake
pacman -S mingw-w64-x86_64-pkg-config
```
Configure options for libcurl:
```
./configure --disable-shared --enable-optimize --enable-threaded-resolver --disable-libcurl-option --disable-ares --disable-rt --disable-ftp --disable-file --disable-ldap --disable-ldaps --disable-rtsp --disable-dict --disable-telnet --disable-tftp --disable-pop3 --disable-imap --disable-smb --disable-smtp --disable-gopher --disable-manual --disable-ipv6 --disable-sspi --disable-crypto-auth --disable-ntlm-wb --disable-tls-srp --disable-unix-sockets --without-zlib --without-winssl --without-ssl --without-libssh2 --without-nghttp2 --disable-cookies --without-ca-bundle
./configure --disable-shared --enable-optimize --enable-threaded-resolver --disable-libcurl-option --disable-ares --disable-rt --disable-ftp --disable-file --disable-ldap --disable-ldaps --disable-rtsp --disable-dict --disable-telnet --disable-tftp --disable-pop3 --disable-imap --disable-smb --disable-smtp --disable-gopher --disable-manual --disable-ipv6 --disable-sspi --disable-crypto-auth --disable-ntlm-wb --disable-tls-srp --disable-unix-sockets --without-zlib --without-winssl --without-ssl --without-libssh2 --without-nghttp2 --disable-cookies --without-ca-bundle --without-librtmp
```
CMake options:
```
cmake .. -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release -DCURL_INCLUDE_DIR="c:\<path>\curl-7.53.1\include" -DCURL_LIBRARY="c:\<path>\curl-7.53.1\lib\.libs"
cmake .. -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release -DCURL_INCLUDE_DIR="c:\xmrig-deps\gcc\x64\include" -DCURL_LIBRARY="c:\xmrig-deps\gcc\x64\lib\libcurl.a"
```
### Optional features
`-DWITH_LIBCPUID=OFF` Disable libcpuid. Auto configuration of CPU after this will be very limited.
`-DWITH_AEON=OFF` Disable CryptoNight-Lite support.
## Common Issues
### HUGE PAGES unavailable
* Run XMRig as Administrator.
* Enable SeLockMemoryPrivilege. For Windows 7 pro, or Windows 8 and above see [this article](https://msdn.microsoft.com/en-gb/library/ms190730.aspx).
* Since version 0.8.0 XMRig automatically enable SeLockMemoryPrivilege for current user, but reboot or sign out still required. [Manual instruction](https://msdn.microsoft.com/en-gb/library/ms190730.aspx).
## Other information
* Now only support 64 bit operating systems (Windows/Linux).
* No HTTP support, only stratum protocol support.
* No TLS support.
* Default donation 5% (5 minutes in 100 minutes) can be reduced to 1% via command line option `--donate-level`.
@@ -105,3 +133,7 @@ Please note performance is highly dependent on system load. The numbers above ar
## Donations
* XMR: `48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD`
* BTC: `1P7ujsXeX7GxQwHNnJsRMgAdNkFZmNVqJT`
## Contacts
* support@xmrig.com
* [reddit](https://www.reddit.com/user/XMRig/)

View File

@@ -0,0 +1,274 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef XMRIG_CRYPTONIGHT_LITE_AESNI_H
#define XMRIG_CRYPTONIGHT_LITE_AESNI_H
#include <x86intrin.h>
#include <stdint.h>
#define aes_genkey_sub(imm8) \
__m128i xout1 = _mm_aeskeygenassist_si128(*xout2, (imm8)); \
xout1 = _mm_shuffle_epi32(xout1, 0xFF); \
*xout0 = sl_xor(*xout0); \
*xout0 = _mm_xor_si128(*xout0, xout1); \
xout1 = _mm_aeskeygenassist_si128(*xout0, 0x00);\
xout1 = _mm_shuffle_epi32(xout1, 0xAA); \
*xout2 = sl_xor(*xout2); \
*xout2 = _mm_xor_si128(*xout2, xout1); \
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
static inline __m128i sl_xor(__m128i tmp1)
{
__m128i tmp4;
tmp4 = _mm_slli_si128(tmp1, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
return tmp1;
}
static inline void aes_genkey_sub1(__m128i* xout0, __m128i* xout2)
{
aes_genkey_sub(0x1)
}
static inline void aes_genkey_sub2(__m128i* xout0, __m128i* xout2)
{
aes_genkey_sub(0x2)
}
static inline void aes_genkey_sub4(__m128i* xout0, __m128i* xout2)
{
aes_genkey_sub(0x4)
}
static inline void aes_genkey_sub8(__m128i* xout0, __m128i* xout2)
{
aes_genkey_sub(0x8)
}
static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7)
{
*x0 = _mm_aesenc_si128(*x0, key);
*x1 = _mm_aesenc_si128(*x1, key);
*x2 = _mm_aesenc_si128(*x2, key);
*x3 = _mm_aesenc_si128(*x3, key);
*x4 = _mm_aesenc_si128(*x4, key);
*x5 = _mm_aesenc_si128(*x5, key);
*x6 = _mm_aesenc_si128(*x6, key);
*x7 = _mm_aesenc_si128(*x7, key);
}
static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9)
{
__m128i xout0 = _mm_load_si128(memory);
__m128i xout2 = _mm_load_si128(memory + 1);
*k0 = xout0;
*k1 = xout2;
aes_genkey_sub1(&xout0, &xout2);
*k2 = xout0;
*k3 = xout2;
aes_genkey_sub2(&xout0, &xout2);
*k4 = xout0;
*k5 = xout2;
aes_genkey_sub4(&xout0, &xout2);
*k6 = xout0;
*k7 = xout2;
aes_genkey_sub8(&xout0, &xout2);
*k8 = xout0;
*k9 = xout2;
}
static inline void cn_explode_scratchpad(const __m128i* input, __m128i* output)
{
// This is more than we have registers, compiler will assign 2 keys on the stack
__m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
aes_genkey(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
xin0 = _mm_load_si128(input + 4);
xin1 = _mm_load_si128(input + 5);
xin2 = _mm_load_si128(input + 6);
xin3 = _mm_load_si128(input + 7);
xin4 = _mm_load_si128(input + 8);
xin5 = _mm_load_si128(input + 9);
xin6 = _mm_load_si128(input + 10);
xin7 = _mm_load_si128(input + 11);
for (size_t i = 0; __builtin_expect(i < MEMORY_LITE / sizeof(__m128i), 1); i += 8) {
aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
_mm_store_si128(output + i + 0, xin0);
_mm_store_si128(output + i + 1, xin1);
_mm_store_si128(output + i + 2, xin2);
_mm_store_si128(output + i + 3, xin3);
_mm_store_si128(output + i + 4, xin4);
_mm_store_si128(output + i + 5, xin5);
_mm_store_si128(output + i + 6, xin6);
_mm_store_si128(output + i + 7, xin7);
}
}
static inline void cn_implode_scratchpad(const __m128i* input, __m128i* output)
{
// This is more than we have registers, compiler will assign 2 keys on the stack
__m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
aes_genkey(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
xout0 = _mm_load_si128(output + 4);
xout1 = _mm_load_si128(output + 5);
xout2 = _mm_load_si128(output + 6);
xout3 = _mm_load_si128(output + 7);
xout4 = _mm_load_si128(output + 8);
xout5 = _mm_load_si128(output + 9);
xout6 = _mm_load_si128(output + 10);
xout7 = _mm_load_si128(output + 11);
for (size_t i = 0; __builtin_expect(i < MEMORY_LITE / sizeof(__m128i), 1); i += 8)
{
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
}
_mm_store_si128(output + 4, xout0);
_mm_store_si128(output + 5, xout1);
_mm_store_si128(output + 6, xout2);
_mm_store_si128(output + 7, xout3);
_mm_store_si128(output + 8, xout4);
_mm_store_si128(output + 9, xout5);
_mm_store_si128(output + 10, xout6);
_mm_store_si128(output + 11, xout7);
}
#if defined(__x86_64__)
# define EXTRACT64(X) _mm_cvtsi128_si64(X)
static inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi)
{
unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b;
*hi = r >> 64;
return (uint64_t) r;
}
#elif defined(__i386__)
# define HI32(X) \
_mm_srli_si128((X), 4)
# define EXTRACT64(X) \
((uint64_t)(uint32_t)_mm_cvtsi128_si32(X) | \
((uint64_t)(uint32_t)_mm_cvtsi128_si32(HI32(X)) << 32))
static inline uint64_t _umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) {
// multiplier = ab = a * 2^32 + b
// multiplicand = cd = c * 2^32 + d
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
uint64_t a = multiplier >> 32;
uint64_t b = multiplier & 0xFFFFFFFF;
uint64_t c = multiplicand >> 32;
uint64_t d = multiplicand & 0xFFFFFFFF;
//uint64_t ac = a * c;
uint64_t ad = a * d;
//uint64_t bc = b * c;
uint64_t bd = b * d;
uint64_t adbc = ad + (b * c);
uint64_t adbc_carry = adbc < ad ? 1 : 0;
// multiplier * multiplicand = product_hi * 2^64 + product_lo
uint64_t product_lo = bd + (adbc << 32);
uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
*product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
return product_lo;
}
#endif
static inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
{
mem_out[0] = EXTRACT64(tmp);
tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp)));
uint64_t vh = EXTRACT64(tmp);
uint8_t x = vh >> 24;
static const uint16_t table = 0x7531;
const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1;
vh ^= ((table >> index) & 0x3) << 28;
mem_out[1] = vh;
}
#endif /* XMRIG_CRYPTONIGHT_LITE_AESNI_H */

View File

@@ -0,0 +1,134 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <x86intrin.h>
#include <string.h>
#include "algo/cryptonight/cryptonight.h"
#include "algo/cryptonight/cryptonight_monero.h"
#include "crypto/c_keccak.h"
#include "cryptonight_lite_aesni.h"
void cryptonight_lite_av1_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
keccak(input, size, ctx[0]->state, 200);
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
const uint8_t* l0 = ctx[0]->memory;
uint64_t* h0 = (uint64_t*) ctx[0]->state;
uint64_t al0 = h0[0] ^ h0[4];
uint64_t ah0 = h0[1] ^ h0[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
uint64_t idx0 = h0[0] ^ h0[4];
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
__m128i cx;
cx = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
_mm_store_si128((__m128i *) &l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx));
idx0 = EXTRACT64(cx);
bx0 = cx;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
lo = _umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
((uint64_t*)&l0[idx0 & 0xFFFF0])[0] = al0;
((uint64_t*)&l0[idx0 & 0xFFFF0])[1] = ah0;
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
}
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
keccakf(h0, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}
void cryptonight_lite_av1_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
if (size < 43) {
memset(output, 0, 32);
return;
}
keccak(input, size, ctx[0]->state, 200);
VARIANT1_INIT(0);
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
const uint8_t* l0 = ctx[0]->memory;
uint64_t* h0 = (uint64_t*) ctx[0]->state;
uint64_t al0 = h0[0] ^ h0[4];
uint64_t ah0 = h0[1] ^ h0[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
uint64_t idx0 = h0[0] ^ h0[4];
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
__m128i cx;
cx = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx));
idx0 = EXTRACT64(cx);
bx0 = cx;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
lo = _umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
((uint64_t*)&l0[idx0 & 0xFFFF0])[0] = al0;
((uint64_t*)&l0[idx0 & 0xFFFF0])[1] = ah0 ^ tweak1_2_0;
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
}
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
keccakf(h0, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}

View File

@@ -0,0 +1,202 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <x86intrin.h>
#include <string.h>
#include "algo/cryptonight/cryptonight.h"
#include "algo/cryptonight/cryptonight_monero.h"
#include "cryptonight_lite_aesni.h"
#include "crypto/c_keccak.h"
void cryptonight_lite_av2_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
keccak(input, size, ctx[0]->state, 200);
keccak(input + size, size, ctx[1]->state, 200);
const uint8_t* l0 = ctx[0]->memory;
const uint8_t* l1 = ctx[1]->memory;
uint64_t* h0 = (uint64_t*) ctx[0]->state;
uint64_t* h1 = (uint64_t*) ctx[1]->state;
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
uint64_t al0 = h0[0] ^ h0[4];
uint64_t al1 = h1[0] ^ h1[4];
uint64_t ah0 = h0[1] ^ h0[5];
uint64_t ah1 = h1[1] ^ h1[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
uint64_t idx0 = h0[0] ^ h0[4];
uint64_t idx1 = h1[0] ^ h1[4];
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0xFFFF0]);
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
_mm_store_si128((__m128i *) &l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx0));
_mm_store_si128((__m128i *) &l1[idx1 & 0xFFFF0], _mm_xor_si128(bx1, cx1));
idx0 = EXTRACT64(cx0);
idx1 = EXTRACT64(cx1);
bx0 = cx0;
bx1 = cx1;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
lo = _umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
((uint64_t*) &l0[idx0 & 0xFFFF0])[0] = al0;
((uint64_t*) &l0[idx0 & 0xFFFF0])[1] = ah0;
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
cl = ((uint64_t*) &l1[idx1 & 0xFFFF0])[0];
ch = ((uint64_t*) &l1[idx1 & 0xFFFF0])[1];
lo = _umul128(idx1, cl, &hi);
al1 += hi;
ah1 += lo;
((uint64_t*) &l1[idx1 & 0xFFFF0])[0] = al1;
((uint64_t*) &l1[idx1 & 0xFFFF0])[1] = ah1;
ah1 ^= ch;
al1 ^= cl;
idx1 = al1;
}
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
keccakf(h0, 24);
keccakf(h1, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, (char*) output + 32);
}
void cryptonight_lite_av2_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
if (size < 43) {
memset(output, 0, 64);
return;
}
keccak(input, size, ctx[0]->state, 200);
keccak(input + size, size, ctx[1]->state, 200);
VARIANT1_INIT(0);
VARIANT1_INIT(1);
const uint8_t* l0 = ctx[0]->memory;
const uint8_t* l1 = ctx[1]->memory;
uint64_t* h0 = (uint64_t*) ctx[0]->state;
uint64_t* h1 = (uint64_t*) ctx[1]->state;
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
uint64_t al0 = h0[0] ^ h0[4];
uint64_t al1 = h1[0] ^ h1[4];
uint64_t ah0 = h0[1] ^ h0[5];
uint64_t ah1 = h1[1] ^ h1[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
uint64_t idx0 = h0[0] ^ h0[4];
uint64_t idx1 = h1[0] ^ h1[4];
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0xFFFF0]);
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx0));
cryptonight_monero_tweak((uint64_t*)&l1[idx1 & 0xFFFF0], _mm_xor_si128(bx1, cx1));
idx0 = EXTRACT64(cx0);
idx1 = EXTRACT64(cx1);
bx0 = cx0;
bx1 = cx1;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
lo = _umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
((uint64_t*) &l0[idx0 & 0xFFFF0])[0] = al0;
((uint64_t*) &l0[idx0 & 0xFFFF0])[1] = ah0 ^ tweak1_2_0;
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
cl = ((uint64_t*) &l1[idx1 & 0xFFFF0])[0];
ch = ((uint64_t*) &l1[idx1 & 0xFFFF0])[1];
lo = _umul128(idx1, cl, &hi);
al1 += hi;
ah1 += lo;
((uint64_t*) &l1[idx1 & 0xFFFF0])[0] = al1;
((uint64_t*) &l1[idx1 & 0xFFFF0])[1] = ah1 ^ tweak1_2_1;
ah1 ^= ch;
al1 ^= cl;
idx1 = al1;
}
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
keccakf(h0, 24);
keccakf(h1, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, (char*) output + 32);
}

View File

@@ -0,0 +1,134 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <x86intrin.h>
#include <string.h>
#include "algo/cryptonight/cryptonight.h"
#include "algo/cryptonight/cryptonight_monero.h"
#include "cryptonight_lite_softaes.h"
#include "crypto/c_keccak.h"
void cryptonight_lite_av3_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
keccak(input, size, ctx[0]->state, 200);
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
const uint8_t* l0 = ctx[0]->memory;
uint64_t* h0 = (uint64_t*) ctx[0]->state;
uint64_t al0 = h0[0] ^ h0[4];
uint64_t ah0 = h0[1] ^ h0[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
uint64_t idx0 = h0[0] ^ h0[4];
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
__m128i cx;
cx = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
_mm_store_si128((__m128i *) &l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx));
idx0 = EXTRACT64(cx);
bx0 = cx;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
lo = _umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
((uint64_t*)&l0[idx0 & 0xFFFF0])[0] = al0;
((uint64_t*)&l0[idx0 & 0xFFFF0])[1] = ah0;
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
}
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
keccakf(h0, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}
void cryptonight_lite_av3_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
if (size < 43) {
memset(output, 0, 32);
return;
}
keccak(input, size, ctx[0]->state, 200);
VARIANT1_INIT(0);
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
const uint8_t* l0 = ctx[0]->memory;
uint64_t* h0 = (uint64_t*) ctx[0]->state;
uint64_t al0 = h0[0] ^ h0[4];
uint64_t ah0 = h0[1] ^ h0[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
uint64_t idx0 = h0[0] ^ h0[4];
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
__m128i cx;
cx = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx));
idx0 = EXTRACT64(cx);
bx0 = cx;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
lo = _umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
((uint64_t*)&l0[idx0 & 0xFFFF0])[0] = al0;
((uint64_t*)&l0[idx0 & 0xFFFF0])[1] = ah0 ^ tweak1_2_0;
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
}
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
keccakf(h0, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}

View File

@@ -0,0 +1,202 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <x86intrin.h>
#include <string.h>
#include "algo/cryptonight/cryptonight.h"
#include "algo/cryptonight/cryptonight_monero.h"
#include "cryptonight_lite_softaes.h"
#include "crypto/c_keccak.h"
void cryptonight_lite_av4_v0(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx **restrict ctx)
{
keccak(input, size, ctx[0]->state, 200);
keccak(input + size, size, ctx[1]->state, 200);
const uint8_t* l0 = ctx[0]->memory;
const uint8_t* l1 = ctx[1]->memory;
uint64_t* h0 = (uint64_t*) ctx[0]->state;
uint64_t* h1 = (uint64_t*) ctx[1]->state;
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
uint64_t al0 = h0[0] ^ h0[4];
uint64_t al1 = h1[0] ^ h1[4];
uint64_t ah0 = h0[1] ^ h0[5];
uint64_t ah1 = h1[1] ^ h1[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
uint64_t idx0 = h0[0] ^ h0[4];
uint64_t idx1 = h1[0] ^ h1[4];
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0xFFFF0]);
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
_mm_store_si128((__m128i *) &l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx0));
_mm_store_si128((__m128i *) &l1[idx1 & 0xFFFF0], _mm_xor_si128(bx1, cx1));
idx0 = EXTRACT64(cx0);
idx1 = EXTRACT64(cx1);
bx0 = cx0;
bx1 = cx1;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
lo = _umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
((uint64_t*) &l0[idx0 & 0xFFFF0])[0] = al0;
((uint64_t*) &l0[idx0 & 0xFFFF0])[1] = ah0;
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
cl = ((uint64_t*) &l1[idx1 & 0xFFFF0])[0];
ch = ((uint64_t*) &l1[idx1 & 0xFFFF0])[1];
lo = _umul128(idx1, cl, &hi);
al1 += hi;
ah1 += lo;
((uint64_t*) &l1[idx1 & 0xFFFF0])[0] = al1;
((uint64_t*) &l1[idx1 & 0xFFFF0])[1] = ah1;
ah1 ^= ch;
al1 ^= cl;
idx1 = al1;
}
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
keccakf(h0, 24);
keccakf(h1, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
}
void cryptonight_lite_av4_v1(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx **restrict ctx)
{
if (size < 43) {
memset(output, 0, 64);
return;
}
keccak(input, size, ctx[0]->state, 200);
keccak(input + size, size, ctx[1]->state, 200);
VARIANT1_INIT(0);
VARIANT1_INIT(1);
const uint8_t* l0 = ctx[0]->memory;
const uint8_t* l1 = ctx[1]->memory;
uint64_t* h0 = (uint64_t*) ctx[0]->state;
uint64_t* h1 = (uint64_t*) ctx[1]->state;
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
uint64_t al0 = h0[0] ^ h0[4];
uint64_t al1 = h1[0] ^ h1[4];
uint64_t ah0 = h0[1] ^ h0[5];
uint64_t ah1 = h1[1] ^ h1[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
uint64_t idx0 = h0[0] ^ h0[4];
uint64_t idx1 = h1[0] ^ h1[4];
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0xFFFF0]);
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx0));
cryptonight_monero_tweak((uint64_t*)&l1[idx1 & 0xFFFF0], _mm_xor_si128(bx1, cx1));
idx0 = EXTRACT64(cx0);
idx1 = EXTRACT64(cx1);
bx0 = cx0;
bx1 = cx1;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
lo = _umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
((uint64_t*) &l0[idx0 & 0xFFFF0])[0] = al0;
((uint64_t*) &l0[idx0 & 0xFFFF0])[1] = ah0 ^ tweak1_2_0;
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
cl = ((uint64_t*) &l1[idx1 & 0xFFFF0])[0];
ch = ((uint64_t*) &l1[idx1 & 0xFFFF0])[1];
lo = _umul128(idx1, cl, &hi);
al1 += hi;
ah1 += lo;
((uint64_t*) &l1[idx1 & 0xFFFF0])[0] = al1;
((uint64_t*) &l1[idx1 & 0xFFFF0])[1] = ah1 ^ tweak1_2_1;
ah1 ^= ch;
al1 ^= cl;
idx1 = al1;
}
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
keccakf(h0, 24);
keccakf(h1, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, (char*) output + 32);
}

View File

@@ -0,0 +1,255 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef XMRIG_CRYPTONIGHT_LITE_SOFTAES_H
#define XMRIG_CRYPTONIGHT_LITE_SOFTAES_H
#include <x86intrin.h>
#include <stdint.h>
#include "crypto/soft_aes.h"
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
static inline __m128i sl_xor(__m128i tmp1)
{
__m128i tmp4;
tmp4 = _mm_slli_si128(tmp1, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
return tmp1;
}
static inline void aes_genkey_sub(__m128i* xout0, __m128i* xout2, uint8_t rcon)
{
__m128i xout1 = soft_aeskeygenassist(*xout2, rcon);
xout1 = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem
*xout0 = sl_xor(*xout0);
*xout0 = _mm_xor_si128(*xout0, xout1);
xout1 = soft_aeskeygenassist(*xout0, 0x00);
xout1 = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem
*xout2 = sl_xor(*xout2);
*xout2 = _mm_xor_si128(*xout2, xout1);
}
static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7)
{
*x0 = soft_aesenc(*x0, key);
*x1 = soft_aesenc(*x1, key);
*x2 = soft_aesenc(*x2, key);
*x3 = soft_aesenc(*x3, key);
*x4 = soft_aesenc(*x4, key);
*x5 = soft_aesenc(*x5, key);
*x6 = soft_aesenc(*x6, key);
*x7 = soft_aesenc(*x7, key);
}
static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9)
{
__m128i xout0 = _mm_load_si128(memory);
__m128i xout2 = _mm_load_si128(memory + 1);
*k0 = xout0;
*k1 = xout2;
aes_genkey_sub(&xout0, &xout2, 0x1);
*k2 = xout0;
*k3 = xout2;
aes_genkey_sub(&xout0, &xout2, 0x2);
*k4 = xout0;
*k5 = xout2;
aes_genkey_sub(&xout0, &xout2, 0x4);
*k6 = xout0;
*k7 = xout2;
aes_genkey_sub(&xout0, &xout2, 0x8);
*k8 = xout0;
*k9 = xout2;
}
static inline void cn_explode_scratchpad(const __m128i* input, __m128i* output)
{
// This is more than we have registers, compiler will assign 2 keys on the stack
__m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
aes_genkey(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
xin0 = _mm_load_si128(input + 4);
xin1 = _mm_load_si128(input + 5);
xin2 = _mm_load_si128(input + 6);
xin3 = _mm_load_si128(input + 7);
xin4 = _mm_load_si128(input + 8);
xin5 = _mm_load_si128(input + 9);
xin6 = _mm_load_si128(input + 10);
xin7 = _mm_load_si128(input + 11);
for (size_t i = 0; i < MEMORY_LITE / sizeof(__m128i); i += 8) {
aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
_mm_store_si128(output + i + 0, xin0);
_mm_store_si128(output + i + 1, xin1);
_mm_store_si128(output + i + 2, xin2);
_mm_store_si128(output + i + 3, xin3);
_mm_store_si128(output + i + 4, xin4);
_mm_store_si128(output + i + 5, xin5);
_mm_store_si128(output + i + 6, xin6);
_mm_store_si128(output + i + 7, xin7);
}
}
static inline void cn_implode_scratchpad(const __m128i* input, __m128i* output)
{
// This is more than we have registers, compiler will assign 2 keys on the stack
__m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
aes_genkey(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
xout0 = _mm_load_si128(output + 4);
xout1 = _mm_load_si128(output + 5);
xout2 = _mm_load_si128(output + 6);
xout3 = _mm_load_si128(output + 7);
xout4 = _mm_load_si128(output + 8);
xout5 = _mm_load_si128(output + 9);
xout6 = _mm_load_si128(output + 10);
xout7 = _mm_load_si128(output + 11);
for (size_t i = 0; __builtin_expect(i < MEMORY_LITE / sizeof(__m128i), 1); i += 8)
{
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
}
_mm_store_si128(output + 4, xout0);
_mm_store_si128(output + 5, xout1);
_mm_store_si128(output + 6, xout2);
_mm_store_si128(output + 7, xout3);
_mm_store_si128(output + 8, xout4);
_mm_store_si128(output + 9, xout5);
_mm_store_si128(output + 10, xout6);
_mm_store_si128(output + 11, xout7);
}
#if defined(__x86_64__)
# define EXTRACT64(X) _mm_cvtsi128_si64(X)
static inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi)
{
unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b;
*hi = r >> 64;
return (uint64_t) r;
}
#elif defined(__i386__)
# define HI32(X) \
_mm_srli_si128((X), 4)
# define EXTRACT64(X) \
((uint64_t)(uint32_t)_mm_cvtsi128_si32(X) | \
((uint64_t)(uint32_t)_mm_cvtsi128_si32(HI32(X)) << 32))
static inline uint64_t _umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) {
// multiplier = ab = a * 2^32 + b
// multiplicand = cd = c * 2^32 + d
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
uint64_t a = multiplier >> 32;
uint64_t b = multiplier & 0xFFFFFFFF;
uint64_t c = multiplicand >> 32;
uint64_t d = multiplicand & 0xFFFFFFFF;
//uint64_t ac = a * c;
uint64_t ad = a * d;
//uint64_t bc = b * c;
uint64_t bd = b * d;
uint64_t adbc = ad + (b * c);
uint64_t adbc_carry = adbc < ad ? 1 : 0;
// multiplier * multiplicand = product_hi * 2^64 + product_lo
uint64_t product_lo = bd + (adbc << 32);
uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
*product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
return product_lo;
}
#endif
static inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
{
mem_out[0] = EXTRACT64(tmp);
tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp)));
uint64_t vh = EXTRACT64(tmp);
uint8_t x = vh >> 24;
static const uint16_t table = 0x7531;
const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1;
vh ^= ((table >> index) & 0x3) << 28;
mem_out[1] = vh;
}
#endif /* XMRIG_CRYPTONIGHT_LITE_SOFTAES_H */

View File

@@ -1,2 +0,0 @@
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mbmi2")
add_library(cryptonight_av3_aesni_bmi2 STATIC ../cryptonight_av3_aesni_bmi2.c)

View File

@@ -0,0 +1,407 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <mm_malloc.h>
#ifndef BUILD_TEST
# include "xmrig.h"
#endif
#include "cpu.h"
#include "crypto/c_blake256.h"
#include "crypto/c_groestl.h"
#include "crypto/c_jh.h"
#include "crypto/c_skein.h"
#include "cryptonight_test.h"
#include "cryptonight.h"
#include "options.h"
#include "persistent_memory.h"
static cn_hash_fun asm_func_map[AV_MAX][VARIANT_MAX][ASM_MAX] = {};
void cryptonight_av1_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_av1_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_av1_v2(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_av2_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_av2_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_av2_v2(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_av3_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_av3_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_av3_v2(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_av4_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_av4_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_av4_v2(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_r_av1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_r_av2(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_r_av3(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_r_av4(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
#ifndef XMRIG_NO_AEON
void cryptonight_lite_av1_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_lite_av1_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_lite_av2_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_lite_av2_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_lite_av3_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_lite_av3_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_lite_av4_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_lite_av4_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
#endif
#ifndef XMRIG_NO_ASM
void cryptonight_single_hash_asm_intel(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_single_hash_asm_ryzen(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_single_hash_asm_bulldozer(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_double_hash_asm(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_r_av1_asm_intel(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_r_av1_asm_bulldozer(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_r_av2_asm_intel(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_r_av2_asm_bulldozer(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
#endif
static inline bool verify(enum Variant variant, uint8_t *output, struct cryptonight_ctx **ctx, const uint8_t *referenceValue)
{
cn_hash_fun func = cryptonight_hash_fn(opt_algo, opt_av, variant);
if (func == NULL) {
return false;
}
func(test_input, 76, output, ctx);
return memcmp(output, referenceValue, opt_double_hash ? 64 : 32) == 0;
}
static inline bool verify2(enum Variant variant, uint8_t *output, struct cryptonight_ctx **ctx, const uint8_t *referenceValue)
{
cn_hash_fun func = cryptonight_hash_fn(opt_algo, opt_av, variant);
if (func == NULL) {
return false;
}
if (opt_double_hash) {
uint8_t input[128];
for (size_t i = 0; i < (sizeof(cn_r_test_input) / sizeof(cn_r_test_input[0])); ++i) {
const size_t size = cn_r_test_input[i].size;
memcpy(input, cn_r_test_input[i].data, size);
memcpy(input + size, cn_r_test_input[i].data, size);
ctx[0]->height = ctx[1]->height = cn_r_test_input[i].height;
func(input, size, output, ctx);
if (memcmp(output, referenceValue + i * 32, 32) != 0 || memcmp(output + 32, referenceValue + i * 32, 32) != 0) {
return false;
}
}
}
else {
for (size_t i = 0; i < (sizeof(cn_r_test_input) / sizeof(cn_r_test_input[0])); ++i) {
ctx[0]->height = cn_r_test_input[i].height;
func(cn_r_test_input[i].data, cn_r_test_input[i].size, output, ctx);
if (memcmp(output, referenceValue + i * 32, 32) != 0) {
return false;
}
}
}
return true;
}
static bool self_test() {
struct cryptonight_ctx *ctx[2];
uint8_t output[64];
const size_t count = opt_double_hash ? 2 : 1;
const size_t size = opt_algo == ALGO_CRYPTONIGHT ? MEMORY : MEMORY_LITE;
bool result = false;
for (size_t i = 0; i < count; ++i) {
ctx[i] = _mm_malloc(sizeof(struct cryptonight_ctx), 16);
ctx[i]->memory = _mm_malloc(size, 16);
init_cn_r(ctx[i]);
}
if (opt_algo == ALGO_CRYPTONIGHT) {
result = verify(VARIANT_0, output, ctx, test_output_v0) &&
verify(VARIANT_1, output, ctx, test_output_v1) &&
verify(VARIANT_2, output, ctx, test_output_v2) &&
verify2(VARIANT_4, output, ctx, test_output_r);
}
# ifndef XMRIG_NO_AEON
else {
result = verify(VARIANT_0, output, ctx, test_output_v0_lite) &&
verify(VARIANT_1, output, ctx, test_output_v1_lite);
}
# endif
for (size_t i = 0; i < count; ++i) {
_mm_free(ctx[i]->memory);
_mm_free(ctx[i]);
}
return result;
}
#ifndef XMRIG_NO_ASM
cn_hash_fun cryptonight_hash_asm_fn(enum AlgoVariant av, enum Variant variant, enum Assembly assembly)
{
if (assembly == ASM_AUTO) {
assembly = (enum Assembly) cpu_info.assembly;
}
if (assembly == ASM_NONE) {
return NULL;
}
return asm_func_map[av][variant][assembly];
}
#endif
cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum Variant variant)
{
assert(av > AV_AUTO && av < AV_MAX);
assert(variant > VARIANT_AUTO && variant < VARIANT_MAX);
# ifndef XMRIG_NO_ASM
if (algorithm == ALGO_CRYPTONIGHT) {
cn_hash_fun fun = cryptonight_hash_asm_fn(av, variant, opt_assembly);
if (fun) {
return fun;
}
}
# endif
static const cn_hash_fun func_table[VARIANT_MAX * 4 * 2] = {
cryptonight_av1_v0,
cryptonight_av2_v0,
cryptonight_av3_v0,
cryptonight_av4_v0,
cryptonight_av1_v1,
cryptonight_av2_v1,
cryptonight_av3_v1,
cryptonight_av4_v1,
cryptonight_av1_v2,
cryptonight_av2_v2,
cryptonight_av3_v2,
cryptonight_av4_v2,
cryptonight_r_av1,
cryptonight_r_av2,
cryptonight_r_av3,
cryptonight_r_av4,
# ifndef XMRIG_NO_AEON
cryptonight_lite_av1_v0,
cryptonight_lite_av2_v0,
cryptonight_lite_av3_v0,
cryptonight_lite_av4_v0,
cryptonight_lite_av1_v1,
cryptonight_lite_av2_v1,
cryptonight_lite_av3_v1,
cryptonight_lite_av4_v1,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
# else
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
# endif
};
# ifndef NDEBUG
const size_t index = VARIANT_MAX * 4 * algorithm + 4 * variant + av - 1;
cn_hash_fun func = func_table[index];
assert(index < sizeof(func_table) / sizeof(func_table[0]));
assert(func != NULL);
return func;
# else
return func_table[VARIANT_MAX * 4 * algorithm + 4 * variant + av - 1];
# endif
}
bool cryptonight_init(int av)
{
opt_double_hash = av == AV_DOUBLE || av == AV_DOUBLE_SOFT;
# ifndef XMRIG_NO_ASM
asm_func_map[AV_SINGLE][VARIANT_2][ASM_INTEL] = cryptonight_single_hash_asm_intel;
asm_func_map[AV_SINGLE][VARIANT_2][ASM_RYZEN] = cryptonight_single_hash_asm_intel;
asm_func_map[AV_SINGLE][VARIANT_2][ASM_BULLDOZER] = cryptonight_single_hash_asm_bulldozer;
asm_func_map[AV_DOUBLE][VARIANT_2][ASM_INTEL] = cryptonight_double_hash_asm;
asm_func_map[AV_DOUBLE][VARIANT_2][ASM_RYZEN] = cryptonight_double_hash_asm;
asm_func_map[AV_DOUBLE][VARIANT_2][ASM_BULLDOZER] = cryptonight_double_hash_asm;
asm_func_map[AV_SINGLE][VARIANT_4][ASM_INTEL] = cryptonight_r_av1_asm_intel;
asm_func_map[AV_SINGLE][VARIANT_4][ASM_RYZEN] = cryptonight_r_av1_asm_intel;
asm_func_map[AV_SINGLE][VARIANT_4][ASM_BULLDOZER] = cryptonight_r_av1_asm_bulldozer;
asm_func_map[AV_DOUBLE][VARIANT_4][ASM_INTEL] = cryptonight_r_av2_asm_intel;
asm_func_map[AV_DOUBLE][VARIANT_4][ASM_RYZEN] = cryptonight_r_av2_asm_intel;
asm_func_map[AV_DOUBLE][VARIANT_4][ASM_BULLDOZER] = cryptonight_r_av2_asm_bulldozer;
# endif
return self_test();
}
static inline void do_blake_hash(const void* input, size_t len, char* output) {
blake256_hash((uint8_t*)output, input, len);
}
static inline void do_groestl_hash(const void* input, size_t len, char* output) {
groestl(input, len * 8, (uint8_t*)output);
}
static inline void do_jh_hash(const void* input, size_t len, char* output) {
jh_hash(32 * 8, input, 8 * len, (uint8_t*)output);
}
static inline void do_skein_hash(const void* input, size_t len, char* output) {
skein_hash(8 * 32, input, 8 * len, (uint8_t*)output);
}
void (* const extra_hashes[4])(const void *, size_t, char *) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash};
static inline enum Variant cryptonight_variant(uint8_t version)
{
if (opt_variant != VARIANT_AUTO) {
return opt_variant;
}
if (opt_algo == ALGO_CRYPTONIGHT_LITE) {
return VARIANT_1;
}
if (version >= 10) {
return VARIANT_4;
}
if (version >= 8) {
return VARIANT_2;
}
return version == 7 ? VARIANT_1 : VARIANT_0;
}
#ifndef BUILD_TEST
int scanhash_cryptonight(int thr_id, uint32_t *hash, uint8_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx **restrict ctx) {
uint32_t *nonceptr = (uint32_t*) (((char*) blob) + 39);
enum Variant variant = cryptonight_variant(blob[0]);
do {
cryptonight_hash_fn(opt_algo, opt_av, variant)(blob, blob_size, (uint8_t *) hash, ctx);
(*hashes_done)++;
if (unlikely(hash[7] < target)) {
return 1;
}
(*nonceptr)++;
} while (likely(((*nonceptr) < max_nonce && !work_restart[thr_id].restart)));
return 0;
}
int scanhash_cryptonight_double(int thr_id, uint32_t *hash, uint8_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx **restrict ctx) {
int rc = 0;
uint32_t *nonceptr0 = (uint32_t*) (((char*) blob) + 39);
uint32_t *nonceptr1 = (uint32_t*) (((char*) blob) + 39 + blob_size);
enum Variant variant = cryptonight_variant(blob[0]);
do {
cryptonight_hash_fn(opt_algo, opt_av, variant)(blob, blob_size, (uint8_t *) hash, ctx);
(*hashes_done) += 2;
if (unlikely(hash[7] < target)) {
return rc |= 1;
}
if (unlikely(hash[15] < target)) {
return rc |= 2;
}
if (rc) {
break;
}
(*nonceptr0)++;
(*nonceptr1)++;
} while (likely(((*nonceptr0) < max_nonce && !work_restart[thr_id].restart)));
return rc;
}
#endif

View File

@@ -4,8 +4,10 @@
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -21,52 +23,59 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __CRYPTONIGHT_H__
#define __CRYPTONIGHT_H__
#ifndef XMRIG_CRYPTONIGHT_H
#define XMRIG_CRYPTONIGHT_H
#include <stddef.h>
#include <stdint.h>
#include <stdbool.h>
#define MEMORY (1 << 21) /* 2 MiB */
#define MEMORY_M128I (MEMORY >> 4) // 2 MiB / 16 = 128 ki * __m128i
#define ITER (1 << 20)
#define AES_BLOCK_SIZE 16
#define AES_KEY_SIZE 32 /*16*/
#define INIT_SIZE_BLK 8
#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE) // 128
#define INIT_SIZE_M128I (INIT_SIZE_BYTE >> 4) // 8
#pragma pack(push, 1)
union hash_state {
uint8_t b[200];
uint64_t w[25];
};
#pragma pack(pop)
#include "options.h"
#pragma pack(push, 1)
union cn_slow_hash_state {
union hash_state hs;
struct {
uint8_t k[64];
uint8_t init[INIT_SIZE_BYTE];
};
};
#pragma pack(pop)
#define MEMORY 2097152 /* 2 MiB */
#define MEMORY_LITE 1048576 /* 1 MiB */
#if defined _MSC_VER || defined XMRIG_ARM
#define ABI_ATTRIBUTE
#else
#define ABI_ATTRIBUTE __attribute__((ms_abi))
#endif
struct cryptonight_ctx;
typedef void(*cn_mainloop_fun_ms_abi)(struct cryptonight_ctx*) ABI_ATTRIBUTE;
typedef void(*cn_mainloop_double_fun_ms_abi)(struct cryptonight_ctx*, struct cryptonight_ctx*) ABI_ATTRIBUTE;
struct cryptonight_ctx {
union cn_slow_hash_state state;
uint8_t text[INIT_SIZE_BYTE] __attribute((aligned(16)));
uint64_t a[2] __attribute__((aligned(16)));
uint64_t b[2] __attribute__((aligned(16)));
uint64_t c[2] __attribute__((aligned(16)));
uint8_t state[224] __attribute__((aligned(16)));
uint8_t *memory __attribute__((aligned(16)));
uint8_t unused[40];
const uint32_t *saes_table;
cn_mainloop_fun_ms_abi generated_code;
cn_mainloop_double_fun_ms_abi generated_code_double;
uint64_t generated_code_height;
uint64_t generated_code_double_height;
uint64_t height;
};
typedef void (*cn_hash_fun)(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
extern void (* const extra_hashes[4])(const void *, size_t, char *);
void cryptonight_init(int variant);
void cryptonight_hash(void* output, const void* input, size_t input_len);
int scanhash_cryptonight(int thr_id, uint32_t *hash, uint32_t *restrict pdata, const uint32_t *restrict ptarget, uint32_t max_nonce, unsigned long *restrict hashes_done, const char *memory, struct cryptonight_ctx *persistentctx);
cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum Variant variant);
#endif /* __CRYPTONIGHT_H__ */
bool cryptonight_init(int av);
int scanhash_cryptonight(int thr_id, uint32_t *hash, uint8_t *blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *hashes_done, struct cryptonight_ctx **ctx);
int scanhash_cryptonight_double(int thr_id, uint32_t *hash, uint8_t *blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *hashes_done, struct cryptonight_ctx **ctx);
#endif /* XMRIG_CRYPTONIGHT_H */

View File

@@ -0,0 +1,274 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef XMRIG_CRYPTONIGHT_AESNI_H
#define XMRIG_CRYPTONIGHT_AESNI_H
#include <x86intrin.h>
#include <stdint.h>
#define aes_genkey_sub(imm8) \
__m128i xout1 = _mm_aeskeygenassist_si128(*xout2, (imm8)); \
xout1 = _mm_shuffle_epi32(xout1, 0xFF); \
*xout0 = sl_xor(*xout0); \
*xout0 = _mm_xor_si128(*xout0, xout1); \
xout1 = _mm_aeskeygenassist_si128(*xout0, 0x00);\
xout1 = _mm_shuffle_epi32(xout1, 0xAA); \
*xout2 = sl_xor(*xout2); \
*xout2 = _mm_xor_si128(*xout2, xout1); \
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
static inline __m128i sl_xor(__m128i tmp1)
{
__m128i tmp4;
tmp4 = _mm_slli_si128(tmp1, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
return tmp1;
}
static inline void aes_genkey_sub1(__m128i* xout0, __m128i* xout2)
{
aes_genkey_sub(0x1)
}
static inline void aes_genkey_sub2(__m128i* xout0, __m128i* xout2)
{
aes_genkey_sub(0x2)
}
static inline void aes_genkey_sub4(__m128i* xout0, __m128i* xout2)
{
aes_genkey_sub(0x4)
}
static inline void aes_genkey_sub8(__m128i* xout0, __m128i* xout2)
{
aes_genkey_sub(0x8)
}
static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7)
{
*x0 = _mm_aesenc_si128(*x0, key);
*x1 = _mm_aesenc_si128(*x1, key);
*x2 = _mm_aesenc_si128(*x2, key);
*x3 = _mm_aesenc_si128(*x3, key);
*x4 = _mm_aesenc_si128(*x4, key);
*x5 = _mm_aesenc_si128(*x5, key);
*x6 = _mm_aesenc_si128(*x6, key);
*x7 = _mm_aesenc_si128(*x7, key);
}
static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9)
{
__m128i xout0 = _mm_load_si128(memory);
__m128i xout2 = _mm_load_si128(memory + 1);
*k0 = xout0;
*k1 = xout2;
aes_genkey_sub1(&xout0, &xout2);
*k2 = xout0;
*k3 = xout2;
aes_genkey_sub2(&xout0, &xout2);
*k4 = xout0;
*k5 = xout2;
aes_genkey_sub4(&xout0, &xout2);
*k6 = xout0;
*k7 = xout2;
aes_genkey_sub8(&xout0, &xout2);
*k8 = xout0;
*k9 = xout2;
}
static inline void cn_explode_scratchpad(const __m128i* input, __m128i* output)
{
// This is more than we have registers, compiler will assign 2 keys on the stack
__m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
aes_genkey(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
xin0 = _mm_load_si128(input + 4);
xin1 = _mm_load_si128(input + 5);
xin2 = _mm_load_si128(input + 6);
xin3 = _mm_load_si128(input + 7);
xin4 = _mm_load_si128(input + 8);
xin5 = _mm_load_si128(input + 9);
xin6 = _mm_load_si128(input + 10);
xin7 = _mm_load_si128(input + 11);
for (size_t i = 0; __builtin_expect(i < MEMORY / sizeof(__m128i), 1); i += 8) {
aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
_mm_store_si128(output + i + 0, xin0);
_mm_store_si128(output + i + 1, xin1);
_mm_store_si128(output + i + 2, xin2);
_mm_store_si128(output + i + 3, xin3);
_mm_store_si128(output + i + 4, xin4);
_mm_store_si128(output + i + 5, xin5);
_mm_store_si128(output + i + 6, xin6);
_mm_store_si128(output + i + 7, xin7);
}
}
static inline void cn_implode_scratchpad(const __m128i* input, __m128i* output)
{
// This is more than we have registers, compiler will assign 2 keys on the stack
__m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
aes_genkey(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
xout0 = _mm_load_si128(output + 4);
xout1 = _mm_load_si128(output + 5);
xout2 = _mm_load_si128(output + 6);
xout3 = _mm_load_si128(output + 7);
xout4 = _mm_load_si128(output + 8);
xout5 = _mm_load_si128(output + 9);
xout6 = _mm_load_si128(output + 10);
xout7 = _mm_load_si128(output + 11);
for (size_t i = 0; __builtin_expect(i < MEMORY / sizeof(__m128i), 1); i += 8)
{
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
}
_mm_store_si128(output + 4, xout0);
_mm_store_si128(output + 5, xout1);
_mm_store_si128(output + 6, xout2);
_mm_store_si128(output + 7, xout3);
_mm_store_si128(output + 8, xout4);
_mm_store_si128(output + 9, xout5);
_mm_store_si128(output + 10, xout6);
_mm_store_si128(output + 11, xout7);
}
#if defined(__x86_64__)
# define EXTRACT64(X) _mm_cvtsi128_si64(X)
static inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi)
{
unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b;
*hi = r >> 64;
return (uint64_t) r;
}
#elif defined(__i386__)
# define HI32(X) \
_mm_srli_si128((X), 4)
# define EXTRACT64(X) \
((uint64_t)(uint32_t)_mm_cvtsi128_si32(X) | \
((uint64_t)(uint32_t)_mm_cvtsi128_si32(HI32(X)) << 32))
static inline uint64_t _umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) {
// multiplier = ab = a * 2^32 + b
// multiplicand = cd = c * 2^32 + d
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
uint64_t a = multiplier >> 32;
uint64_t b = multiplier & 0xFFFFFFFF;
uint64_t c = multiplicand >> 32;
uint64_t d = multiplicand & 0xFFFFFFFF;
//uint64_t ac = a * c;
uint64_t ad = a * d;
//uint64_t bc = b * c;
uint64_t bd = b * d;
uint64_t adbc = ad + (b * c);
uint64_t adbc_carry = adbc < ad ? 1 : 0;
// multiplier * multiplicand = product_hi * 2^64 + product_lo
uint64_t product_lo = bd + (adbc << 32);
uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
*product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
return product_lo;
}
#endif
static inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
{
mem_out[0] = EXTRACT64(tmp);
tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp)));
uint64_t vh = EXTRACT64(tmp);
uint8_t x = vh >> 24;
static const uint16_t table = 0x7531;
const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1;
vh ^= ((table >> index) & 0x3) << 28;
mem_out[1] = vh;
}
#endif /* XMRIG_CRYPTONIGHT_AESNI_H */

View File

@@ -0,0 +1,261 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <x86intrin.h>
#include <string.h>
#include "crypto/c_keccak.h"
#include "cryptonight.h"
#include "cryptonight_aesni.h"
#include "cryptonight_monero.h"
void cryptonight_av1_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
keccak(input, size, ctx[0]->state, 200);
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
const uint8_t* l0 = ctx[0]->memory;
uint64_t* h0 = (uint64_t*) ctx[0]->state;
uint64_t al0 = h0[0] ^ h0[4];
uint64_t ah0 = h0[1] ^ h0[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
uint64_t idx0 = h0[0] ^ h0[4];
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
__m128i cx;
cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
idx0 = EXTRACT64(cx);
bx0 = cx;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
lo = _umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
}
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
keccakf(h0, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}
void cryptonight_av1_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
if (size < 43) {
memset(output, 0, 32);
return;
}
keccak(input, size, ctx[0]->state, 200);
VARIANT1_INIT(0);
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
const uint8_t* l0 = ctx[0]->memory;
uint64_t* h0 = (uint64_t*) ctx[0]->state;
uint64_t al0 = h0[0] ^ h0[4];
uint64_t ah0 = h0[1] ^ h0[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
uint64_t idx0 = h0[0] ^ h0[4];
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
__m128i cx;
cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
idx0 = EXTRACT64(cx);
bx0 = cx;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
lo = _umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0 ^ tweak1_2_0;
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
}
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
keccakf(h0, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}
void cryptonight_av1_v2(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
keccak(input, size, ctx[0]->state, 200);
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
const uint8_t* l0 = ctx[0]->memory;
uint64_t* h0 = (uint64_t*) ctx[0]->state;
VARIANT2_INIT(0);
VARIANT2_SET_ROUNDING_MODE();
uint64_t al0 = h0[0] ^ h0[4];
uint64_t ah0 = h0[1] ^ h0[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
uint64_t idx0 = al0;
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
__m128i cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
cx = _mm_aesenc_si128(cx, ax0);
VARIANT2_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx0, bx1);
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
idx0 = _mm_cvtsi128_si64(cx);
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
VARIANT2_INTEGER_MATH(0, cl, cx);
lo = _umul128(idx0, cl, &hi);
VARIANT2_SHUFFLE2(l0, idx0 & 0x1FFFF0, ax0, bx0, bx1, hi, lo);
al0 += hi;
ah0 += lo;
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
al0 ^= cl;
ah0 ^= ch;
idx0 = al0;
bx1 = bx0;
bx0 = cx;
}
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
keccakf(h0, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}
#ifndef XMRIG_NO_ASM
extern void cnv2_mainloop_ivybridge_asm(struct cryptonight_ctx *ctx);
extern void cnv2_mainloop_ryzen_asm(struct cryptonight_ctx *ctx);
extern void cnv2_mainloop_bulldozer_asm(struct cryptonight_ctx *ctx);
extern void cnv2_double_mainloop_sandybridge_asm(struct cryptonight_ctx* ctx0, struct cryptonight_ctx* ctx1);
void cryptonight_single_hash_asm_intel(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
keccak(input, size, ctx[0]->state, 200);
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
cnv2_mainloop_ivybridge_asm(ctx[0]);
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
keccakf((uint64_t*) ctx[0]->state, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}
void cryptonight_single_hash_asm_ryzen(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
keccak(input, size, ctx[0]->state, 200);
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
cnv2_mainloop_ryzen_asm(ctx[0]);
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
keccakf((uint64_t*) ctx[0]->state, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}
void cryptonight_single_hash_asm_bulldozer(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
keccak(input, size, ctx[0]->state, 200);
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
cnv2_mainloop_bulldozer_asm(ctx[0]);
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
keccakf((uint64_t*) ctx[0]->state, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}
void cryptonight_double_hash_asm(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
keccak(input, size, ctx[0]->state, 200);
keccak(input + size, size, ctx[1]->state, 200);
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
cn_explode_scratchpad((__m128i*) ctx[1]->state, (__m128i*) ctx[1]->memory);
cnv2_double_mainloop_sandybridge_asm(ctx[0], ctx[1]);
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
cn_implode_scratchpad((__m128i*) ctx[1]->memory, (__m128i*) ctx[1]->state);
keccakf((uint64_t*) ctx[0]->state, 24);
keccakf((uint64_t*) ctx[1]->state, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
}
#endif

View File

@@ -1,216 +0,0 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <x86intrin.h>
#include <string.h>
#include "cryptonight.h"
#include "crypto/c_keccak.h"
static inline void ExpandAESKey256_sub1(__m128i *tmp1, __m128i *tmp2)
{
__m128i tmp4;
*tmp2 = _mm_shuffle_epi32(*tmp2, 0xFF);
tmp4 = _mm_slli_si128(*tmp1, 0x04);
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
*tmp1 = _mm_xor_si128(*tmp1, *tmp2);
}
static inline void ExpandAESKey256_sub2(__m128i *tmp1, __m128i *tmp3)
{
__m128i tmp2, tmp4;
tmp4 = _mm_aeskeygenassist_si128(*tmp1, 0x00);
tmp2 = _mm_shuffle_epi32(tmp4, 0xAA);
tmp4 = _mm_slli_si128(*tmp3, 0x04);
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
*tmp3 = _mm_xor_si128(*tmp3, tmp2);
}
// Special thanks to Intel for helping me
// with ExpandAESKey256() and its subroutines
static inline void ExpandAESKey256(char *keybuf)
{
__m128i tmp1, tmp2, tmp3, *keys;
keys = (__m128i *)keybuf;
tmp1 = _mm_load_si128((__m128i *)keybuf);
tmp3 = _mm_load_si128((__m128i *)(keybuf+0x10));
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x01);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[2] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[3] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x02);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[4] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[5] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x04);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[6] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[7] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x08);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[8] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[9] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x10);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[10] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[11] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x20);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[12] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[13] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x40);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[14] = tmp1;
}
void cryptonight_av1_aesni(void *restrict output, const void *restrict input, const char *restrict memory, struct cryptonight_ctx *restrict ctx)
{
keccak((const uint8_t *)input, 76, (uint8_t *) &ctx->state.hs, 200);
uint8_t ExpandedKey[256];
size_t i, j;
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
memcpy(ExpandedKey, ctx->state.hs.b, AES_KEY_SIZE);
ExpandAESKey256(ExpandedKey);
__m128i *longoutput, *expkey, *xmminput;
longoutput = (__m128i *) memory;
expkey = (__m128i *)ExpandedKey;
xmminput = (__m128i *)ctx->text;
for (i = 0; __builtin_expect(i < MEMORY, 1); i += INIT_SIZE_BYTE)
{
for(j = 0; j < 10; j++)
{
xmminput[0] = _mm_aesenc_si128(xmminput[0], expkey[j]);
xmminput[1] = _mm_aesenc_si128(xmminput[1], expkey[j]);
xmminput[2] = _mm_aesenc_si128(xmminput[2], expkey[j]);
xmminput[3] = _mm_aesenc_si128(xmminput[3], expkey[j]);
xmminput[4] = _mm_aesenc_si128(xmminput[4], expkey[j]);
xmminput[5] = _mm_aesenc_si128(xmminput[5], expkey[j]);
xmminput[6] = _mm_aesenc_si128(xmminput[6], expkey[j]);
xmminput[7] = _mm_aesenc_si128(xmminput[7], expkey[j]);
}
_mm_store_si128(&(longoutput[(i >> 4)]), xmminput[0]);
_mm_store_si128(&(longoutput[(i >> 4) + 1]), xmminput[1]);
_mm_store_si128(&(longoutput[(i >> 4) + 2]), xmminput[2]);
_mm_store_si128(&(longoutput[(i >> 4) + 3]), xmminput[3]);
_mm_store_si128(&(longoutput[(i >> 4) + 4]), xmminput[4]);
_mm_store_si128(&(longoutput[(i >> 4) + 5]), xmminput[5]);
_mm_store_si128(&(longoutput[(i >> 4) + 6]), xmminput[6]);
_mm_store_si128(&(longoutput[(i >> 4) + 7]), xmminput[7]);
}
for (i = 0; i < 2; i++)
{
ctx->a[i] = ((uint64_t *)ctx->state.k)[i] ^ ((uint64_t *)ctx->state.k)[i+4];
ctx->b[i] = ((uint64_t *)ctx->state.k)[i+2] ^ ((uint64_t *)ctx->state.k)[i+6];
}
__m128i a_x = _mm_load_si128((__m128i *) &memory[ctx->a[0] & 0x1FFFF0]);
__m128i b_x = _mm_load_si128((__m128i *) ctx->b);
uint64_t c[2] __attribute((aligned(16)));
uint64_t d[2] __attribute((aligned(16)));
for (i = 0; __builtin_expect(i < 0x80000, 1); i++) {
__m128i c_x = _mm_aesenc_si128(a_x, _mm_load_si128((__m128i *) ctx->a));
_mm_store_si128((__m128i *) c, c_x);
uint64_t *restrict d_ptr = (uint64_t *) &memory[c[0] & 0x1FFFF0];
_mm_store_si128((__m128i *) &memory[ctx->a[0] & 0x1FFFF0], _mm_xor_si128(b_x, c_x));
b_x = c_x;
d[0] = d_ptr[0];
d[1] = d_ptr[1];
{
unsigned __int128 res = (unsigned __int128) c[0] * d[0];
d_ptr[0] = ctx->a[0] += res >> 64;
d_ptr[1] = ctx->a[1] += (uint64_t) res;
}
ctx->a[0] ^= d[0];
ctx->a[1] ^= d[1];
a_x = _mm_load_si128((__m128i *) &memory[ctx->a[0] & 0x1FFFF0]);
}
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
memcpy(ExpandedKey, &ctx->state.hs.b[32], AES_KEY_SIZE);
ExpandAESKey256(ExpandedKey);
for (i = 0; __builtin_expect(i < MEMORY, 1); i += INIT_SIZE_BYTE) {
xmminput[0] = _mm_xor_si128(longoutput[(i >> 4)], xmminput[0]);
xmminput[1] = _mm_xor_si128(longoutput[(i >> 4) + 1], xmminput[1]);
xmminput[2] = _mm_xor_si128(longoutput[(i >> 4) + 2], xmminput[2]);
xmminput[3] = _mm_xor_si128(longoutput[(i >> 4) + 3], xmminput[3]);
xmminput[4] = _mm_xor_si128(longoutput[(i >> 4) + 4], xmminput[4]);
xmminput[5] = _mm_xor_si128(longoutput[(i >> 4) + 5], xmminput[5]);
xmminput[6] = _mm_xor_si128(longoutput[(i >> 4) + 6], xmminput[6]);
xmminput[7] = _mm_xor_si128(longoutput[(i >> 4) + 7], xmminput[7]);
for(j = 0; j < 10; j++)
{
xmminput[0] = _mm_aesenc_si128(xmminput[0], expkey[j]);
xmminput[1] = _mm_aesenc_si128(xmminput[1], expkey[j]);
xmminput[2] = _mm_aesenc_si128(xmminput[2], expkey[j]);
xmminput[3] = _mm_aesenc_si128(xmminput[3], expkey[j]);
xmminput[4] = _mm_aesenc_si128(xmminput[4], expkey[j]);
xmminput[5] = _mm_aesenc_si128(xmminput[5], expkey[j]);
xmminput[6] = _mm_aesenc_si128(xmminput[6], expkey[j]);
xmminput[7] = _mm_aesenc_si128(xmminput[7], expkey[j]);
}
}
memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
keccakf((uint64_t *) &ctx->state.hs, 24);
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
}

View File

@@ -1,239 +0,0 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <x86intrin.h>
#include <string.h>
#include "cryptonight.h"
#include "crypto/c_keccak.h"
static inline uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) {
// multiplier = ab = a * 2^32 + b
// multiplicand = cd = c * 2^32 + d
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
uint64_t a = multiplier >> 32;
uint64_t b = multiplier & 0xFFFFFFFF;
uint64_t c = multiplicand >> 32;
uint64_t d = multiplicand & 0xFFFFFFFF;
//uint64_t ac = a * c;
uint64_t ad = a * d;
//uint64_t bc = b * c;
uint64_t bd = b * d;
uint64_t adbc = ad + (b * c);
uint64_t adbc_carry = adbc < ad ? 1 : 0;
// multiplier * multiplicand = product_hi * 2^64 + product_lo
uint64_t product_lo = bd + (adbc << 32);
uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
*product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
return product_lo;
}
static inline void ExpandAESKey256_sub1(__m128i *tmp1, __m128i *tmp2)
{
__m128i tmp4;
*tmp2 = _mm_shuffle_epi32(*tmp2, 0xFF);
tmp4 = _mm_slli_si128(*tmp1, 0x04);
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
*tmp1 = _mm_xor_si128(*tmp1, *tmp2);
}
static inline void ExpandAESKey256_sub2(__m128i *tmp1, __m128i *tmp3)
{
__m128i tmp2, tmp4;
tmp4 = _mm_aeskeygenassist_si128(*tmp1, 0x00);
tmp2 = _mm_shuffle_epi32(tmp4, 0xAA);
tmp4 = _mm_slli_si128(*tmp3, 0x04);
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
*tmp3 = _mm_xor_si128(*tmp3, tmp2);
}
// Special thanks to Intel for helping me
// with ExpandAESKey256() and its subroutines
static inline void ExpandAESKey256(char *keybuf)
{
__m128i tmp1, tmp2, tmp3, *keys;
keys = (__m128i *)keybuf;
tmp1 = _mm_load_si128((__m128i *)keybuf);
tmp3 = _mm_load_si128((__m128i *)(keybuf+0x10));
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x01);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[2] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[3] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x02);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[4] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[5] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x04);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[6] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[7] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x08);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[8] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[9] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x10);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[10] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[11] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x20);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[12] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[13] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x40);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[14] = tmp1;
}
void cryptonight_av1_aesni32(void *restrict output, const void *restrict input, const char *restrict memory, struct cryptonight_ctx *restrict ctx)
{
keccak((const uint8_t *)input, 76, (uint8_t *) &ctx->state.hs, 200);
uint8_t ExpandedKey[256];
size_t i, j;
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
memcpy(ExpandedKey, ctx->state.hs.b, AES_KEY_SIZE);
ExpandAESKey256(ExpandedKey);
__m128i *longoutput, *expkey, *xmminput;
longoutput = (__m128i *) memory;
expkey = (__m128i *)ExpandedKey;
xmminput = (__m128i *)ctx->text;
for (i = 0; __builtin_expect(i < MEMORY, 1); i += INIT_SIZE_BYTE)
{
for(j = 0; j < 10; j++)
{
xmminput[0] = _mm_aesenc_si128(xmminput[0], expkey[j]);
xmminput[1] = _mm_aesenc_si128(xmminput[1], expkey[j]);
xmminput[2] = _mm_aesenc_si128(xmminput[2], expkey[j]);
xmminput[3] = _mm_aesenc_si128(xmminput[3], expkey[j]);
xmminput[4] = _mm_aesenc_si128(xmminput[4], expkey[j]);
xmminput[5] = _mm_aesenc_si128(xmminput[5], expkey[j]);
xmminput[6] = _mm_aesenc_si128(xmminput[6], expkey[j]);
xmminput[7] = _mm_aesenc_si128(xmminput[7], expkey[j]);
}
_mm_store_si128(&(longoutput[(i >> 4)]), xmminput[0]);
_mm_store_si128(&(longoutput[(i >> 4) + 1]), xmminput[1]);
_mm_store_si128(&(longoutput[(i >> 4) + 2]), xmminput[2]);
_mm_store_si128(&(longoutput[(i >> 4) + 3]), xmminput[3]);
_mm_store_si128(&(longoutput[(i >> 4) + 4]), xmminput[4]);
_mm_store_si128(&(longoutput[(i >> 4) + 5]), xmminput[5]);
_mm_store_si128(&(longoutput[(i >> 4) + 6]), xmminput[6]);
_mm_store_si128(&(longoutput[(i >> 4) + 7]), xmminput[7]);
}
for (i = 0; i < 2; i++)
{
ctx->a[i] = ((uint64_t *)ctx->state.k)[i] ^ ((uint64_t *)ctx->state.k)[i+4];
ctx->b[i] = ((uint64_t *)ctx->state.k)[i+2] ^ ((uint64_t *)ctx->state.k)[i+6];
}
__m128i a_x = _mm_load_si128((__m128i *) &memory[ctx->a[0] & 0x1FFFF0]);
__m128i b_x = _mm_load_si128((__m128i *) ctx->b);
uint64_t c[2] __attribute((aligned(16)));
uint64_t d[2] __attribute((aligned(16)));
uint64_t hi;
for (i = 0; __builtin_expect(i < 0x80000, 1); i++) {
__m128i c_x = _mm_aesenc_si128(a_x, _mm_load_si128((__m128i *) ctx->a));
_mm_store_si128((__m128i *) c, c_x);
uint64_t *restrict d_ptr = (uint64_t *) &memory[c[0] & 0x1FFFF0];
_mm_store_si128((__m128i *) &memory[ctx->a[0] & 0x1FFFF0], _mm_xor_si128(b_x, c_x));
b_x = c_x;
d[0] = d_ptr[0];
d[1] = d_ptr[1];
d_ptr[1] = ctx->a[1] += mul128(c[0], d[0], &hi);
d_ptr[0] = ctx->a[0] += hi;
ctx->a[0] ^= d[0];
ctx->a[1] ^= d[1];
a_x = _mm_load_si128((__m128i *) &memory[ctx->a[0] & 0x1FFFF0]);
}
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
memcpy(ExpandedKey, &ctx->state.hs.b[32], AES_KEY_SIZE);
ExpandAESKey256(ExpandedKey);
for (i = 0; __builtin_expect(i < MEMORY, 1); i += INIT_SIZE_BYTE) {
xmminput[0] = _mm_xor_si128(longoutput[(i >> 4)], xmminput[0]);
xmminput[1] = _mm_xor_si128(longoutput[(i >> 4) + 1], xmminput[1]);
xmminput[2] = _mm_xor_si128(longoutput[(i >> 4) + 2], xmminput[2]);
xmminput[3] = _mm_xor_si128(longoutput[(i >> 4) + 3], xmminput[3]);
xmminput[4] = _mm_xor_si128(longoutput[(i >> 4) + 4], xmminput[4]);
xmminput[5] = _mm_xor_si128(longoutput[(i >> 4) + 5], xmminput[5]);
xmminput[6] = _mm_xor_si128(longoutput[(i >> 4) + 6], xmminput[6]);
xmminput[7] = _mm_xor_si128(longoutput[(i >> 4) + 7], xmminput[7]);
for(j = 0; j < 10; j++)
{
xmminput[0] = _mm_aesenc_si128(xmminput[0], expkey[j]);
xmminput[1] = _mm_aesenc_si128(xmminput[1], expkey[j]);
xmminput[2] = _mm_aesenc_si128(xmminput[2], expkey[j]);
xmminput[3] = _mm_aesenc_si128(xmminput[3], expkey[j]);
xmminput[4] = _mm_aesenc_si128(xmminput[4], expkey[j]);
xmminput[5] = _mm_aesenc_si128(xmminput[5], expkey[j]);
xmminput[6] = _mm_aesenc_si128(xmminput[6], expkey[j]);
xmminput[7] = _mm_aesenc_si128(xmminput[7], expkey[j]);
}
}
memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
keccakf((uint64_t *) &ctx->state.hs, 24);
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
}

View File

@@ -0,0 +1,304 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <x86intrin.h>
#include <string.h>
#include "crypto/c_keccak.h"
#include "cryptonight.h"
#include "cryptonight_aesni.h"
#include "cryptonight_monero.h"
void cryptonight_av2_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
keccak(input, size, ctx[0]->state, 200);
keccak(input + size, size, ctx[1]->state, 200);
const uint8_t* l0 = ctx[0]->memory;
const uint8_t* l1 = ctx[1]->memory;
uint64_t* h0 = (uint64_t*) ctx[0]->state;
uint64_t* h1 = (uint64_t*) ctx[1]->state;
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
uint64_t al0 = h0[0] ^ h0[4];
uint64_t al1 = h1[0] ^ h1[4];
uint64_t ah0 = h0[1] ^ h0[5];
uint64_t ah1 = h1[1] ^ h1[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
uint64_t idx0 = h0[0] ^ h0[4];
uint64_t idx1 = h1[0] ^ h1[4];
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]);
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx0));
_mm_store_si128((__m128i *) &l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx1, cx1));
idx0 = EXTRACT64(cx0);
idx1 = EXTRACT64(cx1);
bx0 = cx0;
bx1 = cx1;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
lo = _umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
((uint64_t*) &l0[idx0 & 0x1FFFF0])[0] = al0;
((uint64_t*) &l0[idx0 & 0x1FFFF0])[1] = ah0;
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0];
ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1];
lo = _umul128(idx1, cl, &hi);
al1 += hi;
ah1 += lo;
((uint64_t*) &l1[idx1 & 0x1FFFF0])[0] = al1;
((uint64_t*) &l1[idx1 & 0x1FFFF0])[1] = ah1;
ah1 ^= ch;
al1 ^= cl;
idx1 = al1;
}
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
keccakf(h0, 24);
keccakf(h1, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
}
void cryptonight_av2_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
if (size < 43) {
memset(output, 0, 64);
return;
}
keccak(input, size, ctx[0]->state, 200);
keccak(input + size, size, ctx[1]->state, 200);
VARIANT1_INIT(0);
VARIANT1_INIT(1);
const uint8_t* l0 = ctx[0]->memory;
const uint8_t* l1 = ctx[1]->memory;
uint64_t* h0 = (uint64_t*) ctx[0]->state;
uint64_t* h1 = (uint64_t*) ctx[1]->state;
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
uint64_t al0 = h0[0] ^ h0[4];
uint64_t al1 = h1[0] ^ h1[4];
uint64_t ah0 = h0[1] ^ h0[5];
uint64_t ah1 = h1[1] ^ h1[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
uint64_t idx0 = h0[0] ^ h0[4];
uint64_t idx1 = h1[0] ^ h1[4];
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]);
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx0));
cryptonight_monero_tweak((uint64_t*)&l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx1, cx1));
idx0 = EXTRACT64(cx0);
idx1 = EXTRACT64(cx1);
bx0 = cx0;
bx1 = cx1;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
lo = _umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
((uint64_t*) &l0[idx0 & 0x1FFFF0])[0] = al0;
((uint64_t*) &l0[idx0 & 0x1FFFF0])[1] = ah0 ^ tweak1_2_0;
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0];
ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1];
lo = _umul128(idx1, cl, &hi);
al1 += hi;
ah1 += lo;
((uint64_t*) &l1[idx1 & 0x1FFFF0])[0] = al1;
((uint64_t*) &l1[idx1 & 0x1FFFF0])[1] = ah1 ^ tweak1_2_1;
ah1 ^= ch;
al1 ^= cl;
idx1 = al1;
}
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
keccakf(h0, 24);
keccakf(h1, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
}
void cryptonight_av2_v2(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
keccak(input, size, ctx[0]->state, 200);
keccak(input + size, size, ctx[1]->state, 200);
const uint8_t* l0 = ctx[0]->memory;
const uint8_t* l1 = ctx[1]->memory;
uint64_t* h0 = (uint64_t*) ctx[0]->state;
uint64_t* h1 = (uint64_t*) ctx[1]->state;
VARIANT2_INIT(0);
VARIANT2_INIT(1);
VARIANT2_SET_ROUNDING_MODE();
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
uint64_t al0 = h0[0] ^ h0[4];
uint64_t al1 = h1[0] ^ h1[4];
uint64_t ah0 = h0[1] ^ h0[5];
uint64_t ah1 = h1[1] ^ h1[5];
__m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx01 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
__m128i bx10 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
__m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
uint64_t idx0 = al0;
uint64_t idx1 = al1;
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]);
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
const __m128i ax1 = _mm_set_epi64x(ah1, al1);
cx0 = _mm_aesenc_si128(cx0, ax0);
cx1 = _mm_aesenc_si128(cx1, ax1);
VARIANT2_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx00, bx01);
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx00, cx0));
VARIANT2_SHUFFLE(l1, idx1 & 0x1FFFF0, ax1, bx10, bx11);
_mm_store_si128((__m128i *) &l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx10, cx1));
idx0 = _mm_cvtsi128_si64(cx0);
idx1 = _mm_cvtsi128_si64(cx1);
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
VARIANT2_INTEGER_MATH(0, cl, cx0);
lo = _umul128(idx0, cl, &hi);
VARIANT2_SHUFFLE2(l0, idx0 & 0x1FFFF0, ax0, bx00, bx01, hi, lo);
al0 += hi;
ah0 += lo;
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
al0 ^= cl;
ah0 ^= ch;
idx0 = al0;
cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0];
ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1];
VARIANT2_INTEGER_MATH(1, cl, cx1);
lo = _umul128(idx1, cl, &hi);
VARIANT2_SHUFFLE2(l1, idx1 & 0x1FFFF0, ax1, bx10, bx11, hi, lo);
al1 += hi;
ah1 += lo;
((uint64_t*)&l1[idx1 & 0x1FFFF0])[0] = al1;
((uint64_t*)&l1[idx1 & 0x1FFFF0])[1] = ah1;
al1 ^= cl;
ah1 ^= ch;
idx1 = al1;
bx01 = bx00;
bx11 = bx10;
bx00 = cx0;
bx10 = cx1;
}
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
keccakf(h0, 24);
keccakf(h1, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
}

View File

@@ -1,237 +0,0 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <x86intrin.h>
#include "cryptonight.h"
#include "crypto/c_keccak.h"
static inline void ExpandAESKey256_sub1(__m128i *tmp1, __m128i *tmp2)
{
__m128i tmp4;
*tmp2 = _mm_shuffle_epi32(*tmp2, 0xFF);
tmp4 = _mm_slli_si128(*tmp1, 0x04);
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
*tmp1 = _mm_xor_si128(*tmp1, *tmp2);
}
static inline void ExpandAESKey256_sub2(__m128i *tmp1, __m128i *tmp3)
{
__m128i tmp2, tmp4;
tmp4 = _mm_aeskeygenassist_si128(*tmp1, 0x00);
tmp2 = _mm_shuffle_epi32(tmp4, 0xAA);
tmp4 = _mm_slli_si128(*tmp3, 0x04);
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
*tmp3 = _mm_xor_si128(*tmp3, tmp2);
}
// Special thanks to Intel for helping me
// with ExpandAESKey256() and its subroutines
static inline void ExpandAESKey256(char *keybuf)
{
__m128i tmp1, tmp2, tmp3, *keys;
keys = (__m128i *)keybuf;
tmp1 = _mm_load_si128((__m128i *)keybuf);
tmp3 = _mm_load_si128((__m128i *)(keybuf+0x10));
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x01);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[2] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[3] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x02);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[4] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[5] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x04);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[6] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[7] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x08);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[8] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[9] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x10);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[10] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[11] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x20);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[12] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[13] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x40);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[14] = tmp1;
}
void cryptonight_av2_aesni_wolf(void *restrict output, const void *restrict input, const char *restrict memory, struct cryptonight_ctx *restrict ctx)
{
keccak((const uint8_t *) input, 76, (uint8_t *) &ctx->state.hs, 200);
uint8_t ExpandedKey[256];
size_t i, j;
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
memcpy(ExpandedKey, ctx->state.hs.b, AES_KEY_SIZE);
ExpandAESKey256(ExpandedKey);
__m128i *longoutput, *expkey, *xmminput;
longoutput = (__m128i *)memory;
expkey = (__m128i *)ExpandedKey;
xmminput = (__m128i *)ctx->text;
for (i = 0; __builtin_expect(i < MEMORY, 1); i += INIT_SIZE_BYTE)
{
for(j = 0; j < 10; j++)
{
xmminput[0] = _mm_aesenc_si128(xmminput[0], expkey[j]);
xmminput[1] = _mm_aesenc_si128(xmminput[1], expkey[j]);
xmminput[2] = _mm_aesenc_si128(xmminput[2], expkey[j]);
xmminput[3] = _mm_aesenc_si128(xmminput[3], expkey[j]);
xmminput[4] = _mm_aesenc_si128(xmminput[4], expkey[j]);
xmminput[5] = _mm_aesenc_si128(xmminput[5], expkey[j]);
xmminput[6] = _mm_aesenc_si128(xmminput[6], expkey[j]);
xmminput[7] = _mm_aesenc_si128(xmminput[7], expkey[j]);
}
_mm_store_si128(&(longoutput[(i >> 4)]), xmminput[0]);
_mm_store_si128(&(longoutput[(i >> 4) + 1]), xmminput[1]);
_mm_store_si128(&(longoutput[(i >> 4) + 2]), xmminput[2]);
_mm_store_si128(&(longoutput[(i >> 4) + 3]), xmminput[3]);
_mm_store_si128(&(longoutput[(i >> 4) + 4]), xmminput[4]);
_mm_store_si128(&(longoutput[(i >> 4) + 5]), xmminput[5]);
_mm_store_si128(&(longoutput[(i >> 4) + 6]), xmminput[6]);
_mm_store_si128(&(longoutput[(i >> 4) + 7]), xmminput[7]);
}
for (i = 0; i < 2; i++)
{
ctx->a[i] = ((uint64_t *)ctx->state.k)[i] ^ ((uint64_t *)ctx->state.k)[i+4];
ctx->b[i] = ((uint64_t *)ctx->state.k)[i+2] ^ ((uint64_t *)ctx->state.k)[i+6];
}
__m128i b_x = _mm_load_si128((__m128i *)ctx->b);
uint64_t a[2] __attribute((aligned(16))), b[2] __attribute((aligned(16)));
a[0] = ctx->a[0];
a[1] = ctx->a[1];
for(i = 0; __builtin_expect(i < 0x80000, 1); i++)
{
__m128i c_x = _mm_load_si128((__m128i *)&memory[a[0] & 0x1FFFF0]);
__m128i a_x = _mm_load_si128((__m128i *)a);
uint64_t c[2];
c_x = _mm_aesenc_si128(c_x, a_x);
_mm_store_si128((__m128i *)c, c_x);
__builtin_prefetch(&memory[c[0] & 0x1FFFF0], 0, 1);
b_x = _mm_xor_si128(b_x, c_x);
_mm_store_si128((__m128i *)&memory[a[0] & 0x1FFFF0], b_x);
uint64_t *nextblock = (uint64_t *)&memory[c[0] & 0x1FFFF0];
uint64_t b[2];
b[0] = nextblock[0];
b[1] = nextblock[1];
{
uint64_t hi, lo;
// hi,lo = 64bit x 64bit multiply of c[0] and b[0]
__asm__("mulq %3\n\t"
: "=d" (hi),
"=a" (lo)
: "%a" (c[0]),
"rm" (b[0])
: "cc" );
a[0] += hi;
a[1] += lo;
}
uint64_t *dst = (uint64_t *) &memory[c[0] & 0x1FFFF0];
dst[0] = a[0];
dst[1] = a[1];
a[0] ^= b[0];
a[1] ^= b[1];
b_x = c_x;
__builtin_prefetch(&memory[a[0] & 0x1FFFF0], 0, 3);
}
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
memcpy(ExpandedKey, &ctx->state.hs.b[32], AES_KEY_SIZE);
ExpandAESKey256(ExpandedKey);
//for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE)
// aesni_parallel_xor(&ctx->text, ExpandedKey, &ctx->long_state[i]);
for (i = 0; __builtin_expect(i < MEMORY, 1); i += INIT_SIZE_BYTE)
{
xmminput[0] = _mm_xor_si128(longoutput[(i >> 4)], xmminput[0]);
xmminput[1] = _mm_xor_si128(longoutput[(i >> 4) + 1], xmminput[1]);
xmminput[2] = _mm_xor_si128(longoutput[(i >> 4) + 2], xmminput[2]);
xmminput[3] = _mm_xor_si128(longoutput[(i >> 4) + 3], xmminput[3]);
xmminput[4] = _mm_xor_si128(longoutput[(i >> 4) + 4], xmminput[4]);
xmminput[5] = _mm_xor_si128(longoutput[(i >> 4) + 5], xmminput[5]);
xmminput[6] = _mm_xor_si128(longoutput[(i >> 4) + 6], xmminput[6]);
xmminput[7] = _mm_xor_si128(longoutput[(i >> 4) + 7], xmminput[7]);
for(j = 0; j < 10; j++)
{
xmminput[0] = _mm_aesenc_si128(xmminput[0], expkey[j]);
xmminput[1] = _mm_aesenc_si128(xmminput[1], expkey[j]);
xmminput[2] = _mm_aesenc_si128(xmminput[2], expkey[j]);
xmminput[3] = _mm_aesenc_si128(xmminput[3], expkey[j]);
xmminput[4] = _mm_aesenc_si128(xmminput[4], expkey[j]);
xmminput[5] = _mm_aesenc_si128(xmminput[5], expkey[j]);
xmminput[6] = _mm_aesenc_si128(xmminput[6], expkey[j]);
xmminput[7] = _mm_aesenc_si128(xmminput[7], expkey[j]);
}
}
memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
keccakf((uint64_t *) &ctx->state.hs, 24);
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
}

View File

@@ -0,0 +1,193 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <x86intrin.h>
#include <string.h>
#include "crypto/c_keccak.h"
#include "cryptonight.h"
#include "cryptonight_monero.h"
#include "cryptonight_softaes.h"
void cryptonight_av3_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
keccak(input, size, ctx[0]->state, 200);
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
const uint8_t* l0 = ctx[0]->memory;
uint64_t* h0 = (uint64_t*) ctx[0]->state;
uint64_t al0 = h0[0] ^ h0[4];
uint64_t ah0 = h0[1] ^ h0[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
uint64_t idx0 = h0[0] ^ h0[4];
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
__m128i cx;
cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
idx0 = EXTRACT64(cx);
bx0 = cx;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
lo = _umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
}
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
keccakf(h0, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}
void cryptonight_av3_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
if (size < 43) {
memset(output, 0, 32);
return;
}
keccak(input, size, ctx[0]->state, 200);
VARIANT1_INIT(0);
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
const uint8_t* l0 = ctx[0]->memory;
uint64_t* h0 = (uint64_t*) ctx[0]->state;
uint64_t al0 = h0[0] ^ h0[4];
uint64_t ah0 = h0[1] ^ h0[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
uint64_t idx0 = h0[0] ^ h0[4];
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
__m128i cx;
cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
idx0 = EXTRACT64(cx);
bx0 = cx;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
lo = _umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0 ^ tweak1_2_0;
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
}
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
keccakf(h0, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}
void cryptonight_av3_v2(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
keccak(input, size, ctx[0]->state, 200);
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
const uint8_t* l0 = ctx[0]->memory;
uint64_t* h0 = (uint64_t*) ctx[0]->state;
VARIANT2_INIT(0);
VARIANT2_SET_ROUNDING_MODE();
uint64_t al0 = h0[0] ^ h0[4];
uint64_t ah0 = h0[1] ^ h0[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
uint64_t idx0 = al0;
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
__m128i cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
cx = soft_aesenc(cx, ax0);
VARIANT2_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx0, bx1);
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
idx0 = _mm_cvtsi128_si64(cx);
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
VARIANT2_INTEGER_MATH(0, cl, cx);
lo = _umul128(idx0, cl, &hi);
VARIANT2_SHUFFLE2(l0, idx0 & 0x1FFFF0, ax0, bx0, bx1, hi, lo);
al0 += hi;
ah0 += lo;
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
al0 ^= cl;
ah0 ^= ch;
idx0 = al0;
bx1 = bx0;
bx0 = cx;
}
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
keccakf(h0, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}

View File

@@ -1,214 +0,0 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <x86intrin.h>
#include <string.h>
#include "cryptonight.h"
#include "crypto/c_keccak.h"
static inline void ExpandAESKey256_sub1(__m128i *tmp1, __m128i *tmp2)
{
__m128i tmp4;
*tmp2 = _mm_shuffle_epi32(*tmp2, 0xFF);
tmp4 = _mm_slli_si128(*tmp1, 0x04);
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
*tmp1 = _mm_xor_si128(*tmp1, *tmp2);
}
static inline void ExpandAESKey256_sub2(__m128i *tmp1, __m128i *tmp3)
{
__m128i tmp2, tmp4;
tmp4 = _mm_aeskeygenassist_si128(*tmp1, 0x00);
tmp2 = _mm_shuffle_epi32(tmp4, 0xAA);
tmp4 = _mm_slli_si128(*tmp3, 0x04);
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
*tmp3 = _mm_xor_si128(*tmp3, tmp2);
}
// Special thanks to Intel for helping me
// with ExpandAESKey256() and its subroutines
static inline void ExpandAESKey256(char *keybuf)
{
__m128i tmp1, tmp2, tmp3, *keys;
keys = (__m128i *)keybuf;
tmp1 = _mm_load_si128((__m128i *)keybuf);
tmp3 = _mm_load_si128((__m128i *)(keybuf+0x10));
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x01);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[2] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[3] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x02);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[4] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[5] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x04);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[6] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[7] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x08);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[8] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[9] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x10);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[10] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[11] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x20);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[12] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[13] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x40);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[14] = tmp1;
}
void cryptonight_av3_aesni_bmi2(void *restrict output, const void *restrict input, const char *restrict memory, struct cryptonight_ctx *restrict ctx)
{
keccak((const uint8_t *) input, 76, (uint8_t *) &ctx->state.hs, 200);
uint8_t ExpandedKey[256];
size_t i, j;
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
memcpy(ExpandedKey, ctx->state.hs.b, AES_KEY_SIZE);
ExpandAESKey256(ExpandedKey);
__m128i *longoutput, *expkey, *xmminput;
longoutput = (__m128i *) memory;
expkey = (__m128i *)ExpandedKey;
xmminput = (__m128i *)ctx->text;
for (i = 0; __builtin_expect(i < MEMORY, 1); i += INIT_SIZE_BYTE)
{
for(j = 0; j < 10; j++)
{
xmminput[0] = _mm_aesenc_si128(xmminput[0], expkey[j]);
xmminput[1] = _mm_aesenc_si128(xmminput[1], expkey[j]);
xmminput[2] = _mm_aesenc_si128(xmminput[2], expkey[j]);
xmminput[3] = _mm_aesenc_si128(xmminput[3], expkey[j]);
xmminput[4] = _mm_aesenc_si128(xmminput[4], expkey[j]);
xmminput[5] = _mm_aesenc_si128(xmminput[5], expkey[j]);
xmminput[6] = _mm_aesenc_si128(xmminput[6], expkey[j]);
xmminput[7] = _mm_aesenc_si128(xmminput[7], expkey[j]);
}
_mm_store_si128(&(longoutput[(i >> 4)]), xmminput[0]);
_mm_store_si128(&(longoutput[(i >> 4) + 1]), xmminput[1]);
_mm_store_si128(&(longoutput[(i >> 4) + 2]), xmminput[2]);
_mm_store_si128(&(longoutput[(i >> 4) + 3]), xmminput[3]);
_mm_store_si128(&(longoutput[(i >> 4) + 4]), xmminput[4]);
_mm_store_si128(&(longoutput[(i >> 4) + 5]), xmminput[5]);
_mm_store_si128(&(longoutput[(i >> 4) + 6]), xmminput[6]);
_mm_store_si128(&(longoutput[(i >> 4) + 7]), xmminput[7]);
}
for (i = 0; i < 2; i++)
{
ctx->a[i] = ((uint64_t *)ctx->state.k)[i] ^ ((uint64_t *)ctx->state.k)[i+4];
ctx->b[i] = ((uint64_t *)ctx->state.k)[i+2] ^ ((uint64_t *)ctx->state.k)[i+6];
}
__m128i a_x = _mm_load_si128((__m128i *) &memory[ctx->a[0] & 0x1FFFF0]);
__m128i b_x = _mm_load_si128((__m128i *) ctx->b);
uint64_t c[2] __attribute((aligned(16)));
uint64_t d[2] __attribute((aligned(16)));
uint64_t hi;
for (i = 0; __builtin_expect(i < 0x80000, 1); i++) {
__m128i c_x = _mm_aesenc_si128(a_x, _mm_load_si128((__m128i *) ctx->a));
_mm_store_si128((__m128i *) c, c_x);
uint64_t *restrict d_ptr = (uint64_t *) &memory[c[0] & 0x1FFFF0];
_mm_store_si128((__m128i *) &memory[ctx->a[0] & 0x1FFFF0], _mm_xor_si128(b_x, c_x));
b_x = c_x;
d[0] = d_ptr[0];
d[1] = d_ptr[1];
d_ptr[1] = ctx->a[1] += _mulx_u64(c[0], d[0], &hi);
d_ptr[0] = ctx->a[0] += hi;
ctx->a[0] ^= d[0];
ctx->a[1] ^= d[1];
a_x = _mm_load_si128((__m128i *) &memory[ctx->a[0] & 0x1FFFF0]);
}
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
memcpy(ExpandedKey, &ctx->state.hs.b[32], AES_KEY_SIZE);
ExpandAESKey256(ExpandedKey);
for (i = 0; __builtin_expect(i < MEMORY, 1); i += INIT_SIZE_BYTE) {
xmminput[0] = _mm_xor_si128(longoutput[(i >> 4)], xmminput[0]);
xmminput[1] = _mm_xor_si128(longoutput[(i >> 4) + 1], xmminput[1]);
xmminput[2] = _mm_xor_si128(longoutput[(i >> 4) + 2], xmminput[2]);
xmminput[3] = _mm_xor_si128(longoutput[(i >> 4) + 3], xmminput[3]);
xmminput[4] = _mm_xor_si128(longoutput[(i >> 4) + 4], xmminput[4]);
xmminput[5] = _mm_xor_si128(longoutput[(i >> 4) + 5], xmminput[5]);
xmminput[6] = _mm_xor_si128(longoutput[(i >> 4) + 6], xmminput[6]);
xmminput[7] = _mm_xor_si128(longoutput[(i >> 4) + 7], xmminput[7]);
for(j = 0; j < 10; j++)
{
xmminput[0] = _mm_aesenc_si128(xmminput[0], expkey[j]);
xmminput[1] = _mm_aesenc_si128(xmminput[1], expkey[j]);
xmminput[2] = _mm_aesenc_si128(xmminput[2], expkey[j]);
xmminput[3] = _mm_aesenc_si128(xmminput[3], expkey[j]);
xmminput[4] = _mm_aesenc_si128(xmminput[4], expkey[j]);
xmminput[5] = _mm_aesenc_si128(xmminput[5], expkey[j]);
xmminput[6] = _mm_aesenc_si128(xmminput[6], expkey[j]);
xmminput[7] = _mm_aesenc_si128(xmminput[7], expkey[j]);
}
}
memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
keccakf((uint64_t *) &ctx->state.hs, 24);
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
}

View File

@@ -0,0 +1,304 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <x86intrin.h>
#include <string.h>
#include "crypto/c_keccak.h"
#include "cryptonight.h"
#include "cryptonight_monero.h"
#include "cryptonight_softaes.h"
void cryptonight_av4_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
keccak(input, size, ctx[0]->state, 200);
keccak(input + size, size, ctx[1]->state, 200);
const uint8_t* l0 = ctx[0]->memory;
const uint8_t* l1 = ctx[1]->memory;
uint64_t* h0 = (uint64_t*) ctx[0]->state;
uint64_t* h1 = (uint64_t*) ctx[1]->state;
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
uint64_t al0 = h0[0] ^ h0[4];
uint64_t al1 = h1[0] ^ h1[4];
uint64_t ah0 = h0[1] ^ h0[5];
uint64_t ah1 = h1[1] ^ h1[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
uint64_t idx0 = h0[0] ^ h0[4];
uint64_t idx1 = h1[0] ^ h1[4];
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]);
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx0));
_mm_store_si128((__m128i *) &l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx1, cx1));
idx0 = EXTRACT64(cx0);
idx1 = EXTRACT64(cx1);
bx0 = cx0;
bx1 = cx1;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
lo = _umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
((uint64_t*) &l0[idx0 & 0x1FFFF0])[0] = al0;
((uint64_t*) &l0[idx0 & 0x1FFFF0])[1] = ah0;
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0];
ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1];
lo = _umul128(idx1, cl, &hi);
al1 += hi;
ah1 += lo;
((uint64_t*) &l1[idx1 & 0x1FFFF0])[0] = al1;
((uint64_t*) &l1[idx1 & 0x1FFFF0])[1] = ah1;
ah1 ^= ch;
al1 ^= cl;
idx1 = al1;
}
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
keccakf(h0, 24);
keccakf(h1, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
}
void cryptonight_av4_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
if (size < 43) {
memset(output, 0, 64);
return;
}
keccak(input, size, ctx[0]->state, 200);
keccak(input + size, size, ctx[1]->state, 200);
VARIANT1_INIT(0);
VARIANT1_INIT(1);
const uint8_t* l0 = ctx[0]->memory;
const uint8_t* l1 = ctx[1]->memory;
uint64_t* h0 = (uint64_t*) ctx[0]->state;
uint64_t* h1 = (uint64_t*) ctx[1]->state;
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
uint64_t al0 = h0[0] ^ h0[4];
uint64_t al1 = h1[0] ^ h1[4];
uint64_t ah0 = h0[1] ^ h0[5];
uint64_t ah1 = h1[1] ^ h1[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
uint64_t idx0 = h0[0] ^ h0[4];
uint64_t idx1 = h1[0] ^ h1[4];
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]);
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx0));
cryptonight_monero_tweak((uint64_t*)&l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx1, cx1));
idx0 = EXTRACT64(cx0);
idx1 = EXTRACT64(cx1);
bx0 = cx0;
bx1 = cx1;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
lo = _umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
((uint64_t*) &l0[idx0 & 0x1FFFF0])[0] = al0;
((uint64_t*) &l0[idx0 & 0x1FFFF0])[1] = ah0 ^ tweak1_2_0;
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0];
ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1];
lo = _umul128(idx1, cl, &hi);
al1 += hi;
ah1 += lo;
((uint64_t*) &l1[idx1 & 0x1FFFF0])[0] = al1;
((uint64_t*) &l1[idx1 & 0x1FFFF0])[1] = ah1 ^ tweak1_2_1;
ah1 ^= ch;
al1 ^= cl;
idx1 = al1;
}
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
keccakf(h0, 24);
keccakf(h1, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
}
void cryptonight_av4_v2(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
keccak(input, size, ctx[0]->state, 200);
keccak(input + size, size, ctx[1]->state, 200);
const uint8_t* l0 = ctx[0]->memory;
const uint8_t* l1 = ctx[1]->memory;
uint64_t* h0 = (uint64_t*) ctx[0]->state;
uint64_t* h1 = (uint64_t*) ctx[1]->state;
VARIANT2_INIT(0);
VARIANT2_INIT(1);
VARIANT2_SET_ROUNDING_MODE();
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
uint64_t al0 = h0[0] ^ h0[4];
uint64_t al1 = h1[0] ^ h1[4];
uint64_t ah0 = h0[1] ^ h0[5];
uint64_t ah1 = h1[1] ^ h1[5];
__m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx01 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
__m128i bx10 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
__m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
uint64_t idx0 = al0;
uint64_t idx1 = al1;
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]);
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
const __m128i ax1 = _mm_set_epi64x(ah1, al1);
cx0 = soft_aesenc(cx0, ax0);
cx1 = soft_aesenc(cx1, ax1);
VARIANT2_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx00, bx01);
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx00, cx0));
VARIANT2_SHUFFLE(l1, idx1 & 0x1FFFF0, ax1, bx10, bx11);
_mm_store_si128((__m128i *) &l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx10, cx1));
idx0 = _mm_cvtsi128_si64(cx0);
idx1 = _mm_cvtsi128_si64(cx1);
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
VARIANT2_INTEGER_MATH(0, cl, cx0);
lo = _umul128(idx0, cl, &hi);
VARIANT2_SHUFFLE2(l0, idx0 & 0x1FFFF0, ax0, bx00, bx01, hi, lo);
al0 += hi;
ah0 += lo;
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
al0 ^= cl;
ah0 ^= ch;
idx0 = al0;
cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0];
ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1];
VARIANT2_INTEGER_MATH(1, cl, cx1);
lo = _umul128(idx1, cl, &hi);
VARIANT2_SHUFFLE2(l1, idx1 & 0x1FFFF0, ax1, bx10, bx11, hi, lo);
al1 += hi;
ah1 += lo;
((uint64_t*)&l1[idx1 & 0x1FFFF0])[0] = al1;
((uint64_t*)&l1[idx1 & 0x1FFFF0])[1] = ah1;
al1 ^= cl;
ah1 ^= ch;
idx1 = al1;
bx01 = bx00;
bx11 = bx10;
bx00 = cx0;
bx10 = cx1;
}
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
keccakf(h0, 24);
keccakf(h1, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
}

View File

@@ -1,151 +0,0 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <x86intrin.h>
#include <string.h>
#include "cryptonight.h"
#include "compat.h"
#include "crypto/c_keccak.h"
#include "crypto/aesb.h"
#include "crypto/oaes_lib.h"
static inline uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) {
// multiplier = ab = a * 2^32 + b
// multiplicand = cd = c * 2^32 + d
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
uint64_t a = multiplier >> 32;
uint64_t b = multiplier & 0xFFFFFFFF;
uint64_t c = multiplicand >> 32;
uint64_t d = multiplicand & 0xFFFFFFFF;
//uint64_t ac = a * c;
uint64_t ad = a * d;
//uint64_t bc = b * c;
uint64_t bd = b * d;
uint64_t adbc = ad + (b * c);
uint64_t adbc_carry = adbc < ad ? 1 : 0;
// multiplier * multiplicand = product_hi * 2^64 + product_lo
uint64_t product_lo = bd + (adbc << 32);
uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
*product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
return product_lo;
}
static inline void mul_sum_xor_dst(const uint8_t* a, uint8_t* c, uint8_t* dst) {
uint64_t hi, lo = mul128(((uint64_t*) a)[0], ((uint64_t*) dst)[0], &hi) + ((uint64_t*) c)[1];
hi += ((uint64_t*) c)[0];
((uint64_t*) c)[0] = ((uint64_t*) dst)[0] ^ hi;
((uint64_t*) c)[1] = ((uint64_t*) dst)[1] ^ lo;
((uint64_t*) dst)[0] = hi;
((uint64_t*) dst)[1] = lo;
}
static inline void xor_blocks(uint8_t* a, const uint8_t* b) {
((uint64_t*) a)[0] ^= ((uint64_t*) b)[0];
((uint64_t*) a)[1] ^= ((uint64_t*) b)[1];
}
static inline void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) {
((uint64_t*) dst)[0] = ((uint64_t*) a)[0] ^ ((uint64_t*) b)[0];
((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1];
}
void cryptonight_av4_legacy(void *restrict output, const void *restrict input, const char *restrict memory, struct cryptonight_ctx *restrict ctx) {
oaes_ctx *aes_ctx = (oaes_ctx*) oaes_alloc();
size_t i, j;
keccak((const uint8_t *)input, 76, (uint8_t *) &ctx->state.hs, 200);
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
oaes_key_import_data(aes_ctx, ctx->state.hs.b, AES_KEY_SIZE);
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 0], aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 1], aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 2], aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 3], aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 4], aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 5], aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 6], aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 7], aes_ctx->key->exp_data);
memcpy((void *) &memory[i], ctx->text, INIT_SIZE_BYTE);
}
xor_blocks_dst(&ctx->state.k[0], &ctx->state.k[32], (uint8_t*) ctx->a);
xor_blocks_dst(&ctx->state.k[16], &ctx->state.k[48], (uint8_t*) ctx->b);
for (i = 0; likely(i < ITER / 4); ++i) {
/* Dependency chain: address -> read value ------+
* written value <-+ hard function (AES or MUL) <+
* next address <-+
*/
/* Iteration 1 */
j = ctx->a[0] & 0x1FFFF0;
aesb_single_round((const uint8_t*) &memory[j], (uint8_t *) ctx->c, (const uint8_t *) ctx->a);
xor_blocks_dst((const uint8_t*) ctx->c, (const uint8_t*) ctx->b, (uint8_t*) &memory[j]);
/* Iteration 2 */
mul_sum_xor_dst((const uint8_t*) ctx->c, (uint8_t*) ctx->a, (uint8_t*) &memory[ctx->c[0] & 0x1FFFF0]);
/* Iteration 3 */
j = ctx->a[0] & 0x1FFFF0;
aesb_single_round(&memory[j], (uint8_t *) ctx->b, (uint8_t *) ctx->a);
xor_blocks_dst((const uint8_t*) ctx->b, (const uint8_t*) ctx->c, (uint8_t*) &memory[j]);
/* Iteration 4 */
mul_sum_xor_dst((const uint8_t*) ctx->b, (uint8_t*) ctx->a, (uint8_t*) &memory[ctx->b[0] & 0x1FFFF0]);
}
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
oaes_key_import_data(aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE);
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
xor_blocks(&ctx->text[0 * AES_BLOCK_SIZE], &memory[i + 0 * AES_BLOCK_SIZE]);
aesb_pseudo_round_mut(&ctx->text[0 * AES_BLOCK_SIZE], aes_ctx->key->exp_data);
xor_blocks(&ctx->text[1 * AES_BLOCK_SIZE], &memory[i + 1 * AES_BLOCK_SIZE]);
aesb_pseudo_round_mut(&ctx->text[1 * AES_BLOCK_SIZE], aes_ctx->key->exp_data);
xor_blocks(&ctx->text[2 * AES_BLOCK_SIZE], &memory[i + 2 * AES_BLOCK_SIZE]);
aesb_pseudo_round_mut(&ctx->text[2 * AES_BLOCK_SIZE], aes_ctx->key->exp_data);
xor_blocks(&ctx->text[3 * AES_BLOCK_SIZE], &memory[i + 3 * AES_BLOCK_SIZE]);
aesb_pseudo_round_mut(&ctx->text[3 * AES_BLOCK_SIZE], aes_ctx->key->exp_data);
xor_blocks(&ctx->text[4 * AES_BLOCK_SIZE], &memory[i + 4 * AES_BLOCK_SIZE]);
aesb_pseudo_round_mut(&ctx->text[4 * AES_BLOCK_SIZE], aes_ctx->key->exp_data);
xor_blocks(&ctx->text[5 * AES_BLOCK_SIZE], &memory[i + 5 * AES_BLOCK_SIZE]);
aesb_pseudo_round_mut(&ctx->text[5 * AES_BLOCK_SIZE], aes_ctx->key->exp_data);
xor_blocks(&ctx->text[6 * AES_BLOCK_SIZE], &memory[i + 6 * AES_BLOCK_SIZE]);
aesb_pseudo_round_mut(&ctx->text[6 * AES_BLOCK_SIZE], aes_ctx->key->exp_data);
xor_blocks(&ctx->text[7 * AES_BLOCK_SIZE], &memory[i + 7 * AES_BLOCK_SIZE]);
aesb_pseudo_round_mut(&ctx->text[7 * AES_BLOCK_SIZE], aes_ctx->key->exp_data);
}
memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
keccakf((uint64_t *) &ctx->state.hs, 24);
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
oaes_free((OAES_CTX **) &aes_ctx);
}

View File

@@ -1,248 +0,0 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <x86intrin.h>
#include <string.h>
#include "cryptonight.h"
#include "crypto/c_keccak.h"
static inline void ExpandAESKey256_sub1(__m128i *tmp1, __m128i *tmp2)
{
__m128i tmp4;
*tmp2 = _mm_shuffle_epi32(*tmp2, 0xFF);
tmp4 = _mm_slli_si128(*tmp1, 0x04);
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
*tmp1 = _mm_xor_si128(*tmp1, *tmp2);
}
static inline void ExpandAESKey256_sub2(__m128i *tmp1, __m128i *tmp3)
{
__m128i tmp2, tmp4;
tmp4 = _mm_aeskeygenassist_si128(*tmp1, 0x00);
tmp2 = _mm_shuffle_epi32(tmp4, 0xAA);
tmp4 = _mm_slli_si128(*tmp3, 0x04);
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
*tmp3 = _mm_xor_si128(*tmp3, tmp2);
}
// Special thanks to Intel for helping me
// with ExpandAESKey256() and its subroutines
static inline void ExpandAESKey256(char *keybuf)
{
__m128i tmp1, tmp2, tmp3, *keys;
keys = (__m128i *)keybuf;
tmp1 = _mm_load_si128((__m128i *)keybuf);
tmp3 = _mm_load_si128((__m128i *)(keybuf+0x10));
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x01);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[2] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[3] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x02);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[4] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[5] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x04);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[6] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[7] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x08);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[8] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[9] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x10);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[10] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[11] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x20);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[12] = tmp1;
ExpandAESKey256_sub2(&tmp1, &tmp3);
keys[13] = tmp3;
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x40);
ExpandAESKey256_sub1(&tmp1, &tmp2);
keys[14] = tmp1;
}
void cryptonight_av5_aesni_experimental(void *restrict output, const void *restrict input, const char *restrict memory, struct cryptonight_ctx *restrict ctx)
{
keccak((const uint8_t *)input, 76, (uint8_t *) &ctx->state.hs, 200);
uint8_t ExpandedKey[256];
size_t i, j;
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
memcpy(ExpandedKey, ctx->state.hs.b, AES_KEY_SIZE);
ExpandAESKey256(ExpandedKey);
__m128i *longoutput, *expkey, *xmminput;
longoutput = (__m128i *) memory;
expkey = (__m128i *) ExpandedKey;
xmminput = (__m128i *)ctx->text;
// prefetch expkey, all of xmminput and enough longoutput for 4 loops
_mm_prefetch(xmminput, _MM_HINT_T0 );
_mm_prefetch(xmminput + 4, _MM_HINT_T0 );
for (i = 0; i < 64; i += 16) {
_mm_prefetch(longoutput + i, _MM_HINT_T0);
_mm_prefetch(longoutput + i + 4, _MM_HINT_T0);
_mm_prefetch(longoutput + i + 8, _MM_HINT_T0);
_mm_prefetch(longoutput + i + 12, _MM_HINT_T0);
}
_mm_prefetch(expkey, _MM_HINT_T0);
_mm_prefetch(expkey + 4, _MM_HINT_T0);
_mm_prefetch(expkey + 8, _MM_HINT_T0);
for (i = 0; __builtin_expect(i < MEMORY_M128I, 1); i += INIT_SIZE_M128I) {
__builtin_prefetch(longoutput + i + 64, 1, 0);
__builtin_prefetch(longoutput + i + 68, 1, 0);
for(j = 0; j < 10; j++) {
xmminput[0] = _mm_aesenc_si128(xmminput[0], expkey[j]);
xmminput[1] = _mm_aesenc_si128(xmminput[1], expkey[j]);
xmminput[2] = _mm_aesenc_si128(xmminput[2], expkey[j]);
xmminput[3] = _mm_aesenc_si128(xmminput[3], expkey[j]);
xmminput[4] = _mm_aesenc_si128(xmminput[4], expkey[j]);
xmminput[5] = _mm_aesenc_si128(xmminput[5], expkey[j]);
xmminput[6] = _mm_aesenc_si128(xmminput[6], expkey[j]);
xmminput[7] = _mm_aesenc_si128(xmminput[7], expkey[j]);
}
_mm_store_si128(&(longoutput[i ]), xmminput[0]);
_mm_store_si128(&(longoutput[i + 1 ]), xmminput[1]);
_mm_store_si128(&(longoutput[i + 2 ]), xmminput[2]);
_mm_store_si128(&(longoutput[i + 3 ]), xmminput[3]);
_mm_store_si128(&(longoutput[i + 4 ]), xmminput[4]);
_mm_store_si128(&(longoutput[i + 5 ]), xmminput[5]);
_mm_store_si128(&(longoutput[i + 6 ]), xmminput[6]);
_mm_store_si128(&(longoutput[i + 7 ]), xmminput[7]);
}
ctx->a[0] = ((uint64_t *) ctx->state.k)[0] ^ ((uint64_t *) ctx->state.k)[4];
ctx->b[0] = ((uint64_t *) ctx->state.k)[2] ^ ((uint64_t *) ctx->state.k)[6];
ctx->a[1] = ((uint64_t *) ctx->state.k)[1] ^ ((uint64_t *) ctx->state.k)[5];
ctx->b[1] = ((uint64_t *) ctx->state.k)[3] ^ ((uint64_t *) ctx->state.k)[7];
__m128i a_x = _mm_load_si128((__m128i *) &memory[ctx->a[0] & 0x1FFFF0]);
__m128i b_x = _mm_load_si128((__m128i *) ctx->b);
uint64_t c[2] __attribute((aligned(16)));
uint64_t d[2] __attribute((aligned(16)));
for (i = 0; __builtin_expect(i < 0x80000, 1); i++) {
__m128i c_x = _mm_aesenc_si128(a_x, _mm_load_si128((__m128i *) ctx->a));
_mm_store_si128((__m128i *) c, c_x);
uint64_t *restrict d_ptr = (uint64_t *) &memory[c[0] & 0x1FFFF0];
_mm_store_si128((__m128i *) &memory[ctx->a[0] & 0x1FFFF0], _mm_xor_si128(b_x, c_x));
b_x = c_x;
d[0] = d_ptr[0];
d[1] = d_ptr[1];
{
unsigned __int128 res = (unsigned __int128) c[0] * d[0];
d_ptr[0] = ctx->a[0] += res >> 64;
d_ptr[1] = ctx->a[1] += (uint64_t) res;
}
ctx->a[0] ^= d[0];
ctx->a[1] ^= d[1];
a_x = _mm_load_si128((__m128i *) &memory[ctx->a[0] & 0x1FFFF0]);
}
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
memcpy(ExpandedKey, &ctx->state.hs.b[32], AES_KEY_SIZE);
ExpandAESKey256(ExpandedKey);
_mm_prefetch(xmminput, _MM_HINT_T0 );
_mm_prefetch(xmminput + 4, _MM_HINT_T0 );
for (i = 0; i < 64; i += 16) {
_mm_prefetch(longoutput + i, _MM_HINT_T0);
_mm_prefetch(longoutput + i + 4, _MM_HINT_T0);
_mm_prefetch(longoutput + i + 8, _MM_HINT_T0);
_mm_prefetch(longoutput + i + 12, _MM_HINT_T0);
}
_mm_prefetch(expkey, _MM_HINT_T0);
_mm_prefetch(expkey + 4, _MM_HINT_T0);
_mm_prefetch(expkey + 8, _MM_HINT_T0);
for (i = 0; __builtin_expect(i < MEMORY_M128I, 1); i += INIT_SIZE_M128I) {
_mm_prefetch(longoutput + i + 64, _MM_HINT_T0);
_mm_prefetch(longoutput + i + 68, _MM_HINT_T0);
xmminput[0] = _mm_xor_si128(longoutput[i ], xmminput[0]);
xmminput[1] = _mm_xor_si128(longoutput[i + 1], xmminput[1]);
xmminput[2] = _mm_xor_si128(longoutput[i + 2], xmminput[2]);
xmminput[3] = _mm_xor_si128(longoutput[i + 3], xmminput[3]);
xmminput[4] = _mm_xor_si128(longoutput[i + 4], xmminput[4]);
xmminput[5] = _mm_xor_si128(longoutput[i + 5], xmminput[5]);
xmminput[6] = _mm_xor_si128(longoutput[i + 6], xmminput[6]);
xmminput[7] = _mm_xor_si128(longoutput[i + 7], xmminput[7]);
for(j = 0; j < 10; j++) {
xmminput[0] = _mm_aesenc_si128(xmminput[0], expkey[j]);
xmminput[1] = _mm_aesenc_si128(xmminput[1], expkey[j]);
xmminput[2] = _mm_aesenc_si128(xmminput[2], expkey[j]);
xmminput[3] = _mm_aesenc_si128(xmminput[3], expkey[j]);
xmminput[4] = _mm_aesenc_si128(xmminput[4], expkey[j]);
xmminput[5] = _mm_aesenc_si128(xmminput[5], expkey[j]);
xmminput[6] = _mm_aesenc_si128(xmminput[6], expkey[j]);
xmminput[7] = _mm_aesenc_si128(xmminput[7], expkey[j]);
}
}
memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
keccakf((uint64_t *) &ctx->state.hs, 24);
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
}

View File

@@ -1,142 +0,0 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdlib.h>
#ifndef BUILD_TEST
# include "xmrig.h"
#endif
#include "crypto/c_groestl.h"
#include "crypto/c_blake256.h"
#include "crypto/c_jh.h"
#include "crypto/c_skein.h"
#include "cryptonight.h"
#include "options.h"
#if defined(__x86_64__)
void cryptonight_av1_aesni(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx);
void cryptonight_av2_aesni_wolf(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx);
void cryptonight_av3_aesni_bmi2(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx);
void cryptonight_av5_aesni_experimental(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx);
#elif defined(__i386__)
void cryptonight_av1_aesni32(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx);
#endif
void cryptonight_av4_legacy(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx);
void (*cryptonight_hash_ctx)(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx) = NULL;
void cryptonight_init(int variant)
{
switch (variant) {
#if defined(__x86_64__)
case XMR_VARIANT_AESNI:
cryptonight_hash_ctx = cryptonight_av1_aesni;
break;
case XMR_VARIANT_AESNI_WOLF:
cryptonight_hash_ctx = cryptonight_av2_aesni_wolf;
break;
case XMR_VARIANT_AESNI_BMI2:
cryptonight_hash_ctx = cryptonight_av3_aesni_bmi2;
break;
case XMR_VARIANT_EXPERIMENTAL:
cryptonight_hash_ctx = cryptonight_av5_aesni_experimental;
break;
#elif defined(__i386__)
case XMR_VARIANT_AESNI:
cryptonight_hash_ctx = cryptonight_av1_aesni32;
break;
#endif
case XMR_VARIANT_LEGACY:
cryptonight_hash_ctx = cryptonight_av4_legacy;
break;
default:
break;
}
}
static inline void do_blake_hash(const void* input, size_t len, char* output) {
blake256_hash((uint8_t*)output, input, len);
}
static inline void do_groestl_hash(const void* input, size_t len, char* output) {
groestl(input, len * 8, (uint8_t*)output);
}
static inline void do_jh_hash(const void* input, size_t len, char* output) {
jh_hash(32 * 8, input, 8 * len, (uint8_t*)output);
}
static inline void do_skein_hash(const void* input, size_t len, char* output) {
skein_hash(8 * 32, input, 8 * len, (uint8_t*)output);
}
void (* const extra_hashes[4])(const void *, size_t, char *) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash};
void cryptonight_hash(void* output, const void* input, size_t len) {
uint8_t *memory __attribute((aligned(16))) = (uint8_t *) malloc(MEMORY);
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
cryptonight_hash_ctx(output, input, memory, ctx);
free(memory);
free(ctx);
}
#ifndef BUILD_TEST
int scanhash_cryptonight(int thr_id, uint32_t *hash, uint32_t *restrict pdata, const uint32_t *restrict ptarget, uint32_t max_nonce, unsigned long *restrict hashes_done, const char *restrict memory, struct cryptonight_ctx *persistentctx) {
uint32_t *nonceptr = (uint32_t*) (((char*)pdata) + 39);
uint32_t n = *nonceptr - 1;
const uint32_t first_nonce = n + 1;
do {
*nonceptr = ++n;
cryptonight_hash_ctx(hash, pdata, memory, persistentctx);
if (unlikely(hash[7] < ptarget[7])) {
*hashes_done = n - first_nonce + 1;
return true;
}
} while (likely((n <= max_nonce && !work_restart[thr_id].restart)));
*hashes_done = n - first_nonce + 1;
return 0;
}
#endif

View File

@@ -0,0 +1,150 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef XMRIG_CRYPTONIGHT_MONERO_H
#define XMRIG_CRYPTONIGHT_MONERO_H
#include <fenv.h>
#include <math.h>
#include <stdint.h>
#include <x86intrin.h>
static inline __m128i int_sqrt_v2(const uint64_t n0)
{
__m128d x = _mm_castsi128_pd(_mm_add_epi64(_mm_cvtsi64_si128(n0 >> 12), _mm_set_epi64x(0, 1023ULL << 52)));
x = _mm_sqrt_sd(_mm_setzero_pd(), x);
uint64_t r = (uint64_t)(_mm_cvtsi128_si64(_mm_castpd_si128(x)));
const uint64_t s = r >> 20;
r >>= 19;
uint64_t x2 = (s - (1022ULL << 32)) * (r - s - (1022ULL << 32) + 1);
# if (defined(_MSC_VER) || __GNUC__ > 7 || (__GNUC__ == 7 && __GNUC_MINOR__ > 1)) && (defined(__x86_64__) || defined(_M_AMD64))
_addcarry_u64(_subborrow_u64(0, x2, n0, (unsigned long long int*)&x2), r, 0, (unsigned long long int*)&r);
# else
if (x2 < n0) ++r;
# endif
return _mm_cvtsi64_si128(r);
}
# define VARIANT1_INIT(part) \
uint64_t tweak1_2_##part = (*(const uint64_t*)(input + 35 + part * size) ^ \
*((const uint64_t*)(ctx[part]->state) + 24)); \
# define VARIANT2_INIT(part) \
__m128i division_result_xmm_##part = _mm_cvtsi64_si128(h##part[12]); \
__m128i sqrt_result_xmm_##part = _mm_cvtsi64_si128(h##part[13]);
#ifdef _MSC_VER
# define VARIANT2_SET_ROUNDING_MODE() { _control87(RC_DOWN, MCW_RC); }
#else
# define VARIANT2_SET_ROUNDING_MODE() { fesetround(FE_DOWNWARD); }
#endif
# define VARIANT2_INTEGER_MATH(part, cl, cx) \
{ \
const uint64_t sqrt_result = (uint64_t)(_mm_cvtsi128_si64(sqrt_result_xmm_##part)); \
const uint64_t cx_0 = _mm_cvtsi128_si64(cx); \
cl ^= (uint64_t)(_mm_cvtsi128_si64(division_result_xmm_##part)) ^ (sqrt_result << 32); \
const uint32_t d = (uint32_t)(cx_0 + (sqrt_result << 1)) | 0x80000001UL; \
const uint64_t cx_1 = _mm_cvtsi128_si64(_mm_srli_si128(cx, 8)); \
const uint64_t division_result = (uint32_t)(cx_1 / d) + ((cx_1 % d) << 32); \
division_result_xmm_##part = _mm_cvtsi64_si128((int64_t)(division_result)); \
sqrt_result_xmm_##part = int_sqrt_v2(cx_0 + division_result); \
}
# define VARIANT2_SHUFFLE(base_ptr, offset, _a, _b, _b1) \
{ \
const __m128i chunk1 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10))); \
const __m128i chunk2 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20))); \
const __m128i chunk3 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30))); \
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk3, _b1)); \
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk1, _b)); \
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \
}
# define VARIANT4_SHUFFLE(base_ptr, offset, _a, _b, _b1, _c) \
{ \
const __m128i chunk1 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10))); \
const __m128i chunk2 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20))); \
const __m128i chunk3 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30))); \
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk3, _b1)); \
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk1, _b)); \
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \
_c = _mm_xor_si128(_mm_xor_si128(_c, chunk3), _mm_xor_si128(chunk1, chunk2)); \
}
# define VARIANT2_SHUFFLE2(base_ptr, offset, _a, _b, _b1, hi, lo) \
{ \
const __m128i chunk1 = _mm_xor_si128(_mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10))), _mm_set_epi64x(lo, hi)); \
const __m128i chunk2 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20))); \
hi ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[0]; \
lo ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[1]; \
const __m128i chunk3 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30))); \
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk3, _b1)); \
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk1, _b)); \
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \
}
#ifndef NOINLINE
#ifdef __GNUC__
#define NOINLINE __attribute__ ((noinline))
#elif _MSC_VER
#define NOINLINE __declspec(noinline)
#else
#define NOINLINE
#endif
#endif
#include "variant4_random_math.h"
#define VARIANT4_RANDOM_MATH_INIT(part) \
uint32_t r##part[9]; \
struct V4_Instruction code##part[256]; \
{ \
r##part[0] = (uint32_t)(h##part[12]); \
r##part[1] = (uint32_t)(h##part[12] >> 32); \
r##part[2] = (uint32_t)(h##part[13]); \
r##part[3] = (uint32_t)(h##part[13] >> 32); \
} \
v4_random_math_init(code##part, ctx[part]->height);
#define VARIANT4_RANDOM_MATH(part, al, ah, cl, bx0, bx1) \
{ \
cl ^= (r##part[0] + r##part[1]) | ((uint64_t)(r##part[2] + r##part[3]) << 32); \
r##part[4] = (uint32_t)(al); \
r##part[5] = (uint32_t)(ah); \
r##part[6] = (uint32_t)(_mm_cvtsi128_si32(bx0)); \
r##part[7] = (uint32_t)(_mm_cvtsi128_si32(bx1)); \
r##part[8] = (uint32_t)(_mm_cvtsi128_si32(_mm_srli_si128(bx1, 8))); \
v4_random_math(code##part, r##part); \
}
#endif /* XMRIG_CRYPTONIGHT_MONERO_H */

View File

@@ -0,0 +1,143 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <x86intrin.h>
#include <string.h>
#include "crypto/c_keccak.h"
#include "cryptonight.h"
#include "cryptonight_aesni.h"
#include "cryptonight_monero.h"
void cryptonight_r_av1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
keccak(input, size, ctx[0]->state, 200);
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
const uint8_t* l0 = ctx[0]->memory;
uint64_t* h0 = (uint64_t*) ctx[0]->state;
VARIANT2_INIT(0);
VARIANT2_SET_ROUNDING_MODE();
VARIANT4_RANDOM_MATH_INIT(0);
uint64_t al0 = h0[0] ^ h0[4];
uint64_t ah0 = h0[1] ^ h0[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
uint64_t idx0 = al0;
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
__m128i cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
cx = _mm_aesenc_si128(cx, ax0);
VARIANT4_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx0, bx1, cx);
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
idx0 = _mm_cvtsi128_si64(cx);
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx0, bx1);
al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
lo = _umul128(idx0, cl, &hi);
VARIANT4_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx0, bx1, cx);
al0 += hi;
ah0 += lo;
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
al0 ^= cl;
ah0 ^= ch;
idx0 = al0;
bx1 = bx0;
bx0 = cx;
}
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
keccakf(h0, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}
#ifndef XMRIG_NO_ASM
void v4_compile_code(const struct V4_Instruction* code, int code_size, void* machine_code, enum Assembly ASM);
void cryptonight_r_av1_asm_intel(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
if (ctx[0]->generated_code_height != ctx[0]->height) {
struct V4_Instruction code[256];
const int code_size = v4_random_math_init(code, ctx[0]->height);
v4_compile_code(code, code_size, (void*)(ctx[0]->generated_code), ASM_INTEL);
ctx[0]->generated_code_height = ctx[0]->height;
}
keccak(input, size, ctx[0]->state, 200);
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
ctx[0]->generated_code(ctx[0]);
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
keccakf((uint64_t*) ctx[0]->state, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}
void cryptonight_r_av1_asm_bulldozer(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
if (ctx[0]->generated_code_height != ctx[0]->height) {
struct V4_Instruction code[256];
const int code_size = v4_random_math_init(code, ctx[0]->height);
v4_compile_code(code, code_size, (void*)(ctx[0]->generated_code), ASM_BULLDOZER);
ctx[0]->generated_code_height = ctx[0]->height;
}
keccak(input, size, ctx[0]->state, 200);
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
ctx[0]->generated_code(ctx[0]);
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
keccakf((uint64_t*) ctx[0]->state, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}
#endif

View File

@@ -0,0 +1,202 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <x86intrin.h>
#include <string.h>
#include "crypto/c_keccak.h"
#include "cryptonight.h"
#include "cryptonight_aesni.h"
#include "cryptonight_monero.h"
void cryptonight_r_av2(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
keccak(input, size, ctx[0]->state, 200);
keccak(input + size, size, ctx[1]->state, 200);
const uint8_t* l0 = ctx[0]->memory;
const uint8_t* l1 = ctx[1]->memory;
uint64_t* h0 = (uint64_t*) ctx[0]->state;
uint64_t* h1 = (uint64_t*) ctx[1]->state;
VARIANT2_INIT(0);
VARIANT2_INIT(1);
VARIANT2_SET_ROUNDING_MODE();
VARIANT4_RANDOM_MATH_INIT(0);
VARIANT4_RANDOM_MATH_INIT(1);
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
uint64_t al0 = h0[0] ^ h0[4];
uint64_t al1 = h1[0] ^ h1[4];
uint64_t ah0 = h0[1] ^ h0[5];
uint64_t ah1 = h1[1] ^ h1[5];
__m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx01 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
__m128i bx10 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
__m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
uint64_t idx0 = al0;
uint64_t idx1 = al1;
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]);
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
const __m128i ax1 = _mm_set_epi64x(ah1, al1);
cx0 = _mm_aesenc_si128(cx0, ax0);
cx1 = _mm_aesenc_si128(cx1, ax1);
VARIANT4_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx00, bx01, cx0);
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx00, cx0));
VARIANT4_SHUFFLE(l1, idx1 & 0x1FFFF0, ax1, bx10, bx11, cx1);
_mm_store_si128((__m128i *) &l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx10, cx1));
idx0 = _mm_cvtsi128_si64(cx0);
idx1 = _mm_cvtsi128_si64(cx1);
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx01);
al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
lo = _umul128(idx0, cl, &hi);
VARIANT4_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx00, bx01, cx0);
al0 += hi;
ah0 += lo;
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
al0 ^= cl;
ah0 ^= ch;
idx0 = al0;
cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0];
ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1];
VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx10, bx11);
al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32);
ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32);
lo = _umul128(idx1, cl, &hi);
VARIANT4_SHUFFLE(l1, idx1 & 0x1FFFF0, ax1, bx10, bx11, cx1);
al1 += hi;
ah1 += lo;
((uint64_t*)&l1[idx1 & 0x1FFFF0])[0] = al1;
((uint64_t*)&l1[idx1 & 0x1FFFF0])[1] = ah1;
al1 ^= cl;
ah1 ^= ch;
idx1 = al1;
bx01 = bx00;
bx11 = bx10;
bx00 = cx0;
bx10 = cx1;
}
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
keccakf(h0, 24);
keccakf(h1, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
}
#ifndef XMRIG_NO_ASM
void v4_compile_code_double(const struct V4_Instruction* code, int code_size, void* machine_code, enum Assembly ASM);
void cryptonight_r_av2_asm_intel(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
if (ctx[0]->generated_code_height != ctx[0]->height) {
struct V4_Instruction code[256];
const int code_size = v4_random_math_init(code, ctx[0]->height);
v4_compile_code_double(code, code_size, (void*)(ctx[0]->generated_code_double), ASM_INTEL);
ctx[0]->generated_code_height = ctx[0]->height;
}
keccak(input, size, ctx[0]->state, 200);
keccak(input + size, size, ctx[1]->state, 200);
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
cn_explode_scratchpad((__m128i*) ctx[1]->state, (__m128i*) ctx[1]->memory);
ctx[0]->generated_code_double(ctx[0], ctx[1]);
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
cn_implode_scratchpad((__m128i*) ctx[1]->memory, (__m128i*) ctx[1]->state);
keccakf((uint64_t *) ctx[0]->state, 24);
keccakf((uint64_t *) ctx[1]->state, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
}
void cryptonight_r_av2_asm_bulldozer(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
if (ctx[0]->generated_code_height != ctx[0]->height) {
struct V4_Instruction code[256];
const int code_size = v4_random_math_init(code, ctx[0]->height);
v4_compile_code_double(code, code_size, (void*)(ctx[0]->generated_code_double), ASM_BULLDOZER);
ctx[0]->generated_code_height = ctx[0]->height;
}
keccak(input, size, ctx[0]->state, 200);
keccak(input + size, size, ctx[1]->state, 200);
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
cn_explode_scratchpad((__m128i*) ctx[1]->state, (__m128i*) ctx[1]->memory);
ctx[0]->generated_code_double(ctx[0], ctx[1]);
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
cn_implode_scratchpad((__m128i*) ctx[1]->memory, (__m128i*) ctx[1]->state);
keccakf((uint64_t *) ctx[0]->state, 24);
keccakf((uint64_t *) ctx[1]->state, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
}
#endif

View File

@@ -0,0 +1,112 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <x86intrin.h>
#include <string.h>
#include "crypto/c_keccak.h"
#include "cryptonight.h"
#include "cryptonight_monero.h"
#include "cryptonight_softaes.h"
#ifndef XMRIG_NO_ASM
void v4_soft_aes_compile_code(const struct V4_Instruction* code, int code_size, void* machine_code, enum Assembly ASM);
#endif
void cryptonight_r_av3(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
keccak(input, size, ctx[0]->state, 200);
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
# ifndef XMRIG_NO_ASM
if (ctx[0]->generated_code_height != ctx[0]->height) {
struct V4_Instruction code[256];
const int code_size = v4_random_math_init(code, ctx[0]->height);
v4_soft_aes_compile_code(code, code_size, (void*)(ctx[0]->generated_code), ASM_NONE);
ctx[0]->generated_code_height = ctx[0]->height;
}
ctx[0]->saes_table = (const uint32_t*)saes_table;
ctx[0]->generated_code(ctx[0]);
# else
const uint8_t* l0 = ctx[0]->memory;
uint64_t* h0 = (uint64_t*) ctx[0]->state;
VARIANT2_INIT(0);
VARIANT2_SET_ROUNDING_MODE();
VARIANT4_RANDOM_MATH_INIT(0);
uint64_t al0 = h0[0] ^ h0[4];
uint64_t ah0 = h0[1] ^ h0[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
uint64_t idx0 = al0;
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
__m128i cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
cx = soft_aesenc(cx, ax0);
VARIANT4_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx0, bx1, cx);
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
idx0 = _mm_cvtsi128_si64(cx);
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx0, bx1);
al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
lo = _umul128(idx0, cl, &hi);
VARIANT4_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx0, bx1, cx);
al0 += hi;
ah0 += lo;
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
al0 ^= cl;
ah0 ^= ch;
idx0 = al0;
bx1 = bx0;
bx0 = cx;
}
# endif
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
keccakf((uint64_t *) ctx[0]->state, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}

View File

@@ -0,0 +1,143 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <x86intrin.h>
#include <string.h>
#include "crypto/c_keccak.h"
#include "cryptonight.h"
#include "cryptonight_monero.h"
#include "cryptonight_softaes.h"
void cryptonight_r_av4(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
keccak(input, size, ctx[0]->state, 200);
keccak(input + size, size, ctx[1]->state, 200);
const uint8_t* l0 = ctx[0]->memory;
const uint8_t* l1 = ctx[1]->memory;
uint64_t* h0 = (uint64_t*) ctx[0]->state;
uint64_t* h1 = (uint64_t*) ctx[1]->state;
VARIANT2_INIT(0);
VARIANT2_INIT(1);
VARIANT2_SET_ROUNDING_MODE();
VARIANT4_RANDOM_MATH_INIT(0);
VARIANT4_RANDOM_MATH_INIT(1);
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
uint64_t al0 = h0[0] ^ h0[4];
uint64_t al1 = h1[0] ^ h1[4];
uint64_t ah0 = h0[1] ^ h0[5];
uint64_t ah1 = h1[1] ^ h1[5];
__m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx01 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
__m128i bx10 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
__m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
uint64_t idx0 = al0;
uint64_t idx1 = al1;
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]);
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
const __m128i ax1 = _mm_set_epi64x(ah1, al1);
cx0 = soft_aesenc(cx0, ax0);
cx1 = soft_aesenc(cx1, ax1);
VARIANT4_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx00, bx01, cx0);
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx00, cx0));
VARIANT4_SHUFFLE(l1, idx1 & 0x1FFFF0, ax1, bx10, bx11, cx1);
_mm_store_si128((__m128i *) &l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx10, cx1));
idx0 = _mm_cvtsi128_si64(cx0);
idx1 = _mm_cvtsi128_si64(cx1);
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx01);
al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
lo = _umul128(idx0, cl, &hi);
VARIANT4_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx00, bx01, cx0);
al0 += hi;
ah0 += lo;
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
al0 ^= cl;
ah0 ^= ch;
idx0 = al0;
cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0];
ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1];
VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx10, bx11);
al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32);
ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32);
lo = _umul128(idx1, cl, &hi);
VARIANT4_SHUFFLE(l1, idx1 & 0x1FFFF0, ax1, bx10, bx11, cx1);
al1 += hi;
ah1 += lo;
((uint64_t*)&l1[idx1 & 0x1FFFF0])[0] = al1;
((uint64_t*)&l1[idx1 & 0x1FFFF0])[1] = ah1;
al1 ^= cl;
ah1 ^= ch;
idx1 = al1;
bx01 = bx00;
bx11 = bx10;
bx00 = cx0;
bx10 = cx1;
}
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
keccakf(h0, 24);
keccakf(h1, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
}

View File

@@ -0,0 +1,255 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef XMRIG_CRYPTONIGHT_SOFTAES_H
#define XMRIG_CRYPTONIGHT_SOFTAES_H
#include <x86intrin.h>
#include <stdint.h>
#include "crypto/soft_aes.h"
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
static inline __m128i sl_xor(__m128i tmp1)
{
__m128i tmp4;
tmp4 = _mm_slli_si128(tmp1, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
return tmp1;
}
static inline void aes_genkey_sub(__m128i* xout0, __m128i* xout2, uint8_t rcon)
{
__m128i xout1 = soft_aeskeygenassist(*xout2, rcon);
xout1 = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem
*xout0 = sl_xor(*xout0);
*xout0 = _mm_xor_si128(*xout0, xout1);
xout1 = soft_aeskeygenassist(*xout0, 0x00);
xout1 = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem
*xout2 = sl_xor(*xout2);
*xout2 = _mm_xor_si128(*xout2, xout1);
}
static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7)
{
*x0 = soft_aesenc(*x0, key);
*x1 = soft_aesenc(*x1, key);
*x2 = soft_aesenc(*x2, key);
*x3 = soft_aesenc(*x3, key);
*x4 = soft_aesenc(*x4, key);
*x5 = soft_aesenc(*x5, key);
*x6 = soft_aesenc(*x6, key);
*x7 = soft_aesenc(*x7, key);
}
static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9)
{
__m128i xout0 = _mm_load_si128(memory);
__m128i xout2 = _mm_load_si128(memory + 1);
*k0 = xout0;
*k1 = xout2;
aes_genkey_sub(&xout0, &xout2, 0x1);
*k2 = xout0;
*k3 = xout2;
aes_genkey_sub(&xout0, &xout2, 0x2);
*k4 = xout0;
*k5 = xout2;
aes_genkey_sub(&xout0, &xout2, 0x4);
*k6 = xout0;
*k7 = xout2;
aes_genkey_sub(&xout0, &xout2, 0x8);
*k8 = xout0;
*k9 = xout2;
}
static inline void cn_explode_scratchpad(const __m128i* input, __m128i* output)
{
// This is more than we have registers, compiler will assign 2 keys on the stack
__m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
aes_genkey(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
xin0 = _mm_load_si128(input + 4);
xin1 = _mm_load_si128(input + 5);
xin2 = _mm_load_si128(input + 6);
xin3 = _mm_load_si128(input + 7);
xin4 = _mm_load_si128(input + 8);
xin5 = _mm_load_si128(input + 9);
xin6 = _mm_load_si128(input + 10);
xin7 = _mm_load_si128(input + 11);
for (size_t i = 0; i < MEMORY / sizeof(__m128i); i += 8) {
aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
_mm_store_si128(output + i + 0, xin0);
_mm_store_si128(output + i + 1, xin1);
_mm_store_si128(output + i + 2, xin2);
_mm_store_si128(output + i + 3, xin3);
_mm_store_si128(output + i + 4, xin4);
_mm_store_si128(output + i + 5, xin5);
_mm_store_si128(output + i + 6, xin6);
_mm_store_si128(output + i + 7, xin7);
}
}
static inline void cn_implode_scratchpad(const __m128i* input, __m128i* output)
{
// This is more than we have registers, compiler will assign 2 keys on the stack
__m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
aes_genkey(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
xout0 = _mm_load_si128(output + 4);
xout1 = _mm_load_si128(output + 5);
xout2 = _mm_load_si128(output + 6);
xout3 = _mm_load_si128(output + 7);
xout4 = _mm_load_si128(output + 8);
xout5 = _mm_load_si128(output + 9);
xout6 = _mm_load_si128(output + 10);
xout7 = _mm_load_si128(output + 11);
for (size_t i = 0; __builtin_expect(i < MEMORY / sizeof(__m128i), 1); i += 8)
{
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
}
_mm_store_si128(output + 4, xout0);
_mm_store_si128(output + 5, xout1);
_mm_store_si128(output + 6, xout2);
_mm_store_si128(output + 7, xout3);
_mm_store_si128(output + 8, xout4);
_mm_store_si128(output + 9, xout5);
_mm_store_si128(output + 10, xout6);
_mm_store_si128(output + 11, xout7);
}
#if defined(__x86_64__)
# define EXTRACT64(X) _mm_cvtsi128_si64(X)
static inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi)
{
unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b;
*hi = r >> 64;
return (uint64_t) r;
}
#elif defined(__i386__)
# define HI32(X) \
_mm_srli_si128((X), 4)
# define EXTRACT64(X) \
((uint64_t)(uint32_t)_mm_cvtsi128_si32(X) | \
((uint64_t)(uint32_t)_mm_cvtsi128_si32(HI32(X)) << 32))
inline uint64_t _umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) {
// multiplier = ab = a * 2^32 + b
// multiplicand = cd = c * 2^32 + d
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
uint64_t a = multiplier >> 32;
uint64_t b = multiplier & 0xFFFFFFFF;
uint64_t c = multiplicand >> 32;
uint64_t d = multiplicand & 0xFFFFFFFF;
//uint64_t ac = a * c;
uint64_t ad = a * d;
//uint64_t bc = b * c;
uint64_t bd = b * d;
uint64_t adbc = ad + (b * c);
uint64_t adbc_carry = adbc < ad ? 1 : 0;
// multiplier * multiplicand = product_hi * 2^64 + product_lo
uint64_t product_lo = bd + (adbc << 32);
uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
*product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
return product_lo;
}
#endif
static inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
{
mem_out[0] = EXTRACT64(tmp);
tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp)));
uint64_t vh = EXTRACT64(tmp);
uint8_t x = vh >> 24;
static const uint16_t table = 0x7531;
const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1;
vh ^= ((table >> index) & 0x3) << 28;
mem_out[1] = vh;
}
#endif /* XMRIG_CRYPTONIGHT_SOFTAES_H */

View File

@@ -0,0 +1,129 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef XMRIG_CRYPTONIGHT_TEST_H
#define XMRIG_CRYPTONIGHT_TEST_H
#include <stdint.h>
const static uint8_t test_input[152] = {
0x03, 0x05, 0xA0, 0xDB, 0xD6, 0xBF, 0x05, 0xCF, 0x16, 0xE5, 0x03, 0xF3, 0xA6, 0x6F, 0x78, 0x00,
0x7C, 0xBF, 0x34, 0x14, 0x43, 0x32, 0xEC, 0xBF, 0xC2, 0x2E, 0xD9, 0x5C, 0x87, 0x00, 0x38, 0x3B,
0x30, 0x9A, 0xCE, 0x19, 0x23, 0xA0, 0x96, 0x4B, 0x00, 0x00, 0x00, 0x08, 0xBA, 0x93, 0x9A, 0x62,
0x72, 0x4C, 0x0D, 0x75, 0x81, 0xFC, 0xE5, 0x76, 0x1E, 0x9D, 0x8A, 0x0E, 0x6A, 0x1C, 0x3F, 0x92,
0x4F, 0xDD, 0x84, 0x93, 0xD1, 0x11, 0x56, 0x49, 0xC0, 0x5E, 0xB6, 0x01,
0x01, 0x00, 0xFB, 0x8E, 0x8A, 0xC8, 0x05, 0x89, 0x93, 0x23, 0x37, 0x1B, 0xB7, 0x90, 0xDB, 0x19,
0x21, 0x8A, 0xFD, 0x8D, 0xB8, 0xE3, 0x75, 0x5D, 0x8B, 0x90, 0xF3, 0x9B, 0x3D, 0x55, 0x06, 0xA9,
0xAB, 0xCE, 0x4F, 0xA9, 0x12, 0x24, 0x45, 0x00, 0x00, 0x00, 0x00, 0xEE, 0x81, 0x46, 0xD4, 0x9F,
0xA9, 0x3E, 0xE7, 0x24, 0xDE, 0xB5, 0x7D, 0x12, 0xCB, 0xC6, 0xC6, 0xF3, 0xB9, 0x24, 0xD9, 0x46,
0x12, 0x7C, 0x7A, 0x97, 0x41, 0x8F, 0x93, 0x48, 0x82, 0x8F, 0x0F, 0x02
};
const static uint8_t test_output_v0[64] = {
0x1A, 0x3F, 0xFB, 0xEE, 0x90, 0x9B, 0x42, 0x0D, 0x91, 0xF7, 0xBE, 0x6E, 0x5F, 0xB5, 0x6D, 0xB7,
0x1B, 0x31, 0x10, 0xD8, 0x86, 0x01, 0x1E, 0x87, 0x7E, 0xE5, 0x78, 0x6A, 0xFD, 0x08, 0x01, 0x00,
0x1B, 0x60, 0x6A, 0x3F, 0x4A, 0x07, 0xD6, 0x48, 0x9A, 0x1B, 0xCD, 0x07, 0x69, 0x7B, 0xD1, 0x66,
0x96, 0xB6, 0x1C, 0x8A, 0xE9, 0x82, 0xF6, 0x1A, 0x90, 0x16, 0x0F, 0x4E, 0x52, 0x82, 0x8A, 0x7F
};
// Cryptonight variant 1 (Monero v7)
const static uint8_t test_output_v1[64] = {
0xF2, 0x2D, 0x3D, 0x62, 0x03, 0xD2, 0xA0, 0x8B, 0x41, 0xD9, 0x02, 0x72, 0x78, 0xD8, 0xBC, 0xC9,
0x83, 0xAC, 0xAD, 0xA9, 0xB6, 0x8E, 0x52, 0xE3, 0xC6, 0x89, 0x69, 0x2A, 0x50, 0xE9, 0x21, 0xD9,
0xC9, 0xFA, 0xE8, 0x42, 0x5D, 0x86, 0x88, 0xDC, 0x23, 0x6B, 0xCD, 0xBC, 0x42, 0xFD, 0xB4, 0x2D,
0x37, 0x6C, 0x6E, 0xC1, 0x90, 0x50, 0x1A, 0xA8, 0x4B, 0x04, 0xA4, 0xB4, 0xCF, 0x1E, 0xE1, 0x22
};
// Cryptonight variant 2 (Monero v8)
const static uint8_t test_output_v2[64] = {
0x97, 0x37, 0x82, 0x82, 0xCF, 0x10, 0xE7, 0xAD, 0x03, 0x3F, 0x7B, 0x80, 0x74, 0xC4, 0x0E, 0x14,
0xD0, 0x6E, 0x7F, 0x60, 0x9D, 0xDD, 0xDA, 0x78, 0x76, 0x80, 0xB5, 0x8C, 0x05, 0xF4, 0x3D, 0x21,
0x87, 0x1F, 0xCD, 0x68, 0x23, 0xF6, 0xA8, 0x79, 0xBB, 0x3F, 0x33, 0x95, 0x1C, 0x8E, 0x8E, 0x89,
0x1D, 0x40, 0x43, 0x88, 0x0B, 0x02, 0xDF, 0xA1, 0xBB, 0x3B, 0xE4, 0x98, 0xB5, 0x0E, 0x75, 0x78
};
struct cn_r_test_input_data
{
uint64_t height;
size_t size;
uint8_t data[64];
};
const static struct cn_r_test_input_data cn_r_test_input[] = {
{ 1806260, 44, { 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74 } },
{ 1806261, 50, { 0x4c, 0x6f, 0x72, 0x65, 0x6d, 0x20, 0x69, 0x70, 0x73, 0x75, 0x6d, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x20, 0x73, 0x69, 0x74, 0x20, 0x61, 0x6d, 0x65, 0x74, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x65, 0x63, 0x74, 0x65, 0x74, 0x75, 0x72, 0x20, 0x61, 0x64, 0x69, 0x70, 0x69, 0x73, 0x63, 0x69, 0x6e, 0x67 } },
{ 1806262, 48, { 0x65, 0x6c, 0x69, 0x74, 0x2c, 0x20, 0x73, 0x65, 0x64, 0x20, 0x64, 0x6f, 0x20, 0x65, 0x69, 0x75, 0x73, 0x6d, 0x6f, 0x64, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x6f, 0x72, 0x20, 0x69, 0x6e, 0x63, 0x69, 0x64, 0x69, 0x64, 0x75, 0x6e, 0x74, 0x20, 0x75, 0x74, 0x20, 0x6c, 0x61, 0x62, 0x6f, 0x72, 0x65 } },
{ 1806263, 48, { 0x65, 0x74, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x65, 0x20, 0x6d, 0x61, 0x67, 0x6e, 0x61, 0x20, 0x61, 0x6c, 0x69, 0x71, 0x75, 0x61, 0x2e, 0x20, 0x55, 0x74, 0x20, 0x65, 0x6e, 0x69, 0x6d, 0x20, 0x61, 0x64, 0x20, 0x6d, 0x69, 0x6e, 0x69, 0x6d, 0x20, 0x76, 0x65, 0x6e, 0x69, 0x61, 0x6d, 0x2c } },
{ 1806264, 46, { 0x71, 0x75, 0x69, 0x73, 0x20, 0x6e, 0x6f, 0x73, 0x74, 0x72, 0x75, 0x64, 0x20, 0x65, 0x78, 0x65, 0x72, 0x63, 0x69, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x75, 0x6c, 0x6c, 0x61, 0x6d, 0x63, 0x6f, 0x20, 0x6c, 0x61, 0x62, 0x6f, 0x72, 0x69, 0x73, 0x20, 0x6e, 0x69, 0x73, 0x69 } },
{ 1806265, 45, { 0x75, 0x74, 0x20, 0x61, 0x6c, 0x69, 0x71, 0x75, 0x69, 0x70, 0x20, 0x65, 0x78, 0x20, 0x65, 0x61, 0x20, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x64, 0x6f, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x65, 0x71, 0x75, 0x61, 0x74, 0x2e, 0x20, 0x44, 0x75, 0x69, 0x73, 0x20, 0x61, 0x75, 0x74, 0x65 } },
{ 1806266, 47, { 0x69, 0x72, 0x75, 0x72, 0x65, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x20, 0x69, 0x6e, 0x20, 0x72, 0x65, 0x70, 0x72, 0x65, 0x68, 0x65, 0x6e, 0x64, 0x65, 0x72, 0x69, 0x74, 0x20, 0x69, 0x6e, 0x20, 0x76, 0x6f, 0x6c, 0x75, 0x70, 0x74, 0x61, 0x74, 0x65, 0x20, 0x76, 0x65, 0x6c, 0x69, 0x74 } },
{ 1806267, 44, { 0x65, 0x73, 0x73, 0x65, 0x20, 0x63, 0x69, 0x6c, 0x6c, 0x75, 0x6d, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x65, 0x20, 0x65, 0x75, 0x20, 0x66, 0x75, 0x67, 0x69, 0x61, 0x74, 0x20, 0x6e, 0x75, 0x6c, 0x6c, 0x61, 0x20, 0x70, 0x61, 0x72, 0x69, 0x61, 0x74, 0x75, 0x72, 0x2e } },
{ 1806268, 47, { 0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x65, 0x75, 0x72, 0x20, 0x73, 0x69, 0x6e, 0x74, 0x20, 0x6f, 0x63, 0x63, 0x61, 0x65, 0x63, 0x61, 0x74, 0x20, 0x63, 0x75, 0x70, 0x69, 0x64, 0x61, 0x74, 0x61, 0x74, 0x20, 0x6e, 0x6f, 0x6e, 0x20, 0x70, 0x72, 0x6f, 0x69, 0x64, 0x65, 0x6e, 0x74, 0x2c } },
{ 1806269, 62, { 0x73, 0x75, 0x6e, 0x74, 0x20, 0x69, 0x6e, 0x20, 0x63, 0x75, 0x6c, 0x70, 0x61, 0x20, 0x71, 0x75, 0x69, 0x20, 0x6f, 0x66, 0x66, 0x69, 0x63, 0x69, 0x61, 0x20, 0x64, 0x65, 0x73, 0x65, 0x72, 0x75, 0x6e, 0x74, 0x20, 0x6d, 0x6f, 0x6c, 0x6c, 0x69, 0x74, 0x20, 0x61, 0x6e, 0x69, 0x6d, 0x20, 0x69, 0x64, 0x20, 0x65, 0x73, 0x74, 0x20, 0x6c, 0x61, 0x62, 0x6f, 0x72, 0x75, 0x6d, 0x2e } },
};
// "cn/r"
const static uint8_t test_output_r[] = {
0xf7, 0x59, 0x58, 0x8a, 0xd5, 0x7e, 0x75, 0x84, 0x67, 0x29, 0x54, 0x43, 0xa9, 0xbd, 0x71, 0x49, 0x0a, 0xbf, 0xf8, 0xe9, 0xda, 0xd1, 0xb9, 0x5b, 0x6b, 0xf2, 0xf5, 0xd0, 0xd7, 0x83, 0x87, 0xbc,
0x5b, 0xb8, 0x33, 0xde, 0xca, 0x2b, 0xdd, 0x72, 0x52, 0xa9, 0xcc, 0xd7, 0xb4, 0xce, 0x0b, 0x6a, 0x48, 0x54, 0x51, 0x57, 0x94, 0xb5, 0x6c, 0x20, 0x72, 0x62, 0xf7, 0xa5, 0xb9, 0xbd, 0xb5, 0x66,
0x1e, 0xe6, 0x72, 0x8d, 0xa6, 0x0f, 0xbd, 0x8d, 0x7d, 0x55, 0xb2, 0xb1, 0xad, 0xe4, 0x87, 0xa3, 0xcf, 0x52, 0xa2, 0xc3, 0xac, 0x6f, 0x52, 0x0d, 0xb1, 0x2c, 0x27, 0xd8, 0x92, 0x1f, 0x6c, 0xab,
0x69, 0x69, 0xfe, 0x2d, 0xdf, 0xb7, 0x58, 0x43, 0x8d, 0x48, 0x04, 0x9f, 0x30, 0x2f, 0xc2, 0x10, 0x8a, 0x4f, 0xcc, 0x93, 0xe3, 0x76, 0x69, 0x17, 0x0e, 0x6d, 0xb4, 0xb0, 0xb9, 0xb4, 0xc4, 0xcb,
0x7f, 0x30, 0x48, 0xb4, 0xe9, 0x0d, 0x0c, 0xbe, 0x7a, 0x57, 0xc0, 0x39, 0x4f, 0x37, 0x33, 0x8a, 0x01, 0xfa, 0xe3, 0xad, 0xfd, 0xc0, 0xe5, 0x12, 0x6d, 0x86, 0x3a, 0x89, 0x5e, 0xb0, 0x4e, 0x02,
0x1d, 0x29, 0x04, 0x43, 0xa4, 0xb5, 0x42, 0xaf, 0x04, 0xa8, 0x2f, 0x6b, 0x24, 0x94, 0xa6, 0xee, 0x7f, 0x20, 0xf2, 0x75, 0x4c, 0x58, 0xe0, 0x84, 0x90, 0x32, 0x48, 0x3a, 0x56, 0xe8, 0xe2, 0xef,
0xc4, 0x3c, 0xc6, 0x56, 0x74, 0x36, 0xa8, 0x6a, 0xfb, 0xd6, 0xaa, 0x9e, 0xaa, 0x7c, 0x27, 0x6e, 0x98, 0x06, 0x83, 0x03, 0x34, 0xb6, 0x14, 0xb2, 0xbe, 0xe2, 0x3c, 0xc7, 0x66, 0x34, 0xf6, 0xfd,
0x87, 0xbe, 0x24, 0x79, 0xc0, 0xc4, 0xe8, 0xed, 0xfd, 0xfa, 0xa5, 0x60, 0x3e, 0x93, 0xf4, 0x26, 0x5b, 0x3f, 0x82, 0x24, 0xc1, 0xc5, 0x94, 0x6f, 0xeb, 0x42, 0x48, 0x19, 0xd1, 0x89, 0x90, 0xa4,
0xdd, 0x9d, 0x6a, 0x6d, 0x8e, 0x47, 0x46, 0x5c, 0xce, 0xac, 0x08, 0x77, 0xef, 0x88, 0x9b, 0x93, 0xe7, 0xeb, 0xa9, 0x79, 0x55, 0x7e, 0x39, 0x35, 0xd7, 0xf8, 0x6d, 0xce, 0x11, 0xb0, 0x70, 0xf3,
0x75, 0xc6, 0xf2, 0xae, 0x49, 0xa2, 0x05, 0x21, 0xde, 0x97, 0x28, 0x5b, 0x43, 0x1e, 0x71, 0x71, 0x25, 0x84, 0x7f, 0xb8, 0x93, 0x5e, 0xd8, 0x4a, 0x61, 0xe7, 0xf8, 0xd3, 0x6a, 0x2c, 0x3d, 0x8e,
};
#ifndef XMRIG_NO_AEON
const static uint8_t test_output_v0_lite[64] = {
0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E,
0x00, 0x4E, 0xEC, 0xE0, 0x9B, 0x83, 0xA7, 0x2E, 0xF6, 0xBA, 0x98, 0x64, 0xD3, 0x51, 0x0C, 0x88,
0x28, 0xA2, 0x2B, 0xAD, 0x3F, 0x93, 0xD1, 0x40, 0x8F, 0xCA, 0x47, 0x2E, 0xB5, 0xAD, 0x1C, 0xBE,
0x75, 0xF2, 0x1D, 0x05, 0x3C, 0x8C, 0xE5, 0xB3, 0xAF, 0x10, 0x5A, 0x57, 0x71, 0x3E, 0x21, 0xDD
};
// AEON v7
const static uint8_t test_output_v1_lite[64] = {
0x6D, 0x8C, 0xDC, 0x44, 0x4E, 0x9B, 0xBB, 0xFD, 0x68, 0xFC, 0x43, 0xFC, 0xD4, 0x85, 0x5B, 0x22,
0x8C, 0x8A, 0x1B, 0xD9, 0x1D, 0x9D, 0x00, 0x28, 0x5B, 0xEC, 0x02, 0xB7, 0xCA, 0x2D, 0x67, 0x41,
0x87, 0xC4, 0xE5, 0x70, 0x65, 0x3E, 0xB4, 0xC2, 0xB4, 0x2B, 0x7A, 0x0D, 0x54, 0x65, 0x59, 0x45,
0x2D, 0xFA, 0xB5, 0x73, 0xB8, 0x2E, 0xC5, 0x2F, 0x15, 0x2B, 0x7F, 0xF9, 0x8E, 0x79, 0x44, 0x6F
};
#endif
#endif /* XMRIG_CRYPTONIGHT_TEST_H */

View File

@@ -0,0 +1,449 @@
#ifndef VARIANT4_RANDOM_MATH_H
#define VARIANT4_RANDOM_MATH_H
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "crypto/c_blake256.h"
enum V4_Settings
{
// Generate code with minimal theoretical latency = 45 cycles, which is equivalent to 15 multiplications
TOTAL_LATENCY = 15 * 3,
// Always generate at least 60 instructions
NUM_INSTRUCTIONS_MIN = 60,
// Never generate more than 70 instructions (final RET instruction doesn't count here)
NUM_INSTRUCTIONS_MAX = 70,
// Available ALUs for MUL
// Modern CPUs typically have only 1 ALU which can do multiplications
ALU_COUNT_MUL = 1,
// Total available ALUs
// Modern CPUs have 4 ALUs, but we use only 3 because random math executes together with other main loop code
ALU_COUNT = 3,
};
enum V4_InstructionList
{
MUL, // a*b
ADD, // a+b + C, C is an unsigned 32-bit constant
SUB, // a-b
ROR, // rotate right "a" by "b & 31" bits
ROL, // rotate left "a" by "b & 31" bits
XOR, // a^b
RET, // finish execution
V4_INSTRUCTION_COUNT = RET,
};
// V4_InstructionDefinition is used to generate code from random data
// Every random sequence of bytes is a valid code
//
// There are 9 registers in total:
// - 4 variable registers
// - 5 constant registers initialized from loop variables
// This is why dst_index is 2 bits
enum V4_InstructionDefinition
{
V4_OPCODE_BITS = 3,
V4_DST_INDEX_BITS = 2,
V4_SRC_INDEX_BITS = 3,
};
struct V4_Instruction
{
uint8_t opcode;
uint8_t dst_index;
uint8_t src_index;
uint32_t C;
};
#ifndef FORCEINLINE
#ifdef __GNUC__
#define FORCEINLINE __attribute__((always_inline)) inline
#elif _MSC_VER
#define FORCEINLINE __forceinline
#else
#define FORCEINLINE inline
#endif
#endif
#ifndef UNREACHABLE_CODE
#ifdef __GNUC__
#define UNREACHABLE_CODE __builtin_unreachable()
#elif _MSC_VER
#define UNREACHABLE_CODE __assume(false)
#else
#define UNREACHABLE_CODE
#endif
#endif
#define SWAP32LE(x) x
#define SWAP64LE(x) x
#define hash_extra_blake(data, length, hash) blake256_hash((uint8_t*)(hash), (uint8_t*)(data), (length))
// Random math interpreter's loop is fully unrolled and inlined to achieve 100% branch prediction on CPU:
// every switch-case will point to the same destination on every iteration of Cryptonight main loop
//
// This is about as fast as it can get without using low-level machine code generation
//template<typename v4_reg>
static void v4_random_math(const struct V4_Instruction* code, uint32_t r[9])
{
#define REG_BITS 32
#define V4_EXEC(i) \
{ \
const struct V4_Instruction* op = code + i; \
const uint32_t src = r[op->src_index]; \
uint32_t *dst = r + op->dst_index; \
switch (op->opcode) \
{ \
case MUL: \
*dst *= src; \
break; \
case ADD: \
*dst += src + op->C; \
break; \
case SUB: \
*dst -= src; \
break; \
case ROR: \
{ \
const uint32_t shift = src % REG_BITS; \
*dst = (*dst >> shift) | (*dst << ((REG_BITS - shift) % REG_BITS)); \
} \
break; \
case ROL: \
{ \
const uint32_t shift = src % REG_BITS; \
*dst = (*dst << shift) | (*dst >> ((REG_BITS - shift) % REG_BITS)); \
} \
break; \
case XOR: \
*dst ^= src; \
break; \
case RET: \
return; \
default: \
UNREACHABLE_CODE; \
break; \
} \
}
#define V4_EXEC_10(j) \
V4_EXEC(j + 0) \
V4_EXEC(j + 1) \
V4_EXEC(j + 2) \
V4_EXEC(j + 3) \
V4_EXEC(j + 4) \
V4_EXEC(j + 5) \
V4_EXEC(j + 6) \
V4_EXEC(j + 7) \
V4_EXEC(j + 8) \
V4_EXEC(j + 9)
// Generated program can have 60 + a few more (usually 2-3) instructions to achieve required latency
// I've checked all block heights < 10,000,000 and here is the distribution of program sizes:
//
// 60 27960
// 61 105054
// 62 2452759
// 63 5115997
// 64 1022269
// 65 1109635
// 66 153145
// 67 8550
// 68 4529
// 69 102
// Unroll 70 instructions here
V4_EXEC_10(0); // instructions 0-9
V4_EXEC_10(10); // instructions 10-19
V4_EXEC_10(20); // instructions 20-29
V4_EXEC_10(30); // instructions 30-39
V4_EXEC_10(40); // instructions 40-49
V4_EXEC_10(50); // instructions 50-59
V4_EXEC_10(60); // instructions 60-69
#undef V4_EXEC_10
#undef V4_EXEC
#undef REG_BITS
}
// If we don't have enough data available, generate more
static FORCEINLINE void check_data(size_t* data_index, const size_t bytes_needed, int8_t* data, const size_t data_size)
{
if (*data_index + bytes_needed > data_size)
{
hash_extra_blake(data, data_size, (char*) data);
*data_index = 0;
}
}
// Generates as many random math operations as possible with given latency and ALU restrictions
// "code" array must have space for NUM_INSTRUCTIONS_MAX+1 instructions
static int v4_random_math_init(struct V4_Instruction* code, const uint64_t height)
{
// MUL is 3 cycles, 3-way addition and rotations are 2 cycles, SUB/XOR are 1 cycle
// These latencies match real-life instruction latencies for Intel CPUs starting from Sandy Bridge and up to Skylake/Coffee lake
//
// AMD Ryzen has the same latencies except 1-cycle ROR/ROL, so it'll be a bit faster than Intel Sandy Bridge and newer processors
// Surprisingly, Intel Nehalem also has 1-cycle ROR/ROL, so it'll also be faster than Intel Sandy Bridge and newer processors
// AMD Bulldozer has 4 cycles latency for MUL (slower than Intel) and 1 cycle for ROR/ROL (faster than Intel), so average performance will be the same
// Source: https://www.agner.org/optimize/instruction_tables.pdf
const int op_latency[V4_INSTRUCTION_COUNT] = { 3, 2, 1, 2, 2, 1 };
// Instruction latencies for theoretical ASIC implementation
const int asic_op_latency[V4_INSTRUCTION_COUNT] = { 3, 1, 1, 1, 1, 1 };
// Available ALUs for each instruction
const int op_ALUs[V4_INSTRUCTION_COUNT] = { ALU_COUNT_MUL, ALU_COUNT, ALU_COUNT, ALU_COUNT, ALU_COUNT, ALU_COUNT };
int8_t data[32];
memset(data, 0, sizeof(data));
uint64_t tmp = SWAP64LE(height);
memcpy(data, &tmp, sizeof(uint64_t));
data[20] = -38;
// Set data_index past the last byte in data
// to trigger full data update with blake hash
// before we start using it
size_t data_index = sizeof(data);
int code_size;
// There is a small chance (1.8%) that register R8 won't be used in the generated program
// So we keep track of it and try again if it's not used
bool r8_used;
do {
int latency[9];
int asic_latency[9];
// Tracks previous instruction and value of the source operand for registers R0-R3 throughout code execution
// byte 0: current value of the destination register
// byte 1: instruction opcode
// byte 2: current value of the source register
//
// Registers R4-R8 are constant and are treated as having the same value because when we do
// the same operation twice with two constant source registers, it can be optimized into a single operation
uint32_t inst_data[9] = { 0, 1, 2, 3, 0xFFFFFF, 0xFFFFFF, 0xFFFFFF, 0xFFFFFF, 0xFFFFFF };
bool alu_busy[TOTAL_LATENCY + 1][ALU_COUNT];
bool is_rotation[V4_INSTRUCTION_COUNT];
bool rotated[4];
int rotate_count = 0;
memset(latency, 0, sizeof(latency));
memset(asic_latency, 0, sizeof(asic_latency));
memset(alu_busy, 0, sizeof(alu_busy));
memset(is_rotation, 0, sizeof(is_rotation));
memset(rotated, 0, sizeof(rotated));
is_rotation[ROR] = true;
is_rotation[ROL] = true;
int num_retries = 0;
code_size = 0;
int total_iterations = 0;
r8_used = false;
// Generate random code to achieve minimal required latency for our abstract CPU
// Try to get this latency for all 4 registers
while (((latency[0] < TOTAL_LATENCY) || (latency[1] < TOTAL_LATENCY) || (latency[2] < TOTAL_LATENCY) || (latency[3] < TOTAL_LATENCY)) && (num_retries < 64))
{
// Fail-safe to guarantee loop termination
++total_iterations;
if (total_iterations > 256)
break;
check_data(&data_index, 1, data, sizeof(data));
const uint8_t c = ((uint8_t*)data)[data_index++];
// MUL = opcodes 0-2
// ADD = opcode 3
// SUB = opcode 4
// ROR/ROL = opcode 5, shift direction is selected randomly
// XOR = opcodes 6-7
uint8_t opcode = c & ((1 << V4_OPCODE_BITS) - 1);
if (opcode == 5)
{
check_data(&data_index, 1, data, sizeof(data));
opcode = (data[data_index++] >= 0) ? ROR : ROL;
}
else if (opcode >= 6)
{
opcode = XOR;
}
else
{
opcode = (opcode <= 2) ? MUL : (opcode - 2);
}
uint8_t dst_index = (c >> V4_OPCODE_BITS) & ((1 << V4_DST_INDEX_BITS) - 1);
uint8_t src_index = (c >> (V4_OPCODE_BITS + V4_DST_INDEX_BITS)) & ((1 << V4_SRC_INDEX_BITS) - 1);
const int a = dst_index;
int b = src_index;
// Don't do ADD/SUB/XOR with the same register
if (((opcode == ADD) || (opcode == SUB) || (opcode == XOR)) && (a == b))
{
// a is always < 4, so we don't need to check bounds here
b = 8;
src_index = b;
}
// Don't do rotation with the same destination twice because it's equal to a single rotation
if (is_rotation[opcode] && rotated[a])
{
continue;
}
// Don't do the same instruction (except MUL) with the same source value twice because all other cases can be optimized:
// 2xADD(a, b, C) = ADD(a, b*2, C1+C2), same for SUB and rotations
// 2xXOR(a, b) = NOP
if ((opcode != MUL) && ((inst_data[a] & 0xFFFF00) == (opcode << 8) + ((inst_data[b] & 255) << 16)))
{
continue;
}
// Find which ALU is available (and when) for this instruction
int next_latency = (latency[a] > latency[b]) ? latency[a] : latency[b];
int alu_index = -1;
while (next_latency < TOTAL_LATENCY)
{
for (int i = op_ALUs[opcode] - 1; i >= 0; --i)
{
if (!alu_busy[next_latency][i])
{
// ADD is implemented as two 1-cycle instructions on a real CPU, so do an additional availability check
if ((opcode == ADD) && alu_busy[next_latency + 1][i])
{
continue;
}
// Rotation can only start when previous rotation is finished, so do an additional availability check
if (is_rotation[opcode] && (next_latency < rotate_count * op_latency[opcode]))
{
continue;
}
alu_index = i;
break;
}
}
if (alu_index >= 0)
{
break;
}
++next_latency;
}
// Don't generate instructions that leave some register unchanged for more than 7 cycles
if (next_latency > latency[a] + 7)
{
continue;
}
next_latency += op_latency[opcode];
if (next_latency <= TOTAL_LATENCY)
{
if (is_rotation[opcode])
{
++rotate_count;
}
// Mark ALU as busy only for the first cycle when it starts executing the instruction because ALUs are fully pipelined
alu_busy[next_latency - op_latency[opcode]][alu_index] = true;
latency[a] = next_latency;
// ASIC is supposed to have enough ALUs to run as many independent instructions per cycle as possible, so latency calculation for ASIC is simple
asic_latency[a] = ((asic_latency[a] > asic_latency[b]) ? asic_latency[a] : asic_latency[b]) + asic_op_latency[opcode];
rotated[a] = is_rotation[opcode];
inst_data[a] = code_size + (opcode << 8) + ((inst_data[b] & 255) << 16);
code[code_size].opcode = opcode;
code[code_size].dst_index = dst_index;
code[code_size].src_index = src_index;
code[code_size].C = 0;
if (src_index == 8)
{
r8_used = true;
}
if (opcode == ADD)
{
// ADD instruction is implemented as two 1-cycle instructions on a real CPU, so mark ALU as busy for the next cycle too
alu_busy[next_latency - op_latency[opcode] + 1][alu_index] = true;
// ADD instruction requires 4 more random bytes for 32-bit constant "C" in "a = a + b + C"
check_data(&data_index, sizeof(uint32_t), data, sizeof(data));
uint32_t t;
memcpy(&t, data + data_index, sizeof(uint32_t));
code[code_size].C = SWAP32LE(t);
data_index += sizeof(uint32_t);
}
++code_size;
if (code_size >= NUM_INSTRUCTIONS_MIN)
{
break;
}
}
else
{
++num_retries;
}
}
// ASIC has more execution resources and can extract as much parallelism from the code as possible
// We need to add a few more MUL and ROR instructions to achieve minimal required latency for ASIC
// Get this latency for at least 1 of the 4 registers
const int prev_code_size = code_size;
while ((code_size < NUM_INSTRUCTIONS_MAX) && (asic_latency[0] < TOTAL_LATENCY) && (asic_latency[1] < TOTAL_LATENCY) && (asic_latency[2] < TOTAL_LATENCY) && (asic_latency[3] < TOTAL_LATENCY))
{
int min_idx = 0;
int max_idx = 0;
for (int i = 1; i < 4; ++i)
{
if (asic_latency[i] < asic_latency[min_idx]) min_idx = i;
if (asic_latency[i] > asic_latency[max_idx]) max_idx = i;
}
const uint8_t pattern[3] = { ROR, MUL, MUL };
const uint8_t opcode = pattern[(code_size - prev_code_size) % 3];
latency[min_idx] = latency[max_idx] + op_latency[opcode];
asic_latency[min_idx] = asic_latency[max_idx] + asic_op_latency[opcode];
code[code_size].opcode = opcode;
code[code_size].dst_index = min_idx;
code[code_size].src_index = max_idx;
code[code_size].C = 0;
++code_size;
}
// There is ~98.15% chance that loop condition is false, so this loop will execute only 1 iteration most of the time
// It never does more than 4 iterations for all block heights < 10,000,000
} while (!r8_used || (code_size < NUM_INSTRUCTIONS_MIN) || (code_size > NUM_INSTRUCTIONS_MAX));
// It's guaranteed that NUM_INSTRUCTIONS_MIN <= code_size <= NUM_INSTRUCTIONS_MAX here
// Add final instruction to stop the interpreter
code[code_size].opcode = RET;
code[code_size].dst_index = 0;
code[code_size].src_index = 0;
code[code_size].C = 0;
return code_size;
}
#endif

27
cmake/asm.cmake Normal file
View File

@@ -0,0 +1,27 @@
if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
set(XMRIG_ASM_LIBRARY "xmrig-asm")
enable_language(ASM)
if (WIN32 AND CMAKE_C_COMPILER_ID MATCHES GNU)
set(XMRIG_ASM_FILES
"crypto/asm/win64/cn_main_loop.S"
"crypto/asm/CryptonightR_template.S"
)
else()
set(XMRIG_ASM_FILES
"crypto/asm/cn_main_loop.S"
"crypto/asm/CryptonightR_template.S"
)
endif()
set_property(SOURCE ${XMRIG_ASM_FILES} PROPERTY C)
add_library(${XMRIG_ASM_LIBRARY} STATIC ${XMRIG_ASM_FILES})
set(XMRIG_ASM_SOURCES "crypto/CryptonightR_gen.c")
set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C)
else()
set(XMRIG_ASM_SOURCES "")
set(XMRIG_ASM_LIBRARY "")
add_definitions(/DXMRIG_NO_ASM)
endif()

View File

@@ -6,6 +6,8 @@ add_definitions(-DHAVE_CONFIG_H)
# Add the lib sources.
file(GLOB JANSSON_SRC *.c)
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Os")
set(JANSSON_HDR_PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/hashtable.h
${CMAKE_CURRENT_SOURCE_DIR}/jansson_private.h

View File

@@ -9,13 +9,17 @@
#define _GNU_SOURCE
#endif
#include "jansson_private.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include "jansson.h"
#include "jansson_private.h"
#include "strbuffer.h"
#include "utf.h"
@@ -25,11 +29,28 @@
#define FLAGS_TO_INDENT(f) ((f) & 0x1F)
#define FLAGS_TO_PRECISION(f) (((f) >> 11) & 0x1F)
struct buffer {
const size_t size;
size_t used;
char *data;
};
static int dump_to_strbuffer(const char *buffer, size_t size, void *data)
{
return strbuffer_append_bytes((strbuffer_t *)data, buffer, size);
}
static int dump_to_buffer(const char *buffer, size_t size, void *data)
{
struct buffer *buf = (struct buffer *)data;
if(buf->used + size <= buf->size)
memcpy(&buf->data[buf->used], buffer, size);
buf->used += size;
return 0;
}
static int dump_to_file(const char *buffer, size_t size, void *data)
{
FILE *dest = (FILE *)data;
@@ -38,6 +59,16 @@ static int dump_to_file(const char *buffer, size_t size, void *data)
return 0;
}
static int dump_to_fd(const char *buffer, size_t size, void *data)
{
int *dest = (int *)data;
#ifdef HAVE_UNISTD_H
if(write(*dest, buffer, size) == (ssize_t)size)
return 0;
#endif
return -1;
}
/* 32 spaces (the maximum indentation size) */
static const char whitespace[] = " ";
@@ -168,6 +199,10 @@ static int compare_keys(const void *key1, const void *key2)
static int do_dump(const json_t *json, size_t flags, int depth,
json_dump_callback_t dump, void *data)
{
int embed = flags & JSON_EMBED;
flags &= ~JSON_EMBED;
if(!json)
return -1;
@@ -227,11 +262,11 @@ static int do_dump(const json_t *json, size_t flags, int depth,
n = json_array_size(json);
if(dump("[", 1, data))
if(!embed && dump("[", 1, data))
goto array_error;
if(n == 0) {
array->visited = 0;
return dump("]", 1, data);
return embed ? 0 : dump("]", 1, data);
}
if(dump_indent(flags, depth + 1, 0, dump, data))
goto array_error;
@@ -255,7 +290,7 @@ static int do_dump(const json_t *json, size_t flags, int depth,
}
array->visited = 0;
return dump("]", 1, data);
return embed ? 0 : dump("]", 1, data);
array_error:
array->visited = 0;
@@ -286,11 +321,11 @@ static int do_dump(const json_t *json, size_t flags, int depth,
iter = json_object_iter((json_t *)json);
if(dump("{", 1, data))
if(!embed && dump("{", 1, data))
goto object_error;
if(!iter) {
object->visited = 0;
return dump("}", 1, data);
return embed ? 0 : dump("}", 1, data);
}
if(dump_indent(flags, depth + 1, 0, dump, data))
goto object_error;
@@ -386,7 +421,7 @@ static int do_dump(const json_t *json, size_t flags, int depth,
}
object->visited = 0;
return dump("}", 1, data);
return embed ? 0 : dump("}", 1, data);
object_error:
object->visited = 0;
@@ -416,11 +451,26 @@ char *json_dumps(const json_t *json, size_t flags)
return result;
}
size_t json_dumpb(const json_t *json, char *buffer, size_t size, size_t flags)
{
struct buffer buf = { size, 0, buffer };
if(json_dump_callback(json, dump_to_buffer, (void *)&buf, flags))
return 0;
return buf.used;
}
int json_dumpf(const json_t *json, FILE *output, size_t flags)
{
return json_dump_callback(json, dump_to_file, (void *)output, flags);
}
int json_dumpfd(const json_t *json, int output, size_t flags)
{
return json_dump_callback(json, dump_to_fd, (void *)&output, flags);
}
int json_dump_file(const json_t *json, const char *path, size_t flags)
{
int result;

View File

@@ -168,12 +168,12 @@ static uint32_t generate_seed() {
int done = 0;
#if !defined(_WIN32) && defined(USE_URANDOM)
if (!done && seed_from_urandom(&seed) == 0)
if (seed_from_urandom(&seed) == 0)
done = 1;
#endif
#if defined(_WIN32) && defined(USE_WINDOWS_CRYPTOAPI)
if (!done && seed_from_windows_cryptoapi(&seed) == 0)
if (seed_from_windows_cryptoapi(&seed) == 0)
done = 1;
#endif

View File

@@ -21,11 +21,11 @@ extern "C" {
/* version */
#define JANSSON_MAJOR_VERSION 2
#define JANSSON_MINOR_VERSION 9
#define JANSSON_MINOR_VERSION 10
#define JANSSON_MICRO_VERSION 0
/* Micro version is omitted if it's 0 */
#define JANSSON_VERSION "2.9"
#define JANSSON_VERSION "2.10"
/* Version as a 3-byte hex number, e.g. 0x010201 == 1.2.1. Use this
for numeric comparisons, e.g. #if JANSSON_VERSION_HEX >= ... */
@@ -273,6 +273,7 @@ typedef size_t (*json_load_callback_t)(void *buffer, size_t buflen, void *data);
json_t *json_loads(const char *input, size_t flags, json_error_t *error);
json_t *json_loadb(const char *buffer, size_t buflen, size_t flags, json_error_t *error);
json_t *json_loadf(FILE *input, size_t flags, json_error_t *error);
json_t *json_loadfd(int input, size_t flags, json_error_t *error);
json_t *json_load_file(const char *path, size_t flags, json_error_t *error);
json_t *json_load_callback(json_load_callback_t callback, void *data, size_t flags, json_error_t *error);
@@ -288,11 +289,14 @@ json_t *json_load_callback(json_load_callback_t callback, void *data, size_t fla
#define JSON_ENCODE_ANY 0x200
#define JSON_ESCAPE_SLASH 0x400
#define JSON_REAL_PRECISION(n) (((n) & 0x1F) << 11)
#define JSON_EMBED 0x10000
typedef int (*json_dump_callback_t)(const char *buffer, size_t size, void *data);
char *json_dumps(const json_t *json, size_t flags);
size_t json_dumpb(const json_t *json, char *buffer, size_t size, size_t flags);
int json_dumpf(const json_t *json, FILE *output, size_t flags);
int json_dumpfd(const json_t *json, int output, size_t flags);
int json_dump_file(const json_t *json, const char *path, size_t flags);
int json_dump_callback(const json_t *json, json_dump_callback_t callback, void *data, size_t flags);

View File

@@ -8,6 +8,7 @@
#ifndef JANSSON_PRIVATE_H
#define JANSSON_PRIVATE_H
#include "jansson_private_config.h"
#include <stddef.h>
#include "jansson.h"
#include "hashtable.h"

View File

@@ -9,15 +9,19 @@
#define _GNU_SOURCE
#endif
#include "jansson_private.h"
#include <errno.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include "jansson.h"
#include "jansson_private.h"
#include "strbuffer.h"
#include "utf.h"
@@ -340,7 +344,7 @@ static void lex_scan_string(lex_t *lex, json_error_t *error)
/* control character */
lex_unget_unsave(lex, c);
if(c == '\n')
error_set(error, lex, "unexpected newline", c);
error_set(error, lex, "unexpected newline");
else
error_set(error, lex, "control character 0x%x", c);
goto out;
@@ -914,7 +918,7 @@ static json_t *parse_json(lex_t *lex, size_t flags, json_error_t *error)
typedef struct
{
const char *data;
int pos;
size_t pos;
} string_data_t;
static int string_get(void *data)
@@ -1028,6 +1032,45 @@ json_t *json_loadf(FILE *input, size_t flags, json_error_t *error)
return result;
}
static int fd_get_func(int *fd)
{
uint8_t c;
#ifdef HAVE_UNISTD_H
if (read(*fd, &c, 1) == 1)
return c;
#endif
return EOF;
}
json_t *json_loadfd(int input, size_t flags, json_error_t *error)
{
lex_t lex;
const char *source;
json_t *result;
#ifdef HAVE_UNISTD_H
if(input == STDIN_FILENO)
source = "<stdin>";
else
#endif
source = "<stream>";
jsonp_error_init(error, source);
if (input < 0) {
error_set(error, NULL, "wrong arguments");
return NULL;
}
if(lex_init(&lex, (get_func)fd_get_func, flags, &input))
return NULL;
result = parse_json(&lex, flags, error);
lex_close(&lex);
return result;
}
json_t *json_load_file(const char *path, size_t flags, json_error_t *error)
{
json_t *result;

View File

@@ -0,0 +1,30 @@
cmake_minimum_required (VERSION 2.8)
project (cpuid C)
add_definitions(/DVERSION="0.4.0")
set(HEADERS
libcpuid.h
libcpuid_types.h
libcpuid_constants.h
libcpuid_internal.h
amd_code_t.h
intel_code_t.h
recog_amd.h
recog_intel.h
asm-bits.h
libcpuid_util.h
)
set(SOURCES
cpuid_main.c
asm-bits.c
recog_amd.c
recog_intel.c
libcpuid_util.c
)
add_library(cpuid STATIC
${HEADERS}
${SOURCES}
)

View File

@@ -0,0 +1,39 @@
/*
* Copyright 2016 Veselin Georgiev,
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* This file contains a list of internal codes we use in detection. It is
* of no external use and isn't a complete list of AMD products.
*/
CODE2(OPTERON_800, 1000),
CODE(PHENOM),
CODE(PHENOM2),
CODE(FUSION_C),
CODE(FUSION_E),
CODE(FUSION_EA),
CODE(FUSION_Z),
CODE(FUSION_A),

825
compat/libcpuid/asm-bits.c Normal file
View File

@@ -0,0 +1,825 @@
/*
* Copyright 2008 Veselin Georgiev,
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "libcpuid.h"
#include "asm-bits.h"
int cpuid_exists_by_eflags(void)
{
#if defined(PLATFORM_X64)
return 1; /* CPUID is always present on the x86_64 */
#elif defined(PLATFORM_X86)
# if defined(COMPILER_GCC)
int result;
__asm __volatile(
" pushfl\n"
" pop %%eax\n"
" mov %%eax, %%ecx\n"
" xor $0x200000, %%eax\n"
" push %%eax\n"
" popfl\n"
" pushfl\n"
" pop %%eax\n"
" xor %%ecx, %%eax\n"
" mov %%eax, %0\n"
" push %%ecx\n"
" popfl\n"
: "=m"(result)
: :"eax", "ecx", "memory");
return (result != 0);
# elif defined(COMPILER_MICROSOFT)
int result;
__asm {
pushfd
pop eax
mov ecx, eax
xor eax, 0x200000
push eax
popfd
pushfd
pop eax
xor eax, ecx
mov result, eax
push ecx
popfd
};
return (result != 0);
# else
return 0;
# endif /* COMPILER_MICROSOFT */
#else
return 0;
#endif /* PLATFORM_X86 */
}
#ifdef INLINE_ASM_SUPPORTED
/*
* with MSVC/AMD64, the exec_cpuid() and cpu_rdtsc() functions
* are implemented in separate .asm files. Otherwise, use inline assembly
*/
void exec_cpuid(uint32_t *regs)
{
#ifdef COMPILER_GCC
# ifdef PLATFORM_X64
__asm __volatile(
" mov %0, %%rdi\n"
" push %%rbx\n"
" push %%rcx\n"
" push %%rdx\n"
" mov (%%rdi), %%eax\n"
" mov 4(%%rdi), %%ebx\n"
" mov 8(%%rdi), %%ecx\n"
" mov 12(%%rdi), %%edx\n"
" cpuid\n"
" movl %%eax, (%%rdi)\n"
" movl %%ebx, 4(%%rdi)\n"
" movl %%ecx, 8(%%rdi)\n"
" movl %%edx, 12(%%rdi)\n"
" pop %%rdx\n"
" pop %%rcx\n"
" pop %%rbx\n"
:
:"m"(regs)
:"memory", "eax", "rdi"
);
# else
__asm __volatile(
" mov %0, %%edi\n"
" push %%ebx\n"
" push %%ecx\n"
" push %%edx\n"
" mov (%%edi), %%eax\n"
" mov 4(%%edi), %%ebx\n"
" mov 8(%%edi), %%ecx\n"
" mov 12(%%edi), %%edx\n"
" cpuid\n"
" mov %%eax, (%%edi)\n"
" mov %%ebx, 4(%%edi)\n"
" mov %%ecx, 8(%%edi)\n"
" mov %%edx, 12(%%edi)\n"
" pop %%edx\n"
" pop %%ecx\n"
" pop %%ebx\n"
:
:"m"(regs)
:"memory", "eax", "edi"
);
# endif /* COMPILER_GCC */
#else
# ifdef COMPILER_MICROSOFT
__asm {
push ebx
push ecx
push edx
push edi
mov edi, regs
mov eax, [edi]
mov ebx, [edi+4]
mov ecx, [edi+8]
mov edx, [edi+12]
cpuid
mov [edi], eax
mov [edi+4], ebx
mov [edi+8], ecx
mov [edi+12], edx
pop edi
pop edx
pop ecx
pop ebx
}
# else
# error "Unsupported compiler"
# endif /* COMPILER_MICROSOFT */
#endif
}
#endif /* INLINE_ASSEMBLY_SUPPORTED */
#ifdef INLINE_ASM_SUPPORTED
void cpu_rdtsc(uint64_t* result)
{
uint32_t low_part, hi_part;
#ifdef COMPILER_GCC
__asm __volatile (
" rdtsc\n"
" mov %%eax, %0\n"
" mov %%edx, %1\n"
:"=m"(low_part), "=m"(hi_part)::"memory", "eax", "edx"
);
#else
# ifdef COMPILER_MICROSOFT
__asm {
rdtsc
mov low_part, eax
mov hi_part, edx
};
# else
# error "Unsupported compiler"
# endif /* COMPILER_MICROSOFT */
#endif /* COMPILER_GCC */
*result = (uint64_t)low_part + (((uint64_t) hi_part) << 32);
}
#endif /* INLINE_ASM_SUPPORTED */
#ifdef INLINE_ASM_SUPPORTED
void busy_sse_loop(int cycles)
{
#ifdef COMPILER_GCC
#ifndef __APPLE__
# define XALIGN ".balign 16\n"
#else
# define XALIGN ".align 4\n"
#endif
__asm __volatile (
" xorps %%xmm0, %%xmm0\n"
" xorps %%xmm1, %%xmm1\n"
" xorps %%xmm2, %%xmm2\n"
" xorps %%xmm3, %%xmm3\n"
" xorps %%xmm4, %%xmm4\n"
" xorps %%xmm5, %%xmm5\n"
" xorps %%xmm6, %%xmm6\n"
" xorps %%xmm7, %%xmm7\n"
XALIGN
/* ".bsLoop:\n" */
"1:\n"
// 0:
" addps %%xmm1, %%xmm0\n"
" addps %%xmm2, %%xmm1\n"
" addps %%xmm3, %%xmm2\n"
" addps %%xmm4, %%xmm3\n"
" addps %%xmm5, %%xmm4\n"
" addps %%xmm6, %%xmm5\n"
" addps %%xmm7, %%xmm6\n"
" addps %%xmm0, %%xmm7\n"
// 1:
" addps %%xmm1, %%xmm0\n"
" addps %%xmm2, %%xmm1\n"
" addps %%xmm3, %%xmm2\n"
" addps %%xmm4, %%xmm3\n"
" addps %%xmm5, %%xmm4\n"
" addps %%xmm6, %%xmm5\n"
" addps %%xmm7, %%xmm6\n"
" addps %%xmm0, %%xmm7\n"
// 2:
" addps %%xmm1, %%xmm0\n"
" addps %%xmm2, %%xmm1\n"
" addps %%xmm3, %%xmm2\n"
" addps %%xmm4, %%xmm3\n"
" addps %%xmm5, %%xmm4\n"
" addps %%xmm6, %%xmm5\n"
" addps %%xmm7, %%xmm6\n"
" addps %%xmm0, %%xmm7\n"
// 3:
" addps %%xmm1, %%xmm0\n"
" addps %%xmm2, %%xmm1\n"
" addps %%xmm3, %%xmm2\n"
" addps %%xmm4, %%xmm3\n"
" addps %%xmm5, %%xmm4\n"
" addps %%xmm6, %%xmm5\n"
" addps %%xmm7, %%xmm6\n"
" addps %%xmm0, %%xmm7\n"
// 4:
" addps %%xmm1, %%xmm0\n"
" addps %%xmm2, %%xmm1\n"
" addps %%xmm3, %%xmm2\n"
" addps %%xmm4, %%xmm3\n"
" addps %%xmm5, %%xmm4\n"
" addps %%xmm6, %%xmm5\n"
" addps %%xmm7, %%xmm6\n"
" addps %%xmm0, %%xmm7\n"
// 5:
" addps %%xmm1, %%xmm0\n"
" addps %%xmm2, %%xmm1\n"
" addps %%xmm3, %%xmm2\n"
" addps %%xmm4, %%xmm3\n"
" addps %%xmm5, %%xmm4\n"
" addps %%xmm6, %%xmm5\n"
" addps %%xmm7, %%xmm6\n"
" addps %%xmm0, %%xmm7\n"
// 6:
" addps %%xmm1, %%xmm0\n"
" addps %%xmm2, %%xmm1\n"
" addps %%xmm3, %%xmm2\n"
" addps %%xmm4, %%xmm3\n"
" addps %%xmm5, %%xmm4\n"
" addps %%xmm6, %%xmm5\n"
" addps %%xmm7, %%xmm6\n"
" addps %%xmm0, %%xmm7\n"
// 7:
" addps %%xmm1, %%xmm0\n"
" addps %%xmm2, %%xmm1\n"
" addps %%xmm3, %%xmm2\n"
" addps %%xmm4, %%xmm3\n"
" addps %%xmm5, %%xmm4\n"
" addps %%xmm6, %%xmm5\n"
" addps %%xmm7, %%xmm6\n"
" addps %%xmm0, %%xmm7\n"
// 8:
" addps %%xmm1, %%xmm0\n"
" addps %%xmm2, %%xmm1\n"
" addps %%xmm3, %%xmm2\n"
" addps %%xmm4, %%xmm3\n"
" addps %%xmm5, %%xmm4\n"
" addps %%xmm6, %%xmm5\n"
" addps %%xmm7, %%xmm6\n"
" addps %%xmm0, %%xmm7\n"
// 9:
" addps %%xmm1, %%xmm0\n"
" addps %%xmm2, %%xmm1\n"
" addps %%xmm3, %%xmm2\n"
" addps %%xmm4, %%xmm3\n"
" addps %%xmm5, %%xmm4\n"
" addps %%xmm6, %%xmm5\n"
" addps %%xmm7, %%xmm6\n"
" addps %%xmm0, %%xmm7\n"
//10:
" addps %%xmm1, %%xmm0\n"
" addps %%xmm2, %%xmm1\n"
" addps %%xmm3, %%xmm2\n"
" addps %%xmm4, %%xmm3\n"
" addps %%xmm5, %%xmm4\n"
" addps %%xmm6, %%xmm5\n"
" addps %%xmm7, %%xmm6\n"
" addps %%xmm0, %%xmm7\n"
//11:
" addps %%xmm1, %%xmm0\n"
" addps %%xmm2, %%xmm1\n"
" addps %%xmm3, %%xmm2\n"
" addps %%xmm4, %%xmm3\n"
" addps %%xmm5, %%xmm4\n"
" addps %%xmm6, %%xmm5\n"
" addps %%xmm7, %%xmm6\n"
" addps %%xmm0, %%xmm7\n"
//12:
" addps %%xmm1, %%xmm0\n"
" addps %%xmm2, %%xmm1\n"
" addps %%xmm3, %%xmm2\n"
" addps %%xmm4, %%xmm3\n"
" addps %%xmm5, %%xmm4\n"
" addps %%xmm6, %%xmm5\n"
" addps %%xmm7, %%xmm6\n"
" addps %%xmm0, %%xmm7\n"
//13:
" addps %%xmm1, %%xmm0\n"
" addps %%xmm2, %%xmm1\n"
" addps %%xmm3, %%xmm2\n"
" addps %%xmm4, %%xmm3\n"
" addps %%xmm5, %%xmm4\n"
" addps %%xmm6, %%xmm5\n"
" addps %%xmm7, %%xmm6\n"
" addps %%xmm0, %%xmm7\n"
//14:
" addps %%xmm1, %%xmm0\n"
" addps %%xmm2, %%xmm1\n"
" addps %%xmm3, %%xmm2\n"
" addps %%xmm4, %%xmm3\n"
" addps %%xmm5, %%xmm4\n"
" addps %%xmm6, %%xmm5\n"
" addps %%xmm7, %%xmm6\n"
" addps %%xmm0, %%xmm7\n"
//15:
" addps %%xmm1, %%xmm0\n"
" addps %%xmm2, %%xmm1\n"
" addps %%xmm3, %%xmm2\n"
" addps %%xmm4, %%xmm3\n"
" addps %%xmm5, %%xmm4\n"
" addps %%xmm6, %%xmm5\n"
" addps %%xmm7, %%xmm6\n"
" addps %%xmm0, %%xmm7\n"
//16:
" addps %%xmm1, %%xmm0\n"
" addps %%xmm2, %%xmm1\n"
" addps %%xmm3, %%xmm2\n"
" addps %%xmm4, %%xmm3\n"
" addps %%xmm5, %%xmm4\n"
" addps %%xmm6, %%xmm5\n"
" addps %%xmm7, %%xmm6\n"
" addps %%xmm0, %%xmm7\n"
//17:
" addps %%xmm1, %%xmm0\n"
" addps %%xmm2, %%xmm1\n"
" addps %%xmm3, %%xmm2\n"
" addps %%xmm4, %%xmm3\n"
" addps %%xmm5, %%xmm4\n"
" addps %%xmm6, %%xmm5\n"
" addps %%xmm7, %%xmm6\n"
" addps %%xmm0, %%xmm7\n"
//18:
" addps %%xmm1, %%xmm0\n"
" addps %%xmm2, %%xmm1\n"
" addps %%xmm3, %%xmm2\n"
" addps %%xmm4, %%xmm3\n"
" addps %%xmm5, %%xmm4\n"
" addps %%xmm6, %%xmm5\n"
" addps %%xmm7, %%xmm6\n"
" addps %%xmm0, %%xmm7\n"
//19:
" addps %%xmm1, %%xmm0\n"
" addps %%xmm2, %%xmm1\n"
" addps %%xmm3, %%xmm2\n"
" addps %%xmm4, %%xmm3\n"
" addps %%xmm5, %%xmm4\n"
" addps %%xmm6, %%xmm5\n"
" addps %%xmm7, %%xmm6\n"
" addps %%xmm0, %%xmm7\n"
//20:
" addps %%xmm1, %%xmm0\n"
" addps %%xmm2, %%xmm1\n"
" addps %%xmm3, %%xmm2\n"
" addps %%xmm4, %%xmm3\n"
" addps %%xmm5, %%xmm4\n"
" addps %%xmm6, %%xmm5\n"
" addps %%xmm7, %%xmm6\n"
" addps %%xmm0, %%xmm7\n"
//21:
" addps %%xmm1, %%xmm0\n"
" addps %%xmm2, %%xmm1\n"
" addps %%xmm3, %%xmm2\n"
" addps %%xmm4, %%xmm3\n"
" addps %%xmm5, %%xmm4\n"
" addps %%xmm6, %%xmm5\n"
" addps %%xmm7, %%xmm6\n"
" addps %%xmm0, %%xmm7\n"
//22:
" addps %%xmm1, %%xmm0\n"
" addps %%xmm2, %%xmm1\n"
" addps %%xmm3, %%xmm2\n"
" addps %%xmm4, %%xmm3\n"
" addps %%xmm5, %%xmm4\n"
" addps %%xmm6, %%xmm5\n"
" addps %%xmm7, %%xmm6\n"
" addps %%xmm0, %%xmm7\n"
//23:
" addps %%xmm1, %%xmm0\n"
" addps %%xmm2, %%xmm1\n"
" addps %%xmm3, %%xmm2\n"
" addps %%xmm4, %%xmm3\n"
" addps %%xmm5, %%xmm4\n"
" addps %%xmm6, %%xmm5\n"
" addps %%xmm7, %%xmm6\n"
" addps %%xmm0, %%xmm7\n"
//24:
" addps %%xmm1, %%xmm0\n"
" addps %%xmm2, %%xmm1\n"
" addps %%xmm3, %%xmm2\n"
" addps %%xmm4, %%xmm3\n"
" addps %%xmm5, %%xmm4\n"
" addps %%xmm6, %%xmm5\n"
" addps %%xmm7, %%xmm6\n"
" addps %%xmm0, %%xmm7\n"
//25:
" addps %%xmm1, %%xmm0\n"
" addps %%xmm2, %%xmm1\n"
" addps %%xmm3, %%xmm2\n"
" addps %%xmm4, %%xmm3\n"
" addps %%xmm5, %%xmm4\n"
" addps %%xmm6, %%xmm5\n"
" addps %%xmm7, %%xmm6\n"
" addps %%xmm0, %%xmm7\n"
//26:
" addps %%xmm1, %%xmm0\n"
" addps %%xmm2, %%xmm1\n"
" addps %%xmm3, %%xmm2\n"
" addps %%xmm4, %%xmm3\n"
" addps %%xmm5, %%xmm4\n"
" addps %%xmm6, %%xmm5\n"
" addps %%xmm7, %%xmm6\n"
" addps %%xmm0, %%xmm7\n"
//27:
" addps %%xmm1, %%xmm0\n"
" addps %%xmm2, %%xmm1\n"
" addps %%xmm3, %%xmm2\n"
" addps %%xmm4, %%xmm3\n"
" addps %%xmm5, %%xmm4\n"
" addps %%xmm6, %%xmm5\n"
" addps %%xmm7, %%xmm6\n"
" addps %%xmm0, %%xmm7\n"
//28:
" addps %%xmm1, %%xmm0\n"
" addps %%xmm2, %%xmm1\n"
" addps %%xmm3, %%xmm2\n"
" addps %%xmm4, %%xmm3\n"
" addps %%xmm5, %%xmm4\n"
" addps %%xmm6, %%xmm5\n"
" addps %%xmm7, %%xmm6\n"
" addps %%xmm0, %%xmm7\n"
//29:
" addps %%xmm1, %%xmm0\n"
" addps %%xmm2, %%xmm1\n"
" addps %%xmm3, %%xmm2\n"
" addps %%xmm4, %%xmm3\n"
" addps %%xmm5, %%xmm4\n"
" addps %%xmm6, %%xmm5\n"
" addps %%xmm7, %%xmm6\n"
" addps %%xmm0, %%xmm7\n"
//30:
" addps %%xmm1, %%xmm0\n"
" addps %%xmm2, %%xmm1\n"
" addps %%xmm3, %%xmm2\n"
" addps %%xmm4, %%xmm3\n"
" addps %%xmm5, %%xmm4\n"
" addps %%xmm6, %%xmm5\n"
" addps %%xmm7, %%xmm6\n"
" addps %%xmm0, %%xmm7\n"
//31:
" addps %%xmm1, %%xmm0\n"
" addps %%xmm2, %%xmm1\n"
" addps %%xmm3, %%xmm2\n"
" addps %%xmm4, %%xmm3\n"
" addps %%xmm5, %%xmm4\n"
" addps %%xmm6, %%xmm5\n"
" addps %%xmm7, %%xmm6\n"
" addps %%xmm0, %%xmm7\n"
" dec %%eax\n"
/* "jnz .bsLoop\n" */
" jnz 1b\n"
::"a"(cycles)
);
#else
# ifdef COMPILER_MICROSOFT
__asm {
mov eax, cycles
xorps xmm0, xmm0
xorps xmm1, xmm1
xorps xmm2, xmm2
xorps xmm3, xmm3
xorps xmm4, xmm4
xorps xmm5, xmm5
xorps xmm6, xmm6
xorps xmm7, xmm7
//--
align 16
bsLoop:
// 0:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
// 1:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
// 2:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
// 3:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
// 4:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
// 5:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
// 6:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
// 7:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
// 8:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
// 9:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
// 10:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
// 11:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
// 12:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
// 13:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
// 14:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
// 15:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
// 16:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
// 17:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
// 18:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
// 19:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
// 20:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
// 21:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
// 22:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
// 23:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
// 24:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
// 25:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
// 26:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
// 27:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
// 28:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
// 29:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
// 30:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
// 31:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
//----------------------
dec eax
jnz bsLoop
}
# else
# error "Unsupported compiler"
# endif /* COMPILER_MICROSOFT */
#endif /* COMPILER_GCC */
}
#endif /* INLINE_ASSEMBLY_SUPPORTED */

View File

@@ -0,0 +1,53 @@
/*
* Copyright 2008 Veselin Georgiev,
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __ASM_BITS_H__
#define __ASM_BITS_H__
#include "libcpuid.h"
/* Determine Compiler: */
#if defined(_MSC_VER)
# define COMPILER_MICROSOFT
#elif defined(__GNUC__)
# define COMPILER_GCC
#endif
/* Determine Platform */
#if defined(__x86_64__) || defined(_M_AMD64)
# define PLATFORM_X64
#elif defined(__i386__) || defined(_M_IX86)
# define PLATFORM_X86
#endif
/* Under Windows/AMD64 with MSVC, inline assembly isn't supported */
#if (defined(COMPILER_GCC) && defined(PLATFORM_X64)) || defined(PLATFORM_X86)
# define INLINE_ASM_SUPPORTED
#endif
int cpuid_exists_by_eflags(void);
void exec_cpuid(uint32_t *regs);
void busy_sse_loop(int cycles);
#endif /* __ASM_BITS_H__ */

View File

@@ -0,0 +1,438 @@
/*
* Copyright 2008 Veselin Georgiev,
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "libcpuid.h"
#include "libcpuid_internal.h"
#include "recog_intel.h"
#include "recog_amd.h"
#include "asm-bits.h"
#include "libcpuid_util.h"
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
/* Implementation: */
static int _libcpiud_errno = ERR_OK;
int set_error(cpu_error_t err)
{
_libcpiud_errno = (int) err;
return (int) err;
}
static void raw_data_t_constructor(struct cpu_raw_data_t* raw)
{
memset(raw, 0, sizeof(struct cpu_raw_data_t));
}
static void cpu_id_t_constructor(struct cpu_id_t* id)
{
memset(id, 0, sizeof(struct cpu_id_t));
id->l1_data_cache = id->l1_instruction_cache = id->l2_cache = id->l3_cache = id->l4_cache = -1;
id->l1_assoc = id->l2_assoc = id->l3_assoc = id->l4_assoc = -1;
id->l1_cacheline = id->l2_cacheline = id->l3_cacheline = id->l4_cacheline = -1;
id->sse_size = -1;
}
static int parse_token(const char* expected_token, const char *token,
const char *value, uint32_t array[][4], int limit, int *recognized)
{
char format[32];
int veax, vebx, vecx, vedx;
int index;
if (*recognized) return 1; /* already recognized */
if (strncmp(token, expected_token, strlen(expected_token))) return 1; /* not what we search for */
sprintf(format, "%s[%%d]", expected_token);
*recognized = 1;
if (1 == sscanf(token, format, &index) && index >=0 && index < limit) {
if (4 == sscanf(value, "%x%x%x%x", &veax, &vebx, &vecx, &vedx)) {
array[index][0] = veax;
array[index][1] = vebx;
array[index][2] = vecx;
array[index][3] = vedx;
return 1;
}
}
return 0;
}
/* get_total_cpus() system specific code: uses OS routines to determine total number of CPUs */
#ifdef __APPLE__
#include <unistd.h>
#include <mach/clock_types.h>
#include <mach/clock.h>
#include <mach/mach.h>
static int get_total_cpus(void)
{
kern_return_t kr;
host_basic_info_data_t basic_info;
host_info_t info = (host_info_t)&basic_info;
host_flavor_t flavor = HOST_BASIC_INFO;
mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
kr = host_info(mach_host_self(), flavor, info, &count);
if (kr != KERN_SUCCESS) return 1;
return basic_info.avail_cpus;
}
#define GET_TOTAL_CPUS_DEFINED
#endif
#ifdef _WIN32
#include <windows.h>
static int get_total_cpus(void)
{
SYSTEM_INFO system_info;
GetSystemInfo(&system_info);
return system_info.dwNumberOfProcessors;
}
#define GET_TOTAL_CPUS_DEFINED
#endif
#if defined linux || defined __linux__ || defined __sun
#include <sys/sysinfo.h>
#include <unistd.h>
static int get_total_cpus(void)
{
return sysconf(_SC_NPROCESSORS_ONLN);
}
#define GET_TOTAL_CPUS_DEFINED
#endif
#if defined __FreeBSD__ || defined __OpenBSD__ || defined __NetBSD__ || defined __bsdi__ || defined __QNX__
#include <sys/types.h>
#include <sys/sysctl.h>
static int get_total_cpus(void)
{
int mib[2] = { CTL_HW, HW_NCPU };
int ncpus;
size_t len = sizeof(ncpus);
if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) != 0) return 1;
return ncpus;
}
#define GET_TOTAL_CPUS_DEFINED
#endif
#ifndef GET_TOTAL_CPUS_DEFINED
static int get_total_cpus(void)
{
static int warning_printed = 0;
if (!warning_printed) {
warning_printed = 1;
warnf("Your system is not supported by libcpuid -- don't know how to detect the\n");
warnf("total number of CPUs on your system. It will be reported as 1.\n");
printf("Please use cpu_id_t.logical_cpus field instead.\n");
}
return 1;
}
#endif /* GET_TOTAL_CPUS_DEFINED */
static void load_features_common(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
{
const struct feature_map_t matchtable_edx1[] = {
{ 0, CPU_FEATURE_FPU },
{ 1, CPU_FEATURE_VME },
{ 2, CPU_FEATURE_DE },
{ 3, CPU_FEATURE_PSE },
{ 4, CPU_FEATURE_TSC },
{ 5, CPU_FEATURE_MSR },
{ 6, CPU_FEATURE_PAE },
{ 7, CPU_FEATURE_MCE },
{ 8, CPU_FEATURE_CX8 },
{ 9, CPU_FEATURE_APIC },
{ 11, CPU_FEATURE_SEP },
{ 12, CPU_FEATURE_MTRR },
{ 13, CPU_FEATURE_PGE },
{ 14, CPU_FEATURE_MCA },
{ 15, CPU_FEATURE_CMOV },
{ 16, CPU_FEATURE_PAT },
{ 17, CPU_FEATURE_PSE36 },
{ 19, CPU_FEATURE_CLFLUSH },
{ 23, CPU_FEATURE_MMX },
{ 24, CPU_FEATURE_FXSR },
{ 25, CPU_FEATURE_SSE },
{ 26, CPU_FEATURE_SSE2 },
{ 28, CPU_FEATURE_HT },
};
const struct feature_map_t matchtable_ecx1[] = {
{ 0, CPU_FEATURE_PNI },
{ 1, CPU_FEATURE_PCLMUL },
{ 3, CPU_FEATURE_MONITOR },
{ 9, CPU_FEATURE_SSSE3 },
{ 12, CPU_FEATURE_FMA3 },
{ 13, CPU_FEATURE_CX16 },
{ 19, CPU_FEATURE_SSE4_1 },
{ 20, CPU_FEATURE_SSE4_2 },
{ 22, CPU_FEATURE_MOVBE },
{ 23, CPU_FEATURE_POPCNT },
{ 25, CPU_FEATURE_AES },
{ 26, CPU_FEATURE_XSAVE },
{ 27, CPU_FEATURE_OSXSAVE },
{ 28, CPU_FEATURE_AVX },
{ 29, CPU_FEATURE_F16C },
{ 30, CPU_FEATURE_RDRAND },
};
const struct feature_map_t matchtable_ebx7[] = {
{ 3, CPU_FEATURE_BMI1 },
{ 5, CPU_FEATURE_AVX2 },
{ 8, CPU_FEATURE_BMI2 },
};
const struct feature_map_t matchtable_edx81[] = {
{ 11, CPU_FEATURE_SYSCALL },
{ 27, CPU_FEATURE_RDTSCP },
{ 29, CPU_FEATURE_LM },
};
const struct feature_map_t matchtable_ecx81[] = {
{ 0, CPU_FEATURE_LAHF_LM },
};
const struct feature_map_t matchtable_edx87[] = {
{ 8, CPU_FEATURE_CONSTANT_TSC },
};
if (raw->basic_cpuid[0][0] >= 1) {
match_features(matchtable_edx1, COUNT_OF(matchtable_edx1), raw->basic_cpuid[1][3], data);
match_features(matchtable_ecx1, COUNT_OF(matchtable_ecx1), raw->basic_cpuid[1][2], data);
}
if (raw->basic_cpuid[0][0] >= 7) {
match_features(matchtable_ebx7, COUNT_OF(matchtable_ebx7), raw->basic_cpuid[7][1], data);
}
if (raw->ext_cpuid[0][0] >= 0x80000001) {
match_features(matchtable_edx81, COUNT_OF(matchtable_edx81), raw->ext_cpuid[1][3], data);
match_features(matchtable_ecx81, COUNT_OF(matchtable_ecx81), raw->ext_cpuid[1][2], data);
}
if (raw->ext_cpuid[0][0] >= 0x80000007) {
match_features(matchtable_edx87, COUNT_OF(matchtable_edx87), raw->ext_cpuid[7][3], data);
}
if (data->flags[CPU_FEATURE_SSE]) {
/* apply guesswork to check if the SSE unit width is 128 bit */
switch (data->vendor) {
case VENDOR_AMD:
data->sse_size = (data->ext_family >= 16 && data->ext_family != 17) ? 128 : 64;
break;
case VENDOR_INTEL:
data->sse_size = (data->family == 6 && data->ext_model >= 15) ? 128 : 64;
break;
default:
break;
}
/* leave the CPU_FEATURE_128BIT_SSE_AUTH 0; the advanced per-vendor detection routines
* will set it accordingly if they detect the needed bit */
}
}
static cpu_vendor_t cpuid_vendor_identify(const uint32_t *raw_vendor, char *vendor_str)
{
int i;
cpu_vendor_t vendor = VENDOR_UNKNOWN;
const struct { cpu_vendor_t vendor; char match[16]; }
matchtable[NUM_CPU_VENDORS] = {
/* source: http://www.sandpile.org/ia32/cpuid.htm */
{ VENDOR_INTEL , "GenuineIntel" },
{ VENDOR_AMD , "AuthenticAMD" },
{ VENDOR_CYRIX , "CyrixInstead" },
{ VENDOR_NEXGEN , "NexGenDriven" },
{ VENDOR_TRANSMETA , "GenuineTMx86" },
{ VENDOR_UMC , "UMC UMC UMC " },
{ VENDOR_CENTAUR , "CentaurHauls" },
{ VENDOR_RISE , "RiseRiseRise" },
{ VENDOR_SIS , "SiS SiS SiS " },
{ VENDOR_NSC , "Geode by NSC" },
};
memcpy(vendor_str + 0, &raw_vendor[1], 4);
memcpy(vendor_str + 4, &raw_vendor[3], 4);
memcpy(vendor_str + 8, &raw_vendor[2], 4);
vendor_str[12] = 0;
/* Determine vendor: */
for (i = 0; i < NUM_CPU_VENDORS; i++)
if (!strcmp(vendor_str, matchtable[i].match)) {
vendor = matchtable[i].vendor;
break;
}
return vendor;
}
static int cpuid_basic_identify(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
{
int i, j, basic, xmodel, xfamily, ext;
char brandstr[64] = {0};
data->vendor = cpuid_vendor_identify(raw->basic_cpuid[0], data->vendor_str);
if (data->vendor == VENDOR_UNKNOWN)
return set_error(ERR_CPU_UNKN);
basic = raw->basic_cpuid[0][0];
if (basic >= 1) {
data->family = (raw->basic_cpuid[1][0] >> 8) & 0xf;
data->model = (raw->basic_cpuid[1][0] >> 4) & 0xf;
data->stepping = raw->basic_cpuid[1][0] & 0xf;
xmodel = (raw->basic_cpuid[1][0] >> 16) & 0xf;
xfamily = (raw->basic_cpuid[1][0] >> 20) & 0xff;
if (data->vendor == VENDOR_AMD && data->family < 0xf)
data->ext_family = data->family;
else
data->ext_family = data->family + xfamily;
data->ext_model = data->model + (xmodel << 4);
}
ext = raw->ext_cpuid[0][0] - 0x8000000;
/* obtain the brand string, if present: */
if (ext >= 4) {
for (i = 0; i < 3; i++)
for (j = 0; j < 4; j++)
memcpy(brandstr + i * 16 + j * 4,
&raw->ext_cpuid[2 + i][j], 4);
brandstr[48] = 0;
i = 0;
while (brandstr[i] == ' ') i++;
strncpy(data->brand_str, brandstr + i, sizeof(data->brand_str));
data->brand_str[48] = 0;
}
load_features_common(raw, data);
data->total_logical_cpus = get_total_cpus();
return set_error(ERR_OK);
}
static void make_list_from_string(const char* csv, struct cpu_list_t* list)
{
int i, n, l, last;
l = (int) strlen(csv);
n = 0;
for (i = 0; i < l; i++) if (csv[i] == ',') n++;
n++;
list->num_entries = n;
list->names = (char**) malloc(sizeof(char*) * n);
last = -1;
n = 0;
for (i = 0; i <= l; i++) if (i == l || csv[i] == ',') {
list->names[n] = (char*) malloc(i - last);
memcpy(list->names[n], &csv[last + 1], i - last - 1);
list->names[n][i - last - 1] = '\0';
n++;
last = i;
}
}
/* Interface: */
int cpuid_get_total_cpus(void)
{
return get_total_cpus();
}
int cpuid_present(void)
{
return cpuid_exists_by_eflags();
}
void cpu_exec_cpuid(uint32_t eax, uint32_t* regs)
{
regs[0] = eax;
regs[1] = regs[2] = regs[3] = 0;
exec_cpuid(regs);
}
void cpu_exec_cpuid_ext(uint32_t* regs)
{
exec_cpuid(regs);
}
int cpuid_get_raw_data(struct cpu_raw_data_t* data)
{
unsigned i;
if (!cpuid_present())
return set_error(ERR_NO_CPUID);
for (i = 0; i < 32; i++)
cpu_exec_cpuid(i, data->basic_cpuid[i]);
for (i = 0; i < 32; i++)
cpu_exec_cpuid(0x80000000 + i, data->ext_cpuid[i]);
for (i = 0; i < MAX_INTELFN4_LEVEL; i++) {
memset(data->intel_fn4[i], 0, sizeof(data->intel_fn4[i]));
data->intel_fn4[i][0] = 4;
data->intel_fn4[i][2] = i;
cpu_exec_cpuid_ext(data->intel_fn4[i]);
}
for (i = 0; i < MAX_INTELFN11_LEVEL; i++) {
memset(data->intel_fn11[i], 0, sizeof(data->intel_fn11[i]));
data->intel_fn11[i][0] = 11;
data->intel_fn11[i][2] = i;
cpu_exec_cpuid_ext(data->intel_fn11[i]);
}
for (i = 0; i < MAX_INTELFN12H_LEVEL; i++) {
memset(data->intel_fn12h[i], 0, sizeof(data->intel_fn12h[i]));
data->intel_fn12h[i][0] = 0x12;
data->intel_fn12h[i][2] = i;
cpu_exec_cpuid_ext(data->intel_fn12h[i]);
}
for (i = 0; i < MAX_INTELFN14H_LEVEL; i++) {
memset(data->intel_fn14h[i], 0, sizeof(data->intel_fn14h[i]));
data->intel_fn14h[i][0] = 0x14;
data->intel_fn14h[i][2] = i;
cpu_exec_cpuid_ext(data->intel_fn14h[i]);
}
return set_error(ERR_OK);
}
int cpu_ident_internal(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal)
{
int r;
struct cpu_raw_data_t myraw;
if (!raw) {
if ((r = cpuid_get_raw_data(&myraw)) < 0)
return set_error(r);
raw = &myraw;
}
cpu_id_t_constructor(data);
if ((r = cpuid_basic_identify(raw, data)) < 0)
return set_error(r);
switch (data->vendor) {
case VENDOR_INTEL:
r = cpuid_identify_intel(raw, data, internal);
break;
case VENDOR_AMD:
r = cpuid_identify_amd(raw, data, internal);
break;
default:
break;
}
return set_error(r);
}
int cpu_identify(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
{
struct internal_id_info_t throwaway;
return cpu_ident_internal(raw, data, &throwaway);
}
const char* cpuid_lib_version(void)
{
return VERSION;
}

View File

@@ -0,0 +1,58 @@
/*
* Copyright 2016 Veselin Georgiev,
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* This file contains a list of internal codes we use in detection. It is
* of no external use and isn't a complete list of intel products.
*/
CODE2(PENTIUM, 2000),
CODE(IRWIN),
CODE(POTOMAC),
CODE(GAINESTOWN),
CODE(WESTMERE),
CODE(PENTIUM_M),
CODE(NOT_CELERON),
CODE(CORE_SOLO),
CODE(MOBILE_CORE_SOLO),
CODE(CORE_DUO),
CODE(MOBILE_CORE_DUO),
CODE(WOLFDALE),
CODE(MEROM),
CODE(PENRYN),
CODE(QUAD_CORE),
CODE(DUAL_CORE_HT),
CODE(QUAD_CORE_HT),
CODE(MORE_THAN_QUADCORE),
CODE(PENTIUM_D),
CODE(SILVERTHORNE),
CODE(DIAMONDVILLE),
CODE(PINEVIEW),
CODE(CEDARVIEW),

1150
compat/libcpuid/libcpuid.h Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,47 @@
/*
* Copyright 2008 Veselin Georgiev,
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* @File libcpuid_constants.h
* @Author Veselin Georgiev
* @Brief Some limits and constants for libcpuid
*/
#ifndef __LIBCPUID_CONSTANTS_H__
#define __LIBCPUID_CONSTANTS_H__
#define VENDOR_STR_MAX 16
#define BRAND_STR_MAX 64
#define CPU_FLAGS_MAX 128
#define MAX_CPUID_LEVEL 32
#define MAX_EXT_CPUID_LEVEL 32
#define MAX_INTELFN4_LEVEL 8
#define MAX_INTELFN11_LEVEL 4
#define MAX_INTELFN12H_LEVEL 4
#define MAX_INTELFN14H_LEVEL 4
#define CPU_HINTS_MAX 16
#define SGX_FLAGS_MAX 14
#endif /* __LIBCPUID_CONSTANTS_H__ */

View File

@@ -0,0 +1,64 @@
/*
* Copyright 2016 Veselin Georgiev,
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __LIBCPUID_INTERNAL_H__
#define __LIBCPUID_INTERNAL_H__
/*
* This file contains internal undocumented declarations and function prototypes
* for the workings of the internal library infrastructure.
*/
enum _common_codes_t {
NA = 0,
NC, /* No code */
};
#define CODE(x) x
#define CODE2(x, y) x = y
enum _amd_code_t {
#include "amd_code_t.h"
};
typedef enum _amd_code_t amd_code_t;
enum _intel_code_t {
#include "intel_code_t.h"
};
typedef enum _intel_code_t intel_code_t;
#undef CODE
#undef CODE2
struct internal_id_info_t {
union {
amd_code_t amd;
intel_code_t intel;
} code;
uint64_t bits;
int score; // detection (matchtable) score
};
int cpu_ident_internal(struct cpu_raw_data_t* raw, struct cpu_id_t* data,
struct internal_id_info_t* internal);
#endif /* __LIBCPUID_INTERNAL_H__ */

View File

@@ -0,0 +1,37 @@
/*
* Copyright 2008 Veselin Georgiev,
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* @File libcpuid_types.h
* @Author Veselin Georgiev
* @Brief Type specifications for libcpuid.
*/
#ifndef __LIBCPUID_TYPES_H__
#define __LIBCPUID_TYPES_H__
#include <stdint.h>
#endif /* __LIBCPUID_TYPES_H__ */

View File

@@ -0,0 +1,218 @@
/*
* Copyright 2008 Veselin Georgiev,
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <ctype.h>
#include "libcpuid.h"
#include "libcpuid_util.h"
int _current_verboselevel;
void match_features(const struct feature_map_t* matchtable, int count, uint32_t reg, struct cpu_id_t* data)
{
int i;
for (i = 0; i < count; i++)
if (reg & (1u << matchtable[i].bit))
data->flags[matchtable[i].feature] = 1;
}
static void default_warn(const char *msg)
{
fprintf(stderr, "%s", msg);
}
libcpuid_warn_fn_t _warn_fun = default_warn;
#if defined(_MSC_VER)
# define vsnprintf _vsnprintf
#endif
void warnf(const char* format, ...)
{
char buff[1024];
va_list va;
if (!_warn_fun) return;
va_start(va, format);
vsnprintf(buff, sizeof(buff), format, va);
va_end(va);
_warn_fun(buff);
}
void debugf(int verboselevel, const char* format, ...)
{
char buff[1024];
va_list va;
if (verboselevel > _current_verboselevel) return;
va_start(va, format);
vsnprintf(buff, sizeof(buff), format, va);
va_end(va);
_warn_fun(buff);
}
static int popcount64(uint64_t mask)
{
int num_set_bits = 0;
while (mask) {
mask &= mask - 1;
num_set_bits++;
}
return num_set_bits;
}
static int score(const struct match_entry_t* entry, const struct cpu_id_t* data,
int brand_code, uint64_t bits, int model_code)
{
int res = 0;
if (entry->family == data->family ) res += 2;
if (entry->model == data->model ) res += 2;
if (entry->stepping == data->stepping ) res += 2;
if (entry->ext_family == data->ext_family) res += 2;
if (entry->ext_model == data->ext_model ) res += 2;
if (entry->ncores == data->num_cores ) res += 2;
if (entry->l2cache == data->l2_cache ) res += 1;
if (entry->l3cache == data->l3_cache ) res += 1;
if (entry->brand_code == brand_code ) res += 2;
if (entry->model_code == model_code ) res += 2;
res += popcount64(entry->model_bits & bits) * 2;
return res;
}
int match_cpu_codename(const struct match_entry_t* matchtable, int count,
struct cpu_id_t* data, int brand_code, uint64_t bits,
int model_code)
{
int bestscore = -1;
int bestindex = 0;
int i, t;
debugf(3, "Matching cpu f:%d, m:%d, s:%d, xf:%d, xm:%d, ncore:%d, l2:%d, bcode:%d, bits:%llu, code:%d\n",
data->family, data->model, data->stepping, data->ext_family,
data->ext_model, data->num_cores, data->l2_cache, brand_code, (unsigned long long) bits, model_code);
for (i = 0; i < count; i++) {
t = score(&matchtable[i], data, brand_code, bits, model_code);
debugf(3, "Entry %d, `%s', score %d\n", i, matchtable[i].name, t);
if (t > bestscore) {
debugf(2, "Entry `%s' selected - best score so far (%d)\n", matchtable[i].name, t);
bestscore = t;
bestindex = i;
}
}
strcpy(data->cpu_codename, matchtable[bestindex].name);
return bestscore;
}
void generic_get_cpu_list(const struct match_entry_t* matchtable, int count,
struct cpu_list_t* list)
{
int i, j, n, good;
n = 0;
list->names = (char**) malloc(sizeof(char*) * count);
for (i = 0; i < count; i++) {
if (strstr(matchtable[i].name, "Unknown")) continue;
good = 1;
for (j = n - 1; j >= 0; j--)
if (!strcmp(list->names[j], matchtable[i].name)) {
good = 0;
break;
}
if (!good) continue;
#if defined(_MSC_VER)
list->names[n++] = _strdup(matchtable[i].name);
#else
list->names[n++] = strdup(matchtable[i].name);
#endif
}
list->num_entries = n;
}
static int xmatch_entry(char c, const char* p)
{
int i, j;
if (c == 0) return -1;
if (c == p[0]) return 1;
if (p[0] == '.') return 1;
if (p[0] == '#' && isdigit(c)) return 1;
if (p[0] == '[') {
j = 1;
while (p[j] && p[j] != ']') j++;
if (!p[j]) return -1;
for (i = 1; i < j; i++)
if (p[i] == c) return j + 1;
}
return -1;
}
int match_pattern(const char* s, const char* p)
{
int i, j, dj, k, n, m;
n = (int) strlen(s);
m = (int) strlen(p);
for (i = 0; i < n; i++) {
if (xmatch_entry(s[i], p) != -1) {
j = 0;
k = 0;
while (j < m && ((dj = xmatch_entry(s[i + k], p + j)) != -1)) {
k++;
j += dj;
}
if (j == m) return i + 1;
}
}
return 0;
}
struct cpu_id_t* get_cached_cpuid(void)
{
static int initialized = 0;
static struct cpu_id_t id;
if (initialized) return &id;
if (cpu_identify(NULL, &id))
memset(&id, 0, sizeof(id));
initialized = 1;
return &id;
}
int match_all(uint64_t bits, uint64_t mask)
{
return (bits & mask) == mask;
}
void debug_print_lbits(int debuglevel, uint64_t mask)
{
int i, first = 0;
for (i = 0; i < 64; i++) if (mask & (((uint64_t) 1) << i)) {
if (first) first = 0;
else debugf(2, " + ");
debugf(2, "LBIT(%d)", i);
}
debugf(2, "\n");
}

View File

@@ -0,0 +1,100 @@
/*
* Copyright 2008 Veselin Georgiev,
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __LIBCPUID_UTIL_H__
#define __LIBCPUID_UTIL_H__
#define COUNT_OF(array) (sizeof(array) / sizeof(array[0]))
#define LBIT(x) (((long long) 1) << x)
struct feature_map_t {
unsigned bit;
cpu_feature_t feature;
};
void match_features(const struct feature_map_t* matchtable, int count,
uint32_t reg, struct cpu_id_t* data);
struct match_entry_t {
int family, model, stepping, ext_family, ext_model;
int ncores, l2cache, l3cache, brand_code;
uint64_t model_bits;
int model_code;
char name[32];
};
// returns the match score:
int match_cpu_codename(const struct match_entry_t* matchtable, int count,
struct cpu_id_t* data, int brand_code, uint64_t bits,
int model_code);
void warnf(const char* format, ...)
#ifdef __GNUC__
__attribute__((format(printf, 1, 2)))
#endif
;
void debugf(int verboselevel, const char* format, ...)
#ifdef __GNUC__
__attribute__((format(printf, 2, 3)))
#endif
;
void generic_get_cpu_list(const struct match_entry_t* matchtable, int count,
struct cpu_list_t* list);
/*
* Seek for a pattern in `haystack'.
* Pattern may be an fixed string, or contain the special metacharacters
* '.' - match any single character
* '#' - match any digit
* '[<chars>] - match any of the given chars (regex-like ranges are not
* supported)
* Return val: 0 if the pattern is not found. Nonzero if it is found (actually,
* x + 1 where x is the index where the match is found).
*/
int match_pattern(const char* haystack, const char* pattern);
/*
* Gets an initialized cpu_id_t. It is cached, so that internal libcpuid
* machinery doesn't need to issue cpu_identify more than once.
*/
struct cpu_id_t* get_cached_cpuid(void);
/* returns true if all bits of mask are present in `bits'. */
int match_all(uint64_t bits, uint64_t mask);
/* print what bits a mask consists of */
void debug_print_lbits(int debuglevel, uint64_t mask);
/*
* Sets the current errno
*/
int set_error(cpu_error_t err);
extern libcpuid_warn_fn_t _warn_fun;
extern int _current_verboselevel;
#endif /* __LIBCPUID_UTIL_H__ */

549
compat/libcpuid/recog_amd.c Normal file
View File

@@ -0,0 +1,549 @@
/*
* Copyright 2008 Veselin Georgiev,
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "libcpuid.h"
#include "libcpuid_util.h"
#include "libcpuid_internal.h"
#include "recog_amd.h"
const struct amd_code_str { amd_code_t code; char *str; } amd_code_str[] = {
#define CODE(x) { x, #x }
#define CODE2(x, y) CODE(x)
#include "amd_code_t.h"
#undef CODE
};
struct amd_code_and_bits_t {
int code;
uint64_t bits;
};
enum _amd_bits_t {
ATHLON_ = LBIT( 0 ),
_XP_ = LBIT( 1 ),
_M_ = LBIT( 2 ),
_MP_ = LBIT( 3 ),
MOBILE_ = LBIT( 4 ),
DURON_ = LBIT( 5 ),
SEMPRON_ = LBIT( 6 ),
OPTERON_ = LBIT( 7 ),
TURION_ = LBIT( 8 ),
_LV_ = LBIT( 9 ),
_64_ = LBIT( 10 ),
_X2 = LBIT( 11 ),
_X3 = LBIT( 12 ),
_X4 = LBIT( 13 ),
_X6 = LBIT( 14 ),
_FX = LBIT( 15 ),
};
typedef enum _amd_bits_t amd_bits_t;
enum _amd_model_codes_t {
// Only for Ryzen CPUs:
_1400,
_1500,
_1600,
};
const struct match_entry_t cpudb_amd[] = {
{ -1, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown AMD CPU" },
/* 486 and the likes */
{ 4, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown AMD 486" },
{ 4, 3, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "AMD 486DX2" },
{ 4, 7, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "AMD 486DX2WB" },
{ 4, 8, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "AMD 486DX4" },
{ 4, 9, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "AMD 486DX4WB" },
/* Pentia clones */
{ 5, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown AMD 586" },
{ 5, 0, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K5" },
{ 5, 1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K5" },
{ 5, 2, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K5" },
{ 5, 3, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K5" },
/* The K6 */
{ 5, 6, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K6" },
{ 5, 7, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K6" },
{ 5, 8, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K6-2" },
{ 5, 9, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K6-III" },
{ 5, 10, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown K6" },
{ 5, 11, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown K6" },
{ 5, 12, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown K6" },
{ 5, 13, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K6-2+" },
/* Athlon et al. */
{ 6, 1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Athlon (Slot-A)" },
{ 6, 2, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Athlon (Slot-A)" },
{ 6, 3, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Duron (Spitfire)" },
{ 6, 4, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Athlon (ThunderBird)" },
{ 6, 6, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown Athlon" },
{ 6, 6, -1, -1, -1, 1, -1, -1, NC, ATHLON_ , 0, "Athlon (Palomino)" },
{ 6, 6, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_MP_ , 0, "Athlon MP (Palomino)" },
{ 6, 6, -1, -1, -1, 1, -1, -1, NC, DURON_ , 0, "Duron (Palomino)" },
{ 6, 6, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_XP_ , 0, "Athlon XP" },
{ 6, 7, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown Athlon XP" },
{ 6, 7, -1, -1, -1, 1, -1, -1, NC, DURON_ , 0, "Duron (Morgan)" },
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Athlon XP" },
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, ATHLON_ , 0, "Athlon XP (Thoroughbred)" },
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_XP_ , 0, "Athlon XP (Thoroughbred)" },
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, DURON_ , 0, "Duron (Applebred)" },
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, SEMPRON_ , 0, "Sempron (Thoroughbred)" },
{ 6, 8, -1, -1, -1, 1, 128, -1, NC, SEMPRON_ , 0, "Sempron (Thoroughbred)" },
{ 6, 8, -1, -1, -1, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron (Thoroughbred)" },
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_MP_ , 0, "Athlon MP (Thoroughbred)" },
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_XP_|_M_ , 0, "Mobile Athlon (T-Bred)" },
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_XP_|_M_|_LV_, 0, "Mobile Athlon (T-Bred)" },
{ 6, 10, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Athlon XP (Barton)" },
{ 6, 10, -1, -1, -1, 1, 512, -1, NC, ATHLON_|_XP_ , 0, "Athlon XP (Barton)" },
{ 6, 10, -1, -1, -1, 1, 512, -1, NC, SEMPRON_ , 0, "Sempron (Barton)" },
{ 6, 10, -1, -1, -1, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron (Thorton)" },
{ 6, 10, -1, -1, -1, 1, 256, -1, NC, ATHLON_|_XP_ , 0, "Athlon XP (Thorton)" },
{ 6, 10, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_MP_ , 0, "Athlon MP (Barton)" },
{ 6, 10, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_XP_|_M_ , 0, "Mobile Athlon (Barton)" },
{ 6, 10, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_XP_|_M_|_LV_, 0, "Mobile Athlon (Barton)" },
/* K8 Architecture */
{ 15, -1, -1, 15, -1, 1, -1, -1, NC, 0 , 0, "Unknown K8" },
{ 15, -1, -1, 16, -1, 1, -1, -1, NC, 0 , 0, "Unknown K9" },
{ 15, -1, -1, 15, -1, 1, -1, -1, NC, 0 , 0, "Unknown A64" },
{ 15, -1, -1, 15, -1, 1, -1, -1, NC, OPTERON_ , 0, "Opteron" },
{ 15, -1, -1, 15, -1, 2, -1, -1, NC, OPTERON_|_X2 , 0, "Opteron (Dual Core)" },
{ 15, 3, -1, 15, -1, 1, -1, -1, NC, OPTERON_ , 0, "Opteron" },
{ 15, 3, -1, 15, -1, 2, -1, -1, NC, OPTERON_|_X2 , 0, "Opteron (Dual Core)" },
{ 15, -1, -1, 15, -1, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (512K)" },
{ 15, -1, -1, 15, -1, 1, 1024, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (1024K)" },
{ 15, -1, -1, 15, -1, 1, -1, -1, NC, ATHLON_|_FX , 0, "Athlon FX" },
{ 15, -1, -1, 15, -1, 1, -1, -1, NC, ATHLON_|_64_|_FX , 0, "Athlon 64 FX" },
{ 15, 3, -1, 15, 35, 2, -1, -1, NC, ATHLON_|_64_|_FX , 0, "Athlon 64 FX X2 (Toledo)" },
{ 15, -1, -1, 15, -1, 2, 512, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (512K)" },
{ 15, -1, -1, 15, -1, 2, 1024, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (1024K)" },
{ 15, -1, -1, 15, -1, 1, 512, -1, NC, TURION_|_64_ , 0, "Turion 64 (512K)" },
{ 15, -1, -1, 15, -1, 1, 1024, -1, NC, TURION_|_64_ , 0, "Turion 64 (1024K)" },
{ 15, -1, -1, 15, -1, 2, 512, -1, NC, TURION_|_X2 , 0, "Turion 64 X2 (512K)" },
{ 15, -1, -1, 15, -1, 2, 1024, -1, NC, TURION_|_X2 , 0, "Turion 64 X2 (1024K)" },
{ 15, -1, -1, 15, -1, 1, 128, -1, NC, SEMPRON_ , 0, "A64 Sempron (128K)" },
{ 15, -1, -1, 15, -1, 1, 256, -1, NC, SEMPRON_ , 0, "A64 Sempron (256K)" },
{ 15, -1, -1, 15, -1, 1, 512, -1, NC, SEMPRON_ , 0, "A64 Sempron (512K)" },
{ 15, -1, -1, 15, 0x4f, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (Orleans/512K)" },
{ 15, -1, -1, 15, 0x5f, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (Orleans/512K)" },
{ 15, -1, -1, 15, 0x2f, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (Venice/512K)" },
{ 15, -1, -1, 15, 0x2c, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (Venice/512K)" },
{ 15, -1, -1, 15, 0x1f, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (Winchester/512K)" },
{ 15, -1, -1, 15, 0x0c, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (Newcastle/512K)" },
{ 15, -1, -1, 15, 0x27, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (San Diego/512K)" },
{ 15, -1, -1, 15, 0x37, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (San Diego/512K)" },
{ 15, -1, -1, 15, 0x04, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (ClawHammer/512K)" },
{ 15, -1, -1, 15, 0x5f, 1, 1024, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (Orleans/1024K)" },
{ 15, -1, -1, 15, 0x27, 1, 1024, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (San Diego/1024K)" },
{ 15, -1, -1, 15, 0x04, 1, 1024, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (ClawHammer/1024K)" },
{ 15, -1, -1, 15, 0x4b, 2, 256, -1, NC, SEMPRON_ , 0, "Athlon 64 X2 (Windsor/256K)" },
{ 15, -1, -1, 15, 0x23, 2, 512, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (Toledo/512K)" },
{ 15, -1, -1, 15, 0x4b, 2, 512, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (Windsor/512K)" },
{ 15, -1, -1, 15, 0x43, 2, 512, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (Windsor/512K)" },
{ 15, -1, -1, 15, 0x6b, 2, 512, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (Brisbane/512K)" },
{ 15, -1, -1, 15, 0x2b, 2, 512, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (Manchester/512K)"},
{ 15, -1, -1, 15, 0x23, 2, 1024, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (Toledo/1024K)" },
{ 15, -1, -1, 15, 0x43, 2, 1024, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (Windsor/1024K)" },
{ 15, -1, -1, 15, 0x08, 1, 128, -1, NC, MOBILE_|SEMPRON_ , 0, "Mobile Sempron 64 (Dublin/128K)"},
{ 15, -1, -1, 15, 0x08, 1, 256, -1, NC, MOBILE_|SEMPRON_ , 0, "Mobile Sempron 64 (Dublin/256K)"},
{ 15, -1, -1, 15, 0x0c, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron 64 (Paris)" },
{ 15, -1, -1, 15, 0x1c, 1, 128, -1, NC, SEMPRON_ , 0, "Sempron 64 (Palermo/128K)" },
{ 15, -1, -1, 15, 0x1c, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron 64 (Palermo/256K)" },
{ 15, -1, -1, 15, 0x1c, 1, 128, -1, NC, MOBILE_| SEMPRON_ , 0, "Mobile Sempron 64 (Sonora/128K)"},
{ 15, -1, -1, 15, 0x1c, 1, 256, -1, NC, MOBILE_| SEMPRON_ , 0, "Mobile Sempron 64 (Sonora/256K)"},
{ 15, -1, -1, 15, 0x2c, 1, 128, -1, NC, SEMPRON_ , 0, "Sempron 64 (Palermo/128K)" },
{ 15, -1, -1, 15, 0x2c, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron 64 (Palermo/256K)" },
{ 15, -1, -1, 15, 0x2c, 1, 128, -1, NC, MOBILE_| SEMPRON_ , 0, "Mobile Sempron 64 (Albany/128K)"},
{ 15, -1, -1, 15, 0x2c, 1, 256, -1, NC, MOBILE_| SEMPRON_ , 0, "Mobile Sempron 64 (Albany/256K)"},
{ 15, -1, -1, 15, 0x2f, 1, 128, -1, NC, SEMPRON_ , 0, "Sempron 64 (Palermo/128K)" },
{ 15, -1, -1, 15, 0x2f, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron 64 (Palermo/256K)" },
{ 15, -1, -1, 15, 0x4f, 1, 128, -1, NC, SEMPRON_ , 0, "Sempron 64 (Manila/128K)" },
{ 15, -1, -1, 15, 0x4f, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron 64 (Manila/256K)" },
{ 15, -1, -1, 15, 0x5f, 1, 128, -1, NC, SEMPRON_ , 0, "Sempron 64 (Manila/128K)" },
{ 15, -1, -1, 15, 0x5f, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron 64 (Manila/256K)" },
{ 15, -1, -1, 15, 0x6b, 2, 256, -1, NC, SEMPRON_ , 0, "Sempron 64 Dual (Sherman/256K)"},
{ 15, -1, -1, 15, 0x6b, 2, 512, -1, NC, SEMPRON_ , 0, "Sempron 64 Dual (Sherman/512K)"},
{ 15, -1, -1, 15, 0x7f, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron 64 (Sparta/256K)" },
{ 15, -1, -1, 15, 0x7f, 1, 512, -1, NC, SEMPRON_ , 0, "Sempron 64 (Sparta/512K)" },
{ 15, -1, -1, 15, 0x4c, 1, 256, -1, NC, MOBILE_| SEMPRON_ , 0, "Mobile Sempron 64 (Keene/256K)"},
{ 15, -1, -1, 15, 0x4c, 1, 512, -1, NC, MOBILE_| SEMPRON_ , 0, "Mobile Sempron 64 (Keene/512K)"},
{ 15, -1, -1, 15, -1, 2, -1, -1, NC, SEMPRON_ , 0, "Sempron Dual Core" },
{ 15, -1, -1, 15, 0x24, 1, 512, -1, NC, TURION_|_64_ , 0, "Turion 64 (Lancaster/512K)" },
{ 15, -1, -1, 15, 0x24, 1, 1024, -1, NC, TURION_|_64_ , 0, "Turion 64 (Lancaster/1024K)" },
{ 15, -1, -1, 15, 0x48, 2, 256, -1, NC, TURION_|_X2 , 0, "Turion X2 (Taylor)" },
{ 15, -1, -1, 15, 0x48, 2, 512, -1, NC, TURION_|_X2 , 0, "Turion X2 (Trinidad)" },
{ 15, -1, -1, 15, 0x4c, 1, 512, -1, NC, TURION_|_64_ , 0, "Turion 64 (Richmond)" },
{ 15, -1, -1, 15, 0x68, 2, 256, -1, NC, TURION_|_X2 , 0, "Turion X2 (Tyler/256K)" },
{ 15, -1, -1, 15, 0x68, 2, 512, -1, NC, TURION_|_X2 , 0, "Turion X2 (Tyler/512K)" },
{ 15, -1, -1, 17, 3, 2, 512, -1, NC, TURION_|_X2 , 0, "Turion X2 (Griffin/512K)" },
{ 15, -1, -1, 17, 3, 2, 1024, -1, NC, TURION_|_X2 , 0, "Turion X2 (Griffin/1024K)" },
/* K10 Architecture (2007) */
{ 15, -1, -1, 16, -1, 1, -1, -1, PHENOM, 0 , 0, "Unknown AMD Phenom" },
{ 15, 2, -1, 16, -1, 1, -1, -1, PHENOM, 0 , 0, "Phenom" },
{ 15, 2, -1, 16, -1, 3, -1, -1, PHENOM, 0 , 0, "Phenom X3 (Toliman)" },
{ 15, 2, -1, 16, -1, 4, -1, -1, PHENOM, 0 , 0, "Phenom X4 (Agena)" },
{ 15, 2, -1, 16, -1, 3, 512, -1, PHENOM, 0 , 0, "Phenom X3 (Toliman/256K)" },
{ 15, 2, -1, 16, -1, 3, 512, -1, PHENOM, 0 , 0, "Phenom X3 (Toliman/512K)" },
{ 15, 2, -1, 16, -1, 4, 128, -1, PHENOM, 0 , 0, "Phenom X4 (Agena/128K)" },
{ 15, 2, -1, 16, -1, 4, 256, -1, PHENOM, 0 , 0, "Phenom X4 (Agena/256K)" },
{ 15, 2, -1, 16, -1, 4, 512, -1, PHENOM, 0 , 0, "Phenom X4 (Agena/512K)" },
{ 15, 2, -1, 16, -1, 2, 512, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon X2 (Kuma)" },
/* Phenom II derivates: */
{ 15, 4, -1, 16, -1, 4, -1, -1, NC, 0 , 0, "Phenom (Deneb-based)" },
{ 15, 4, -1, 16, -1, 1, 1024, -1, NC, SEMPRON_ , 0, "Sempron (Sargas)" },
{ 15, 4, -1, 16, -1, 2, 512, -1, PHENOM2, 0 , 0, "Phenom II X2 (Callisto)" },
{ 15, 4, -1, 16, -1, 3, 512, -1, PHENOM2, 0 , 0, "Phenom II X3 (Heka)" },
{ 15, 4, -1, 16, -1, 4, 512, -1, PHENOM2, 0 , 0, "Phenom II X4" },
{ 15, 4, -1, 16, 4, 4, 512, -1, PHENOM2, 0 , 0, "Phenom II X4 (Deneb)" },
{ 15, 5, -1, 16, 5, 4, 512, -1, PHENOM2, 0 , 0, "Phenom II X4 (Deneb)" },
{ 15, 4, -1, 16, 10, 4, 512, -1, PHENOM2, 0 , 0, "Phenom II X4 (Zosma)" },
{ 15, 4, -1, 16, 10, 6, 512, -1, PHENOM2, 0 , 0, "Phenom II X6 (Thuban)" },
/* Athlon II derivates: */
{ 15, 6, -1, 16, 6, 2, 512, -1, NC, ATHLON_|_X2 , 0, "Athlon II (Champlain)" },
{ 15, 6, -1, 16, 6, 2, 512, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon II X2 (Regor)" },
{ 15, 6, -1, 16, 6, 2, 1024, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon II X2 (Regor)" },
{ 15, 5, -1, 16, 5, 3, 512, -1, NC, ATHLON_|_64_|_X3 , 0, "Athlon II X3 (Rana)" },
{ 15, 5, -1, 16, 5, 4, 512, -1, NC, ATHLON_|_64_|_X4 , 0, "Athlon II X4 (Propus)" },
/* Llano APUs (2011): */
{ 15, 1, -1, 18, 1, 2, -1, -1, FUSION_EA, 0 , 0, "Llano X2" },
{ 15, 1, -1, 18, 1, 3, -1, -1, FUSION_EA, 0 , 0, "Llano X3" },
{ 15, 1, -1, 18, 1, 4, -1, -1, FUSION_EA, 0 , 0, "Llano X4" },
/* Family 14h: Bobcat Architecture (2011) */
{ 15, 2, -1, 20, -1, 1, -1, -1, FUSION_C, 0 , 0, "Brazos Ontario" },
{ 15, 2, -1, 20, -1, 2, -1, -1, FUSION_C, 0 , 0, "Brazos Ontario (Dual-core)" },
{ 15, 1, -1, 20, -1, 1, -1, -1, FUSION_E, 0 , 0, "Brazos Zacate" },
{ 15, 1, -1, 20, -1, 2, -1, -1, FUSION_E, 0 , 0, "Brazos Zacate (Dual-core)" },
{ 15, 2, -1, 20, -1, 2, -1, -1, FUSION_Z, 0 , 0, "Brazos Desna (Dual-core)" },
/* Family 15h: Bulldozer Architecture (2011) */
{ 15, -1, -1, 21, 0, 4, -1, -1, NC, 0 , 0, "Bulldozer X2" },
{ 15, -1, -1, 21, 1, 4, -1, -1, NC, 0 , 0, "Bulldozer X2" },
{ 15, -1, -1, 21, 1, 6, -1, -1, NC, 0 , 0, "Bulldozer X3" },
{ 15, -1, -1, 21, 1, 8, -1, -1, NC, 0 , 0, "Bulldozer X4" },
/* 2nd-gen, Piledriver core (2012): */
{ 15, -1, -1, 21, 2, 4, -1, -1, NC, 0 , 0, "Vishera X2" },
{ 15, -1, -1, 21, 2, 6, -1, -1, NC, 0 , 0, "Vishera X3" },
{ 15, -1, -1, 21, 2, 8, -1, -1, NC, 0 , 0, "Vishera X4" },
{ 15, 0, -1, 21, 16, 2, -1, -1, FUSION_A, 0 , 0, "Trinity X2" },
{ 15, 0, -1, 21, 16, 4, -1, -1, FUSION_A, 0 , 0, "Trinity X4" },
{ 15, 3, -1, 21, 19, 2, -1, -1, FUSION_A, 0 , 0, "Richland X2" },
{ 15, 3, -1, 21, 19, 4, -1, -1, FUSION_A, 0 , 0, "Richland X4" },
/* 3rd-gen, Steamroller core (2014): */
{ 15, 0, -1, 21, 48, 2, -1, -1, FUSION_A, 0 , 0, "Kaveri X2" },
{ 15, 0, -1, 21, 48, 4, -1, -1, FUSION_A, 0 , 0, "Kaveri X4" },
{ 15, 8, -1, 21, 56, 4, -1, -1, FUSION_A, 0 , 0, "Godavari X4" },
/* 4th-gen, Excavator core (2015): */
{ 15, 1, -1, 21, 96, 2, -1, -1, FUSION_A, 0 , 0, "Carrizo X2" },
{ 15, 1, -1, 21, 96, 4, -1, -1, FUSION_A, 0 , 0, "Carrizo X4" },
{ 15, 5, -1, 21, 101, 2, -1, -1, FUSION_A, 0 , 0, "Bristol Ridge X2" },
{ 15, 5, -1, 21, 101, 4, -1, -1, FUSION_A, 0 , 0, "Bristol Ridge X4" },
{ 15, 0, -1, 21, 112, 2, -1, -1, FUSION_A, 0 , 0, "Stoney Ridge X2" },
{ 15, 0, -1, 21, 112, 2, -1, -1, FUSION_E, 0 , 0, "Stoney Ridge X2" },
/* Family 16h: Jaguar Architecture (2013) */
{ 15, 0, -1, 22, 0, 2, -1, -1, FUSION_A, 0 , 0, "Kabini X2" },
{ 15, 0, -1, 22, 0, 4, -1, -1, FUSION_A, 0 , 0, "Kabini X4" },
/* 2nd-gen, Puma core (2013): */
{ 15, 0, -1, 22, 48, 2, -1, -1, FUSION_E, 0 , 0, "Mullins X2" },
{ 15, 0, -1, 22, 48, 4, -1, -1, FUSION_A, 0 , 0, "Mullins X4" },
/* Family 17h: Zen Architecture (2017) */
{ 15, -1, -1, 23, 1, 8, -1, -1, NC, 0 , 0, "Ryzen 7" },
{ 15, -1, -1, 23, 1, 6, -1, -1, NC, 0 , _1600, "Ryzen 5" },
{ 15, -1, -1, 23, 1, 4, -1, -1, NC, 0 , _1500, "Ryzen 5" },
{ 15, -1, -1, 23, 1, 4, -1, -1, NC, 0 , _1400, "Ryzen 5" },
{ 15, -1, -1, 23, 1, 4, -1, -1, NC, 0 , 0, "Ryzen 3" },
//{ 15, -1, -1, 23, 1, 4, -1, -1, NC, 0 , 0, "Raven Ridge" }, //TBA
/* Newer Opterons: */
{ 15, 9, -1, 22, 9, 8, -1, -1, NC, OPTERON_ , 0, "Magny-Cours Opteron" },
};
static void load_amd_features(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
{
const struct feature_map_t matchtable_edx81[] = {
{ 20, CPU_FEATURE_NX },
{ 22, CPU_FEATURE_MMXEXT },
{ 25, CPU_FEATURE_FXSR_OPT },
{ 30, CPU_FEATURE_3DNOWEXT },
{ 31, CPU_FEATURE_3DNOW },
};
const struct feature_map_t matchtable_ecx81[] = {
{ 1, CPU_FEATURE_CMP_LEGACY },
{ 2, CPU_FEATURE_SVM },
{ 5, CPU_FEATURE_ABM },
{ 6, CPU_FEATURE_SSE4A },
{ 7, CPU_FEATURE_MISALIGNSSE },
{ 8, CPU_FEATURE_3DNOWPREFETCH },
{ 9, CPU_FEATURE_OSVW },
{ 10, CPU_FEATURE_IBS },
{ 11, CPU_FEATURE_XOP },
{ 12, CPU_FEATURE_SKINIT },
{ 13, CPU_FEATURE_WDT },
{ 16, CPU_FEATURE_FMA4 },
{ 21, CPU_FEATURE_TBM },
};
const struct feature_map_t matchtable_edx87[] = {
{ 0, CPU_FEATURE_TS },
{ 1, CPU_FEATURE_FID },
{ 2, CPU_FEATURE_VID },
{ 3, CPU_FEATURE_TTP },
{ 4, CPU_FEATURE_TM_AMD },
{ 5, CPU_FEATURE_STC },
{ 6, CPU_FEATURE_100MHZSTEPS },
{ 7, CPU_FEATURE_HWPSTATE },
/* id 8 is handled in common */
{ 9, CPU_FEATURE_CPB },
{ 10, CPU_FEATURE_APERFMPERF },
{ 11, CPU_FEATURE_PFI },
{ 12, CPU_FEATURE_PA },
};
if (raw->ext_cpuid[0][0] >= 0x80000001) {
match_features(matchtable_edx81, COUNT_OF(matchtable_edx81), raw->ext_cpuid[1][3], data);
match_features(matchtable_ecx81, COUNT_OF(matchtable_ecx81), raw->ext_cpuid[1][2], data);
}
if (raw->ext_cpuid[0][0] >= 0x80000007)
match_features(matchtable_edx87, COUNT_OF(matchtable_edx87), raw->ext_cpuid[7][3], data);
if (raw->ext_cpuid[0][0] >= 0x8000001a) {
/* We have the extended info about SSE unit size */
data->detection_hints[CPU_HINT_SSE_SIZE_AUTH] = 1;
data->sse_size = (raw->ext_cpuid[0x1a][0] & 1) ? 128 : 64;
}
}
static void decode_amd_cache_info(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
{
int l3_result;
const int assoc_table[16] = {
0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, 255
};
unsigned n = raw->ext_cpuid[0][0];
if (n >= 0x80000005) {
data->l1_data_cache = (raw->ext_cpuid[5][2] >> 24) & 0xff;
data->l1_assoc = (raw->ext_cpuid[5][2] >> 16) & 0xff;
data->l1_cacheline = (raw->ext_cpuid[5][2]) & 0xff;
data->l1_instruction_cache = (raw->ext_cpuid[5][3] >> 24) & 0xff;
}
if (n >= 0x80000006) {
data->l2_cache = (raw->ext_cpuid[6][2] >> 16) & 0xffff;
data->l2_assoc = assoc_table[(raw->ext_cpuid[6][2] >> 12) & 0xf];
data->l2_cacheline = (raw->ext_cpuid[6][2]) & 0xff;
l3_result = (raw->ext_cpuid[6][3] >> 18);
if (l3_result > 0) {
l3_result = 512 * l3_result; /* AMD spec says it's a range,
but we take the lower bound */
data->l3_cache = l3_result;
data->l3_assoc = assoc_table[(raw->ext_cpuid[6][3] >> 12) & 0xf];
data->l3_cacheline = (raw->ext_cpuid[6][3]) & 0xff;
} else {
data->l3_cache = 0;
}
}
}
static void decode_amd_number_of_cores(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
{
int logical_cpus = -1, num_cores = -1;
if (raw->basic_cpuid[0][0] >= 1) {
logical_cpus = (raw->basic_cpuid[1][1] >> 16) & 0xff;
if (raw->ext_cpuid[0][0] >= 8) {
num_cores = 1 + (raw->ext_cpuid[8][2] & 0xff);
}
}
if (data->flags[CPU_FEATURE_HT]) {
if (num_cores > 1) {
if (data->ext_family >= 23)
num_cores /= 2; // e.g., Ryzen 7 reports 16 "real" cores, but they are really just 8.
data->num_cores = num_cores;
data->num_logical_cpus = logical_cpus;
} else {
data->num_cores = 1;
data->num_logical_cpus = (logical_cpus >= 2 ? logical_cpus : 2);
}
} else {
data->num_cores = data->num_logical_cpus = 1;
}
}
static int amd_has_turion_modelname(const char *bs)
{
/* We search for something like TL-60. Ahh, I miss regexes...*/
int i, l, k;
char code[3] = {0};
const char* codes[] = { "ML", "MT", "MK", "TK", "TL", "RM", "ZM", "" };
l = (int) strlen(bs);
for (i = 3; i < l - 2; i++) {
if (bs[i] == '-' &&
isupper(bs[i-1]) && isupper(bs[i-2]) && !isupper(bs[i-3]) &&
isdigit(bs[i+1]) && isdigit(bs[i+2]) && !isdigit(bs[i+3]))
{
code[0] = bs[i-2];
code[1] = bs[i-1];
for (k = 0; codes[k][0]; k++)
if (!strcmp(codes[k], code)) return 1;
}
}
return 0;
}
static struct amd_code_and_bits_t decode_amd_codename_part1(const char *bs)
{
amd_code_t code = NC;
uint64_t bits = 0;
struct amd_code_and_bits_t result;
if (strstr(bs, "Dual Core") ||
strstr(bs, "Dual-Core") ||
strstr(bs, " X2 "))
bits |= _X2;
if (strstr(bs, " X4 ")) bits |= _X4;
if (strstr(bs, " X3 ")) bits |= _X3;
if (strstr(bs, "Opteron")) bits |= OPTERON_;
if (strstr(bs, "Phenom")) {
code = (strstr(bs, "II")) ? PHENOM2 : PHENOM;
}
if (amd_has_turion_modelname(bs)) {
bits |= TURION_;
}
if (strstr(bs, "Athlon(tm)")) bits |= ATHLON_;
if (strstr(bs, "Sempron(tm)")) bits |= SEMPRON_;
if (strstr(bs, "Duron")) bits |= DURON_;
if (strstr(bs, " 64 ")) bits |= _64_;
if (strstr(bs, " FX")) bits |= _FX;
if (strstr(bs, " MP")) bits |= _MP_;
if (strstr(bs, "Athlon(tm) 64") || strstr(bs, "Athlon(tm) II X") || match_pattern(bs, "Athlon(tm) X#")) {
bits |= ATHLON_ | _64_;
}
if (strstr(bs, "Turion")) bits |= TURION_;
if (strstr(bs, "mobile") || strstr(bs, "Mobile")) {
bits |= MOBILE_;
}
if (strstr(bs, "XP")) bits |= _XP_;
if (strstr(bs, "XP-M")) bits |= _M_;
if (strstr(bs, "(LV)")) bits |= _LV_;
if (match_pattern(bs, "C-##")) code = FUSION_C;
if (match_pattern(bs, "E-###")) code = FUSION_E;
if (match_pattern(bs, "Z-##")) code = FUSION_Z;
if (match_pattern(bs, "E#-####") || match_pattern(bs, "A#-####")) code = FUSION_EA;
result.code = code;
result.bits = bits;
return result;
}
static int decode_amd_ryzen_model_code(const char* bs)
{
const struct {
int model_code;
const char* match_str;
} patterns[] = {
{ _1600, "1600" },
{ _1500, "1500" },
{ _1400, "1400" },
};
int i;
for (i = 0; i < COUNT_OF(patterns); i++)
if (strstr(bs, patterns[i].match_str))
return patterns[i].model_code;
//
return 0;
}
static void decode_amd_codename(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal)
{
struct amd_code_and_bits_t code_and_bits = decode_amd_codename_part1(data->brand_str);
int i = 0;
char* code_str = NULL;
int model_code;
for (i = 0; i < COUNT_OF(amd_code_str); i++) {
if (code_and_bits.code == amd_code_str[i].code) {
code_str = amd_code_str[i].str;
break;
}
}
if (/*code == ATHLON_64_X2*/ match_all(code_and_bits.bits, ATHLON_|_64_|_X2) && data->l2_cache < 512) {
code_and_bits.bits &= ~(ATHLON_ | _64_);
code_and_bits.bits |= SEMPRON_;
}
if (code_str)
debugf(2, "Detected AMD brand code: %d (%s)\n", code_and_bits.code, code_str);
else
debugf(2, "Detected AMD brand code: %d\n", code_and_bits.code);
if (code_and_bits.bits) {
debugf(2, "Detected AMD bits: ");
debug_print_lbits(2, code_and_bits.bits);
}
// is it Ryzen? if so, we need to detect discern between the four-core 1400/1500 (Ryzen 5) and the four-core Ryzen 3:
model_code = (data->ext_family == 23) ? decode_amd_ryzen_model_code(data->brand_str) : 0;
internal->code.amd = code_and_bits.code;
internal->bits = code_and_bits.bits;
internal->score = match_cpu_codename(cpudb_amd, COUNT_OF(cpudb_amd), data, code_and_bits.code,
code_and_bits.bits, model_code);
}
int cpuid_identify_amd(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal)
{
load_amd_features(raw, data);
decode_amd_cache_info(raw, data);
decode_amd_number_of_cores(raw, data);
decode_amd_codename(raw, data, internal);
return 0;
}
void cpuid_get_list_amd(struct cpu_list_t* list)
{
generic_get_cpu_list(cpudb_amd, COUNT_OF(cpudb_amd), list);
}

View File

@@ -0,0 +1,32 @@
/*
* Copyright 2008 Veselin Georgiev,
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __RECOG_AMD_H__
#define __RECOG_AMD_H__
int cpuid_identify_amd(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal);
void cpuid_get_list_amd(struct cpu_list_t* list);
#endif /* __RECOG_AMD_H__ */

View File

@@ -0,0 +1,935 @@
/*
* Copyright 2008 Veselin Georgiev,
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <string.h>
#include <ctype.h>
#include "libcpuid.h"
#include "libcpuid_util.h"
#include "libcpuid_internal.h"
#include "recog_intel.h"
const struct intel_bcode_str { intel_code_t code; char *str; } intel_bcode_str[] = {
#define CODE(x) { x, #x }
#define CODE2(x, y) CODE(x)
#include "intel_code_t.h"
#undef CODE
};
typedef struct {
int code;
uint64_t bits;
} intel_code_and_bits_t;
enum _intel_model_t {
UNKNOWN = -1,
_3000 = 100,
_3100,
_3200,
X3200,
_3300,
X3300,
_5100,
_5200,
_5300,
_5400,
_2xxx, /* Core i[357] 2xxx */
_3xxx, /* Core i[357] 3xxx */
};
typedef enum _intel_model_t intel_model_t;
enum _intel_bits_t {
PENTIUM_ = LBIT( 0 ),
CELERON_ = LBIT( 1 ),
MOBILE_ = LBIT( 2 ),
CORE_ = LBIT( 3 ),
_I_ = LBIT( 4 ),
_M_ = LBIT( 5 ),
_3 = LBIT( 6 ),
_5 = LBIT( 7 ),
_7 = LBIT( 8 ),
XEON_ = LBIT( 9 ),
_MP = LBIT( 10 ),
ATOM_ = LBIT( 11 ),
};
typedef enum _intel_bits_t intel_bits_t;
const struct match_entry_t cpudb_intel[] = {
{ -1, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown Intel CPU" },
/* i486 */
{ 4, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown i486" },
{ 4, 0, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 DX-25/33" },
{ 4, 1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 DX-50" },
{ 4, 2, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 SX" },
{ 4, 3, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 DX2" },
{ 4, 4, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 SL" },
{ 4, 5, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 SX2" },
{ 4, 7, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 DX2 WriteBack" },
{ 4, 8, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 DX4" },
{ 4, 9, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 DX4 WriteBack" },
/* All Pentia:
Pentium 1 */
{ 5, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown Pentium" },
{ 5, 0, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium A-Step" },
{ 5, 1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium 1 (0.8u)" },
{ 5, 2, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium 1 (0.35u)" },
{ 5, 3, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium OverDrive" },
{ 5, 4, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium 1 (0.35u)" },
{ 5, 7, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium 1 (0.35u)" },
{ 5, 8, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium MMX (0.25u)" },
/* Pentium 2 / 3 / M / Conroe / whatsnext - all P6 based. */
{ 6, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown P6" },
{ 6, 0, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium Pro" },
{ 6, 1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium Pro" },
{ 6, 3, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium II (Klamath)" },
{ 6, 5, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium II (Deschutes)" },
{ 6, 5, -1, -1, -1, 1, -1, -1, NC, MOBILE_|PENTIUM_, 0, "Mobile Pentium II (Tonga)"},
{ 6, 6, -1, -1, -1, 1, -1, -1, NC,0 , 0, "Pentium II (Dixon)" },
{ 6, 3, -1, -1, -1, 1, -1, -1, NC, XEON_ , 0, "P-II Xeon (Klamath)" },
{ 6, 5, -1, -1, -1, 1, -1, -1, NC, XEON_ , 0, "P-II Xeon (Drake)" },
{ 6, 6, -1, -1, -1, 1, -1, -1, NC, XEON_ , 0, "P-II Xeon (Dixon)" },
{ 6, 5, -1, -1, -1, 1, -1, -1, NC, CELERON_ , 0, "P-II Celeron (Covington)" },
{ 6, 6, -1, -1, -1, 1, -1, -1, NC, CELERON_ , 0, "P-II Celeron (Mendocino)" },
/* -------------------------------------------------- */
{ 6, 7, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium III (Katmai)" },
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium III (Coppermine)"},
{ 6, 10, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium III (Coppermine)"},
{ 6, 11, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium III (Tualatin)" },
{ 6, 7, -1, -1, -1, 1, -1, -1, NC, XEON_ , 0, "P-III Xeon (Tanner)" },
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, XEON_ , 0, "P-III Xeon (Cascades)" },
{ 6, 10, -1, -1, -1, 1, -1, -1, NC, XEON_ , 0, "P-III Xeon (Cascades)" },
{ 6, 11, -1, -1, -1, 1, -1, -1, NC, XEON_ , 0, "P-III Xeon (Tualatin)" },
{ 6, 7, -1, -1, -1, 1, -1, -1, NC, CELERON_ , 0, "P-III Celeron (Katmai)" },
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, CELERON_ , 0, "P-III Celeron (Coppermine)" },
{ 6, 10, -1, -1, -1, 1, -1, -1, NC, CELERON_ , 0, "P-III Celeron (Coppermine)" },
{ 6, 11, -1, -1, -1, 1, -1, -1, NC, CELERON_ , 0, "P-III Celeron (Tualatin)" },
/* Netburst based (Pentium 4 and later)
classic P4s */
{ 15, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown Pentium 4" },
{ 15, -1, -1, 15, -1, 1, -1, -1, NC, CELERON_ , 0, "Unknown P-4 Celeron" },
{ 15, -1, -1, 15, -1, 1, -1, -1, NC, XEON_ , 0, "Unknown Xeon" },
{ 15, 0, -1, 15, -1, 1, -1, -1, NC, PENTIUM_ , 0, "Pentium 4 (Willamette)" },
{ 15, 1, -1, 15, -1, 1, -1, -1, NC, PENTIUM_ , 0, "Pentium 4 (Willamette)" },
{ 15, 2, -1, 15, -1, 1, -1, -1, NC, PENTIUM_ , 0, "Pentium 4 (Northwood)" },
{ 15, 3, -1, 15, -1, 1, -1, -1, NC, PENTIUM_ , 0, "Pentium 4 (Prescott)" },
{ 15, 4, -1, 15, -1, 1, -1, -1, NC, PENTIUM_ , 0, "Pentium 4 (Prescott)" },
{ 15, 6, -1, 15, -1, 1, -1, -1, NC, PENTIUM_ , 0, "Pentium 4 (Cedar Mill)" },
{ 15, 0, -1, 15, -1, 1, -1, -1, NC, MOBILE_|PENTIUM_, 0, "Mobile P-4 (Willamette)" },
{ 15, 1, -1, 15, -1, 1, -1, -1, NC, MOBILE_|PENTIUM_, 0, "Mobile P-4 (Willamette)" },
{ 15, 2, -1, 15, -1, 1, -1, -1, NC, MOBILE_|PENTIUM_, 0, "Mobile P-4 (Northwood)" },
{ 15, 3, -1, 15, -1, 1, -1, -1, NC, MOBILE_|PENTIUM_, 0, "Mobile P-4 (Prescott)" },
{ 15, 4, -1, 15, -1, 1, -1, -1, NC, MOBILE_|PENTIUM_, 0, "Mobile P-4 (Prescott)" },
{ 15, 6, -1, 15, -1, 1, -1, -1, NC, MOBILE_|PENTIUM_, 0, "Mobile P-4 (Cedar Mill)" },
/* server CPUs */
{ 15, 0, -1, 15, -1, 1, -1, -1, NC, XEON_ , 0, "Xeon (Foster)" },
{ 15, 1, -1, 15, -1, 1, -1, -1, NC, XEON_ , 0, "Xeon (Foster)" },
{ 15, 2, -1, 15, -1, 1, -1, -1, NC, XEON_ , 0, "Xeon (Prestonia)" },
{ 15, 2, -1, 15, -1, 1, -1, -1, NC, XEON_|_MP , 0, "Xeon (Gallatin)" },
{ 15, 3, -1, 15, -1, 1, -1, -1, NC, XEON_ , 0, "Xeon (Nocona)" },
{ 15, 4, -1, 15, -1, 1, -1, -1, NC, XEON_ , 0, "Xeon (Nocona)" },
{ 15, 4, -1, 15, -1, 1, -1, -1, IRWIN, XEON_ , 0, "Xeon (Irwindale)" },
{ 15, 4, -1, 15, -1, 1, -1, -1, NC, XEON_|_MP , 0, "Xeon (Cranford)" },
{ 15, 4, -1, 15, -1, 1, -1, -1, POTOMAC, XEON_ , 0, "Xeon (Potomac)" },
{ 15, 6, -1, 15, -1, 1, -1, -1, NC, XEON_ , 0, "Xeon (Dempsey)" },
/* Pentium Ds */
{ 15, 4, 4, 15, -1, 1, -1, -1, NC, 0 , 0, "Pentium D (SmithField)" },
{ 15, 4, -1, 15, -1, 1, -1, -1, PENTIUM_D, 0 , 0, "Pentium D (SmithField)" },
{ 15, 4, 7, 15, -1, 1, -1, -1, NC, 0 , 0, "Pentium D (SmithField)" },
{ 15, 6, -1, 15, -1, 1, -1, -1, PENTIUM_D, 0 , 0, "Pentium D (Presler)" },
/* Celeron and Celeron Ds */
{ 15, 1, -1, 15, -1, 1, -1, -1, NC, CELERON_ , 0, "P-4 Celeron (Willamette)" },
{ 15, 2, -1, 15, -1, 1, -1, -1, NC, CELERON_ , 0, "P-4 Celeron (Northwood)" },
{ 15, 3, -1, 15, -1, 1, -1, -1, NC, CELERON_ , 0, "P-4 Celeron D (Prescott)" },
{ 15, 4, -1, 15, -1, 1, -1, -1, NC, CELERON_ , 0, "P-4 Celeron D (Prescott)" },
{ 15, 6, -1, 15, -1, 1, -1, -1, NC, CELERON_ , 0, "P-4 Celeron D (Cedar Mill)" },
/* -------------------------------------------------- */
/* Intel Core microarchitecture - P6-based */
{ 6, 9, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown Pentium M" },
{ 6, 9, -1, -1, -1, 1, -1, -1, PENTIUM_M, 0 , 0, "Unknown Pentium M" },
{ 6, 9, -1, -1, -1, 1, -1, -1, NC, PENTIUM_ , 0, "Pentium M (Banias)" },
{ 6, 9, -1, -1, -1, 1, -1, -1, PENTIUM_M, 0 , 0, "Pentium M (Banias)" },
{ 6, 9, -1, -1, -1, 1, -1, -1, NC, CELERON_ , 0, "Celeron M" },
{ 6, 13, -1, -1, -1, 1, -1, -1, NC, PENTIUM_ , 0, "Pentium M (Dothan)" },
{ 6, 13, -1, -1, -1, 1, -1, -1, PENTIUM_M, 0 , 0, "Pentium M (Dothan)" },
{ 6, 13, -1, -1, -1, 1, -1, -1, NC, CELERON_ , 0, "Celeron M" },
{ 6, 12, -1, -1, -1, -1, -1, -1, NC, ATOM_ , 0, "Unknown Atom" },
{ 6, 12, -1, -1, -1, -1, -1, -1, DIAMONDVILLE,ATOM_, 0, "Atom (Diamondville)" },
{ 6, 12, -1, -1, -1, -1, -1, -1, SILVERTHORNE,ATOM_, 0, "Atom (Silverthorne)" },
{ 6, 12, -1, -1, -1, -1, -1, -1, CEDARVIEW, ATOM_ , 0, "Atom (Cedarview)" },
{ 6, 6, -1, -1, -1, -1, -1, -1, CEDARVIEW, ATOM_ , 0, "Atom (Cedarview)" },
{ 6, 12, -1, -1, -1, -1, -1, -1, PINEVIEW, ATOM_ , 0, "Atom (Pineview)" },
/* -------------------------------------------------- */
{ 6, 14, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown Yonah" },
{ 6, 14, -1, -1, -1, 1, -1, -1, CORE_SOLO, 0 , 0, "Yonah (Core Solo)" },
{ 6, 14, -1, -1, -1, 2, -1, -1, CORE_DUO, 0 , 0, "Yonah (Core Duo)" },
{ 6, 14, -1, -1, -1, 1, -1, -1, CORE_SOLO, MOBILE_, 0, "Yonah (Core Solo)" },
{ 6, 14, -1, -1, -1, 2, -1, -1, CORE_DUO , MOBILE_, 0, "Yonah (Core Duo)" },
{ 6, 14, -1, -1, -1, 1, -1, -1, CORE_SOLO, 0 , 0, "Yonah (Core Solo)" },
{ 6, 15, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown Core 2" },
{ 6, 15, -1, -1, -1, 2, 4096, -1, CORE_DUO, 0 , 0, "Conroe (Core 2 Duo)" },
{ 6, 15, -1, -1, -1, 2, 1024, -1, CORE_DUO, 0 , 0, "Conroe (Core 2 Duo) 1024K" },
{ 6, 15, -1, -1, -1, 2, 512, -1, CORE_DUO, 0 , 0, "Conroe (Core 2 Duo) 512K" },
{ 6, 15, -1, -1, -1, 4, -1, -1, QUAD_CORE, 0 , 0, "Kentsfield (Core 2 Quad)" },
{ 6, 15, -1, -1, -1, 4, 4096, -1, QUAD_CORE, 0 , 0, "Kentsfield (Core 2 Quad)" },
{ 6, 15, -1, -1, -1, 400, -1, -1, MORE_THAN_QUADCORE, 0, 0, "More than quad-core" },
{ 6, 15, -1, -1, -1, 2, 2048, -1, CORE_DUO, 0 , 0, "Allendale (Core 2 Duo)" },
{ 6, 15, -1, -1, -1, 2, -1, -1, MOBILE_CORE_DUO, 0, 0, "Merom (Core 2 Duo)" },
{ 6, 15, -1, -1, -1, 2, 2048, -1, MEROM, 0 , 0, "Merom (Core 2 Duo) 2048K" },
{ 6, 15, -1, -1, -1, 2, 4096, -1, MEROM, 0 , 0, "Merom (Core 2 Duo) 4096K" },
{ 6, 15, -1, -1, 15, 1, -1, -1, NC, CELERON_ , 0, "Conroe-L (Celeron)" },
{ 6, 6, -1, -1, 22, 1, -1, -1, NC, CELERON_ , 0, "Conroe-L (Celeron)" },
{ 6, 15, -1, -1, 15, 2, -1, -1, NC, CELERON_ , 0, "Conroe-L (Allendale)" },
{ 6, 6, -1, -1, 22, 2, -1, -1, NC, CELERON_ , 0, "Conroe-L (Allendale)" },
{ 6, 6, -1, -1, 22, 1, -1, -1, NC, 0 , 0, "Unknown Core ?" },
{ 6, 7, -1, -1, 23, 1, -1, -1, NC, 0 , 0, "Unknown Core ?" },
{ 6, 6, -1, -1, 22, 400, -1, -1, MORE_THAN_QUADCORE, 0, 0, "More than quad-core" },
{ 6, 7, -1, -1, 23, 400, -1, -1, MORE_THAN_QUADCORE, 0, 0, "More than quad-core" },
{ 6, 7, -1, -1, 23, 1, -1, -1, CORE_SOLO , 0, 0, "Unknown Core 45nm" },
{ 6, 7, -1, -1, 23, 1, -1, -1, CORE_DUO , 0, 0, "Unknown Core 45nm" },
{ 6, 7, -1, -1, 23, 2, 1024, -1, WOLFDALE , 0, 0, "Celeron Wolfdale 1M" },
{ 6, 7, -1, -1, 23, 2, 2048, -1, WOLFDALE , 0, 0, "Wolfdale (Core 2 Duo) 2M" },
{ 6, 7, -1, -1, 23, 2, 3072, -1, WOLFDALE , 0, 0, "Wolfdale (Core 2 Duo) 3M" },
{ 6, 7, -1, -1, 23, 2, 6144, -1, WOLFDALE , 0, 0, "Wolfdale (Core 2 Duo) 6M" },
{ 6, 7, -1, -1, 23, 1, -1, -1, MOBILE_CORE_DUO , 0, 0, "Penryn (Core 2 Duo)" },
{ 6, 7, -1, -1, 23, 2, 1024, -1, PENRYN , 0, 0, "Penryn (Core 2 Duo)" },
{ 6, 7, -1, -1, 23, 2, 3072, -1, PENRYN , 0, 0, "Penryn (Core 2 Duo) 3M" },
{ 6, 7, -1, -1, 23, 2, 6144, -1, PENRYN , 0, 0, "Penryn (Core 2 Duo) 6M" },
{ 6, 7, -1, -1, 23, 4, 2048, -1, NC , 0, 0, "Yorkfield (Core 2 Quad) 2M"},
{ 6, 7, -1, -1, 23, 4, 3072, -1, NC , 0, 0, "Yorkfield (Core 2 Quad) 3M"},
{ 6, 7, -1, -1, 23, 4, 6144, -1, NC , 0, 0, "Yorkfield (Core 2 Quad) 6M"},
/* Core microarchitecture-based Xeons: */
{ 6, 14, -1, -1, 14, 1, -1, -1, NC, XEON_ , 0, "Xeon LV" },
{ 6, 15, -1, -1, 15, 2, 4096, -1, NC, XEON_ , _5100, "Xeon (Woodcrest)" },
{ 6, 15, -1, -1, 15, 2, 2048, -1, NC, XEON_ , _3000, "Xeon (Conroe/2M)" },
{ 6, 15, -1, -1, 15, 2, 4096, -1, NC, XEON_ , _3000, "Xeon (Conroe/4M)" },
{ 6, 15, -1, -1, 15, 4, 4096, -1, NC, XEON_ , X3200, "Xeon (Kentsfield)" },
{ 6, 15, -1, -1, 15, 4, 4096, -1, NC, XEON_ , _5300, "Xeon (Clovertown)" },
{ 6, 7, -1, -1, 23, 2, 6144, -1, NC, XEON_ , _3100, "Xeon (Wolfdale)" },
{ 6, 7, -1, -1, 23, 2, 6144, -1, NC, XEON_ , _5200, "Xeon (Wolfdale DP)" },
{ 6, 7, -1, -1, 23, 4, 6144, -1, NC, XEON_ , _5400, "Xeon (Harpertown)" },
{ 6, 7, -1, -1, 23, 4, 3072, -1, NC, XEON_ , X3300, "Xeon (Yorkfield/3M)" },
{ 6, 7, -1, -1, 23, 4, 6144, -1, NC, XEON_ , X3300, "Xeon (Yorkfield/6M)" },
/* Nehalem CPUs (45nm): */
{ 6, 10, -1, -1, 26, 4, -1, -1, GAINESTOWN, XEON_ , 0, "Gainestown (Xeon)" },
{ 6, 10, -1, -1, 26, 4, -1, 4096, GAINESTOWN, XEON_ , 0, "Gainestown 4M (Xeon)" },
{ 6, 10, -1, -1, 26, 4, -1, 8192, GAINESTOWN, XEON_ , 0, "Gainestown 8M (Xeon)" },
{ 6, 10, -1, -1, 26, 4, -1, -1, NC, XEON_|_7 , 0, "Bloomfield (Xeon)" },
{ 6, 10, -1, -1, 26, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Bloomfield (Core i7)" },
{ 6, 10, -1, -1, 30, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Lynnfield (Core i7)" },
{ 6, 5, -1, -1, 37, 4, -1, 8192, NC, CORE_|_I_|_5 , 0, "Lynnfield (Core i5)" },
/* Westmere CPUs (32nm): */
{ 6, 5, -1, -1, 37, 2, -1, -1, NC, 0 , 0, "Unknown Core i3/i5" },
{ 6, 12, -1, -1, 44, -1, -1, -1, WESTMERE, XEON_ , 0, "Westmere (Xeon)" },
{ 6, 12, -1, -1, 44, -1, -1, 12288, WESTMERE, XEON_ , 0, "Gulftown (Xeon)" },
{ 6, 12, -1, -1, 44, 4, -1, 12288, NC, CORE_|_I_|_7 , 0, "Gulftown (Core i7)" },
{ 6, 5, -1, -1, 37, 2, -1, 4096, NC, CORE_|_I_|_5 , 0, "Clarkdale (Core i5)" },
{ 6, 5, -1, -1, 37, 2, -1, 4096, NC, CORE_|_I_|_3 , 0, "Clarkdale (Core i3)" },
{ 6, 5, -1, -1, 37, 2, -1, -1, NC, PENTIUM_ , 0, "Arrandale" },
{ 6, 5, -1, -1, 37, 2, -1, 4096, NC, CORE_|_I_|_7 , 0, "Arrandale (Core i7)" },
{ 6, 5, -1, -1, 37, 2, -1, 3072, NC, CORE_|_I_|_5 , 0, "Arrandale (Core i5)" },
{ 6, 5, -1, -1, 37, 2, -1, 3072, NC, CORE_|_I_|_3 , 0, "Arrandale (Core i3)" },
/* Sandy Bridge CPUs (32nm): */
{ 6, 10, -1, -1, 42, -1, -1, -1, NC, 0 , 0, "Unknown Sandy Bridge" },
{ 6, 10, -1, -1, 42, -1, -1, -1, NC, XEON_ , 0, "Sandy Bridge (Xeon)" },
{ 6, 10, -1, -1, 42, -1, -1, -1, NC, CORE_|_I_|_7 , 0, "Sandy Bridge (Core i7)" },
{ 6, 10, -1, -1, 42, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Sandy Bridge (Core i7)" },
{ 6, 10, -1, -1, 42, 4, -1, -1, NC, CORE_|_I_|_5 , 0, "Sandy Bridge (Core i5)" },
{ 6, 10, -1, -1, 42, 2, -1, -1, NC, CORE_|_I_|_3 , 0, "Sandy Bridge (Core i3)" },
{ 6, 10, -1, -1, 42, 2, -1, -1, NC, PENTIUM_ , 0, "Sandy Bridge (Pentium)" },
{ 6, 10, -1, -1, 42, 1, -1, -1, NC, CELERON_ , 0, "Sandy Bridge (Celeron)" },
{ 6, 10, -1, -1, 42, 2, -1, -1, NC, CELERON_ , 0, "Sandy Bridge (Celeron)" },
{ 6, 13, -1, -1, 45, -1, -1, -1, NC, CORE_|_I_|_3 , 0, "Sandy Bridge-E" },
{ 6, 13, -1, -1, 45, -1, -1, -1, NC, XEON_ , 0, "Sandy Bridge-E (Xeon)" },
/* Ivy Bridge CPUs (22nm): */
{ 6, 10, -1, -1, 58, -1, -1, -1, NC, XEON_ , 0, "Ivy Bridge (Xeon)" },
{ 6, 10, -1, -1, 58, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Ivy Bridge (Core i7)" },
{ 6, 10, -1, -1, 58, 4, -1, -1, NC, CORE_|_I_|_5 , 0, "Ivy Bridge (Core i5)" },
{ 6, 10, -1, -1, 58, 2, -1, -1, NC, CORE_|_I_|_3 , 0, "Ivy Bridge (Core i3)" },
{ 6, 10, -1, -1, 58, 2, -1, -1, NC, PENTIUM_ , 0, "Ivy Bridge (Pentium)" },
{ 6, 10, -1, -1, 58, 1, -1, -1, NC, CELERON_ , 0, "Ivy Bridge (Celeron)" },
{ 6, 10, -1, -1, 58, 2, -1, -1, NC, CELERON_ , 0, "Ivy Bridge (Celeron)" },
{ 6, 14, -1, -1, 62, -1, -1, -1, NC, 0 , 0, "Ivy Bridge-E" },
/* Haswell CPUs (22nm): */
{ 6, 12, -1, -1, 60, -1, -1, -1, NC, XEON_ , 0, "Haswell (Xeon)" },
{ 6, 12, -1, -1, 60, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Haswell (Core i7)" },
{ 6, 5, -1, -1, 69, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Haswell (Core i7)" },
{ 6, 6, -1, -1, 70, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Haswell (Core i7)" },
{ 6, 12, -1, -1, 60, 4, -1, -1, NC, CORE_|_I_|_5 , 0, "Haswell (Core i5)" },
{ 6, 5, -1, -1, 69, 4, -1, -1, NC, CORE_|_I_|_5 , 0, "Haswell (Core i5)" },
{ 6, 12, -1, -1, 60, 2, -1, -1, NC, CORE_|_I_|_5 , 0, "Haswell (Core i5)" },
{ 6, 5, -1, -1, 69, 2, -1, -1, NC, CORE_|_I_|_5 , 0, "Haswell (Core i5)" },
{ 6, 12, -1, -1, 60, 2, -1, -1, NC, CORE_|_I_|_3 , 0, "Haswell (Core i3)" },
{ 6, 5, -1, -1, 69, 2, -1, -1, NC, CORE_|_I_|_3 , 0, "Haswell (Core i3)" },
{ 6, 12, -1, -1, 60, 2, -1, -1, NC, PENTIUM_ , 0, "Haswell (Pentium)" },
{ 6, 12, -1, -1, 60, 2, -1, -1, NC, CELERON_ , 0, "Haswell (Celeron)" },
{ 6, 12, -1, -1, 60, 1, -1, -1, NC, CELERON_ , 0, "Haswell (Celeron)" },
{ 6, 15, -1, -1, 63, -1, -1, -1, NC, 0 , 0, "Haswell-E" },
/* Broadwell CPUs (14nm): */
{ 6, 7, -1, -1, 71, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Broadwell (Core i7)" },
{ 6, 7, -1, -1, 71, 4, -1, -1, NC, CORE_|_I_|_5 , 0, "Broadwell (Core i5)" },
{ 6, 13, -1, -1, 61, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Broadwell-U (Core i7)" },
{ 6, 13, -1, -1, 61, 2, -1, -1, NC, CORE_|_I_|_7 , 0, "Broadwell-U (Core i7)" },
{ 6, 13, -1, -1, 61, 2, -1, -1, NC, CORE_|_I_|_5 , 0, "Broadwell-U (Core i5)" },
{ 6, 13, -1, -1, 61, 2, -1, -1, NC, CORE_|_I_|_3 , 0, "Broadwell-U (Core i3)" },
{ 6, 13, -1, -1, 61, 2, -1, -1, NC, PENTIUM_ , 0, "Broadwell-U (Pentium)" },
{ 6, 13, -1, -1, 61, 2, -1, -1, NC, CELERON_ , 0, "Broadwell-U (Celeron)" },
{ 6, 13, -1, -1, 61, 2, -1, -1, NA, 0 , 0, "Broadwell-U (Core M)" },
{ 6, 15, -1, -1, 79, -1, -1, -1, NC, XEON_ , 0, "Broadwell-E (Xeon)" },
{ 6, 15, -1, -1, 79, 2, -1, -1, NC, CORE_|_I_|_3 , 0, "Broadwell-E (Core i3)" },
{ 6, 15, -1, -1, 79, 2, -1, -1, NC, CORE_|_I_|_5 , 0, "Broadwell-E (Core i5)" },
{ 6, 15, -1, -1, 79, 4, -1, -1, NC, CORE_|_I_|_5 , 0, "Broadwell-E (Core i5)" },
{ 6, 15, -1, -1, 79, 2, -1, -1, NC, CORE_|_I_|_7 , 0, "Broadwell-E (Core i7)" },
{ 6, 15, -1, -1, 79, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Broadwell-E (Core i7)" },
/* Skylake CPUs (14nm): */
{ 6, 14, -1, -1, 94, -1, -1, -1, NC, XEON_ , 0, "Skylake (Xeon)" },
{ 6, 14, -1, -1, 94, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Skylake (Core i7)" },
{ 6, 14, -1, -1, 94, 4, -1, -1, NC, CORE_|_I_|_5 , 0, "Skylake (Core i5)" },
{ 6, 14, -1, -1, 94, 2, -1, -1, NC, CORE_|_I_|_3 , 0, "Skylake (Core i3)" },
{ 6, 14, -1, -1, 94, 2, -1, -1, NC, PENTIUM_ , 0, "Skylake (Pentium)" },
{ 6, 14, -1, -1, 78, 2, -1, -1, NC, PENTIUM_ , 0, "Skylake (Pentium)" },
{ 6, 14, -1, -1, 94, 2, -1, -1, NC, CELERON_ , 0, "Skylake (Celeron)" },
{ 6, 14, -1, -1, 78, 2, -1, -1, NC, CELERON_ , 0, "Skylake (Celeron)" },
{ 6, 14, -1, -1, 78, 2, -1, -1, NC, CORE_|_M_|_7 , 0, "Skylake (Core m7)" },
{ 6, 14, -1, -1, 78, 2, -1, -1, NC, CORE_|_M_|_5 , 0, "Skylake (Core m5)" },
{ 6, 14, -1, -1, 78, 2, -1, -1, NC, CORE_|_M_|_3 , 0, "Skylake (Core m3)" },
/* Kaby Lake CPUs (14nm): */
{ 6, 14, -1, -1, 158, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Kaby Lake (Core i7)" },
{ 6, 14, -1, -1, 158, 4, -1, -1, NC, CORE_|_I_|_5 , 0, "Kaby Lake (Core i5)" },
{ 6, 14, -1, -1, 158, 2, -1, -1, NC, CORE_|_I_|_3 , 0, "Kaby Lake (Core i3)" },
{ 6, 14, -1, -1, 158, 2, -1, -1, NC, PENTIUM_ , 0, "Kaby Lake (Pentium)" },
{ 6, 14, -1, -1, 158, 2, -1, -1, NC, CELERON_ , 0, "Kaby Lake (Celeron)" },
{ 6, 14, -1, -1, 158, 2, -1, -1, NC, CORE_|_M_|_3 , 0, "Kaby Lake (Core m3)" },
/* Itaniums */
{ 7, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Itanium" },
{ 15, -1, -1, 16, -1, 1, -1, -1, NC, 0 , 0, "Itanium 2" },
};
static void load_intel_features(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
{
const struct feature_map_t matchtable_edx1[] = {
{ 18, CPU_FEATURE_PN },
{ 21, CPU_FEATURE_DTS },
{ 22, CPU_FEATURE_ACPI },
{ 27, CPU_FEATURE_SS },
{ 29, CPU_FEATURE_TM },
{ 30, CPU_FEATURE_IA64 },
{ 31, CPU_FEATURE_PBE },
};
const struct feature_map_t matchtable_ecx1[] = {
{ 2, CPU_FEATURE_DTS64 },
{ 4, CPU_FEATURE_DS_CPL },
{ 5, CPU_FEATURE_VMX },
{ 6, CPU_FEATURE_SMX },
{ 7, CPU_FEATURE_EST },
{ 8, CPU_FEATURE_TM2 },
{ 10, CPU_FEATURE_CID },
{ 14, CPU_FEATURE_XTPR },
{ 15, CPU_FEATURE_PDCM },
{ 18, CPU_FEATURE_DCA },
{ 21, CPU_FEATURE_X2APIC },
};
const struct feature_map_t matchtable_edx81[] = {
{ 20, CPU_FEATURE_XD },
};
const struct feature_map_t matchtable_ebx7[] = {
{ 2, CPU_FEATURE_SGX },
{ 4, CPU_FEATURE_HLE },
{ 11, CPU_FEATURE_RTM },
{ 16, CPU_FEATURE_AVX512F },
{ 17, CPU_FEATURE_AVX512DQ },
{ 18, CPU_FEATURE_RDSEED },
{ 19, CPU_FEATURE_ADX },
{ 26, CPU_FEATURE_AVX512PF },
{ 27, CPU_FEATURE_AVX512ER },
{ 28, CPU_FEATURE_AVX512CD },
{ 29, CPU_FEATURE_SHA_NI },
{ 30, CPU_FEATURE_AVX512BW },
{ 31, CPU_FEATURE_AVX512VL },
};
if (raw->basic_cpuid[0][0] >= 1) {
match_features(matchtable_edx1, COUNT_OF(matchtable_edx1), raw->basic_cpuid[1][3], data);
match_features(matchtable_ecx1, COUNT_OF(matchtable_ecx1), raw->basic_cpuid[1][2], data);
}
if (raw->ext_cpuid[0][0] >= 1) {
match_features(matchtable_edx81, COUNT_OF(matchtable_edx81), raw->ext_cpuid[1][3], data);
}
// detect TSX/AVX512:
if (raw->basic_cpuid[0][0] >= 7) {
match_features(matchtable_ebx7, COUNT_OF(matchtable_ebx7), raw->basic_cpuid[7][1], data);
}
}
enum _cache_type_t {
L1I,
L1D,
L2,
L3,
L4
};
typedef enum _cache_type_t cache_type_t;
static void check_case(uint8_t on, cache_type_t cache, int size, int assoc, int linesize, struct cpu_id_t* data)
{
if (!on) return;
switch (cache) {
case L1I:
data->l1_instruction_cache = size;
break;
case L1D:
data->l1_data_cache = size;
data->l1_assoc = assoc;
data->l1_cacheline = linesize;
break;
case L2:
data->l2_cache = size;
data->l2_assoc = assoc;
data->l2_cacheline = linesize;
break;
case L3:
data->l3_cache = size;
data->l3_assoc = assoc;
data->l3_cacheline = linesize;
break;
case L4:
data->l4_cache = size;
data->l4_assoc = assoc;
data->l4_cacheline = linesize;
break;
default:
break;
}
}
static void decode_intel_oldstyle_cache_info(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
{
uint8_t f[256] = {0};
int reg, off;
uint32_t x;
for (reg = 0; reg < 4; reg++) {
x = raw->basic_cpuid[2][reg];
if (x & 0x80000000) continue;
for (off = 0; off < 4; off++) {
f[x & 0xff] = 1;
x >>= 8;
}
}
check_case(f[0x06], L1I, 8, 4, 32, data);
check_case(f[0x08], L1I, 16, 4, 32, data);
check_case(f[0x0A], L1D, 8, 2, 32, data);
check_case(f[0x0C], L1D, 16, 4, 32, data);
check_case(f[0x22], L3, 512, 4, 64, data);
check_case(f[0x23], L3, 1024, 8, 64, data);
check_case(f[0x25], L3, 2048, 8, 64, data);
check_case(f[0x29], L3, 4096, 8, 64, data);
check_case(f[0x2C], L1D, 32, 8, 64, data);
check_case(f[0x30], L1I, 32, 8, 64, data);
check_case(f[0x39], L2, 128, 4, 64, data);
check_case(f[0x3A], L2, 192, 6, 64, data);
check_case(f[0x3B], L2, 128, 2, 64, data);
check_case(f[0x3C], L2, 256, 4, 64, data);
check_case(f[0x3D], L2, 384, 6, 64, data);
check_case(f[0x3E], L2, 512, 4, 64, data);
check_case(f[0x41], L2, 128, 4, 32, data);
check_case(f[0x42], L2, 256, 4, 32, data);
check_case(f[0x43], L2, 512, 4, 32, data);
check_case(f[0x44], L2, 1024, 4, 32, data);
check_case(f[0x45], L2, 2048, 4, 32, data);
check_case(f[0x46], L3, 4096, 4, 64, data);
check_case(f[0x47], L3, 8192, 8, 64, data);
check_case(f[0x4A], L3, 6144, 12, 64, data);
check_case(f[0x4B], L3, 8192, 16, 64, data);
check_case(f[0x4C], L3, 12288, 12, 64, data);
check_case(f[0x4D], L3, 16384, 16, 64, data);
check_case(f[0x4E], L2, 6144, 24, 64, data);
check_case(f[0x60], L1D, 16, 8, 64, data);
check_case(f[0x66], L1D, 8, 4, 64, data);
check_case(f[0x67], L1D, 16, 4, 64, data);
check_case(f[0x68], L1D, 32, 4, 64, data);
/* The following four entries are trace cache. Intel does not
* specify a cache-line size, so we use -1 instead
*/
check_case(f[0x70], L1I, 12, 8, -1, data);
check_case(f[0x71], L1I, 16, 8, -1, data);
check_case(f[0x72], L1I, 32, 8, -1, data);
check_case(f[0x73], L1I, 64, 8, -1, data);
check_case(f[0x78], L2, 1024, 4, 64, data);
check_case(f[0x79], L2, 128, 8, 64, data);
check_case(f[0x7A], L2, 256, 8, 64, data);
check_case(f[0x7B], L2, 512, 8, 64, data);
check_case(f[0x7C], L2, 1024, 8, 64, data);
check_case(f[0x7D], L2, 2048, 8, 64, data);
check_case(f[0x7F], L2, 512, 2, 64, data);
check_case(f[0x82], L2, 256, 8, 32, data);
check_case(f[0x83], L2, 512, 8, 32, data);
check_case(f[0x84], L2, 1024, 8, 32, data);
check_case(f[0x85], L2, 2048, 8, 32, data);
check_case(f[0x86], L2, 512, 4, 64, data);
check_case(f[0x87], L2, 1024, 8, 64, data);
if (f[0x49]) {
/* This flag is overloaded with two meanings. On Xeon MP
* (family 0xf, model 0x6) this means L3 cache. On all other
* CPUs (notably Conroe et al), this is L2 cache. In both cases
* it means 4MB, 16-way associative, 64-byte line size.
*/
if (data->family == 0xf && data->model == 0x6) {
data->l3_cache = 4096;
data->l3_assoc = 16;
data->l3_cacheline = 64;
} else {
data->l2_cache = 4096;
data->l2_assoc = 16;
data->l2_cacheline = 64;
}
}
if (f[0x40]) {
/* Again, a special flag. It means:
* 1) If no L2 is specified, then CPU is w/o L2 (0 KB)
* 2) If L2 is specified by other flags, then, CPU is w/o L3.
*/
if (data->l2_cache == -1) {
data->l2_cache = 0;
} else {
data->l3_cache = 0;
}
}
}
static void decode_intel_deterministic_cache_info(struct cpu_raw_data_t* raw,
struct cpu_id_t* data)
{
int ecx;
int ways, partitions, linesize, sets, size, level, typenumber;
cache_type_t type;
for (ecx = 0; ecx < MAX_INTELFN4_LEVEL; ecx++) {
typenumber = raw->intel_fn4[ecx][0] & 0x1f;
if (typenumber == 0) break;
level = (raw->intel_fn4[ecx][0] >> 5) & 0x7;
if (level == 1 && typenumber == 1)
type = L1D;
else if (level == 1 && typenumber == 2)
type = L1I;
else if (level == 2 && typenumber == 3)
type = L2;
else if (level == 3 && typenumber == 3)
type = L3;
else if (level == 4 && typenumber == 3)
type = L4;
else {
warnf("deterministic_cache: unknown level/typenumber combo (%d/%d), cannot\n", level, typenumber);
warnf("deterministic_cache: recognize cache type\n");
continue;
}
ways = ((raw->intel_fn4[ecx][1] >> 22) & 0x3ff) + 1;
partitions = ((raw->intel_fn4[ecx][1] >> 12) & 0x3ff) + 1;
linesize = (raw->intel_fn4[ecx][1] & 0xfff) + 1;
sets = raw->intel_fn4[ecx][2] + 1;
size = ways * partitions * linesize * sets / 1024;
check_case(1, type, size, ways, linesize, data);
}
}
static int decode_intel_extended_topology(struct cpu_raw_data_t* raw,
struct cpu_id_t* data)
{
int i, level_type, num_smt = -1, num_core = -1;
for (i = 0; i < MAX_INTELFN11_LEVEL; i++) {
level_type = (raw->intel_fn11[i][2] & 0xff00) >> 8;
switch (level_type) {
case 0x01:
num_smt = raw->intel_fn11[i][1] & 0xffff;
break;
case 0x02:
num_core = raw->intel_fn11[i][1] & 0xffff;
break;
default:
break;
}
}
if (num_smt == -1 || num_core == -1) return 0;
data->num_logical_cpus = num_core;
data->num_cores = num_core / num_smt;
// make sure num_cores is at least 1. In VMs, the CPUID instruction
// is rigged and may give nonsensical results, but we should at least
// avoid outputs like data->num_cores == 0.
if (data->num_cores <= 0) data->num_cores = 1;
return 1;
}
static void decode_intel_number_of_cores(struct cpu_raw_data_t* raw,
struct cpu_id_t* data)
{
int logical_cpus = -1, num_cores = -1;
if (raw->basic_cpuid[0][0] >= 11) {
if (decode_intel_extended_topology(raw, data)) return;
}
if (raw->basic_cpuid[0][0] >= 1) {
logical_cpus = (raw->basic_cpuid[1][1] >> 16) & 0xff;
if (raw->basic_cpuid[0][0] >= 4) {
num_cores = 1 + ((raw->basic_cpuid[4][0] >> 26) & 0x3f);
}
}
if (data->flags[CPU_FEATURE_HT]) {
if (num_cores > 1) {
data->num_cores = num_cores;
data->num_logical_cpus = logical_cpus;
} else {
data->num_cores = 1;
data->num_logical_cpus = (logical_cpus >= 1 ? logical_cpus : 1);
if (data->num_logical_cpus == 1)
data->flags[CPU_FEATURE_HT] = 0;
}
} else {
data->num_cores = data->num_logical_cpus = 1;
}
}
static intel_code_and_bits_t get_brand_code_and_bits(struct cpu_id_t* data)
{
intel_code_t code = (intel_code_t) NC;
intel_code_and_bits_t result;
uint64_t bits = 0;
int i = 0;
const char* bs = data->brand_str;
const char* s;
const struct { intel_code_t c; const char *search; } matchtable[] = {
{ PENTIUM_M, "Pentium(R) M" },
{ CORE_SOLO, "Pentium(R) Dual CPU" },
{ CORE_SOLO, "Pentium(R) Dual-Core" },
{ PENTIUM_D, "Pentium(R) D" },
{ CORE_SOLO, "Genuine Intel(R) CPU" },
{ CORE_SOLO, "Intel(R) Core(TM)" },
{ DIAMONDVILLE, "CPU [N ][23]## " },
{ SILVERTHORNE, "CPU Z" },
{ PINEVIEW, "CPU [ND][45]## " },
{ CEDARVIEW, "CPU [ND]#### " },
};
const struct { uint64_t bit; const char* search; } bit_matchtable[] = {
{ XEON_, "Xeon" },
{ _MP, " MP" },
{ ATOM_, "Atom(TM) CPU" },
{ MOBILE_, "Mobile" },
{ CELERON_, "Celeron" },
{ PENTIUM_, "Pentium" },
};
for (i = 0; i < COUNT_OF(bit_matchtable); i++) {
if (match_pattern(bs, bit_matchtable[i].search))
bits |= bit_matchtable[i].bit;
}
if ((i = match_pattern(bs, "Core(TM) [im][357]")) != 0) {
bits |= CORE_;
i--;
switch (bs[i + 9]) {
case 'i': bits |= _I_; break;
case 'm': bits |= _M_; break;
}
switch (bs[i + 10]) {
case '3': bits |= _3; break;
case '5': bits |= _5; break;
case '7': bits |= _7; break;
}
}
for (i = 0; i < COUNT_OF(matchtable); i++)
if (match_pattern(bs, matchtable[i].search)) {
code = matchtable[i].c;
break;
}
debugf(2, "intel matchtable result is %d\n", code);
if (bits & XEON_) {
if (match_pattern(bs, "W35##") || match_pattern(bs, "[ELXW]75##"))
bits |= _7;
else if (match_pattern(bs, "[ELXW]55##"))
code = GAINESTOWN;
else if (match_pattern(bs, "[ELXW]56##"))
code = WESTMERE;
else if (data->l3_cache > 0 && data->family == 16)
/* restrict by family, since later Xeons also have L3 ... */
code = IRWIN;
}
if (match_all(bits, XEON_ + _MP) && data->l3_cache > 0)
code = POTOMAC;
if (code == CORE_SOLO) {
s = strstr(bs, "CPU");
if (s) {
s += 3;
while (*s == ' ') s++;
if (*s == 'T')
bits |= MOBILE_;
}
}
if (code == CORE_SOLO) {
switch (data->num_cores) {
case 1: break;
case 2:
{
code = CORE_DUO;
if (data->num_logical_cpus > 2)
code = DUAL_CORE_HT;
break;
}
case 4:
{
code = QUAD_CORE;
if (data->num_logical_cpus > 4)
code = QUAD_CORE_HT;
break;
}
default:
code = MORE_THAN_QUADCORE; break;
}
}
if (code == CORE_DUO && (bits & MOBILE_) && data->model != 14) {
if (data->ext_model < 23) {
code = MEROM;
} else {
code = PENRYN;
}
}
if (data->ext_model == 23 &&
(code == CORE_DUO || code == PENTIUM_D || (bits & CELERON_))) {
code = WOLFDALE;
}
result.code = code;
result.bits = bits;
return result;
}
static intel_model_t get_model_code(struct cpu_id_t* data)
{
int i = 0;
int l = (int) strlen(data->brand_str);
const char *bs = data->brand_str;
int mod_flags = 0, model_no = 0, ndigs = 0;
/* If the CPU is a Core ix, then just return the model number generation: */
if ((i = match_pattern(bs, "Core(TM) i[357]")) != 0) {
i += 11;
if (i + 4 >= l) return UNKNOWN;
if (bs[i] == '2') return _2xxx;
if (bs[i] == '3') return _3xxx;
return UNKNOWN;
}
/* For Core2-based Xeons: */
while (i < l - 3) {
if (bs[i] == 'C' && bs[i+1] == 'P' && bs[i+2] == 'U')
break;
i++;
}
if (i >= l - 3) return UNKNOWN;
i += 3;
while (i < l - 4 && bs[i] == ' ') i++;
if (i >= l - 4) return UNKNOWN;
while (i < l - 4 && !isdigit(bs[i])) {
if (bs[i] >= 'A' && bs[i] <= 'Z')
mod_flags |= (1 << (bs[i] - 'A'));
i++;
}
if (i >= l - 4) return UNKNOWN;
while (isdigit(bs[i])) {
ndigs++;
model_no = model_no * 10 + (int) (bs[i] - '0');
i++;
}
if (ndigs != 4) return UNKNOWN;
#define HAVE(ch, flags) ((flags & (1 << ((int)(ch-'A')))) != 0)
switch (model_no / 100) {
case 30: return _3000;
case 31: return _3100;
case 32:
{
return (HAVE('X', mod_flags)) ? X3200 : _3200;
}
case 33:
{
return (HAVE('X', mod_flags)) ? X3300 : _3300;
}
case 51: return _5100;
case 52: return _5200;
case 53: return _5300;
case 54: return _5400;
default:
return UNKNOWN;
}
#undef HAVE
}
static void decode_intel_sgx_features(const struct cpu_raw_data_t* raw, struct cpu_id_t* data)
{
struct cpu_epc_t epc;
int i;
if (raw->basic_cpuid[0][0] < 0x12) return; // no 12h leaf
if (raw->basic_cpuid[0x12][0] == 0) return; // no sub-leafs available, probably it's disabled by BIOS
// decode sub-leaf 0:
if (raw->basic_cpuid[0x12][0] & 1) data->sgx.flags[INTEL_SGX1] = 1;
if (raw->basic_cpuid[0x12][0] & 2) data->sgx.flags[INTEL_SGX2] = 1;
if (data->sgx.flags[INTEL_SGX1] || data->sgx.flags[INTEL_SGX2])
data->sgx.present = 1;
data->sgx.misc_select = raw->basic_cpuid[0x12][1];
data->sgx.max_enclave_32bit = (raw->basic_cpuid[0x12][3] ) & 0xff;
data->sgx.max_enclave_64bit = (raw->basic_cpuid[0x12][3] >> 8) & 0xff;
// decode sub-leaf 1:
data->sgx.secs_attributes = raw->intel_fn12h[1][0] | (((uint64_t) raw->intel_fn12h[1][1]) << 32);
data->sgx.secs_xfrm = raw->intel_fn12h[1][2] | (((uint64_t) raw->intel_fn12h[1][3]) << 32);
// decode higher-order subleafs, whenever present:
data->sgx.num_epc_sections = -1;
for (i = 0; i < 1000000; i++) {
epc = cpuid_get_epc(i, raw);
if (epc.length == 0) {
debugf(2, "SGX: epc section request for %d returned null, no more EPC sections.\n", i);
data->sgx.num_epc_sections = i;
break;
}
}
if (data->sgx.num_epc_sections == -1) {
debugf(1, "SGX: warning: seems to be infinitude of EPC sections.\n");
data->sgx.num_epc_sections = 1000000;
}
}
struct cpu_epc_t cpuid_get_epc(int index, const struct cpu_raw_data_t* raw)
{
uint32_t regs[4];
struct cpu_epc_t retval = {0, 0};
if (raw && index < MAX_INTELFN12H_LEVEL - 2) {
// this was queried already, use the data:
memcpy(regs, raw->intel_fn12h[2 + index], sizeof(regs));
} else {
// query this ourselves:
regs[0] = 0x12;
regs[2] = 2 + index;
regs[1] = regs[3] = 0;
cpu_exec_cpuid_ext(regs);
}
// decode values:
if ((regs[0] & 0xf) == 0x1) {
retval.start_addr |= (regs[0] & 0xfffff000); // bits [12, 32) -> bits [12, 32)
retval.start_addr |= ((uint64_t) (regs[1] & 0x000fffff)) << 32; // bits [0, 20) -> bits [32, 52)
retval.length |= (regs[2] & 0xfffff000); // bits [12, 32) -> bits [12, 32)
retval.length |= ((uint64_t) (regs[3] & 0x000fffff)) << 32; // bits [0, 20) -> bits [32, 52)
}
return retval;
}
int cpuid_identify_intel(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal)
{
intel_code_and_bits_t brand;
intel_model_t model_code;
int i;
char* brand_code_str = NULL;
load_intel_features(raw, data);
if (raw->basic_cpuid[0][0] >= 4) {
/* Deterministic way is preferred, being more generic */
decode_intel_deterministic_cache_info(raw, data);
} else if (raw->basic_cpuid[0][0] >= 2) {
decode_intel_oldstyle_cache_info(raw, data);
}
decode_intel_number_of_cores(raw, data);
brand = get_brand_code_and_bits(data);
model_code = get_model_code(data);
for (i = 0; i < COUNT_OF(intel_bcode_str); i++) {
if (brand.code == intel_bcode_str[i].code) {
brand_code_str = intel_bcode_str[i].str;
break;
}
}
if (brand_code_str)
debugf(2, "Detected Intel brand code: %d (%s)\n", brand.code, brand_code_str);
else
debugf(2, "Detected Intel brand code: %d\n", brand.code);
if (brand.bits) {
debugf(2, "Detected Intel bits: ");
debug_print_lbits(2, brand.bits);
}
debugf(2, "Detected Intel model code: %d\n", model_code);
internal->code.intel = brand.code;
internal->bits = brand.bits;
if (data->flags[CPU_FEATURE_SGX]) {
debugf(2, "SGX seems to be present, decoding...\n");
// if SGX is indicated by the CPU, verify its presence:
decode_intel_sgx_features(raw, data);
}
internal->score = match_cpu_codename(cpudb_intel, COUNT_OF(cpudb_intel), data,
brand.code, brand.bits, model_code);
return 0;
}
void cpuid_get_list_intel(struct cpu_list_t* list)
{
generic_get_cpu_list(cpudb_intel, COUNT_OF(cpudb_intel), list);
}

View File

@@ -0,0 +1,32 @@
/*
* Copyright 2008 Veselin Georgiev,
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __RECOG_INTEL_H__
#define __RECOG_INTEL_H__
int cpuid_identify_intel(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal);
void cpuid_get_list_intel(struct cpu_list_t* list);
#endif /*__RECOG_INTEL_H__*/

133
cpu.c
View File

@@ -4,8 +4,9 @@
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -24,77 +25,87 @@
#include <cpuid.h>
#include <string.h>
#include <stdbool.h>
#include <math.h>
#ifndef BUILD_TEST
# include <libcpuid.h>
#endif
#include "cpu.h"
#include "options.h"
#define VENDOR_ID (0)
#define PROCESSOR_INFO (1)
#define CACHE_TLB_DESCRIPTOR (2)
#define EXTENDED_FEATURES (7)
#define PROCESSOR_BRAND_STRING_1 (0x80000002)
#define PROCESSOR_BRAND_STRING_2 (0x80000003)
#define PROCESSOR_BRAND_STRING_3 (0x80000004)
#define EAX_Reg (0)
#define EBX_Reg (1)
#define ECX_Reg (2)
#define EDX_Reg (3)
static inline void cpuid(int level, int output[4]) {
int a, b, c, d;
__cpuid_count(level, 0, a, b, c, d);
output[0] = a;
output[1] = b;
output[2] = c;
output[3] = d;
}
static void cpu_brand_string(char* s) {
int cpu_info[4] = { 0 };
cpuid(VENDOR_ID, cpu_info);
if (cpu_info[EAX_Reg] >= 4) {
for (int i = 0; i < 4; i++) {
cpuid(0x80000002 + i, cpu_info);
memcpy(s, cpu_info, sizeof(cpu_info));
s += 16;
}
}
}
static bool has_aes_ni()
{
int cpu_info[4] = { 0 };
cpuid(PROCESSOR_INFO, cpu_info);
return cpu_info[ECX_Reg] & bit_AES;
}
static bool has_bmi2() {
int cpu_info[4] = { 0 };
cpuid(EXTENDED_FEATURES, cpu_info);
return cpu_info[EBX_Reg] & bit_BMI2;
}
#ifndef BUILD_TEST
void cpu_init_common() {
cpu_brand_string(cpu_info.brand);
struct cpu_raw_data_t raw = { 0 };
struct cpu_id_t data = { 0 };
cpuid_get_raw_data(&raw);
cpu_identify(&raw, &data);
strncpy(cpu_info.brand, data.brand_str, sizeof(cpu_info.brand) - 1);
cpu_info.total_logical_cpus = data.total_logical_cpus;
cpu_info.sockets = data.total_logical_cpus / data.num_logical_cpus;
cpu_info.total_cores = data.num_cores * cpu_info.sockets;
cpu_info.l3_cache = data.l3_cache > 0 ? data.l3_cache * cpu_info.sockets : 0;
// Workaround for AMD CPUs https://github.com/anrieff/libcpuid/issues/97
if (data.vendor == VENDOR_AMD && data.l3_cache <= 0 && data.l2_assoc == 16 && data.ext_family >= 21) {
cpu_info.l2_cache = data.l2_cache * (cpu_info.total_cores / 2) * cpu_info.sockets;
}
else {
cpu_info.l2_cache = data.l2_cache > 0 ? data.l2_cache * cpu_info.total_cores * cpu_info.sockets : 0;
}
# ifdef __x86_64__
cpu_info.flags |= CPU_FLAG_X86_64;
# endif
if (has_aes_ni()) {
if (data.flags[CPU_FEATURE_AES]) {
cpu_info.flags |= CPU_FLAG_AES;
}
if (has_bmi2()) {
if (data.flags[CPU_FEATURE_BMI2]) {
cpu_info.flags |= CPU_FLAG_BMI2;
}
# ifndef XMRIG_NO_ASM
if (data.vendor == VENDOR_AMD) {
cpu_info.assembly = (data.ext_family >= 23) ? ASM_RYZEN : ASM_BULLDOZER;
}
else if (data.vendor == VENDOR_INTEL) {
cpu_info.assembly = ASM_INTEL;
}
# endif
}
#endif
int get_optimal_threads_count(int algo, bool double_hash, int max_cpu_usage) {
if (cpu_info.total_logical_cpus == 1) {
return 1;
}
int cache = cpu_info.l3_cache ? cpu_info.l3_cache : cpu_info.l2_cache;
int count = 0;
const int size = (algo ? 1024 : 2048) * (double_hash ? 2 : 1);
if (cache) {
count = cache / size;
}
else {
count = cpu_info.total_logical_cpus / 2;
}
if (count > cpu_info.total_logical_cpus) {
count = cpu_info.total_logical_cpus;
}
if (((float) count / cpu_info.total_logical_cpus * 100) > max_cpu_usage) {
count = ceil((float) cpu_info.total_logical_cpus * (max_cpu_usage / 100.0));
}
return count < 1 ? 1 : count;
}

25
cpu.h
View File

@@ -4,8 +4,9 @@
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -21,13 +22,20 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __CPU_H__
#define __CPU_H__
#ifndef XMRIG_CPU_H
#define XMRIG_CPU_H
#include <stdbool.h>
struct cpu_info {
int count;
int total_cores;
int total_logical_cpus;
int flags;
char brand[48];
int sockets;
int l2_cache;
int l3_cache;
char brand[64];
int assembly;
};
extern struct cpu_info cpu_info;
@@ -40,9 +48,8 @@ enum cpu_flags {
};
void cpu_init();
int get_optimal_threads_count();
int get_optimal_threads_count(int algo, bool double_hash, int max_cpu_usage);
int affine_to_cpu_mask(int id, unsigned long mask);
#endif /* __CPU_H__ */
#endif /* XMRIG_CPU_H */

129
cpu_stub.c Normal file
View File

@@ -0,0 +1,129 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <cpuid.h>
#include <string.h>
#include <stdbool.h>
#include <stdint.h>
#include "cpu.h"
#include "options.h"
#define VENDOR_ID (0)
#define PROCESSOR_INFO (1)
#define CACHE_TLB_DESCRIPTOR (2)
#define EXTENDED_FEATURES (7)
#define PROCESSOR_BRAND_STRING_1 (0x80000002)
#define PROCESSOR_BRAND_STRING_2 (0x80000003)
#define PROCESSOR_BRAND_STRING_3 (0x80000004)
#define EAX_Reg (0)
#define EBX_Reg (1)
#define ECX_Reg (2)
#define EDX_Reg (3)
static inline void cpuid(int level, int output[4]) {
int a, b, c, d;
__cpuid_count(level, 0, a, b, c, d);
output[0] = a;
output[1] = b;
output[2] = c;
output[3] = d;
}
static void cpu_brand_string(char* s) {
int32_t cpu_info[4] = { 0 };
cpuid(VENDOR_ID, cpu_info);
if (cpu_info[EAX_Reg] >= 4) {
for (int i = 0; i < 4; i++) {
cpuid(0x80000002 + i, cpu_info);
memcpy(s, cpu_info, sizeof(cpu_info));
s += 16;
}
}
}
static bool has_aes_ni()
{
int32_t cpu_info[4] = { 0 };
cpuid(PROCESSOR_INFO, cpu_info);
return cpu_info[ECX_Reg] & bit_AES;
}
static bool has_bmi2() {
int32_t cpu_info[4] = { 0 };
cpuid(EXTENDED_FEATURES, cpu_info);
return cpu_info[EBX_Reg] & bit_BMI2;
}
void cpu_init_common() {
cpu_info.sockets = 1;
cpu_brand_string(cpu_info.brand);
# ifdef __x86_64__
cpu_info.flags |= CPU_FLAG_X86_64;
# endif
if (has_aes_ni()) {
cpu_info.flags |= CPU_FLAG_AES;
# ifndef XMRIG_NO_ASM
char vendor[13] = { 0 };
int32_t data[4] = { 0 };
cpuid(0, data);
memcpy(vendor + 0, &data[1], 4);
memcpy(vendor + 4, &data[3], 4);
memcpy(vendor + 8, &data[2], 4);
if (memcmp(vendor, "GenuineIntel", 12) == 0) {
cpu_info.assembly = ASM_INTEL;
}
else if (memcmp(vendor, "AuthenticAMD", 12) == 0) {
cpu_info.assembly = ASM_RYZEN;
}
# endif
}
if (has_bmi2()) {
cpu_info.flags |= CPU_FLAG_BMI2;
}
}
int get_optimal_threads_count(int algo, bool double_hash, int max_cpu_usage) {
int count = cpu_info.total_logical_cpus / 2;
return count < 1 ? 1 : count;
}

146
crypto/CryptonightR_gen.c Normal file
View File

@@ -0,0 +1,146 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <string.h>
#include "algo/cryptonight/cryptonight_monero.h"
#include "crypto/asm/CryptonightR_template.h"
#include "persistent_memory.h"
static inline void add_code(uint8_t **p, void (*p1)(), void (*p2)())
{
const ptrdiff_t size = (const uint8_t*)(p2) - (const uint8_t*)(p1);
if (size > 0) {
memcpy(*p, (const void *) p1, size);
*p += size;
}
}
static inline void add_random_math(uint8_t **p, const struct V4_Instruction* code, int code_size, const void_func* instructions, const void_func* instructions_mov, bool is_64_bit, enum Assembly ASM)
{
uint32_t prev_rot_src = (uint32_t)(-1);
for (int i = 0;; ++i) {
const struct V4_Instruction inst = code[i];
if (inst.opcode == RET) {
break;
}
uint8_t opcode = (inst.opcode == MUL) ? inst.opcode : (inst.opcode + 2);
uint8_t dst_index = inst.dst_index;
uint8_t src_index = inst.src_index;
const uint32_t a = inst.dst_index;
const uint32_t b = inst.src_index;
const uint8_t c = opcode | (dst_index << V4_OPCODE_BITS) | (((src_index == 8) ? dst_index : src_index) << (V4_OPCODE_BITS + V4_DST_INDEX_BITS));
switch (inst.opcode) {
case ROR:
case ROL:
if (b != prev_rot_src) {
prev_rot_src = b;
add_code(p, instructions_mov[c], instructions_mov[c + 1]);
}
break;
}
if (a == prev_rot_src) {
prev_rot_src = (uint32_t)(-1);
}
void_func begin = instructions[c];
if ((ASM = ASM_BULLDOZER) && (inst.opcode == MUL) && !is_64_bit) {
// AMD Bulldozer has latency 4 for 32-bit IMUL and 6 for 64-bit IMUL
// Always use 32-bit IMUL for AMD Bulldozer in 32-bit mode - skip prefix 0x48 and change 0x49 to 0x41
uint8_t* prefix = (uint8_t*) begin;
if (*prefix == 0x49) {
**p = 0x41;
*p += 1;
}
begin = (void_func)(prefix + 1);
}
add_code(p, begin, instructions[c + 1]);
if (inst.opcode == ADD) {
*(uint32_t*)(*p - sizeof(uint32_t) - (is_64_bit ? 3 : 0)) = inst.C;
if (is_64_bit) {
prev_rot_src = (uint32_t)(-1);
}
}
}
}
void v4_compile_code(const struct V4_Instruction* code, int code_size, void* machine_code, enum Assembly ASM)
{
uint8_t* p0 = machine_code;
uint8_t* p = p0;
add_code(&p, CryptonightR_template_part1, CryptonightR_template_part2);
add_random_math(&p, code, code_size, instructions, instructions_mov, false, ASM);
add_code(&p, CryptonightR_template_part2, CryptonightR_template_part3);
*(int*)(p - 4) = (int)((((const uint8_t*)CryptonightR_template_mainloop) - ((const uint8_t*)CryptonightR_template_part1)) - (p - p0));
add_code(&p, CryptonightR_template_part3, CryptonightR_template_end);
flush_instruction_cache(machine_code, p - p0);
}
void v4_compile_code_double(const struct V4_Instruction* code, int code_size, void* machine_code, enum Assembly ASM)
{
uint8_t* p0 = (uint8_t*) machine_code;
uint8_t* p = p0;
add_code(&p, CryptonightR_template_double_part1, CryptonightR_template_double_part2);
add_random_math(&p, code, code_size, instructions, instructions_mov, false, ASM);
add_code(&p, CryptonightR_template_double_part2, CryptonightR_template_double_part3);
add_random_math(&p, code, code_size, instructions, instructions_mov, false, ASM);
add_code(&p, CryptonightR_template_double_part3, CryptonightR_template_double_part4);
*(int*)(p - 4) = (int)((((const uint8_t*)CryptonightR_template_double_mainloop) - ((const uint8_t*)CryptonightR_template_double_part1)) - (p - p0));
add_code(&p, CryptonightR_template_double_part4, CryptonightR_template_double_end);
flush_instruction_cache(machine_code, p - p0);
}
void v4_soft_aes_compile_code(const struct V4_Instruction* code, int code_size, void* machine_code, enum Assembly ASM)
{
uint8_t* p0 = machine_code;
uint8_t* p = p0;
add_code(&p, CryptonightR_soft_aes_template_part1, CryptonightR_soft_aes_template_part2);
add_random_math(&p, code, code_size, instructions, instructions_mov, false, ASM);
add_code(&p, CryptonightR_soft_aes_template_part2, CryptonightR_soft_aes_template_part3);
*(int*)(p - 4) = (int)((((const uint8_t*)CryptonightR_soft_aes_template_mainloop) - ((const uint8_t*)CryptonightR_soft_aes_template_part1)) - (p - p0));
add_code(&p, CryptonightR_soft_aes_template_part3, CryptonightR_soft_aes_template_end);
flush_instruction_cache(machine_code, p - p0);
}

View File

@@ -1,170 +0,0 @@
/*
---------------------------------------------------------------------------
Copyright (c) 1998-2013, Brian Gladman, Worcester, UK. All rights reserved.
The redistribution and use of this software (with or without changes)
is allowed without the payment of fees or royalties provided that:
source code distributions include the above copyright notice, this
list of conditions and the following disclaimer;
binary distributions include the above copyright notice, this list
of conditions and the following disclaimer in their documentation.
This software is provided 'as is' with no explicit or implied warranties
in respect of its operation, including, but not limited to, correctness
and fitness for purpose.
---------------------------------------------------------------------------
Issue Date: 20/12/2007
*/
#include <stdint.h>
#include "aesb.h"
#if defined(__cplusplus)
extern "C"
{
#endif
#define TABLE_ALIGN 32
#define WPOLY 0x011b
#define N_COLS 4
#define AES_BLOCK_SIZE 16
#define RC_LENGTH (5 * (AES_BLOCK_SIZE / 4 - 2))
#if defined(_MSC_VER)
#define ALIGN __declspec(align(TABLE_ALIGN))
#elif defined(__GNUC__)
#define ALIGN __attribute__ ((aligned(16)))
#else
#define ALIGN
#endif
#define rf1(r,c) (r)
#define word_in(x,c) (*((uint32_t*)(x)+(c)))
#define word_out(x,c,v) (*((uint32_t*)(x)+(c)) = (v))
#define s(x,c) x[c]
#define si(y,x,c) (s(y,c) = word_in(x, c))
#define so(y,x,c) word_out(y, c, s(x,c))
#define state_in(y,x) si(y,x,0); si(y,x,1); si(y,x,2); si(y,x,3)
#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
#define round(y,x,k) \
y[0] = (k)[0] ^ (t_fn[0][x[0] & 0xff] ^ t_fn[1][(x[1] >> 8) & 0xff] ^ t_fn[2][(x[2] >> 16) & 0xff] ^ t_fn[3][x[3] >> 24]); \
y[1] = (k)[1] ^ (t_fn[0][x[1] & 0xff] ^ t_fn[1][(x[2] >> 8) & 0xff] ^ t_fn[2][(x[3] >> 16) & 0xff] ^ t_fn[3][x[0] >> 24]); \
y[2] = (k)[2] ^ (t_fn[0][x[2] & 0xff] ^ t_fn[1][(x[3] >> 8) & 0xff] ^ t_fn[2][(x[0] >> 16) & 0xff] ^ t_fn[3][x[1] >> 24]); \
y[3] = (k)[3] ^ (t_fn[0][x[3] & 0xff] ^ t_fn[1][(x[0] >> 8) & 0xff] ^ t_fn[2][(x[1] >> 16) & 0xff] ^ t_fn[3][x[2] >> 24]);
#define to_byte(x) ((x) & 0xff)
#define bval(x,n) to_byte((x) >> (8 * (n)))
#define fwd_var(x,r,c)\
( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
: r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\
: r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
: ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2)))
#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,n),fwd_var,rf1,c))
#define sb_data(w) {\
w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\
w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\
w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\
w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\
w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\
w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\
w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\
w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\
w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\
w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\
w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\
w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\
w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\
w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\
w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\
w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\
w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\
w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\
w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\
w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\
w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\
w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\
w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\
w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\
w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\
w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\
w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\
w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\
w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\
w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\
w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\
w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) }
#define rc_data(w) {\
w(0x01), w(0x02), w(0x04), w(0x08), w(0x10),w(0x20), w(0x40), w(0x80),\
w(0x1b), w(0x36) }
#define bytes2word(b0, b1, b2, b3) (((uint32_t)(b3) << 24) | \
((uint32_t)(b2) << 16) | ((uint32_t)(b1) << 8) | (b0))
#define h0(x) (x)
#define w0(p) bytes2word(p, 0, 0, 0)
#define w1(p) bytes2word(0, p, 0, 0)
#define w2(p) bytes2word(0, 0, p, 0)
#define w3(p) bytes2word(0, 0, 0, p)
#define u0(p) bytes2word(f2(p), p, p, f3(p))
#define u1(p) bytes2word(f3(p), f2(p), p, p)
#define u2(p) bytes2word(p, f3(p), f2(p), p)
#define u3(p) bytes2word(p, p, f3(p), f2(p))
#define v0(p) bytes2word(fe(p), f9(p), fd(p), fb(p))
#define v1(p) bytes2word(fb(p), fe(p), f9(p), fd(p))
#define v2(p) bytes2word(fd(p), fb(p), fe(p), f9(p))
#define v3(p) bytes2word(f9(p), fd(p), fb(p), fe(p))
#define f2(x) ((x<<1) ^ (((x>>7) & 1) * WPOLY))
#define f4(x) ((x<<2) ^ (((x>>6) & 1) * WPOLY) ^ (((x>>6) & 2) * WPOLY))
#define f8(x) ((x<<3) ^ (((x>>5) & 1) * WPOLY) ^ (((x>>5) & 2) * WPOLY) ^ (((x>>5) & 4) * WPOLY))
#define f3(x) (f2(x) ^ x)
#define f9(x) (f8(x) ^ x)
#define fb(x) (f8(x) ^ f2(x) ^ x)
#define fd(x) (f8(x) ^ f4(x) ^ x)
#define fe(x) (f8(x) ^ f4(x) ^ f2(x))
#define t_dec(m,n) t_##m##n
#define t_set(m,n) t_##m##n
#define t_use(m,n) t_##m##n
#define d_4(t,n,b,e,f,g,h) ALIGN const t n[4][256] = { b(e), b(f), b(g), b(h) }
#define four_tables(x,tab,vf,rf,c) \
(tab[0][bval(vf(x,0,c),rf(0,c))] \
^ tab[1][bval(vf(x,1,c),rf(1,c))] \
^ tab[2][bval(vf(x,2,c),rf(2,c))] \
^ tab[3][bval(vf(x,3,c),rf(3,c))])
d_4(uint32_t, t_dec(f,n), sb_data, u0, u1, u2, u3);
inline void aesb_single_round(const uint8_t *restrict in, uint8_t *out, const uint8_t *restrict expandedKey) {
round(((uint32_t*) out), ((uint32_t*) in), ((uint32_t*) expandedKey));
}
inline void aesb_pseudo_round_mut(uint8_t *restrict val, const uint8_t *restrict expandedKey) {
uint32_t b1[4];
round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey));
round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 1 * N_COLS);
round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey) + 2 * N_COLS);
round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 3 * N_COLS);
round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey) + 4 * N_COLS);
round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 5 * N_COLS);
round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey) + 6 * N_COLS);
round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 7 * N_COLS);
round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey) + 8 * N_COLS);
round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 9 * N_COLS);
}
#if defined(__cplusplus)
}
#endif

View File

@@ -1,10 +0,0 @@
#ifndef __AESB_H__
#define __AESB_H__
void aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
void aesb_pseudo_round_mut(uint8_t *val, const uint8_t *expandedKey);
#define fast_aesb_single_round aesb_single_round
#define fast_aesb_pseudo_round_mut aesb_pseudo_round_mut
#endif /* __AESB_H__ */

View File

@@ -0,0 +1,279 @@
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part1)
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_mainloop)
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part2)
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part3)
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_end)
ALIGN(64)
FN_PREFIX(CryptonightR_soft_aes_template_part1):
mov QWORD PTR [rsp+8], rcx
push rbx
push rbp
push rsi
push rdi
push r12
push r13
push r14
push r15
sub rsp, 232
mov eax, [rcx+96]
mov ebx, [rcx+100]
mov esi, [rcx+104]
mov edx, [rcx+108]
mov [rsp+144], eax
mov [rsp+148], ebx
mov [rsp+152], esi
mov [rsp+156], edx
mov rax, QWORD PTR [rcx+48]
mov r10, rcx
xor rax, QWORD PTR [rcx+16]
mov r8, QWORD PTR [rcx+32]
xor r8, QWORD PTR [rcx]
mov r9, QWORD PTR [rcx+40]
xor r9, QWORD PTR [rcx+8]
movq xmm4, rax
mov rdx, QWORD PTR [rcx+56]
xor rdx, QWORD PTR [rcx+24]
mov r11, QWORD PTR [rcx+224]
mov rcx, QWORD PTR [rcx+88]
xor rcx, QWORD PTR [r10+72]
mov rax, QWORD PTR [r10+80]
movq xmm0, rdx
xor rax, QWORD PTR [r10+64]
movaps XMMWORD PTR [rsp+16], xmm6
movaps XMMWORD PTR [rsp+32], xmm7
movaps XMMWORD PTR [rsp+48], xmm8
movaps XMMWORD PTR [rsp+64], xmm9
movaps XMMWORD PTR [rsp+80], xmm10
movaps XMMWORD PTR [rsp+96], xmm11
movaps XMMWORD PTR [rsp+112], xmm12
movaps XMMWORD PTR [rsp+128], xmm13
movq xmm5, rax
mov rax, r8
punpcklqdq xmm4, xmm0
and eax, 2097136
movq xmm10, QWORD PTR [r10+96]
movq xmm0, rcx
mov rcx, QWORD PTR [r10+104]
xorps xmm9, xmm9
mov QWORD PTR [rsp+328], rax
movq xmm12, r11
mov QWORD PTR [rsp+320], r9
punpcklqdq xmm5, xmm0
movq xmm13, rcx
mov r12d, 524288
ALIGN(64)
FN_PREFIX(CryptonightR_soft_aes_template_mainloop):
movd xmm11, r12d
mov r12, QWORD PTR [r10+272]
lea r13, QWORD PTR [rax+r11]
mov esi, DWORD PTR [r13]
movq xmm0, r9
mov r10d, DWORD PTR [r13+4]
movq xmm7, r8
mov ebp, DWORD PTR [r13+12]
mov r14d, DWORD PTR [r13+8]
mov rdx, QWORD PTR [rsp+328]
movzx ecx, sil
shr esi, 8
punpcklqdq xmm7, xmm0
mov r15d, DWORD PTR [r12+rcx*4]
movzx ecx, r10b
shr r10d, 8
mov edi, DWORD PTR [r12+rcx*4]
movzx ecx, r14b
shr r14d, 8
mov ebx, DWORD PTR [r12+rcx*4]
movzx ecx, bpl
shr ebp, 8
mov r9d, DWORD PTR [r12+rcx*4]
movzx ecx, r10b
shr r10d, 8
xor r15d, DWORD PTR [r12+rcx*4+1024]
movzx ecx, r14b
shr r14d, 8
mov eax, r14d
shr eax, 8
xor edi, DWORD PTR [r12+rcx*4+1024]
add eax, 256
movzx ecx, bpl
shr ebp, 8
xor ebx, DWORD PTR [r12+rcx*4+1024]
movzx ecx, sil
shr esi, 8
xor r9d, DWORD PTR [r12+rcx*4+1024]
add r12, 2048
movzx ecx, r10b
shr r10d, 8
add r10d, 256
mov r11d, DWORD PTR [r12+rax*4]
xor r11d, DWORD PTR [r12+rcx*4]
xor r11d, r9d
movzx ecx, sil
mov r10d, DWORD PTR [r12+r10*4]
shr esi, 8
add esi, 256
xor r10d, DWORD PTR [r12+rcx*4]
movzx ecx, bpl
xor r10d, ebx
shr ebp, 8
movd xmm1, r11d
add ebp, 256
movq r11, xmm12
mov r9d, DWORD PTR [r12+rcx*4]
xor r9d, DWORD PTR [r12+rsi*4]
mov eax, DWORD PTR [r12+rbp*4]
xor r9d, edi
movzx ecx, r14b
movd xmm0, r10d
movd xmm2, r9d
xor eax, DWORD PTR [r12+rcx*4]
mov rcx, rdx
xor eax, r15d
punpckldq xmm2, xmm1
xor rcx, 16
movd xmm6, eax
mov rax, rdx
punpckldq xmm6, xmm0
xor rax, 32
punpckldq xmm6, xmm2
xor rdx, 48
movdqu xmm2, XMMWORD PTR [rcx+r11]
pxor xmm6, xmm2
pxor xmm6, xmm7
paddq xmm2, xmm4
movdqu xmm1, XMMWORD PTR [rax+r11]
movdqu xmm0, XMMWORD PTR [rdx+r11]
pxor xmm6, xmm1
pxor xmm6, xmm0
paddq xmm0, xmm5
movdqu XMMWORD PTR [rcx+r11], xmm0
movdqu XMMWORD PTR [rax+r11], xmm2
movq rcx, xmm13
paddq xmm1, xmm7
movdqu XMMWORD PTR [rdx+r11], xmm1
movq rdi, xmm6
mov r10, rdi
and r10d, 2097136
movdqa xmm0, xmm6
pxor xmm0, xmm4
movdqu XMMWORD PTR [r13], xmm0
mov ebx, [rsp+144]
mov ebp, [rsp+152]
add ebx, [rsp+148]
add ebp, [rsp+156]
shl rbp, 32
or rbx, rbp
xor rbx, QWORD PTR [r10+r11]
lea r14, QWORD PTR [r10+r11]
mov rbp, QWORD PTR [r14+8]
mov [rsp+160], rbx
mov [rsp+168], rdi
mov [rsp+176], rbp
mov [rsp+184], r10
mov r10, rsp
mov ebx, [rsp+144]
mov esi, [rsp+148]
mov edi, [rsp+152]
mov ebp, [rsp+156]
movd esp, xmm7
movaps xmm0, xmm7
psrldq xmm0, 8
movd r15d, xmm0
movd eax, xmm4
movd edx, xmm5
movaps xmm0, xmm5
psrldq xmm0, 8
movd r9d, xmm0
FN_PREFIX(CryptonightR_soft_aes_template_part2):
mov rsp, r10
mov [rsp+144], ebx
mov [rsp+148], esi
mov [rsp+152], edi
mov [rsp+156], ebp
mov edi, edi
shl rbp, 32
or rbp, rdi
xor r8, rbp
mov ebx, ebx
shl rsi, 32
or rsi, rbx
xor QWORD PTR [rsp+320], rsi
mov rbx, [rsp+160]
mov rdi, [rsp+168]
mov rbp, [rsp+176]
mov r10, [rsp+184]
mov r9, r10
xor r9, 16
mov rcx, r10
xor rcx, 32
xor r10, 48
mov rax, rbx
mul rdi
movdqu xmm2, XMMWORD PTR [r9+r11]
movdqu xmm1, XMMWORD PTR [rcx+r11]
pxor xmm6, xmm2
pxor xmm6, xmm1
paddq xmm1, xmm7
add r8, rdx
movdqu xmm0, XMMWORD PTR [r10+r11]
pxor xmm6, xmm0
paddq xmm0, xmm5
paddq xmm2, xmm4
movdqu XMMWORD PTR [r9+r11], xmm0
movdqa xmm5, xmm4
mov r9, QWORD PTR [rsp+320]
movdqa xmm4, xmm6
add r9, rax
movdqu XMMWORD PTR [rcx+r11], xmm2
movdqu XMMWORD PTR [r10+r11], xmm1
mov r10, QWORD PTR [rsp+304]
movd r12d, xmm11
mov QWORD PTR [r14], r8
xor r8, rbx
mov rax, r8
mov QWORD PTR [r14+8], r9
and eax, 2097136
xor r9, rbp
mov QWORD PTR [rsp+320], r9
mov QWORD PTR [rsp+328], rax
sub r12d, 1
jne FN_PREFIX(CryptonightR_soft_aes_template_mainloop)
FN_PREFIX(CryptonightR_soft_aes_template_part3):
movaps xmm6, XMMWORD PTR [rsp+16]
movaps xmm7, XMMWORD PTR [rsp+32]
movaps xmm8, XMMWORD PTR [rsp+48]
movaps xmm9, XMMWORD PTR [rsp+64]
movaps xmm10, XMMWORD PTR [rsp+80]
movaps xmm11, XMMWORD PTR [rsp+96]
movaps xmm12, XMMWORD PTR [rsp+112]
movaps xmm13, XMMWORD PTR [rsp+128]
add rsp, 232
pop r15
pop r14
pop r13
pop r12
pop rdi
pop rsi
pop rbp
pop rbx
ret
FN_PREFIX(CryptonightR_soft_aes_template_end):

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,531 @@
PUBLIC FN_PREFIX(CryptonightR_template_part1)
PUBLIC FN_PREFIX(CryptonightR_template_mainloop)
PUBLIC FN_PREFIX(CryptonightR_template_part2)
PUBLIC FN_PREFIX(CryptonightR_template_part3)
PUBLIC FN_PREFIX(CryptonightR_template_end)
PUBLIC FN_PREFIX(CryptonightR_template_double_part1)
PUBLIC FN_PREFIX(CryptonightR_template_double_mainloop)
PUBLIC FN_PREFIX(CryptonightR_template_double_part2)
PUBLIC FN_PREFIX(CryptonightR_template_double_part3)
PUBLIC FN_PREFIX(CryptonightR_template_double_part4)
PUBLIC FN_PREFIX(CryptonightR_template_double_end)
ALIGN(64)
FN_PREFIX(CryptonightR_template_part1):
mov QWORD PTR [rsp+16], rbx
mov QWORD PTR [rsp+24], rbp
mov QWORD PTR [rsp+32], rsi
push r10
push r11
push r12
push r13
push r14
push r15
push rdi
sub rsp, 64
mov r12, rcx
mov r8, QWORD PTR [r12+32]
mov rdx, r12
xor r8, QWORD PTR [r12]
mov r15, QWORD PTR [r12+40]
mov r9, r8
xor r15, QWORD PTR [r12+8]
mov r11, QWORD PTR [r12+224]
mov r12, QWORD PTR [r12+56]
xor r12, QWORD PTR [rdx+24]
mov rax, QWORD PTR [rdx+48]
xor rax, QWORD PTR [rdx+16]
movaps XMMWORD PTR [rsp+48], xmm6
movq xmm0, r12
movaps XMMWORD PTR [rsp+32], xmm7
movaps XMMWORD PTR [rsp+16], xmm8
movaps XMMWORD PTR [rsp], xmm9
mov r12, QWORD PTR [rdx+88]
xor r12, QWORD PTR [rdx+72]
movq xmm6, rax
mov rax, QWORD PTR [rdx+80]
xor rax, QWORD PTR [rdx+64]
punpcklqdq xmm6, xmm0
and r9d, 2097136
movq xmm0, r12
movq xmm7, rax
punpcklqdq xmm7, xmm0
mov r10d, r9d
movq xmm9, rsp
mov rsp, r8
mov r8d, 524288
mov ebx, [rdx+96]
mov esi, [rdx+100]
mov edi, [rdx+104]
mov ebp, [rdx+108]
ALIGN(64)
FN_PREFIX(CryptonightR_template_mainloop):
movdqa xmm5, XMMWORD PTR [r9+r11]
movq xmm0, r15
movq xmm4, rsp
punpcklqdq xmm4, xmm0
lea rdx, QWORD PTR [r9+r11]
aesenc xmm5, xmm4
mov r13d, r9d
mov eax, r9d
xor r9d, 48
xor r13d, 16
xor eax, 32
movdqu xmm0, XMMWORD PTR [r9+r11]
movaps xmm3, xmm0
movdqu xmm2, XMMWORD PTR [r13+r11]
movdqu xmm1, XMMWORD PTR [rax+r11]
pxor xmm0, xmm2
pxor xmm5, xmm1
pxor xmm5, xmm0
movq r12, xmm5
movd r10d, xmm5
and r10d, 2097136
paddq xmm3, xmm7
paddq xmm2, xmm6
paddq xmm1, xmm4
movdqu XMMWORD PTR [r13+r11], xmm3
movdqu XMMWORD PTR [rax+r11], xmm2
movdqu XMMWORD PTR [r9+r11], xmm1
movdqa xmm0, xmm5
pxor xmm0, xmm6
movdqu XMMWORD PTR [rdx], xmm0
lea r13d, [ebx+esi]
lea edx, [edi+ebp]
shl rdx, 32
or r13, rdx
movd eax, xmm6
movd edx, xmm7
pextrd r9d, xmm7, 2
xor r13, QWORD PTR [r10+r11]
mov r14, QWORD PTR [r10+r11+8]
FN_PREFIX(CryptonightR_template_part2):
lea rcx, [r10+r11]
mov eax, edi
mov edx, ebp
shl rdx, 32
or rax, rdx
xor rsp, rax
mov eax, ebx
mov edx, esi
shl rdx, 32
or rax, rdx
xor r15, rax
mov rax, r13
mul r12
add r15, rax
add rsp, rdx
mov r9d, r10d
mov r12d, r10d
xor r9d, 16
xor r12d, 32
xor r10d, 48
movdqa xmm1, XMMWORD PTR [r12+r11]
movaps xmm3, xmm1
movdqa xmm2, XMMWORD PTR [r9+r11]
movdqa xmm0, XMMWORD PTR [r10+r11]
pxor xmm1, xmm2
pxor xmm5, xmm0
pxor xmm5, xmm1
paddq xmm3, xmm4
paddq xmm2, xmm6
paddq xmm0, xmm7
movdqu XMMWORD PTR [r9+r11], xmm0
movdqu XMMWORD PTR [r12+r11], xmm2
movdqu XMMWORD PTR [r10+r11], xmm3
movdqa xmm7, xmm6
mov QWORD PTR [rcx], rsp
xor rsp, r13
mov r9d, esp
mov QWORD PTR [rcx+8], r15
and r9d, 2097136
xor r15, r14
movdqa xmm6, xmm5
dec r8d
jnz FN_PREFIX(CryptonightR_template_mainloop)
FN_PREFIX(CryptonightR_template_part3):
movq rsp, xmm9
mov rbx, QWORD PTR [rsp+136]
mov rbp, QWORD PTR [rsp+144]
mov rsi, QWORD PTR [rsp+152]
movaps xmm6, XMMWORD PTR [rsp+48]
movaps xmm7, XMMWORD PTR [rsp+32]
movaps xmm8, XMMWORD PTR [rsp+16]
movaps xmm9, XMMWORD PTR [rsp]
add rsp, 64
pop rdi
pop r15
pop r14
pop r13
pop r12
pop r11
pop r10
ret 0
FN_PREFIX(CryptonightR_template_end):
ALIGN(64)
FN_PREFIX(CryptonightR_template_double_part1):
mov QWORD PTR [rsp+24], rbx
push rbp
push rsi
push rdi
push r12
push r13
push r14
push r15
sub rsp, 320
mov r14, QWORD PTR [rcx+32]
mov r8, rcx
xor r14, QWORD PTR [rcx]
mov r12, QWORD PTR [rcx+40]
mov ebx, r14d
mov rsi, QWORD PTR [rcx+224]
and ebx, 2097136
xor r12, QWORD PTR [rcx+8]
mov rcx, QWORD PTR [rcx+56]
xor rcx, QWORD PTR [r8+24]
mov rax, QWORD PTR [r8+48]
xor rax, QWORD PTR [r8+16]
mov r15, QWORD PTR [rdx+32]
xor r15, QWORD PTR [rdx]
movq xmm0, rcx
mov rcx, QWORD PTR [r8+88]
xor rcx, QWORD PTR [r8+72]
mov r13, QWORD PTR [rdx+40]
mov rdi, QWORD PTR [rdx+224]
xor r13, QWORD PTR [rdx+8]
movaps XMMWORD PTR [rsp+160], xmm6
movaps XMMWORD PTR [rsp+176], xmm7
movaps XMMWORD PTR [rsp+192], xmm8
movaps XMMWORD PTR [rsp+208], xmm9
movaps XMMWORD PTR [rsp+224], xmm10
movaps XMMWORD PTR [rsp+240], xmm11
movaps XMMWORD PTR [rsp+256], xmm12
movaps XMMWORD PTR [rsp+272], xmm13
movaps XMMWORD PTR [rsp+288], xmm14
movaps XMMWORD PTR [rsp+304], xmm15
movq xmm7, rax
mov rax, QWORD PTR [r8+80]
xor rax, QWORD PTR [r8+64]
movaps xmm1, XMMWORD PTR [rdx+96]
movaps xmm2, XMMWORD PTR [r8+96]
movaps XMMWORD PTR [rsp], xmm1
movaps XMMWORD PTR [rsp+16], xmm2
mov r8d, r15d
punpcklqdq xmm7, xmm0
movq xmm0, rcx
mov rcx, QWORD PTR [rdx+56]
xor rcx, QWORD PTR [rdx+24]
movq xmm9, rax
mov QWORD PTR [rsp+128], rsi
mov rax, QWORD PTR [rdx+48]
xor rax, QWORD PTR [rdx+16]
punpcklqdq xmm9, xmm0
movq xmm0, rcx
mov rcx, QWORD PTR [rdx+88]
xor rcx, QWORD PTR [rdx+72]
movq xmm8, rax
mov QWORD PTR [rsp+136], rdi
mov rax, QWORD PTR [rdx+80]
xor rax, QWORD PTR [rdx+64]
punpcklqdq xmm8, xmm0
and r8d, 2097136
movq xmm0, rcx
mov r11d, 524288
movq xmm10, rax
punpcklqdq xmm10, xmm0
movq xmm14, QWORD PTR [rsp+128]
movq xmm15, QWORD PTR [rsp+136]
ALIGN(64)
FN_PREFIX(CryptonightR_template_double_mainloop):
movdqu xmm6, XMMWORD PTR [rbx+rsi]
movq xmm0, r12
mov ecx, ebx
movq xmm3, r14
punpcklqdq xmm3, xmm0
xor ebx, 16
aesenc xmm6, xmm3
movq xmm4, r15
movdqu xmm0, XMMWORD PTR [rbx+rsi]
pxor xmm6, xmm0
xor ebx, 48
paddq xmm0, xmm7
movdqu xmm1, XMMWORD PTR [rbx+rsi]
pxor xmm6, xmm1
movdqu XMMWORD PTR [rbx+rsi], xmm0
paddq xmm1, xmm3
xor ebx, 16
mov eax, ebx
xor rax, 32
movdqu xmm0, XMMWORD PTR [rbx+rsi]
pxor xmm6, xmm0
movq rdx, xmm6
movdqu XMMWORD PTR [rbx+rsi], xmm1
paddq xmm0, xmm9
movdqu XMMWORD PTR [rax+rsi], xmm0
movdqa xmm0, xmm6
pxor xmm0, xmm7
movdqu XMMWORD PTR [rcx+rsi], xmm0
mov esi, edx
movdqu xmm5, XMMWORD PTR [r8+rdi]
and esi, 2097136
mov ecx, r8d
movq xmm0, r13
punpcklqdq xmm4, xmm0
xor r8d, 16
aesenc xmm5, xmm4
movdqu xmm0, XMMWORD PTR [r8+rdi]
pxor xmm5, xmm0
xor r8d, 48
paddq xmm0, xmm8
movdqu xmm1, XMMWORD PTR [r8+rdi]
pxor xmm5, xmm1
movdqu XMMWORD PTR [r8+rdi], xmm0
paddq xmm1, xmm4
xor r8d, 16
mov eax, r8d
xor rax, 32
movdqu xmm0, XMMWORD PTR [r8+rdi]
pxor xmm5, xmm0
movdqu XMMWORD PTR [r8+rdi], xmm1
paddq xmm0, xmm10
movdqu XMMWORD PTR [rax+rdi], xmm0
movdqa xmm0, xmm5
pxor xmm0, xmm8
movdqu XMMWORD PTR [rcx+rdi], xmm0
movq rdi, xmm5
movq rcx, xmm14
mov ebp, edi
mov r8, QWORD PTR [rcx+rsi]
mov r10, QWORD PTR [rcx+rsi+8]
lea r9, QWORD PTR [rcx+rsi]
xor esi, 16
movq xmm0, rsp
movq xmm1, rsi
movq xmm2, rdi
movq xmm11, rbp
movq xmm12, r15
movq xmm13, rdx
mov [rsp+104], rcx
mov [rsp+112], r9
mov ebx, DWORD PTR [rsp+16]
mov esi, DWORD PTR [rsp+20]
mov edi, DWORD PTR [rsp+24]
mov ebp, DWORD PTR [rsp+28]
lea eax, [ebx+esi]
lea edx, [edi+ebp]
shl rdx, 32
or rax, rdx
xor r8, rax
movd esp, xmm3
pextrd r15d, xmm3, 2
movd eax, xmm7
movd edx, xmm9
pextrd r9d, xmm9, 2
FN_PREFIX(CryptonightR_template_double_part2):
mov eax, edi
mov edx, ebp
shl rdx, 32
or rax, rdx
xor r14, rax
mov eax, ebx
mov edx, esi
shl rdx, 32
or rax, rdx
xor r12, rax
movq rsp, xmm0
mov DWORD PTR [rsp+16], ebx
mov DWORD PTR [rsp+20], esi
mov DWORD PTR [rsp+24], edi
mov DWORD PTR [rsp+28], ebp
movq rsi, xmm1
movq rdi, xmm2
movq rbp, xmm11
movq r15, xmm12
movq rdx, xmm13
mov rcx, [rsp+104]
mov r9, [rsp+112]
mov rbx, r8
mov rax, r8
mul rdx
and ebp, 2097136
mov r8, rax
movdqu xmm1, XMMWORD PTR [rcx+rsi]
pxor xmm6, xmm1
xor esi, 48
paddq xmm1, xmm7
movdqu xmm2, XMMWORD PTR [rsi+rcx]
pxor xmm6, xmm2
paddq xmm2, xmm3
movdqu XMMWORD PTR [rsi+rcx], xmm1
xor esi, 16
mov eax, esi
mov rsi, rcx
movdqu xmm0, XMMWORD PTR [rax+rcx]
pxor xmm6, xmm0
movdqu XMMWORD PTR [rax+rcx], xmm2
paddq xmm0, xmm9
add r12, r8
xor rax, 32
add r14, rdx
movdqa xmm9, xmm7
movdqa xmm7, xmm6
movdqu XMMWORD PTR [rax+rcx], xmm0
mov QWORD PTR [r9+8], r12
xor r12, r10
mov QWORD PTR [r9], r14
movq rcx, xmm15
xor r14, rbx
mov r10d, ebp
mov ebx, r14d
xor ebp, 16
and ebx, 2097136
mov r8, QWORD PTR [r10+rcx]
mov r9, QWORD PTR [r10+rcx+8]
movq xmm0, rsp
movq xmm1, rbx
movq xmm2, rsi
movq xmm11, rdi
movq xmm12, rbp
movq xmm13, r15
mov [rsp+104], rcx
mov [rsp+112], r9
mov ebx, DWORD PTR [rsp]
mov esi, DWORD PTR [rsp+4]
mov edi, DWORD PTR [rsp+8]
mov ebp, DWORD PTR [rsp+12]
lea eax, [ebx+esi]
lea edx, [edi+ebp]
shl rdx, 32
or rax, rdx
xor r8, rax
movq xmm3, r8
movd esp, xmm4
pextrd r15d, xmm4, 2
movd eax, xmm8
movd edx, xmm10
pextrd r9d, xmm10, 2
FN_PREFIX(CryptonightR_template_double_part3):
movq r15, xmm13
mov eax, edi
mov edx, ebp
shl rdx, 32
or rax, rdx
xor r15, rax
mov eax, ebx
mov edx, esi
shl rdx, 32
or rax, rdx
xor r13, rax
movq rsp, xmm0
mov DWORD PTR [rsp], ebx
mov DWORD PTR [rsp+4], esi
mov DWORD PTR [rsp+8], edi
mov DWORD PTR [rsp+12], ebp
movq rbx, xmm1
movq rsi, xmm2
movq rdi, xmm11
movq rbp, xmm12
mov rcx, [rsp+104]
mov r9, [rsp+112]
mov rax, r8
mul rdi
mov rdi, rcx
mov r8, rax
movdqu xmm1, XMMWORD PTR [rbp+rcx]
pxor xmm5, xmm1
xor ebp, 48
paddq xmm1, xmm8
add r13, r8
movdqu xmm2, XMMWORD PTR [rbp+rcx]
pxor xmm5, xmm2
add r15, rdx
movdqu XMMWORD PTR [rbp+rcx], xmm1
paddq xmm2, xmm4
xor ebp, 16
mov eax, ebp
xor rax, 32
movdqu xmm0, XMMWORD PTR [rbp+rcx]
pxor xmm5, xmm0
movdqu XMMWORD PTR [rbp+rcx], xmm2
paddq xmm0, xmm10
movdqu XMMWORD PTR [rax+rcx], xmm0
movq rax, xmm3
movdqa xmm10, xmm8
mov QWORD PTR [r10+rcx], r15
movdqa xmm8, xmm5
xor r15, rax
mov QWORD PTR [r10+rcx+8], r13
mov r8d, r15d
xor r13, r9
and r8d, 2097136
dec r11d
jnz FN_PREFIX(CryptonightR_template_double_mainloop)
FN_PREFIX(CryptonightR_template_double_part4):
mov rbx, QWORD PTR [rsp+400]
movaps xmm6, XMMWORD PTR [rsp+160]
movaps xmm7, XMMWORD PTR [rsp+176]
movaps xmm8, XMMWORD PTR [rsp+192]
movaps xmm9, XMMWORD PTR [rsp+208]
movaps xmm10, XMMWORD PTR [rsp+224]
movaps xmm11, XMMWORD PTR [rsp+240]
movaps xmm12, XMMWORD PTR [rsp+256]
movaps xmm13, XMMWORD PTR [rsp+272]
movaps xmm14, XMMWORD PTR [rsp+288]
movaps xmm15, XMMWORD PTR [rsp+304]
add rsp, 320
pop r15
pop r14
pop r13
pop r12
pop rdi
pop rsi
pop rbp
ret 0
FN_PREFIX(CryptonightR_template_double_end):

View File

@@ -0,0 +1,410 @@
mov rax, rsp
push rbx
push rbp
push rsi
push rdi
push r12
push r13
push r14
push r15
sub rsp, 184
stmxcsr DWORD PTR [rsp+272]
mov DWORD PTR [rsp+276], 24448
ldmxcsr DWORD PTR [rsp+276]
mov r13, QWORD PTR [rcx+224]
mov r9, rdx
mov r10, QWORD PTR [rcx+32]
mov r8, rcx
xor r10, QWORD PTR [rcx]
mov r14d, 524288
mov r11, QWORD PTR [rcx+40]
xor r11, QWORD PTR [rcx+8]
mov rsi, QWORD PTR [rdx+224]
mov rdx, QWORD PTR [rcx+56]
xor rdx, QWORD PTR [rcx+24]
mov rdi, QWORD PTR [r9+32]
xor rdi, QWORD PTR [r9]
mov rbp, QWORD PTR [r9+40]
xor rbp, QWORD PTR [r9+8]
movq xmm0, rdx
movaps XMMWORD PTR [rax-88], xmm6
movaps XMMWORD PTR [rax-104], xmm7
movaps XMMWORD PTR [rax-120], xmm8
movaps XMMWORD PTR [rsp+112], xmm9
movaps XMMWORD PTR [rsp+96], xmm10
movaps XMMWORD PTR [rsp+80], xmm11
movaps XMMWORD PTR [rsp+64], xmm12
movaps XMMWORD PTR [rsp+48], xmm13
movaps XMMWORD PTR [rsp+32], xmm14
movaps XMMWORD PTR [rsp+16], xmm15
mov rdx, r10
movq xmm4, QWORD PTR [r8+96]
and edx, 2097136
mov rax, QWORD PTR [rcx+48]
xorps xmm13, xmm13
xor rax, QWORD PTR [rcx+16]
mov rcx, QWORD PTR [rcx+88]
xor rcx, QWORD PTR [r8+72]
movq xmm5, QWORD PTR [r8+104]
movq xmm7, rax
mov eax, 1
shl rax, 52
movq xmm14, rax
punpcklqdq xmm14, xmm14
mov eax, 1023
shl rax, 52
movq xmm12, rax
punpcklqdq xmm12, xmm12
mov rax, QWORD PTR [r8+80]
xor rax, QWORD PTR [r8+64]
punpcklqdq xmm7, xmm0
movq xmm0, rcx
mov rcx, QWORD PTR [r9+56]
xor rcx, QWORD PTR [r9+24]
movq xmm3, rax
mov rax, QWORD PTR [r9+48]
xor rax, QWORD PTR [r9+16]
punpcklqdq xmm3, xmm0
movq xmm0, rcx
mov QWORD PTR [rsp], r13
mov rcx, QWORD PTR [r9+88]
xor rcx, QWORD PTR [r9+72]
movq xmm6, rax
mov rax, QWORD PTR [r9+80]
xor rax, QWORD PTR [r9+64]
punpcklqdq xmm6, xmm0
movq xmm0, rcx
mov QWORD PTR [rsp+256], r10
mov rcx, rdi
mov QWORD PTR [rsp+264], r11
movq xmm8, rax
and ecx, 2097136
punpcklqdq xmm8, xmm0
movq xmm0, QWORD PTR [r9+96]
punpcklqdq xmm4, xmm0
movq xmm0, QWORD PTR [r9+104]
lea r8, QWORD PTR [rcx+rsi]
movdqu xmm11, XMMWORD PTR [r8]
punpcklqdq xmm5, xmm0
lea r9, QWORD PTR [rdx+r13]
movdqu xmm15, XMMWORD PTR [r9]
ALIGN(64)
main_loop_double_sandybridge:
movdqu xmm9, xmm15
mov eax, edx
mov ebx, edx
xor eax, 16
xor ebx, 32
xor edx, 48
movq xmm0, r11
movq xmm2, r10
punpcklqdq xmm2, xmm0
aesenc xmm9, xmm2
movdqu xmm0, XMMWORD PTR [rax+r13]
movdqu xmm1, XMMWORD PTR [rbx+r13]
paddq xmm0, xmm7
paddq xmm1, xmm2
movdqu XMMWORD PTR [rbx+r13], xmm0
movdqu xmm0, XMMWORD PTR [rdx+r13]
movdqu XMMWORD PTR [rdx+r13], xmm1
paddq xmm0, xmm3
movdqu XMMWORD PTR [rax+r13], xmm0
movq r11, xmm9
mov edx, r11d
and edx, 2097136
movdqa xmm0, xmm9
pxor xmm0, xmm7
movdqu XMMWORD PTR [r9], xmm0
lea rbx, QWORD PTR [rdx+r13]
mov r10, QWORD PTR [rdx+r13]
movdqu xmm10, xmm11
movq xmm0, rbp
movq xmm11, rdi
punpcklqdq xmm11, xmm0
aesenc xmm10, xmm11
mov eax, ecx
mov r12d, ecx
xor eax, 16
xor r12d, 32
xor ecx, 48
movdqu xmm0, XMMWORD PTR [rax+rsi]
paddq xmm0, xmm6
movdqu xmm1, XMMWORD PTR [r12+rsi]
movdqu XMMWORD PTR [r12+rsi], xmm0
paddq xmm1, xmm11
movdqu xmm0, XMMWORD PTR [rcx+rsi]
movdqu XMMWORD PTR [rcx+rsi], xmm1
paddq xmm0, xmm8
movdqu XMMWORD PTR [rax+rsi], xmm0
movq rcx, xmm10
and ecx, 2097136
movdqa xmm0, xmm10
pxor xmm0, xmm6
movdqu XMMWORD PTR [r8], xmm0
mov r12, QWORD PTR [rcx+rsi]
mov r9, QWORD PTR [rbx+8]
xor edx, 16
mov r8d, edx
mov r15d, edx
movq rdx, xmm5
shl rdx, 32
movq rax, xmm4
xor rdx, rax
xor r10, rdx
mov rax, r10
mul r11
mov r11d, r8d
xor r11d, 48
movq xmm0, rdx
xor rdx, [r11+r13]
movq xmm1, rax
xor rax, [r11+r13+8]
punpcklqdq xmm0, xmm1
pxor xmm0, XMMWORD PTR [r8+r13]
xor r8d, 32
movdqu xmm1, XMMWORD PTR [r11+r13]
paddq xmm0, xmm7
paddq xmm1, xmm2
movdqu XMMWORD PTR [r11+r13], xmm0
movdqu xmm0, XMMWORD PTR [r8+r13]
movdqu XMMWORD PTR [r8+r13], xmm1
paddq xmm0, xmm3
movdqu XMMWORD PTR [r15+r13], xmm0
mov r11, QWORD PTR [rsp+256]
add r11, rdx
mov rdx, QWORD PTR [rsp+264]
add rdx, rax
mov QWORD PTR [rbx], r11
xor r11, r10
mov QWORD PTR [rbx+8], rdx
xor rdx, r9
mov QWORD PTR [rsp+256], r11
and r11d, 2097136
mov QWORD PTR [rsp+264], rdx
mov QWORD PTR [rsp+8], r11
lea r15, QWORD PTR [r11+r13]
movdqu xmm15, XMMWORD PTR [r11+r13]
lea r13, QWORD PTR [rsi+rcx]
movdqa xmm0, xmm5
psrldq xmm0, 8
movaps xmm2, xmm13
movq r10, xmm0
psllq xmm5, 1
shl r10, 32
movdqa xmm0, xmm9
psrldq xmm0, 8
movdqa xmm1, xmm10
movq r11, xmm0
psrldq xmm1, 8
movq r8, xmm1
psrldq xmm4, 8
movaps xmm0, xmm13
movq rax, xmm4
xor r10, rax
movaps xmm1, xmm13
xor r10, r12
lea rax, QWORD PTR [r11+1]
shr rax, 1
movdqa xmm3, xmm9
punpcklqdq xmm3, xmm10
paddq xmm5, xmm3
movq rdx, xmm5
psrldq xmm5, 8
cvtsi2sd xmm2, rax
or edx, -2147483647
lea rax, QWORD PTR [r8+1]
shr rax, 1
movq r9, xmm5
cvtsi2sd xmm0, rax
or r9d, -2147483647
cvtsi2sd xmm1, rdx
unpcklpd xmm2, xmm0
movaps xmm0, xmm13
cvtsi2sd xmm0, r9
unpcklpd xmm1, xmm0
divpd xmm2, xmm1
paddq xmm2, xmm14
cvttsd2si rax, xmm2
psrldq xmm2, 8
mov rbx, rax
imul rax, rdx
sub r11, rax
js div_fix_1_sandybridge
div_fix_1_ret_sandybridge:
cvttsd2si rdx, xmm2
mov rax, rdx
imul rax, r9
movd xmm2, r11d
movd xmm4, ebx
sub r8, rax
js div_fix_2_sandybridge
div_fix_2_ret_sandybridge:
movd xmm1, r8d
movd xmm0, edx
punpckldq xmm2, xmm1
punpckldq xmm4, xmm0
punpckldq xmm4, xmm2
paddq xmm3, xmm4
movdqa xmm0, xmm3
psrlq xmm0, 12
paddq xmm0, xmm12
sqrtpd xmm1, xmm0
movq r9, xmm1
movdqa xmm5, xmm1
psrlq xmm5, 19
test r9, 524287
je sqrt_fix_1_sandybridge
sqrt_fix_1_ret_sandybridge:
movq r9, xmm10
psrldq xmm1, 8
movq r8, xmm1
test r8, 524287
je sqrt_fix_2_sandybridge
sqrt_fix_2_ret_sandybridge:
mov r12d, ecx
mov r8d, ecx
xor r12d, 16
xor r8d, 32
xor ecx, 48
mov rax, r10
mul r9
movq xmm0, rax
movq xmm3, rdx
punpcklqdq xmm3, xmm0
movdqu xmm0, XMMWORD PTR [r12+rsi]
pxor xmm0, xmm3
movdqu xmm1, XMMWORD PTR [r8+rsi]
xor rdx, [r8+rsi]
xor rax, [r8+rsi+8]
movdqu xmm3, XMMWORD PTR [rcx+rsi]
paddq xmm0, xmm6
paddq xmm1, xmm11
paddq xmm3, xmm8
movdqu XMMWORD PTR [r8+rsi], xmm0
movdqu XMMWORD PTR [rcx+rsi], xmm1
movdqu XMMWORD PTR [r12+rsi], xmm3
add rdi, rdx
mov QWORD PTR [r13], rdi
xor rdi, r10
mov ecx, edi
and ecx, 2097136
lea r8, QWORD PTR [rcx+rsi]
mov rdx, QWORD PTR [r13+8]
add rbp, rax
mov QWORD PTR [r13+8], rbp
movdqu xmm11, XMMWORD PTR [rcx+rsi]
xor rbp, rdx
mov r13, QWORD PTR [rsp]
movdqa xmm3, xmm7
mov rdx, QWORD PTR [rsp+8]
movdqa xmm8, xmm6
mov r10, QWORD PTR [rsp+256]
movdqa xmm7, xmm9
mov r11, QWORD PTR [rsp+264]
movdqa xmm6, xmm10
mov r9, r15
dec r14d
jne main_loop_double_sandybridge
ldmxcsr DWORD PTR [rsp+272]
movaps xmm13, XMMWORD PTR [rsp+48]
lea r11, QWORD PTR [rsp+184]
movaps xmm6, XMMWORD PTR [r11-24]
movaps xmm7, XMMWORD PTR [r11-40]
movaps xmm8, XMMWORD PTR [r11-56]
movaps xmm9, XMMWORD PTR [r11-72]
movaps xmm10, XMMWORD PTR [r11-88]
movaps xmm11, XMMWORD PTR [r11-104]
movaps xmm12, XMMWORD PTR [r11-120]
movaps xmm14, XMMWORD PTR [rsp+32]
movaps xmm15, XMMWORD PTR [rsp+16]
mov rsp, r11
pop r15
pop r14
pop r13
pop r12
pop rdi
pop rsi
pop rbp
pop rbx
jmp cnv2_double_mainloop_asm_sandybridge_endp
div_fix_1_sandybridge:
dec rbx
add r11, rdx
jmp div_fix_1_ret_sandybridge
div_fix_2_sandybridge:
dec rdx
add r8, r9
jmp div_fix_2_ret_sandybridge
sqrt_fix_1_sandybridge:
movq r8, xmm3
movdqa xmm0, xmm5
psrldq xmm0, 8
dec r9
mov r11d, -1022
shl r11, 32
mov rax, r9
shr r9, 19
shr rax, 20
mov rdx, r9
sub rdx, rax
lea rdx, [rdx+r11+1]
add rax, r11
imul rdx, rax
sub rdx, r8
adc r9, 0
movq xmm5, r9
punpcklqdq xmm5, xmm0
jmp sqrt_fix_1_ret_sandybridge
sqrt_fix_2_sandybridge:
psrldq xmm3, 8
movq r11, xmm3
dec r8
mov ebx, -1022
shl rbx, 32
mov rax, r8
shr r8, 19
shr rax, 20
mov rdx, r8
sub rdx, rax
lea rdx, [rdx+rbx+1]
add rax, rbx
imul rdx, rax
sub rdx, r11
adc r8, 0
movq xmm0, r8
punpcklqdq xmm5, xmm0
jmp sqrt_fix_2_ret_sandybridge
cnv2_double_mainloop_asm_sandybridge_endp:

View File

@@ -0,0 +1,180 @@
mov QWORD PTR [rsp+16], rbx
mov QWORD PTR [rsp+24], rbp
mov QWORD PTR [rsp+32], rsi
push rdi
push r12
push r13
push r14
push r15
sub rsp, 64
stmxcsr DWORD PTR [rsp]
mov DWORD PTR [rsp+4], 24448
ldmxcsr DWORD PTR [rsp+4]
mov rax, QWORD PTR [rcx+48]
mov r9, rcx
xor rax, QWORD PTR [rcx+16]
mov ebp, 524288
mov r8, QWORD PTR [rcx+32]
xor r8, QWORD PTR [rcx]
mov r11, QWORD PTR [rcx+40]
mov r10, r8
mov rdx, QWORD PTR [rcx+56]
movq xmm3, rax
xor rdx, QWORD PTR [rcx+24]
xor r11, QWORD PTR [rcx+8]
mov rbx, QWORD PTR [rcx+224]
mov rax, QWORD PTR [r9+80]
xor rax, QWORD PTR [r9+64]
movq xmm0, rdx
mov rcx, QWORD PTR [rcx+88]
xor rcx, QWORD PTR [r9+72]
mov rdi, QWORD PTR [r9+104]
and r10d, 2097136
movaps XMMWORD PTR [rsp+48], xmm6
movq xmm4, rax
movaps XMMWORD PTR [rsp+32], xmm7
movaps XMMWORD PTR [rsp+16], xmm8
xorps xmm8, xmm8
mov ax, 1023
shl rax, 52
movq xmm7, rax
mov r15, QWORD PTR [r9+96]
punpcklqdq xmm3, xmm0
movq xmm0, rcx
punpcklqdq xmm4, xmm0
ALIGN(64)
cnv2_main_loop_bulldozer:
movdqa xmm5, XMMWORD PTR [r10+rbx]
movq xmm6, r8
pinsrq xmm6, r11, 1
lea rdx, QWORD PTR [r10+rbx]
lea r9, QWORD PTR [rdi+rdi]
shl rdi, 32
mov ecx, r10d
mov eax, r10d
xor ecx, 16
xor eax, 32
xor r10d, 48
aesenc xmm5, xmm6
movdqa xmm2, XMMWORD PTR [rcx+rbx]
movdqa xmm1, XMMWORD PTR [rax+rbx]
movdqa xmm0, XMMWORD PTR [r10+rbx]
paddq xmm2, xmm3
paddq xmm1, xmm6
paddq xmm0, xmm4
movdqa XMMWORD PTR [rcx+rbx], xmm0
movdqa XMMWORD PTR [rax+rbx], xmm2
movdqa XMMWORD PTR [r10+rbx], xmm1
movaps xmm1, xmm8
mov rsi, r15
xor rsi, rdi
mov edi, 1023
shl rdi, 52
movq r14, xmm5
pextrq rax, xmm5, 1
movdqa xmm0, xmm5
pxor xmm0, xmm3
mov r10, r14
and r10d, 2097136
movdqa XMMWORD PTR [rdx], xmm0
xor rsi, QWORD PTR [r10+rbx]
lea r12, QWORD PTR [r10+rbx]
mov r13, QWORD PTR [r10+rbx+8]
add r9d, r14d
or r9d, -2147483647
xor edx, edx
div r9
mov eax, eax
shl rdx, 32
lea r15, [rax+rdx]
lea rax, [r14+r15]
shr rax, 12
add rax, rdi
movq xmm0, rax
sqrtsd xmm1, xmm0
movq rdi, xmm1
test rdi, 524287
je sqrt_fixup_bulldozer
shr rdi, 19
sqrt_fixup_bulldozer_ret:
mov rax, rsi
mul r14
movq xmm1, rax
movq xmm0, rdx
punpcklqdq xmm0, xmm1
mov r9d, r10d
mov ecx, r10d
xor r9d, 16
xor ecx, 32
xor r10d, 48
movdqa xmm1, XMMWORD PTR [rcx+rbx]
xor rdx, [rcx+rbx]
xor rax, [rcx+rbx+8]
movdqa xmm2, XMMWORD PTR [r9+rbx]
pxor xmm2, xmm0
paddq xmm4, XMMWORD PTR [r10+rbx]
paddq xmm2, xmm3
paddq xmm1, xmm6
movdqa XMMWORD PTR [r9+rbx], xmm4
movdqa XMMWORD PTR [rcx+rbx], xmm2
movdqa XMMWORD PTR [r10+rbx], xmm1
movdqa xmm4, xmm3
add r8, rdx
add r11, rax
mov QWORD PTR [r12], r8
xor r8, rsi
mov QWORD PTR [r12+8], r11
mov r10, r8
xor r11, r13
and r10d, 2097136
movdqa xmm3, xmm5
dec ebp
jne cnv2_main_loop_bulldozer
ldmxcsr DWORD PTR [rsp]
movaps xmm6, XMMWORD PTR [rsp+48]
lea r11, QWORD PTR [rsp+64]
mov rbx, QWORD PTR [r11+56]
mov rbp, QWORD PTR [r11+64]
mov rsi, QWORD PTR [r11+72]
movaps xmm8, XMMWORD PTR [r11-48]
movaps xmm7, XMMWORD PTR [rsp+32]
mov rsp, r11
pop r15
pop r14
pop r13
pop r12
pop rdi
jmp cnv2_main_loop_bulldozer_endp
sqrt_fixup_bulldozer:
movq r9, xmm5
add r9, r15
dec rdi
mov edx, -1022
shl rdx, 32
mov rax, rdi
shr rdi, 19
shr rax, 20
mov rcx, rdi
sub rcx, rax
lea rcx, [rcx+rdx+1]
add rax, rdx
imul rcx, rax
sub rcx, r9
adc rdi, 0
jmp sqrt_fixup_bulldozer_ret
cnv2_main_loop_bulldozer_endp:

View File

@@ -0,0 +1,186 @@
mov QWORD PTR [rsp+24], rbx
push rbp
push rsi
push rdi
push r12
push r13
push r14
push r15
sub rsp, 80
stmxcsr DWORD PTR [rsp]
mov DWORD PTR [rsp+4], 24448
ldmxcsr DWORD PTR [rsp+4]
mov rax, QWORD PTR [rcx+48]
mov r9, rcx
xor rax, QWORD PTR [rcx+16]
mov esi, 524288
mov r8, QWORD PTR [rcx+32]
mov r13d, -2147483647
xor r8, QWORD PTR [rcx]
mov r11, QWORD PTR [rcx+40]
mov r10, r8
mov rdx, QWORD PTR [rcx+56]
movq xmm4, rax
xor rdx, QWORD PTR [rcx+24]
xor r11, QWORD PTR [rcx+8]
mov rbx, QWORD PTR [rcx+224]
mov rax, QWORD PTR [r9+80]
xor rax, QWORD PTR [r9+64]
movq xmm0, rdx
mov rcx, QWORD PTR [rcx+88]
xor rcx, QWORD PTR [r9+72]
movq xmm3, QWORD PTR [r9+104]
movaps XMMWORD PTR [rsp+64], xmm6
movaps XMMWORD PTR [rsp+48], xmm7
movaps XMMWORD PTR [rsp+32], xmm8
and r10d, 2097136
movq xmm5, rax
xor eax, eax
mov QWORD PTR [rsp+16], rax
mov ax, 1023
shl rax, 52
movq xmm8, rax
mov r15, QWORD PTR [r9+96]
punpcklqdq xmm4, xmm0
movq xmm0, rcx
punpcklqdq xmm5, xmm0
movdqu xmm6, XMMWORD PTR [r10+rbx]
ALIGN(64)
main_loop_ivybridge:
lea rdx, QWORD PTR [r10+rbx]
mov ecx, r10d
mov eax, r10d
mov rdi, r15
xor ecx, 16
xor eax, 32
xor r10d, 48
movq xmm0, r11
movq xmm7, r8
punpcklqdq xmm7, xmm0
aesenc xmm6, xmm7
movq rbp, xmm6
mov r9, rbp
and r9d, 2097136
movdqu xmm2, XMMWORD PTR [rcx+rbx]
movdqu xmm1, XMMWORD PTR [rax+rbx]
movdqu xmm0, XMMWORD PTR [r10+rbx]
paddq xmm1, xmm7
paddq xmm0, xmm5
paddq xmm2, xmm4
movdqu XMMWORD PTR [rcx+rbx], xmm0
movdqu XMMWORD PTR [rax+rbx], xmm2
movdqu XMMWORD PTR [r10+rbx], xmm1
mov r10, r9
xor r10d, 32
movq rcx, xmm3
mov rax, rcx
shl rax, 32
xor rdi, rax
movdqa xmm0, xmm6
pxor xmm0, xmm4
movdqu XMMWORD PTR [rdx], xmm0
xor rdi, QWORD PTR [r9+rbx]
lea r14, QWORD PTR [r9+rbx]
mov r12, QWORD PTR [r14+8]
xor edx, edx
lea r9d, DWORD PTR [ecx+ecx]
add r9d, ebp
movdqa xmm0, xmm6
psrldq xmm0, 8
or r9d, r13d
movq rax, xmm0
div r9
xorps xmm3, xmm3
mov eax, eax
shl rdx, 32
add rdx, rax
lea r9, QWORD PTR [rdx+rbp]
mov r15, rdx
mov rax, r9
shr rax, 12
movq xmm0, rax
paddq xmm0, xmm8
sqrtsd xmm3, xmm0
psubq xmm3, XMMWORD PTR [rsp+16]
movq rdx, xmm3
test edx, 524287
je sqrt_fixup_ivybridge
psrlq xmm3, 19
sqrt_fixup_ivybridge_ret:
mov ecx, r10d
mov rax, rdi
mul rbp
movq xmm2, rdx
xor rdx, [rcx+rbx]
add r8, rdx
mov QWORD PTR [r14], r8
xor r8, rdi
mov edi, r8d
and edi, 2097136
movq xmm0, rax
xor rax, [rcx+rbx+8]
add r11, rax
mov QWORD PTR [r14+8], r11
punpcklqdq xmm2, xmm0
mov r9d, r10d
xor r9d, 48
xor r10d, 16
pxor xmm2, XMMWORD PTR [r9+rbx]
movdqu xmm0, XMMWORD PTR [r10+rbx]
paddq xmm0, xmm5
movdqu xmm1, XMMWORD PTR [rcx+rbx]
paddq xmm2, xmm4
paddq xmm1, xmm7
movdqa xmm5, xmm4
movdqu XMMWORD PTR [r9+rbx], xmm0
movdqa xmm4, xmm6
movdqu XMMWORD PTR [rcx+rbx], xmm2
movdqu XMMWORD PTR [r10+rbx], xmm1
movdqu xmm6, [rdi+rbx]
mov r10d, edi
xor r11, r12
dec rsi
jne main_loop_ivybridge
ldmxcsr DWORD PTR [rsp]
mov rbx, QWORD PTR [rsp+160]
movaps xmm6, XMMWORD PTR [rsp+64]
movaps xmm7, XMMWORD PTR [rsp+48]
movaps xmm8, XMMWORD PTR [rsp+32]
add rsp, 80
pop r15
pop r14
pop r13
pop r12
pop rdi
pop rsi
pop rbp
jmp cnv2_main_loop_ivybridge_endp
sqrt_fixup_ivybridge:
dec rdx
mov r13d, -1022
shl r13, 32
mov rax, rdx
shr rdx, 19
shr rax, 20
mov rcx, rdx
sub rcx, rax
add rax, r13
not r13
sub rcx, r13
mov r13d, -2147483647
imul rcx, rax
sub rcx, r9
adc rdx, 0
movq xmm3, rdx
jmp sqrt_fixup_ivybridge_ret
cnv2_main_loop_ivybridge_endp:

View File

@@ -0,0 +1,179 @@
mov QWORD PTR [rsp+16], rbx
mov QWORD PTR [rsp+24], rbp
mov QWORD PTR [rsp+32], rsi
push rdi
push r12
push r13
push r14
push r15
sub rsp, 64
stmxcsr DWORD PTR [rsp]
mov DWORD PTR [rsp+4], 24448
ldmxcsr DWORD PTR [rsp+4]
mov rax, QWORD PTR [rcx+48]
mov r9, rcx
xor rax, QWORD PTR [rcx+16]
mov ebp, 524288
mov r8, QWORD PTR [rcx+32]
xor r8, QWORD PTR [rcx]
mov r11, QWORD PTR [rcx+40]
mov r10, r8
mov rdx, QWORD PTR [rcx+56]
movq xmm3, rax
xor rdx, QWORD PTR [rcx+24]
xor r11, QWORD PTR [rcx+8]
mov rbx, QWORD PTR [rcx+224]
mov rax, QWORD PTR [r9+80]
xor rax, QWORD PTR [r9+64]
movq xmm0, rdx
mov rcx, QWORD PTR [rcx+88]
xor rcx, QWORD PTR [r9+72]
mov rdi, QWORD PTR [r9+104]
and r10d, 2097136
movaps XMMWORD PTR [rsp+48], xmm6
movq xmm4, rax
movaps XMMWORD PTR [rsp+32], xmm7
movaps XMMWORD PTR [rsp+16], xmm8
xorps xmm8, xmm8
mov ax, 1023
shl rax, 52
movq xmm7, rax
mov r15, QWORD PTR [r9+96]
punpcklqdq xmm3, xmm0
movq xmm0, rcx
punpcklqdq xmm4, xmm0
ALIGN(64)
main_loop_ryzen:
movdqa xmm5, XMMWORD PTR [r10+rbx]
movq xmm0, r11
movq xmm6, r8
punpcklqdq xmm6, xmm0
lea rdx, QWORD PTR [r10+rbx]
lea r9, QWORD PTR [rdi+rdi]
shl rdi, 32
mov ecx, r10d
mov eax, r10d
xor ecx, 16
xor eax, 32
xor r10d, 48
aesenc xmm5, xmm6
movdqa xmm2, XMMWORD PTR [rcx+rbx]
movdqa xmm1, XMMWORD PTR [rax+rbx]
movdqa xmm0, XMMWORD PTR [r10+rbx]
paddq xmm2, xmm3
paddq xmm1, xmm6
paddq xmm0, xmm4
movdqa XMMWORD PTR [rcx+rbx], xmm0
movdqa XMMWORD PTR [rax+rbx], xmm2
movdqa XMMWORD PTR [r10+rbx], xmm1
movaps xmm1, xmm8
mov rsi, r15
xor rsi, rdi
movq r14, xmm5
movdqa xmm0, xmm5
pxor xmm0, xmm3
mov r10, r14
and r10d, 2097136
movdqa XMMWORD PTR [rdx], xmm0
xor rsi, QWORD PTR [r10+rbx]
lea r12, QWORD PTR [r10+rbx]
mov r13, QWORD PTR [r10+rbx+8]
add r9d, r14d
or r9d, -2147483647
xor edx, edx
movdqa xmm0, xmm5
psrldq xmm0, 8
movq rax, xmm0
div r9
movq xmm0, rax
movq xmm1, rdx
punpckldq xmm0, xmm1
movq r15, xmm0
paddq xmm0, xmm5
movdqa xmm2, xmm0
psrlq xmm0, 12
paddq xmm0, xmm7
sqrtsd xmm1, xmm0
movq rdi, xmm1
test rdi, 524287
je sqrt_fixup_ryzen
shr rdi, 19
sqrt_fixup_ryzen_ret:
mov rax, rsi
mul r14
movq xmm1, rax
movq xmm0, rdx
punpcklqdq xmm0, xmm1
mov r9d, r10d
mov ecx, r10d
xor r9d, 16
xor ecx, 32
xor r10d, 48
movdqa xmm1, XMMWORD PTR [rcx+rbx]
xor rdx, [rcx+rbx]
xor rax, [rcx+rbx+8]
movdqa xmm2, XMMWORD PTR [r9+rbx]
pxor xmm2, xmm0
paddq xmm4, XMMWORD PTR [r10+rbx]
paddq xmm2, xmm3
paddq xmm1, xmm6
movdqa XMMWORD PTR [r9+rbx], xmm4
movdqa XMMWORD PTR [rcx+rbx], xmm2
movdqa XMMWORD PTR [r10+rbx], xmm1
movdqa xmm4, xmm3
add r8, rdx
add r11, rax
mov QWORD PTR [r12], r8
xor r8, rsi
mov QWORD PTR [r12+8], r11
mov r10, r8
xor r11, r13
and r10d, 2097136
movdqa xmm3, xmm5
dec ebp
jne main_loop_ryzen
ldmxcsr DWORD PTR [rsp]
movaps xmm6, XMMWORD PTR [rsp+48]
lea r11, QWORD PTR [rsp+64]
mov rbx, QWORD PTR [r11+56]
mov rbp, QWORD PTR [r11+64]
mov rsi, QWORD PTR [r11+72]
movaps xmm8, XMMWORD PTR [r11-48]
movaps xmm7, XMMWORD PTR [rsp+32]
mov rsp, r11
pop r15
pop r14
pop r13
pop r12
pop rdi
jmp cnv2_main_loop_ryzen_endp
sqrt_fixup_ryzen:
movq r9, xmm2
dec rdi
mov edx, -1022
shl rdx, 32
mov rax, rdi
shr rdi, 19
shr rax, 20
mov rcx, rdi
sub rcx, rax
lea rcx, [rcx+rdx+1]
add rax, rdx
imul rcx, rax
sub rcx, r9
adc rdi, 0
jmp sqrt_fixup_ryzen_ret
cnv2_main_loop_ryzen_endp:

54
crypto/asm/cn_main_loop.S Normal file
View File

@@ -0,0 +1,54 @@
#ifdef __APPLE__
# define ALIGN(x) .align 6
#else
# define ALIGN(x) .align 64
#endif
.intel_syntax noprefix
#ifdef __APPLE__
# define FN_PREFIX(fn) _ ## fn
.text
#else
# define FN_PREFIX(fn) fn
.section .text
#endif
.global FN_PREFIX(cnv2_mainloop_ivybridge_asm)
.global FN_PREFIX(cnv2_mainloop_ryzen_asm)
.global FN_PREFIX(cnv2_mainloop_bulldozer_asm)
.global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm)
ALIGN(64)
FN_PREFIX(cnv2_mainloop_ivybridge_asm):
sub rsp, 48
mov rcx, rdi
#include "cn2/cnv2_main_loop_ivybridge.inc"
add rsp, 48
ret 0
mov eax, 3735929054
ALIGN(64)
FN_PREFIX(cnv2_mainloop_ryzen_asm):
sub rsp, 48
mov rcx, rdi
#include "cn2/cnv2_main_loop_ryzen.inc"
add rsp, 48
ret 0
mov eax, 3735929054
ALIGN(64)
FN_PREFIX(cnv2_mainloop_bulldozer_asm):
sub rsp, 48
mov rcx, rdi
#include "cn2/cnv2_main_loop_bulldozer.inc"
add rsp, 48
ret 0
mov eax, 3735929054
ALIGN(64)
FN_PREFIX(cnv2_double_mainloop_sandybridge_asm):
sub rsp, 48
mov rcx, rdi
mov rdx, rsi
#include "cn2/cnv2_double_main_loop_sandybridge.inc"
add rsp, 48
ret 0
mov eax, 3735929054

View File

@@ -0,0 +1,31 @@
#define ALIGN(x) .align 64
.intel_syntax noprefix
.section .text
.global cnv2_mainloop_ivybridge_asm
.global cnv2_mainloop_ryzen_asm
.global cnv2_mainloop_bulldozer_asm
.global cnv2_double_mainloop_sandybridge_asm
ALIGN(64)
cnv2_mainloop_ivybridge_asm:
#include "../cn2/cnv2_main_loop_ivybridge.inc"
ret 0
mov eax, 3735929054
ALIGN(64)
cnv2_mainloop_ryzen_asm:
#include "../cn2/cnv2_main_loop_ryzen.inc"
ret 0
mov eax, 3735929054
ALIGN(64)
cnv2_mainloop_bulldozer_asm:
#include "../cn2/cnv2_main_loop_bulldozer.inc"
ret 0
mov eax, 3735929054
ALIGN(64)
cnv2_double_mainloop_sandybridge_asm:
#include "../cn2/cnv2_double_main_loop_sandybridge.inc"
ret 0
mov eax, 3735929054

View File

@@ -1,50 +0,0 @@
/*
* ---------------------------------------------------------------------------
* OpenAES License
* ---------------------------------------------------------------------------
* Copyright (c) 2012, Nabil S. Al Ramli, www.nalramli.com
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* ---------------------------------------------------------------------------
*/
#ifndef _OAES_CONFIG_H
#define _OAES_CONFIG_H
#ifdef __cplusplus
extern "C" {
#endif
//#ifndef OAES_HAVE_ISAAC
//#define OAES_HAVE_ISAAC 1
//#endif // OAES_HAVE_ISAAC
//#ifndef OAES_DEBUG
//#define OAES_DEBUG 0
//#endif // OAES_DEBUG
#ifdef __cplusplus
}
#endif
#endif // _OAES_CONFIG_H

File diff suppressed because it is too large Load Diff

View File

@@ -1,214 +0,0 @@
/*
* ---------------------------------------------------------------------------
* OpenAES License
* ---------------------------------------------------------------------------
* Copyright (c) 2012, Nabil S. Al Ramli, www.nalramli.com
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* ---------------------------------------------------------------------------
*/
#ifndef _OAES_LIB_H
#define _OAES_LIB_H
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
#ifdef _WIN32
# ifdef OAES_SHARED
# ifdef oaes_lib_EXPORTS
# define OAES_API __declspec(dllexport)
# else
# define OAES_API __declspec(dllimport)
# endif
# else
# define OAES_API
# endif
#else
# define OAES_API
#endif // WIN32
#define OAES_VERSION "0.8.1"
#define OAES_BLOCK_SIZE 16
typedef void OAES_CTX;
typedef enum
{
OAES_RET_FIRST = 0,
OAES_RET_SUCCESS = 0,
OAES_RET_UNKNOWN,
OAES_RET_ARG1,
OAES_RET_ARG2,
OAES_RET_ARG3,
OAES_RET_ARG4,
OAES_RET_ARG5,
OAES_RET_NOKEY,
OAES_RET_MEM,
OAES_RET_BUF,
OAES_RET_HEADER,
OAES_RET_COUNT
} OAES_RET;
/*
* oaes_set_option() takes one of these values for its [option] parameter
* some options accept either an optional or a required [value] parameter
*/
// no option
#define OAES_OPTION_NONE 0
// enable ECB mode, disable CBC mode
#define OAES_OPTION_ECB 1
// enable CBC mode, disable ECB mode
// value is optional, may pass uint8_t iv[OAES_BLOCK_SIZE] to specify
// the value of the initialization vector, iv
#define OAES_OPTION_CBC 2
#ifdef OAES_DEBUG
typedef int ( * oaes_step_cb ) (
const uint8_t state[OAES_BLOCK_SIZE],
const char * step_name,
int step_count,
void * user_data );
// enable state stepping mode
// value is required, must pass oaes_step_cb to receive the state at each step
#define OAES_OPTION_STEP_ON 4
// disable state stepping mode
#define OAES_OPTION_STEP_OFF 8
#endif // OAES_DEBUG
typedef uint16_t OAES_OPTION;
typedef struct _oaes_key
{
size_t data_len;
uint8_t *data;
size_t exp_data_len;
uint8_t *exp_data;
size_t num_keys;
size_t key_base;
} oaes_key;
typedef struct _oaes_ctx
{
#ifdef OAES_HAVE_ISAAC
randctx * rctx;
#endif // OAES_HAVE_ISAAC
#ifdef OAES_DEBUG
oaes_step_cb step_cb;
#endif // OAES_DEBUG
oaes_key * key;
OAES_OPTION options;
uint8_t iv[OAES_BLOCK_SIZE];
} oaes_ctx;
/*
* // usage:
*
* OAES_CTX * ctx = oaes_alloc();
* .
* .
* .
* {
* oaes_gen_key_xxx( ctx );
* {
* oaes_key_export( ctx, _buf, &_buf_len );
* // or
* oaes_key_export_data( ctx, _buf, &_buf_len );\
* }
* }
* // or
* {
* oaes_key_import( ctx, _buf, _buf_len );
* // or
* oaes_key_import_data( ctx, _buf, _buf_len );
* }
* .
* .
* .
* oaes_encrypt( ctx, m, m_len, c, &c_len );
* .
* .
* .
* oaes_decrypt( ctx, c, c_len, m, &m_len );
* .
* .
* .
* oaes_free( &ctx );
*/
OAES_API OAES_CTX * oaes_alloc(void);
OAES_API OAES_RET oaes_free( OAES_CTX ** ctx );
OAES_API OAES_RET oaes_set_option( OAES_CTX * ctx,
OAES_OPTION option, const void * value );
OAES_API OAES_RET oaes_key_gen_128( OAES_CTX * ctx );
OAES_API OAES_RET oaes_key_gen_192( OAES_CTX * ctx );
OAES_API OAES_RET oaes_key_gen_256( OAES_CTX * ctx );
// export key with header information
// set data == NULL to get the required data_len
OAES_API OAES_RET oaes_key_export( OAES_CTX * ctx,
uint8_t * data, size_t * data_len );
// directly export the data from key
// set data == NULL to get the required data_len
OAES_API OAES_RET oaes_key_export_data( OAES_CTX * ctx,
uint8_t * data, size_t * data_len );
// import key with header information
OAES_API OAES_RET oaes_key_import( OAES_CTX * ctx,
const uint8_t * data, size_t data_len );
// directly import data into key
OAES_API OAES_RET oaes_key_import_data( OAES_CTX * ctx,
const uint8_t * data, size_t data_len );
// set c == NULL to get the required c_len
OAES_API OAES_RET oaes_encrypt( OAES_CTX * ctx,
const uint8_t * m, size_t m_len, uint8_t * c, size_t * c_len );
// set m == NULL to get the required m_len
OAES_API OAES_RET oaes_decrypt( OAES_CTX * ctx,
const uint8_t * c, size_t c_len, uint8_t * m, size_t * m_len );
// set buf == NULL to get the required buf_len
OAES_API OAES_RET oaes_sprintf(
char * buf, size_t * buf_len, const uint8_t * data, size_t data_len );
OAES_API OAES_RET oaes_encryption_round( const uint8_t * key, uint8_t * c );
OAES_API OAES_RET oaes_pseudo_encrypt_ecb( OAES_CTX * ctx, uint8_t * c );
#ifdef __cplusplus
}
#endif
#endif // _OAES_LIB_H

131
crypto/soft_aes.h Normal file
View File

@@ -0,0 +1,131 @@
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* Additional permission under GNU GPL version 3 section 7
*
* If you modify this Program, or any covered work, by linking or combining
* it with OpenSSL (or a modified version of that library), containing parts
* covered by the terms of OpenSSL License and SSLeay License, the licensors
* of this Program grant you additional permission to convey the resulting work.
*
*/
/*
* Parts of this file are originally copyright (c) 2014-2017, The Monero Project
*/
#pragma once
#if defined(XMRIG_ARM)
# include "crypto/SSE2NEON.h"
#elif defined(__GNUC__)
# include <x86intrin.h>
#else
# include <intrin.h>
#endif
#include <inttypes.h>
#define saes_data(w) {\
w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\
w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\
w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\
w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\
w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\
w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\
w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\
w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\
w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\
w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\
w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\
w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\
w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\
w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\
w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\
w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\
w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\
w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\
w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\
w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\
w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\
w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\
w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\
w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\
w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\
w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\
w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\
w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\
w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\
w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\
w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\
w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) }
#define SAES_WPOLY 0x011b
#define saes_b2w(b0, b1, b2, b3) (((uint32_t)(b3) << 24) | \
((uint32_t)(b2) << 16) | ((uint32_t)(b1) << 8) | (b0))
#define saes_f2(x) ((x<<1) ^ (((x>>7) & 1) * SAES_WPOLY))
#define saes_f3(x) (saes_f2(x) ^ x)
#define saes_h0(x) (x)
#define saes_u0(p) saes_b2w(saes_f2(p), p, p, saes_f3(p))
#define saes_u1(p) saes_b2w(saes_f3(p), saes_f2(p), p, p)
#define saes_u2(p) saes_b2w( p, saes_f3(p), saes_f2(p), p)
#define saes_u3(p) saes_b2w( p, p, saes_f3(p), saes_f2(p))
__attribute__((aligned(16))) const static uint32_t saes_table[4][256] = { saes_data(saes_u0), saes_data(saes_u1), saes_data(saes_u2), saes_data(saes_u3) };
__attribute__((aligned(16))) const static uint8_t saes_sbox[256] = saes_data(saes_h0);
static inline __m128i soft_aesenc(__m128i in, __m128i key)
{
uint32_t x0, x1, x2, x3;
x0 = _mm_cvtsi128_si32(in);
x1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0x55));
x2 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xAA));
x3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xFF));
__m128i out = _mm_set_epi32(
(saes_table[0][x3 & 0xff] ^ saes_table[1][(x0 >> 8) & 0xff] ^ saes_table[2][(x1 >> 16) & 0xff] ^ saes_table[3][x2 >> 24]),
(saes_table[0][x2 & 0xff] ^ saes_table[1][(x3 >> 8) & 0xff] ^ saes_table[2][(x0 >> 16) & 0xff] ^ saes_table[3][x1 >> 24]),
(saes_table[0][x1 & 0xff] ^ saes_table[1][(x2 >> 8) & 0xff] ^ saes_table[2][(x3 >> 16) & 0xff] ^ saes_table[3][x0 >> 24]),
(saes_table[0][x0 & 0xff] ^ saes_table[1][(x1 >> 8) & 0xff] ^ saes_table[2][(x2 >> 16) & 0xff] ^ saes_table[3][x3 >> 24]));
return _mm_xor_si128(out, key);
}
static inline uint32_t sub_word(uint32_t key)
{
return (saes_sbox[key >> 24 ] << 24) |
(saes_sbox[(key >> 16) & 0xff] << 16 ) |
(saes_sbox[(key >> 8) & 0xff] << 8 ) |
saes_sbox[key & 0xff];
}
#if defined(__clang__) || defined(XMRIG_ARM)
static inline uint32_t _rotr(uint32_t value, uint32_t amount)
{
return (value >> amount) | (value << ((32 - amount) & 31));
}
#endif
static inline __m128i soft_aeskeygenassist(__m128i key, uint8_t rcon)
{
const uint32_t X1 = sub_word(_mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0x55)));
const uint32_t X3 = sub_word(_mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0xFF)));
return _mm_set_epi32(_rotr(X3, 8) ^ rcon, X3, _rotr(X1, 8) ^ rcon, X1);
}

View File

@@ -24,6 +24,6 @@
#ifndef __DONATE_H__
#define __DONATE_H__
#define DONATE_LEVEL 5
#define DONATE_LEVEL 0
#endif /* __DONATE_H__ */

47
mac/cpu_mac.c Normal file
View File

@@ -0,0 +1,47 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <unistd.h>
#include <sched.h>
#include <pthread.h>
#include "cpu.h"
struct cpu_info cpu_info = { 0 };
void cpu_init_common();
void cpu_init() {
# ifdef XMRIG_NO_LIBCPUID
cpu_info.total_logical_cpus = sysconf(_SC_NPROCESSORS_CONF);
# endif
cpu_init_common();
}
int affine_to_cpu_mask(int id, unsigned long mask)
{
return 0;
}

95
mac/memory_mac.c Normal file
View File

@@ -0,0 +1,95 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdlib.h>
#include <mm_malloc.h>
#include <sys/mman.h>
#include <mach/vm_statistics.h>
#include "persistent_memory.h"
#include "options.h"
#include "utils/applog.h"
char *persistent_memory;
int persistent_memory_flags = 0;
const char * persistent_memory_allocate() {
const int ratio = (opt_double_hash && opt_algo != ALGO_CRYPTONIGHT_LITE) ? 2 : 1;
const int size = MEMORY * (opt_n_threads * ratio + 1);
persistent_memory_flags |= MEMORY_HUGEPAGES_AVAILABLE;
persistent_memory = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
if (persistent_memory == MAP_FAILED) {
persistent_memory = _mm_malloc(size, 16);
return persistent_memory;
}
persistent_memory_flags |= MEMORY_HUGEPAGES_ENABLED;
if (madvise(persistent_memory, size, MADV_RANDOM | MADV_WILLNEED) != 0) {
applog(LOG_ERR, "madvise failed");
}
if (mlock(persistent_memory, size) == 0) {
persistent_memory_flags |= MEMORY_LOCK;
}
return persistent_memory;
}
void persistent_memory_free() {
const int size = MEMORY * (opt_n_threads + 1);
if (persistent_memory_flags & MEMORY_HUGEPAGES_ENABLED) {
if (persistent_memory_flags & MEMORY_LOCK) {
munlock(persistent_memory, size);
}
munmap(persistent_memory, size);
}
else {
_mm_free(persistent_memory);
}
}
void *allocate_executable_memory(size_t size)
{
return mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0);
}
void protect_executable_memory(void *p, size_t size)
{
mprotect(p, size, PROT_READ | PROT_EXEC);
}
void flush_instruction_cache(void *p, size_t size)
{
__builtin___clear_cache((char*) p, (char*)(p) + size);
}

91
mac/xmrig_mac.c Normal file
View File

@@ -0,0 +1,91 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdlib.h>
#include <signal.h>
#include <errno.h>
#include <unistd.h>
#include "options.h"
#include "cpu.h"
#include "utils/applog.h"
static void signal_handler(int sig)
{
switch (sig) {
case SIGHUP:
applog(LOG_WARNING, "SIGHUP received");
break;
case SIGINT:
applog(LOG_WARNING, "SIGINT received, exiting");
proper_exit(0);
break;
case SIGTERM:
applog(LOG_WARNING, "SIGTERM received, exiting");
proper_exit(0);
break;
}
}
void proper_exit(int reason) {
exit(reason);
}
void os_specific_init()
{
if (opt_affinity != -1) {
affine_to_cpu_mask(-1, opt_affinity);
}
if (opt_background) {
int i = fork();
if (i < 0) {
exit(1);
}
if (i > 0) {
exit(0);
}
i = setsid();
if (i < 0) {
applog(LOG_ERR, "setsid() failed (errno = %d)", errno);
}
i = chdir("/");
if (i < 0) {
applog(LOG_ERR, "chdir() failed (errno = %d)", errno);
}
signal(SIGHUP, signal_handler);
signal(SIGTERM, signal_handler);
}
signal(SIGINT, signal_handler);
}

View File

@@ -4,8 +4,9 @@
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -24,11 +25,15 @@
#include <string.h>
#include "persistent_memory.h"
#include "options.h"
static size_t offset = 0;
void * persistent_calloc(size_t num, size_t size) {
size += size % 16;
void *mem = &persistent_memory[offset];
offset += (num * size);
@@ -36,3 +41,31 @@ void * persistent_calloc(size_t num, size_t size) {
return mem;
}
void init_cn_r(struct cryptonight_ctx *ctx)
{
uint8_t *p = allocate_executable_memory(0x4000);
ctx->generated_code = (cn_mainloop_fun_ms_abi) p;
ctx->generated_code_double = (cn_mainloop_double_fun_ms_abi)(p + 0x2000);
ctx->generated_code_height = ctx->generated_code_double_height = (uint64_t)(-1);
ctx->height = 0;
}
void create_cryptonight_ctx(struct cryptonight_ctx **ctx, int thr_id)
{
const int ratio = (opt_double_hash && opt_algo == ALGO_CRYPTONIGHT) ? 2 : 1;
ctx[0] = persistent_calloc(1, sizeof(struct cryptonight_ctx));
ctx[0]->memory = &persistent_memory[MEMORY * (thr_id * ratio + 1)];
init_cn_r(ctx[0]);
if (opt_double_hash) {
ctx[1] = persistent_calloc(1, sizeof(struct cryptonight_ctx));
ctx[1]->memory = ctx[0]->memory + (opt_algo == ALGO_CRYPTONIGHT ? MEMORY : MEMORY_LITE);
init_cn_r(ctx[1]);
}
}

293
options.c
View File

@@ -4,8 +4,9 @@
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -36,25 +37,61 @@
#include "algo/cryptonight/cryptonight.h"
int64_t opt_affinity = -1L;
int opt_n_threads = 0;
int opt_algo_variant = 0;
int opt_retries = 5;
int opt_retry_pause = 5;
int opt_donate_level = DONATE_LEVEL;
bool opt_colors = true;
bool opt_keepalive = false;
bool opt_background = false;
char *opt_url = NULL;
char *opt_backup_url = NULL;
char *opt_userpass = NULL;
char *opt_user = NULL;
char *opt_pass = NULL;
int64_t opt_affinity = -1L;
int opt_n_threads = 0;
int opt_retries = 5;
int opt_retry_pause = 5;
int opt_donate_level = DONATE_LEVEL;
int opt_max_cpu_usage = 75;
bool opt_colors = true;
bool opt_keepalive = false;
bool opt_background = false;
bool opt_double_hash = false;
bool opt_safe = false;
bool opt_nicehash = false;
char *opt_url = NULL;
char *opt_backup_url = NULL;
char *opt_userpass = NULL;
char *opt_user = NULL;
char *opt_pass = NULL;
enum Algo opt_algo = ALGO_CRYPTONIGHT;
enum Variant opt_variant = VARIANT_AUTO;
enum AlgoVariant opt_av = AV_AUTO;
enum Assembly opt_assembly = ASM_AUTO;
struct AlgoData
{
const char *name;
const char *shortName;
enum Algo algo;
enum Variant variant;
};
static struct AlgoData const algorithms[] = {
{ "cryptonight", "cn", ALGO_CRYPTONIGHT, VARIANT_AUTO },
{ "cryptonight/0", "cn/0", ALGO_CRYPTONIGHT, VARIANT_0 },
{ "cryptonight/1", "cn/1", ALGO_CRYPTONIGHT, VARIANT_1 },
{ "cryptonight/2", "cn/2", ALGO_CRYPTONIGHT, VARIANT_2 },
{ "cryptonight/4", "cn/4", ALGO_CRYPTONIGHT, VARIANT_4 },
{ "cryptonight/r", "cn/r", ALGO_CRYPTONIGHT, VARIANT_4 },
# ifndef XMRIG_NO_AEON
{ "cryptonight-lite", "cn-lite", ALGO_CRYPTONIGHT_LITE, VARIANT_AUTO },
{ "cryptonight-light", "cn-light", ALGO_CRYPTONIGHT_LITE, VARIANT_AUTO },
{ "cryptonight-lite/0", "cn-lite/0", ALGO_CRYPTONIGHT_LITE, VARIANT_0 },
{ "cryptonight-lite/1", "cn-lite/1", ALGO_CRYPTONIGHT_LITE, VARIANT_1 },
# endif
};
static char const usage[] = "\
Usage: " APP_ID " [OPTIONS]\n\
Options:\n\
-a, --algo=ALGO cryptonight (default) or cryptonight-lite\n\
--variant=N cryptonight variant: 0-4\n\
-o, --url=URL URL of mining server\n\
-b, --backup-url=URL URL of backup mining server\n\
-O, --userpass=U:P username:password pair for mining server\n\
@@ -65,11 +102,14 @@ Options:\n\
-k, --keepalive send keepalived for prevent timeout (need pool support)\n\
-r, --retries=N number of times to retry before switch to backup server (default: 5)\n\
-R, --retry-pause=N time to pause between retries (default: 5)\n\
--cpu-affinity set process affinity to cpu core(s), mask 0x3 for cores 0 and 1\n\
--cpu-affinity set process affinity to CPU core(s), mask 0x3 for cores 0 and 1\n\
--no-color disable colored output\n\
--donate-level=N donate level, default 5%% (5 minutes in 100 minutes)\n\
-B, --background run the miner in the background\n\
-c, --config=FILE load a JSON-format configuration file\n\
--max-cpu-usage=N maximum CPU usage for automatic threads mode (default 75)\n\
--safe safe adjust threads and av settings for current CPU\n\
--nicehash enable nicehash support\n\
-h, --help display this help and exit\n\
-V, --version output version information and exit\n\
";
@@ -79,42 +119,90 @@ static char const short_options[] = "a:c:khBp:Px:r:R:s:t:T:o:u:O:v:Vb:";
static struct option const options[] = {
{ "algo", 1, NULL, 'a' },
{ "av", 1, NULL, 'v' },
{ "background", 0, NULL, 'B' },
{ "backup-url", 1, NULL, 'b' },
{ "config", 1, NULL, 'c' },
{ "cpu-affinity", 1, NULL, 1020 },
{ "donate-level", 1, NULL, 1003 },
{ "help", 0, NULL, 'h' },
{ "keepalive", 0, NULL ,'k' },
{ "no-color", 0, NULL, 1002 },
{ "pass", 1, NULL, 'p' },
{ "retries", 1, NULL, 'r' },
{ "retry-pause", 1, NULL, 'R' },
{ "threads", 1, NULL, 't' },
{ "url", 1, NULL, 'o' },
{ "user", 1, NULL, 'u' },
{ "userpass", 1, NULL, 'O' },
{ "version", 0, NULL, 'V' },
{ 0, 0, 0, 0 }
{ "algo", 1, NULL, 'a' },
{ "av", 1, NULL, 'v' },
{ "background", 0, NULL, 'B' },
{ "backup-url", 1, NULL, 'b' },
{ "config", 1, NULL, 'c' },
{ "cpu-affinity", 1, NULL, 1020 },
{ "donate-level", 1, NULL, 1003 },
{ "help", 0, NULL, 'h' },
{ "keepalive", 0, NULL, 'k' },
{ "max-cpu-usage", 1, NULL, 1004 },
{ "nicehash", 0, NULL, 1006 },
{ "no-color", 0, NULL, 1002 },
{ "pass", 1, NULL, 'p' },
{ "retries", 1, NULL, 'r' },
{ "retry-pause", 1, NULL, 'R' },
{ "safe", 0, NULL, 1005 },
{ "threads", 1, NULL, 't' },
{ "url", 1, NULL, 'o' },
{ "user", 1, NULL, 'u' },
{ "userpass", 1, NULL, 'O' },
{ "version", 0, NULL, 'V' },
{ "variant", 1, NULL, 1021 },
{ "asm", 1, NULL, 1022 },
{ NULL, 0, NULL, 0 }
};
static int get_algo_variant(int variant) {
if (variant > XMR_VARIANT_AUTO && variant < XMR_VARIANT_MAX) {
return variant;
}
static const char *algo_names[] = {
"cryptonight",
# ifndef XMRIG_NO_AEON
"cryptonight-lite"
# endif
};
if (cpu_info.flags & CPU_FLAG_AES) {
if (cpu_info.flags & CPU_FLAG_BMI2) {
return XMR_VARIANT_AESNI_BMI2;
}
return XMR_VARIANT_AESNI;
}
static const char *variant_names[] = {
"auto",
"0",
"1",
"2",
"4"
};
return XMR_VARIANT_LEGACY;
static const char *asm_names[] = {
"none",
"auto",
"intel",
"ryzen",
"bulldozer"
};
#ifndef XMRIG_NO_AEON
static int get_cryptonight_lite_variant(int variant) {
if (variant <= AV_AUTO || variant >= AV_MAX) {
return (cpu_info.flags & CPU_FLAG_AES) ? AV_DOUBLE : AV_DOUBLE_SOFT;
}
if (opt_safe && !(cpu_info.flags & CPU_FLAG_AES) && variant <= AV_DOUBLE) {
return variant + 2;
}
return variant;
}
#endif
static int get_algo_variant(int algo, int variant) {
# ifndef XMRIG_NO_AEON
if (algo == ALGO_CRYPTONIGHT_LITE) {
return get_cryptonight_lite_variant(variant);
}
# endif
if (variant <= AV_AUTO || variant >= AV_MAX) {
return (cpu_info.flags & CPU_FLAG_AES) ? AV_SINGLE : AV_SINGLE_SOFT;
}
if (opt_safe && !(cpu_info.flags & CPU_FLAG_AES) && variant <= AV_DOUBLE) {
return variant + 2;
}
return variant;
}
@@ -129,7 +217,22 @@ static void parse_arg(int key, char *arg) {
switch (key)
{
case 'a':
case 'a': /* --algo */
for (size_t i = 0; i < ARRAY_SIZE(algorithms); i++) {
if ((strcasecmp(arg, algorithms[i].name) == 0) || (strcasecmp(arg, algorithms[i].shortName) == 0)) {
opt_algo = algorithms[i].algo;
opt_variant = algorithms[i].variant;
break;
}
}
break;
case 1022: /* --asm */
for (size_t i = 0; i < ARRAY_SIZE(asm_names); i++) {
if (strcasecmp(arg, asm_names[i]) == 0) {
opt_assembly = i;
}
}
break;
case 'O': /* --userpass */
@@ -200,7 +303,20 @@ static void parse_arg(int key, char *arg) {
opt_n_threads = v;
break;
case 'k':
case 1004: /* --max-cpu-usage */
v = atoi(arg);
if (v < 1 || v > 100) {
show_usage_and_exit(1);
}
opt_max_cpu_usage = v;
break;
case 1005: /* --safe */
opt_safe = true;
break;
case 'k': /* --keepalive */
opt_keepalive = true;
break;
@@ -230,24 +346,24 @@ static void parse_arg(int key, char *arg) {
break;
}
case 'B':
case 'B': /* --background */
opt_background = true;
opt_colors = false;
break;
case 'v': /* --av */
v = atoi(arg);
if (v < 0 || v > XMR_VARIANT_MAX) {
if (v <= AV_AUTO || v >= AV_MAX) {
show_usage_and_exit(1);
}
opt_algo_variant = v;
opt_av = v;
break;
case 1020: /* --cpu-affinity */
p = strstr(arg, "0x");
ul = p ? strtoul(p, NULL, 16) : atol(arg);
if (ul > (1UL << cpu_info.count) -1) {
if (ul > (1UL << cpu_info.total_logical_cpus) -1) {
ul = -1;
}
@@ -258,13 +374,28 @@ static void parse_arg(int key, char *arg) {
opt_colors = false;
break;
case 1003:
case 1003: /* --donate-level */
// v = atoi(arg);
// if (v < 1 || v > 99) {
// show_usage_and_exit(1);
// }
// opt_donate_level = v;
break;
case 1021: /* --variant */
v = atoi(arg);
if (v < 1 || v > 99) {
show_usage_and_exit(1);
if (v == 4 || strcasecmp(arg, "r") == 0) {
opt_variant = VARIANT_4;
}
else if (v > VARIANT_AUTO && v < VARIANT_MAX) {
opt_variant = v;
}
opt_donate_level = v;
break;
case 1006: /* --nicehash */
opt_nicehash = true;
break;
default:
@@ -275,7 +406,7 @@ static void parse_arg(int key, char *arg) {
static void parse_config(json_t *config, char *ref)
{
int i;
size_t i;
char buf[16];
json_t *val;
@@ -336,7 +467,7 @@ static char *parse_url(const char *arg)
show_usage_and_exit(1);
}
char *dest = malloc(strlen(arg) + 14);
char *dest = malloc(strlen(arg) + 16);
sprintf(dest, "stratum+tcp://%s", arg);
return dest;
@@ -367,12 +498,12 @@ void parse_cmdline(int argc, char *argv[]) {
}
if (!opt_url) {
opt_url = strdup("stratum+tcp://proxy.xmrig.com:443");
opt_keepalive = true;
applog_notime(LOG_ERR, "No pool URL supplied. Exiting.\n", argv[0]);
proper_exit(1);
}
if (!opt_backup_url) {
opt_backup_url = strdup("stratum+tcp://failover.xmrig.com:80");
}
if (strstr(opt_url, ".nicehash.com:") != NULL) {
opt_nicehash = true;
}
if (!opt_userpass) {
@@ -384,20 +515,23 @@ void parse_cmdline(int argc, char *argv[]) {
sprintf(opt_userpass, "%s:%s", opt_user, opt_pass);
}
opt_av = get_algo_variant(opt_algo, opt_av);
if (!cryptonight_init(opt_av)) {
applog(LOG_ERR, "Cryptonight hash self-test failed. This might be caused by bad compiler optimizations.");
proper_exit(1);
}
if (!opt_n_threads) {
opt_n_threads = get_optimal_threads_count();
opt_n_threads = get_optimal_threads_count(opt_algo, opt_double_hash, opt_max_cpu_usage);
}
opt_algo_variant = get_algo_variant(opt_algo_variant);
if (!opt_algo_variant) {
opt_algo_variant = get_algo_variant(0);
if (opt_safe) {
const int count = get_optimal_threads_count(opt_algo, opt_double_hash, opt_max_cpu_usage);
if (opt_n_threads > count) {
opt_n_threads = count;
}
}
if (opt_donate_level < 1 || opt_donate_level > 99) {
opt_donate_level = 1;
}
cryptonight_init(opt_algo_variant);
}
@@ -439,3 +573,14 @@ void show_version_and_exit(void) {
#endif
proper_exit(0);
}
const char *get_current_algo_name(void) {
return algo_names[opt_algo];
}
const char *get_current_variant_name(void)
{
return variant_names[opt_variant + 1];
}

View File

@@ -4,8 +4,9 @@
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -21,48 +22,84 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __OPTIONS_H__
#define __OPTIONS_H__
#ifndef XMRIG_OPTIONS_H
#define XMRIG_OPTIONS_H
#include <stdbool.h>
#include <stdint.h>
#ifndef ARRAY_SIZE
# define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
#endif
enum xmr_algo_variant {
XMR_VARIANT_AUTO,
XMR_VARIANT_AESNI,
XMR_VARIANT_AESNI_WOLF,
XMR_VARIANT_AESNI_BMI2,
XMR_VARIANT_LEGACY,
XMR_VARIANT_EXPERIMENTAL,
XMR_VARIANT_MAX
enum Algo {
ALGO_CRYPTONIGHT, /* CryptoNight (Monero) */
ALGO_CRYPTONIGHT_LITE, /* CryptoNight-Lite (AEON) */
};
enum Variant {
VARIANT_AUTO = -1,
VARIANT_0 = 0,
VARIANT_1 = 1,
VARIANT_2 = 2,
VARIANT_4 = 3,
VARIANT_MAX
};
enum AlgoVariant {
AV_AUTO, // --av=0 Automatic mode.
AV_SINGLE, // --av=1 Single hash mode
AV_DOUBLE, // --av=2 Double hash mode
AV_SINGLE_SOFT, // --av=3 Single hash mode (Software AES)
AV_DOUBLE_SOFT, // --av=4 Double hash mode (Software AES)
AV_MAX
};
enum Assembly {
ASM_NONE,
ASM_AUTO,
ASM_INTEL,
ASM_RYZEN,
ASM_BULLDOZER,
ASM_MAX
};
extern bool opt_colors;
extern bool opt_keepalive;
extern bool opt_background;
extern bool opt_double_hash;
extern bool opt_safe;
extern bool opt_nicehash;
extern char *opt_url;
extern char *opt_backup_url;
extern char *opt_userpass;
extern char *opt_user;
extern char *opt_pass;
extern int opt_n_threads;
extern int opt_algo_variant;
extern int opt_retry_pause;
extern int opt_retries;
extern int opt_donate_level;
extern int opt_max_cpu_usage;
extern int64_t opt_affinity;
extern enum Algo opt_algo;
extern enum Variant opt_variant;
extern enum AlgoVariant opt_av;
extern enum Assembly opt_assembly;
void parse_cmdline(int argc, char *argv[]);
void show_usage_and_exit(int status);
void show_version_and_exit(void);
const char *get_current_algo_name(void);
const char *get_current_variant_name(void);
extern void proper_exit(int reason);
#endif /* __OPTIONS_H__ */
#endif /* XMRIG_OPTIONS_H */

View File

@@ -4,8 +4,9 @@
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -21,12 +22,16 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __PERSISTENT_MEMORY_H__
#define __PERSISTENT_MEMORY_H__
#ifndef XMRIG_PERSISTENT_MEMORY_H
#define XMRIG_PERSISTENT_MEMORY_H
#include <stddef.h>
#include "algo/cryptonight/cryptonight.h"
enum memory_flags {
MEMORY_HUGEPAGES_AVAILABLE = 1,
MEMORY_HUGEPAGES_ENABLED = 2,
@@ -34,7 +39,7 @@ enum memory_flags {
};
#define TWO_MB_PAGE 2097152
#define MEMORY 2097152
extern char *persistent_memory;
@@ -43,7 +48,15 @@ extern int persistent_memory_flags;
const char * persistent_memory_allocate();
void persistent_memory_free();
void * persistent_calloc(size_t num, size_t size);
void *persistent_calloc(size_t num, size_t size);
void create_cryptonight_ctx(struct cryptonight_ctx **ctx, int thr_id);
#endif /* __PERSISTENT_MEMORY_H__ */
void *allocate_executable_memory(size_t size);
void flush_instruction_cache(void *p, size_t size);
void init_cn_r(struct cryptonight_ctx *ctx);
void protect_executable_memory(void *p, size_t size);
#endif /* XMRIG_PERSISTENT_MEMORY_H */

194
stratum.c
View File

@@ -4,8 +4,9 @@
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -36,11 +37,16 @@
# include <poll.h>
#endif
#include "stratum.h"
#include "version.h"
#ifdef __APPLE_CC__
# include <netinet/in.h>
#endif
#include "options.h"
#include "stats.h"
#include "stratum.h"
#include "util.h"
#include "utils/applog.h"
#include "version.h"
#ifdef WIN32
@@ -58,6 +64,9 @@
#define unlikely(expr) (__builtin_expect(!!(expr), 0))
static struct work work;
static bool send_line(curl_socket_t sock, char *s);
static bool socket_full(curl_socket_t sock, int timeout);
static void buffer_append(struct stratum_ctx *sctx, const char *s);
@@ -66,7 +75,8 @@ static int sockopt_keepalive_cb(void *userdata, curl_socket_t fd, curlsocktype p
static curl_socket_t opensocket_grab_cb(void *clientp, curlsocktype purpose, struct curl_sockaddr *addr);
static int closesocket_cb(void *clientp, curl_socket_t item);
static bool login_decode(struct stratum_ctx *sctx, const json_t *val);
static bool job_decode(struct stratum_ctx *sctx, const json_t *job);
static void extensions_decode(const json_t *val);
static bool job_decode(const json_t *job);
static bool jobj_binary(const json_t *obj, const char *key, void *buf, size_t buflen);
@@ -235,13 +245,19 @@ bool stratum_handle_response(char *buf) {
json_t *id_val = json_object_get(val, "id");
if (!id_val || json_is_null(id_val) || !res_val) {
json_decref(val);
return false;
const char* message;
if (json_is_object(err_val) && (message = json_string_value(json_object_get(err_val, "message")))) {
applog(LOG_ERR, "error: \"%s\"", message);
}
json_decref(val);
return false;
}
json_t *status = json_object_get(res_val, "status");
if (!strcmp(json_string_value(status), "KEEPALIVED") ) {
if (status && !strcmp(json_string_value(status), "KEEPALIVED") ) {
applog(LOG_DEBUG, "Keepalived receveid");
json_decref(val);
return true;
@@ -285,7 +301,6 @@ bool stratum_keepalived(struct stratum_ctx *sctx)
bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *pass)
{
char *sret;
json_error_t err;
char *req = malloc(128 + strlen(user) + strlen(pass));
sprintf(req, "{\"method\":\"login\",\"params\":{\"login\":\"%s\",\"pass\":\"%s\",\"agent\":\"%s/%s\"},\"id\":1}", user, pass, APP_NAME, APP_VERSION);
@@ -321,19 +336,24 @@ bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *p
json_t *error = json_object_get(val, "error");
if (!result || json_is_false(result) || (error && !json_is_null(error))) {
applog(LOG_ERR, "Stratum authentication failed");
const char* message;
if (json_is_object(error) && (message = json_string_value(json_object_get(error, "message")))) {
applog(LOG_ERR, "Stratum authentication failed: \"%s\"", message);
}
else {
applog(LOG_ERR, "Stratum authentication failed");
}
json_decref(val);
return false;
}
login_decode(sctx, val);
json_t *job = json_object_get(result, "job");
pthread_mutex_lock(&sctx->work_lock);
if (job) {
job_decode(sctx, job);
if (login_decode(sctx, val) && job(sctx, json_object_get(result, "job"))) {
pthread_mutex_lock(&sctx->sock_lock);
sctx->ready = true;
pthread_mutex_unlock(&sctx->sock_lock);
}
pthread_mutex_unlock(&sctx->work_lock);
json_decref(val);
return true;
@@ -492,11 +512,20 @@ static void buffer_append(struct stratum_ctx *sctx, const char *s)
*/
static bool job(struct stratum_ctx *sctx, json_t *params)
{
bool ret = false;
if (!job_decode(params)) {
return false;
}
pthread_mutex_lock(&sctx->work_lock);
ret = job_decode(sctx, params);
if (sctx->work.target != work.target) {
stats_set_target(work.target);
}
memcpy(&sctx->work, &work, sizeof(struct work));
pthread_mutex_unlock(&sctx->work_lock);
return ret;
return true;
}
@@ -515,31 +544,36 @@ static int sockopt_keepalive_cb(void *userdata, curl_socket_t fd, curlsocktype p
int tcp_keepintvl = 50;
#ifndef WIN32
if (unlikely(setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &keepalive,
sizeof(keepalive))))
if (unlikely(setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &keepalive, sizeof(keepalive)))) {
return 1;
#ifdef __linux
if (unlikely(setsockopt(fd, SOL_TCP, TCP_KEEPCNT,
&tcp_keepcnt, sizeof(tcp_keepcnt))))
}
# ifdef __linux
if (unlikely(setsockopt(fd, SOL_TCP, TCP_KEEPCNT, &tcp_keepcnt, sizeof(tcp_keepcnt)))) {
return 1;
if (unlikely(setsockopt(fd, SOL_TCP, TCP_KEEPIDLE,
&tcp_keepidle, sizeof(tcp_keepidle))))
}
if (unlikely(setsockopt(fd, SOL_TCP, TCP_KEEPIDLE, &tcp_keepidle, sizeof(tcp_keepidle)))) {
return 1;
if (unlikely(setsockopt(fd, SOL_TCP, TCP_KEEPINTVL,
&tcp_keepintvl, sizeof(tcp_keepintvl))))
}
if (unlikely(setsockopt(fd, SOL_TCP, TCP_KEEPINTVL, &tcp_keepintvl, sizeof(tcp_keepintvl)))) {
return 1;
#endif /* __linux */
#ifdef __APPLE_CC__
if (unlikely(setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE,
&tcp_keepintvl, sizeof(tcp_keepintvl))))
}
# endif /* __linux */
# ifdef __APPLE_CC__
if (unlikely(setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE, &tcp_keepintvl, sizeof(tcp_keepintvl)))) {
return 1;
#endif /* __APPLE_CC__ */
}
# endif /* __APPLE_CC__ */
#else /* WIN32 */
struct tcp_keepalive vals;
vals.onoff = 1;
vals.keepalivetime = tcp_keepidle * 1000;
vals.keepaliveinterval = tcp_keepintvl * 1000;
DWORD outputBytes;
if (unlikely(WSAIoctl(fd, SIO_KEEPALIVE_VALS, &vals, sizeof(vals), NULL, 0, &outputBytes, NULL, NULL))) {
return 1;
}
@@ -584,33 +618,23 @@ static bool login_decode(struct stratum_ctx *sctx, const json_t *val) {
return false;
}
json_t *tmp = json_object_get(res, "id");
if (!tmp) {
const char *id = json_string_value(json_object_get(res, "id"));
if (!id || strlen(id) >= (sizeof(sctx->id))) {
applog(LOG_ERR, "JSON invalid id");
return false;
}
const char *id = json_string_value(tmp);
if (!id) {
applog(LOG_ERR, "JSON id is not a string");
return false;
}
memset(&sctx->id, 0, sizeof(sctx->id));
memcpy(&sctx->id, id, strlen(id));
memcpy(&sctx->id, id, 64);
pthread_mutex_lock(&sctx->sock_lock);
sctx->ready = true;
pthread_mutex_unlock(&sctx->sock_lock);
tmp = json_object_get(res, "status");
if (!tmp) {
applog(LOG_ERR, "JSON invalid status");
return false;
}
const char *s = json_string_value(tmp);
const char *s = json_string_value(json_object_get(res, "status"));
if (!s) {
applog(LOG_ERR, "JSON status is not a string");
// Workaround for xmrig-proxy bug https://github.com/xmrig/xmrig-proxy/commit/dfa1960fe3eeb13f80717b7dbfcc7c6e9f222d89
s = json_string_value(json_object_get(val, "status"));
}
if (!s) {
applog(LOG_ERR, "JSON invalid status");
return false;
}
@@ -619,10 +643,31 @@ static bool login_decode(struct stratum_ctx *sctx, const json_t *val) {
return false;
}
extensions_decode(res);
return true;
}
static void extensions_decode(const json_t *res)
{
json_t *extensions = json_object_get(res, "extensions");
if (!extensions || json_array_size(extensions) == 0) {
return;
}
size_t index;
json_t *value;
json_array_foreach(extensions, index, value) {
const char *s = json_string_value(value);
if (s && strcmp(s, "nicehash")) {
opt_nicehash = true;
}
}
}
/**
* @brief job_decode
* @param sctx
@@ -630,46 +675,42 @@ static bool login_decode(struct stratum_ctx *sctx, const json_t *val) {
* @param work
* @return
*/
static bool job_decode(struct stratum_ctx *sctx, const json_t *job) {
json_t *tmp = json_object_get(job, "job_id");
if (!tmp) {
static bool job_decode(const json_t *job) {
const char *job_id = json_string_value(json_object_get(job, "job_id"));
if (!job_id || strlen(job_id) >= sizeof(work.job_id)) {
applog(LOG_ERR, "JSON invalid job id");
return false;
}
const char *job_id = json_string_value(tmp);
tmp = json_object_get(job, "blob");
if (!tmp) {
const char *blob = json_string_value(json_object_get(job, "blob"));
if (!blob) {
applog(LOG_ERR, "JSON invalid blob");
return false;
}
const char *hexblob = json_string_value(tmp);
if (!hexblob || strlen(hexblob) != 152) {
work.blob_size = strlen(blob);
if (work.blob_size % 2 != 0) {
applog(LOG_ERR, "JSON invalid blob length");
return false;
}
if (!hex2bin(sctx->blob, hexblob, 76)) {
applog(LOG_ERR, "JSON inval blob");
work.blob_size /= 2;
if (work.blob_size < 76 || work.blob_size > (sizeof(work.blob))) {
applog(LOG_ERR, "JSON invalid blob length");
return false;
}
uint32_t target;
jobj_binary(job, "target", &target, 4);
if (sctx->target != target) {
stats_set_target(target);
sctx->target = target;
if (!hex2bin((unsigned char *) work.blob, blob, work.blob_size)) {
applog(LOG_ERR, "JSON invalid blob");
return false;
}
memcpy(sctx->work.data, sctx->blob, 76);
memset(sctx->work.target, 0xff, sizeof(sctx->work.target));
jobj_binary(job, "target", &work.target, 4);
sctx->work.target[7] = sctx->target;
memset(work.job_id, 0, sizeof(work.job_id));
memcpy(work.job_id, job_id, strlen(job_id));
free(sctx->work.job_id);
sctx->work.job_id = strdup(job_id);
work.height = (uint64_t) json_integer_value(json_object_get(job, "height"));
return true;
}
@@ -699,6 +740,7 @@ static bool jobj_binary(const json_t *obj, const char *key, void *buf, size_t bu
return false;
}
if (!hex2bin(buf, hexstr, buflen)) {
return false;
}

View File

@@ -4,8 +4,9 @@
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -21,22 +22,28 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __STRATUM_H__
#define __STRATUM_H__
#ifndef XMRIG_STRATUM_H
#define XMRIG_STRATUM_H
#include <stdbool.h>
#include <inttypes.h>
#include <curl/curl.h>
/**
* 128tx exploit.
*
* Max blob size is 84 (75 fixed + 9 variable), aligned to 96.
* https://github.com/xmrig/xmrig/issues/1 Thanks fireice-uk.
*/
struct work {
uint32_t data[19];
uint32_t target[8];
uint32_t hash[8];
char *job_id;
size_t xnonce2_len;
unsigned char *xnonce2;
uint32_t blob[21] __attribute__((aligned(16)));
size_t blob_size __attribute__((aligned(16)));
uint32_t target __attribute__((aligned(16)));
uint32_t hash[8] __attribute__((aligned(16)));
char job_id[64] __attribute__((aligned(16)));
uint64_t height;
};
@@ -53,8 +60,6 @@ struct stratum_ctx {
bool ready;
char id[64];
char blob[76];
uint32_t target;
struct work work;
struct work g_work;
@@ -73,4 +78,4 @@ bool stratum_handle_method(struct stratum_ctx *sctx, const char *s);
bool stratum_handle_response(char *buf);
bool stratum_keepalived(struct stratum_ctx *sctx);
#endif /* __STRATUM_H__ */
#endif /* XMRIG_STRATUM_H */

View File

@@ -4,4 +4,6 @@ cmake_minimum_required(VERSION 3.0)
include(CTest)
add_subdirectory(unity)
add_subdirectory(cryptonight)
add_subdirectory(cryptonight)
add_subdirectory(cryptonight_lite)
add_subdirectory(autoconf)

View File

@@ -0,0 +1,16 @@
set(SOURCES
autoconf.c
../../cpu.h
../../cpu.c
)
add_executable(autoconf_app ${SOURCES})
target_link_libraries(autoconf_app unity)
include_directories(../..)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-strict-aliasing")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O2")
add_definitions(-DBUILD_TEST)
add_test(autoconf_test autoconf_app)

152
test/autoconf/autoconf.c Normal file
View File

@@ -0,0 +1,152 @@
#include <unity.h>
#include "cpu.h"
#include "options.h"
struct cpu_info cpu_info = { 0 };
static void set_cpu_info(int total_logical_cpus, int l2_cache, int l3_cache) {
cpu_info.total_cores = total_logical_cpus;
cpu_info.total_logical_cpus = total_logical_cpus;
cpu_info.l2_cache = l2_cache;
cpu_info.l3_cache = l3_cache;
}
void test_autoconf_should_GetOptimalThreadsCounti7(void) {
set_cpu_info(8, 1024, 8192); // 4C/8T 8 MB (Generic i7 CPU)
TEST_ASSERT_EQUAL_INT(4, get_optimal_threads_count(ALGO_CRYPTONIGHT, false, 100));
TEST_ASSERT_EQUAL_INT(2, get_optimal_threads_count(ALGO_CRYPTONIGHT, true, 100));
TEST_ASSERT_EQUAL_INT(8, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 100));
TEST_ASSERT_EQUAL_INT(4, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, true, 100));
TEST_ASSERT_EQUAL_INT(6, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 75));
TEST_ASSERT_EQUAL_INT(5, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 60));
TEST_ASSERT_EQUAL_INT(4, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 50));
TEST_ASSERT_EQUAL_INT(3, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 35));
TEST_ASSERT_EQUAL_INT(2, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 20));
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 5));
}
void test_autoconf_should_GetOptimalThreadsCounti5(void) {
set_cpu_info(4, 1024, 6144); // 2C/4T 6 MB (Generic i5 CPU)
TEST_ASSERT_EQUAL_INT(3, get_optimal_threads_count(ALGO_CRYPTONIGHT, false, 100));
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT, true, 100));
TEST_ASSERT_EQUAL_INT(3, get_optimal_threads_count(ALGO_CRYPTONIGHT, false, 75));
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT, true, 75));
TEST_ASSERT_EQUAL_INT(4, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 100));
TEST_ASSERT_EQUAL_INT(3, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, true, 100));
TEST_ASSERT_EQUAL_INT(3, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 75));
TEST_ASSERT_EQUAL_INT(3, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, true, 75));
}
void test_autoconf_should_GetOptimalThreadsCounti3(void) {
set_cpu_info(4, 512, 3072); // 2C/4T 3 MB (Generic i3 CPU)
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT, false, 100));
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT, true, 100));
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT, false, 75));
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT, true, 75));
TEST_ASSERT_EQUAL_INT(3, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 100));
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, true, 100));
TEST_ASSERT_EQUAL_INT(3, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 75));
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, true, 75));
}
void test_autoconf_should_GetOptimalThreadsCountR7(void) {
set_cpu_info(16, 4096, 16384); // 8C/16T 16 MB (AMD Ryzen 7)
TEST_ASSERT_EQUAL_INT(8, get_optimal_threads_count(ALGO_CRYPTONIGHT, false, 100));
TEST_ASSERT_EQUAL_INT(4, get_optimal_threads_count(ALGO_CRYPTONIGHT, true, 100));
TEST_ASSERT_EQUAL_INT(8, get_optimal_threads_count(ALGO_CRYPTONIGHT, false, 75));
TEST_ASSERT_EQUAL_INT(4, get_optimal_threads_count(ALGO_CRYPTONIGHT, true, 75));
TEST_ASSERT_EQUAL_INT(16, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 100));
TEST_ASSERT_EQUAL_INT(8, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, true, 100));
TEST_ASSERT_EQUAL_INT(12, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 75));
TEST_ASSERT_EQUAL_INT(8, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, true, 75));
}
void test_autoconf_should_GetOptimalThreadsCountTwoE5620(void) {
set_cpu_info(16, 2048, 24576); // 8C/16T 24 MB (Two E5620)
TEST_ASSERT_EQUAL_INT(12, get_optimal_threads_count(ALGO_CRYPTONIGHT, false, 100));
TEST_ASSERT_EQUAL_INT(6, get_optimal_threads_count(ALGO_CRYPTONIGHT, true, 100));
TEST_ASSERT_EQUAL_INT(12, get_optimal_threads_count(ALGO_CRYPTONIGHT, false, 75));
TEST_ASSERT_EQUAL_INT(6, get_optimal_threads_count(ALGO_CRYPTONIGHT, true, 75));
TEST_ASSERT_EQUAL_INT(16, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 100));
TEST_ASSERT_EQUAL_INT(12, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, true, 100));
TEST_ASSERT_EQUAL_INT(12, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 75));
TEST_ASSERT_EQUAL_INT(12, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, true, 75));
}
void test_autoconf_should_GetOptimalThreadsCountVCPU(void) {
set_cpu_info(1, 1024, 15360); // 1C/1T 15 MB (Single core Virtual CPU)
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT, false, 100));
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT, true, 100));
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT, false, 75));
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT, true, 75));
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 100));
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, true, 100));
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 75));
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, true, 75));
}
void test_autoconf_should_GetOptimalThreadsCountNoL3(void) {
set_cpu_info(8, 8192, 0); // 4C/8T (Multi core Virtual CPU without L3 cache)
TEST_ASSERT_EQUAL_INT(4, get_optimal_threads_count(ALGO_CRYPTONIGHT, false, 100));
TEST_ASSERT_EQUAL_INT(2, get_optimal_threads_count(ALGO_CRYPTONIGHT, true, 100));
TEST_ASSERT_EQUAL_INT(8, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 100));
TEST_ASSERT_EQUAL_INT(4, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, true, 100));
TEST_ASSERT_EQUAL_INT(6, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 75));
TEST_ASSERT_EQUAL_INT(5, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 60));
TEST_ASSERT_EQUAL_INT(4, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 50));
TEST_ASSERT_EQUAL_INT(3, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 35));
TEST_ASSERT_EQUAL_INT(2, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 20));
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 5));
}
int main(void)
{
UNITY_BEGIN();
RUN_TEST(test_autoconf_should_GetOptimalThreadsCounti7);
RUN_TEST(test_autoconf_should_GetOptimalThreadsCounti5);
RUN_TEST(test_autoconf_should_GetOptimalThreadsCounti3);
RUN_TEST(test_autoconf_should_GetOptimalThreadsCountR7);
RUN_TEST(test_autoconf_should_GetOptimalThreadsCountR7);
RUN_TEST(test_autoconf_should_GetOptimalThreadsCountTwoE5620);
RUN_TEST(test_autoconf_should_GetOptimalThreadsCountVCPU);
RUN_TEST(test_autoconf_should_GetOptimalThreadsCountNoL3);
return UNITY_END();
}

View File

@@ -1,44 +1,28 @@
set(SOURCES
cryptonight.c
../../options.h
../../algo/cryptonight/cryptonight.h
../../algo/cryptonight/cryptonight_common.c
../../algo/cryptonight/cryptonight_av4_legacy.c
../../algo/cryptonight/cryptonight.c
../../algo/cryptonight/cryptonight_av1_aesni.c
../../algo/cryptonight/cryptonight_av2_aesni_double.c
../../algo/cryptonight/cryptonight_av3_softaes.c
../../algo/cryptonight/cryptonight_av4_softaes_double.c
../../crypto/c_keccak.c
../../crypto/c_blake256.c
../../crypto/c_groestl.c
../../crypto/c_jh.c
../../crypto/c_skein.c
../../crypto/oaes_config.h
../../crypto/oaes_lib.h
../../crypto/oaes_lib.c
../../crypto/aesb.c
../../crypto/soft_aes.c
)
if (CMAKE_SIZEOF_VOID_P EQUAL 8)
add_subdirectory(bmi2)
add_executable(cryptonight_app ${SOURCES}
cryptonight.c
../../algo/cryptonight/cryptonight_av1_aesni.c
../../algo/cryptonight/cryptonight_av2_aesni_wolf.c
../../algo/cryptonight/cryptonight_av5_aesni_experimental.c
)
target_link_libraries(cryptonight_app unity cryptonight_av3_aesni_bmi2)
else()
add_executable(cryptonight_app ${SOURCES}
cryptonight32.c
../../algo/cryptonight/cryptonight_av1_aesni32.c
)
target_link_libraries(cryptonight_app unity)
endif()
add_executable(cryptonight_app ${SOURCES})
target_link_libraries(cryptonight_app unity)
include_directories(../..)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes -fno-strict-aliasing")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O2")
add_definitions(-DBUILD_TEST)
add_definitions(-DXMRIG_NO_AEON)
add_test(cryptonight_test cryptonight_app)

View File

@@ -1,3 +0,0 @@
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes -mbmi2")
include_directories(../../..)
add_library(cryptonight_av3_aesni_bmi2 STATIC ../../../algo/cryptonight/cryptonight_av3_aesni_bmi2.c)

View File

@@ -1,146 +1,125 @@
#include <unity.h>
#include <stdbool.h>
#include <stdlib.h>
#include <algo/cryptonight/cryptonight.h>
#include <string.h>
#include <mm_malloc.h>
#include "options.h"
#include "algo/cryptonight/cryptonight.h"
bool opt_double_hash = false;
const static char input1[152] = {
0x03, 0x05, 0xA0, 0xDB, 0xD6, 0xBF, 0x05, 0xCF, 0x16, 0xE5, 0x03, 0xF3, 0xA6, 0x6F, 0x78, 0x00,
0x7C, 0xBF, 0x34, 0x14, 0x43, 0x32, 0xEC, 0xBF, 0xC2, 0x2E, 0xD9, 0x5C, 0x87, 0x00, 0x38, 0x3B,
0x30, 0x9A, 0xCE, 0x19, 0x23, 0xA0, 0x96, 0x4B, 0x00, 0x00, 0x00, 0x08, 0xBA, 0x93, 0x9A, 0x62,
0x72, 0x4C, 0x0D, 0x75, 0x81, 0xFC, 0xE5, 0x76, 0x1E, 0x9D, 0x8A, 0x0E, 0x6A, 0x1C, 0x3F, 0x92,
0x4F, 0xDD, 0x84, 0x93, 0xD1, 0x11, 0x56, 0x49, 0xC0, 0x5E, 0xB6, 0x01,
0x01, 0x00, 0xFB, 0x8E, 0x8A, 0xC8, 0x05, 0x89, 0x93, 0x23, 0x37, 0x1B, 0xB7, 0x90, 0xDB, 0x19,
0x21, 0x8A, 0xFD, 0x8D, 0xB8, 0xE3, 0x75, 0x5D, 0x8B, 0x90, 0xF3, 0x9B, 0x3D, 0x55, 0x06, 0xA9,
0xAB, 0xCE, 0x4F, 0xA9, 0x12, 0x24, 0x45, 0x00, 0x00, 0x00, 0x00, 0xEE, 0x81, 0x46, 0xD4, 0x9F,
0xA9, 0x3E, 0xE7, 0x24, 0xDE, 0xB5, 0x7D, 0x12, 0xCB, 0xC6, 0xC6, 0xF3, 0xB9, 0x24, 0xD9, 0x46,
0x12, 0x7C, 0x7A, 0x97, 0x41, 0x8F, 0x93, 0x48, 0x82, 0x8F, 0x0F, 0x02,
};
const static char input2[] = "This is a test";
const static char input3[] = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vivamus pellentesque metus.";
void cryptonight_av1_aesni(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx);
void cryptonight_av2_aesni_wolf(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx);
void cryptonight_av3_aesni_bmi2(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx);
void cryptonight_av4_legacy(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx);
void cryptonight_av5_aesni_experimental(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx);
void cryptonight_av1_aesni(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
void cryptonight_av2_aesni_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
void cryptonight_av3_softaes(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
void cryptonight_av4_softaes_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
char *bin2hex(const unsigned char *p, size_t len)
static char hash[64];
#define RESULT1 "1a3ffbee909b420d91f7be6e5fb56db71b3110d886011e877ee5786afd080100"
#define RESULT1_DOUBLE "1a3ffbee909b420d91f7be6e5fb56db71b3110d886011e877ee5786afd0801001b606a3f4a07d6489a1bcd07697bd16696b61c8ae982f61a90160f4e52828a7f"
#define RESULT2 "a084f01d1437a09c6985401b60d43554ae105802c5f5d8a9b3253649c0be6605"
#define RESULT3 "0bbe54bd26caa92a1d436eec71cbef02560062fa689fe14d7efcf42566b411cf"
static char *bin2hex(const unsigned char *p, size_t len)
{
int i;
char *s = malloc((len * 2) + 1);
if (!s)
if (!s) {
return NULL;
}
for (i = 0; i < len; i++)
for (int i = 0; i < len; i++) {
sprintf(s + (i * 2), "%02x", (unsigned int) p[i]);
}
return s;
}
bool hex2bin(unsigned char *p, const char *hexstr, size_t len)
{
char hex_byte[3];
char *ep;
hex_byte[2] = '\0';
static void * create_ctx(int ratio) {
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*) _mm_malloc(sizeof(struct cryptonight_ctx), 16);
ctx->memory = (uint8_t *) _mm_malloc(MEMORY * ratio, 16);
while (*hexstr && len) {
if (!hexstr[1]) {
return false;
}
hex_byte[0] = hexstr[0];
hex_byte[1] = hexstr[1];
*p = (unsigned char) strtol(hex_byte, &ep, 16);
if (*ep) {
return false;
}
p++;
hexstr += 2;
len--;
}
return ctx;
}
return (len == 0 && *hexstr == 0) ? true : false;
static void free_ctx(struct cryptonight_ctx *ctx) {
_mm_free(ctx->memory);
_mm_free(ctx);
}
void test_cryptonight_av1_should_CalcHash(void) {
char hash[32];
char data[76];
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*) create_ctx(1);
hex2bin((unsigned char *) &data, "0305a0dbd6bf05cf16e503f3a66f78007cbf34144332ecbfc22ed95c8700383b309ace1923a0964b00000008ba939a62724c0d7581fce5761e9d8a0e6a1c3f924fdd8493d1115649c05eb601", 76);
cryptonight_av1_aesni(input1, 76, &hash, ctx);
TEST_ASSERT_EQUAL_STRING(RESULT1, bin2hex(hash, 32));
uint8_t *memory = (uint8_t *) malloc(MEMORY);
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
cryptonight_av1_aesni(input2, strlen(input2), &hash, ctx);
TEST_ASSERT_EQUAL_STRING(RESULT2, bin2hex(hash, 32));
cryptonight_av1_aesni(&hash, data, memory, ctx);
cryptonight_av1_aesni(input3, strlen(input3), &hash, ctx);
TEST_ASSERT_EQUAL_STRING(RESULT3, bin2hex(hash, 32));
free(memory);
free(ctx);
TEST_ASSERT_EQUAL_STRING("1a3ffbee909b420d91f7be6e5fb56db71b3110d886011e877ee5786afd080100", bin2hex(hash, 32));
free_ctx(ctx);
}
void test_cryptonight_av2_should_CalcHash(void)
{
char hash[32];
char data[76];
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*) create_ctx(2);
hex2bin((unsigned char *) &data, "0305a0dbd6bf05cf16e503f3a66f78007cbf34144332ecbfc22ed95c8700383b309ace1923a0964b00000008ba939a62724c0d7581fce5761e9d8a0e6a1c3f924fdd8493d1115649c05eb601", 76);
cryptonight_av2_aesni_double(input1, 76, &hash, ctx);
TEST_ASSERT_EQUAL_STRING(RESULT1_DOUBLE, bin2hex(hash, 64));
uint8_t *memory = (uint8_t *) malloc(MEMORY);
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
cryptonight_av2_aesni_wolf(&hash, data, memory, ctx);
free(memory);
free(ctx);
TEST_ASSERT_EQUAL_STRING("1a3ffbee909b420d91f7be6e5fb56db71b3110d886011e877ee5786afd080100", bin2hex(hash, 32));
free_ctx(ctx);
}
void test_cryptonight_av3_should_CalcHash(void)
{
char hash[32];
char data[76];
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*) create_ctx(1);
hex2bin((unsigned char *) &data, "0305a0dbd6bf05cf16e503f3a66f78007cbf34144332ecbfc22ed95c8700383b309ace1923a0964b00000008ba939a62724c0d7581fce5761e9d8a0e6a1c3f924fdd8493d1115649c05eb601", 76);
cryptonight_av3_softaes(input1, 76, &hash, ctx);
TEST_ASSERT_EQUAL_STRING(RESULT1, bin2hex(hash, 32));
uint8_t *memory = (uint8_t *) malloc(MEMORY);
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
cryptonight_av3_softaes(input2, strlen(input2), &hash, ctx);
TEST_ASSERT_EQUAL_STRING(RESULT2, bin2hex(hash, 32));
cryptonight_av3_aesni_bmi2(&hash, data, memory, ctx);
cryptonight_av3_softaes(input3, strlen(input3), &hash, ctx);
TEST_ASSERT_EQUAL_STRING(RESULT3, bin2hex(hash, 32));
free(memory);
free(ctx);
TEST_ASSERT_EQUAL_STRING("1a3ffbee909b420d91f7be6e5fb56db71b3110d886011e877ee5786afd080100", bin2hex(hash, 32));
free_ctx(ctx);
}
void test_cryptonight_av4_should_CalcHash(void)
{
char hash[32];
char data[76];
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*) create_ctx(2);
hex2bin((unsigned char *) &data, "0305a0dbd6bf05cf16e503f3a66f78007cbf34144332ecbfc22ed95c8700383b309ace1923a0964b00000008ba939a62724c0d7581fce5761e9d8a0e6a1c3f924fdd8493d1115649c05eb601", 76);
cryptonight_av4_softaes_double(input1, 76, &hash, ctx);
TEST_ASSERT_EQUAL_STRING(RESULT1_DOUBLE, bin2hex(hash, 64));
uint8_t *memory = (uint8_t *) malloc(MEMORY);
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
cryptonight_av4_legacy(&hash, data, memory, ctx);
free(memory);
free(ctx);
TEST_ASSERT_EQUAL_STRING("1a3ffbee909b420d91f7be6e5fb56db71b3110d886011e877ee5786afd080100", bin2hex(hash, 32));
}
void test_cryptonight_av5_should_CalcHash(void)
{
char hash[32];
char data[76];
hex2bin((unsigned char *) &data, "0305a0dbd6bf05cf16e503f3a66f78007cbf34144332ecbfc22ed95c8700383b309ace1923a0964b00000008ba939a62724c0d7581fce5761e9d8a0e6a1c3f924fdd8493d1115649c05eb601", 76);
uint8_t *memory = (uint8_t *) malloc(MEMORY);
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
cryptonight_av5_aesni_experimental(&hash, data, memory, ctx);
free(memory);
free(ctx);
TEST_ASSERT_EQUAL_STRING("1a3ffbee909b420d91f7be6e5fb56db71b3110d886011e877ee5786afd080100", bin2hex(hash, 32));
free_ctx(ctx);
}
@@ -152,7 +131,6 @@ int main(void)
RUN_TEST(test_cryptonight_av2_should_CalcHash);
RUN_TEST(test_cryptonight_av3_should_CalcHash);
RUN_TEST(test_cryptonight_av4_should_CalcHash);
RUN_TEST(test_cryptonight_av5_should_CalcHash);
return UNITY_END();
}

View File

@@ -1,95 +0,0 @@
#include <unity.h>
#include <stdbool.h>
#include <stdlib.h>
#include <algo/cryptonight/cryptonight.h>
void cryptonight_av1_aesni32(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx);
void cryptonight_av4_legacy(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx);
char *bin2hex(const unsigned char *p, size_t len)
{
int i;
char *s = malloc((len * 2) + 1);
if (!s)
return NULL;
for (i = 0; i < len; i++)
sprintf(s + (i * 2), "%02x", (unsigned int) p[i]);
return s;
}
bool hex2bin(unsigned char *p, const char *hexstr, size_t len)
{
char hex_byte[3];
char *ep;
hex_byte[2] = '\0';
while (*hexstr && len) {
if (!hexstr[1]) {
return false;
}
hex_byte[0] = hexstr[0];
hex_byte[1] = hexstr[1];
*p = (unsigned char) strtol(hex_byte, &ep, 16);
if (*ep) {
return false;
}
p++;
hexstr += 2;
len--;
}
return (len == 0 && *hexstr == 0) ? true : false;
}
void test_cryptonight_av1_32_should_CalcHash(void) {
char hash[32];
char data[76];
hex2bin((unsigned char *) &data, "0305a0dbd6bf05cf16e503f3a66f78007cbf34144332ecbfc22ed95c8700383b309ace1923a0964b00000008ba939a62724c0d7581fce5761e9d8a0e6a1c3f924fdd8493d1115649c05eb601", 76);
uint8_t *memory = (uint8_t *) malloc(MEMORY);
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
cryptonight_av1_aesni32(&hash, data, memory, ctx);
free(memory);
free(ctx);
TEST_ASSERT_EQUAL_STRING("1a3ffbee909b420d91f7be6e5fb56db71b3110d886011e877ee5786afd080100", bin2hex(hash, 32));
}
void test_cryptonight_av4_should_CalcHash(void)
{
char hash[32];
char data[76];
hex2bin((unsigned char *) &data, "0305a0dbd6bf05cf16e503f3a66f78007cbf34144332ecbfc22ed95c8700383b309ace1923a0964b00000008ba939a62724c0d7581fce5761e9d8a0e6a1c3f924fdd8493d1115649c05eb601", 76);
uint8_t *memory = (uint8_t *) malloc(MEMORY);
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
cryptonight_av4_legacy(&hash, data, memory, ctx);
free(memory);
free(ctx);
TEST_ASSERT_EQUAL_STRING("1a3ffbee909b420d91f7be6e5fb56db71b3110d886011e877ee5786afd080100", bin2hex(hash, 32));
}
int main(void)
{
UNITY_BEGIN();
RUN_TEST(test_cryptonight_av1_32_should_CalcHash);
RUN_TEST(test_cryptonight_av4_should_CalcHash);
return UNITY_END();
}

View File

@@ -0,0 +1,27 @@
set(SOURCES
cryptonight_lite.c
../../options.h
../../algo/cryptonight/cryptonight.h
../../algo/cryptonight/cryptonight.c
../../algo/cryptonight-lite/cryptonight_lite_av1_aesni.c
../../algo/cryptonight-lite/cryptonight_lite_av2_aesni_double.c
../../algo/cryptonight-lite/cryptonight_lite_av3_softaes.c
../../algo/cryptonight-lite/cryptonight_lite_av4_softaes_double.c
../../crypto/c_keccak.c
../../crypto/c_blake256.c
../../crypto/c_groestl.c
../../crypto/c_jh.c
../../crypto/c_skein.c
../../crypto/soft_aes.c
)
add_executable(cryptonight_lite_app ${SOURCES})
target_link_libraries(cryptonight_lite_app unity)
include_directories(../..)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes -fno-strict-aliasing")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O2")
add_definitions(-DBUILD_TEST)
add_test(cryptonight_lite_test cryptonight_lite_app)

View File

@@ -0,0 +1,124 @@
#include <unity.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <mm_malloc.h>
#include "options.h"
#include "algo/cryptonight/cryptonight.h"
bool opt_double_hash = false;
enum mining_algo opt_algo = ALGO_CRYPTONIGHT_LITE;
const static char input1[152] = {
0x03, 0x05, 0xA0, 0xDB, 0xD6, 0xBF, 0x05, 0xCF, 0x16, 0xE5, 0x03, 0xF3, 0xA6, 0x6F, 0x78, 0x00,
0x7C, 0xBF, 0x34, 0x14, 0x43, 0x32, 0xEC, 0xBF, 0xC2, 0x2E, 0xD9, 0x5C, 0x87, 0x00, 0x38, 0x3B,
0x30, 0x9A, 0xCE, 0x19, 0x23, 0xA0, 0x96, 0x4B, 0x00, 0x00, 0x00, 0x08, 0xBA, 0x93, 0x9A, 0x62,
0x72, 0x4C, 0x0D, 0x75, 0x81, 0xFC, 0xE5, 0x76, 0x1E, 0x9D, 0x8A, 0x0E, 0x6A, 0x1C, 0x3F, 0x92,
0x4F, 0xDD, 0x84, 0x93, 0xD1, 0x11, 0x56, 0x49, 0xC0, 0x5E, 0xB6, 0x01,
0x01, 0x00, 0xFB, 0x8E, 0x8A, 0xC8, 0x05, 0x89, 0x93, 0x23, 0x37, 0x1B, 0xB7, 0x90, 0xDB, 0x19,
0x21, 0x8A, 0xFD, 0x8D, 0xB8, 0xE3, 0x75, 0x5D, 0x8B, 0x90, 0xF3, 0x9B, 0x3D, 0x55, 0x06, 0xA9,
0xAB, 0xCE, 0x4F, 0xA9, 0x12, 0x24, 0x45, 0x00, 0x00, 0x00, 0x00, 0xEE, 0x81, 0x46, 0xD4, 0x9F,
0xA9, 0x3E, 0xE7, 0x24, 0xDE, 0xB5, 0x7D, 0x12, 0xCB, 0xC6, 0xC6, 0xF3, 0xB9, 0x24, 0xD9, 0x46,
0x12, 0x7C, 0x7A, 0x97, 0x41, 0x8F, 0x93, 0x48, 0x82, 0x8F, 0x0F, 0x02,
};
void cryptonight_av1_aesni(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx) {}
void cryptonight_av2_aesni_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx) {}
void cryptonight_av3_softaes(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx) {}
void cryptonight_av4_softaes_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx) {}
void cryptonight_lite_av1_aesni(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
void cryptonight_lite_av2_aesni_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
void cryptonight_lite_av3_softaes(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
void cryptonight_lite_av4_softaes_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
static char hash[64];
#define RESULT1 "3695b4b53bb00358b0ad38dc160feb9e004eece09b83a72ef6ba9864d3510c88"
#define RESULT1_DOUBLE "3695b4b53bb00358b0ad38dc160feb9e004eece09b83a72ef6ba9864d3510c8828a22bad3f93d1408fca472eb5ad1cbe75f21d053c8ce5b3af105a57713e21dd"
static char *bin2hex(const unsigned char *p, size_t len)
{
char *s = malloc((len * 2) + 1);
if (!s) {
return NULL;
}
for (int i = 0; i < len; i++) {
sprintf(s + (i * 2), "%02x", (unsigned int) p[i]);
}
return s;
}
static void * create_ctx(int ratio) {
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*) _mm_malloc(sizeof(struct cryptonight_ctx), 16);
ctx->memory = (uint8_t *) _mm_malloc(MEMORY_LITE * ratio, 16);
return ctx;
}
static void free_ctx(struct cryptonight_ctx *ctx) {
_mm_free(ctx->memory);
_mm_free(ctx);
}
void test_cryptonight_lite_av1_should_CalcHash(void) {
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*) create_ctx(1);
cryptonight_lite_av1_aesni(input1, 76, &hash, ctx);
TEST_ASSERT_EQUAL_STRING(RESULT1, bin2hex(hash, 32));
free_ctx(ctx);
}
void test_cryptonight_lite_av2_should_CalcHash(void)
{
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*) create_ctx(2);
cryptonight_lite_av2_aesni_double(input1, 76, &hash, ctx);
TEST_ASSERT_EQUAL_STRING(RESULT1_DOUBLE, bin2hex(hash, 64));
free_ctx(ctx);
}
void test_cryptonight_lite_av3_should_CalcHash(void) {
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*) create_ctx(1);
cryptonight_lite_av3_softaes(input1, 76, &hash, ctx);
TEST_ASSERT_EQUAL_STRING(RESULT1, bin2hex(hash, 32));
free_ctx(ctx);
}
void test_cryptonight_lite_av4_should_CalcHash(void)
{
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*) create_ctx(2);
cryptonight_lite_av4_softaes_double(input1, 76, &hash, ctx);
TEST_ASSERT_EQUAL_STRING(RESULT1_DOUBLE, bin2hex(hash, 64));
free_ctx(ctx);
}
int main(void)
{
UNITY_BEGIN();
RUN_TEST(test_cryptonight_lite_av1_should_CalcHash);
RUN_TEST(test_cryptonight_lite_av2_should_CalcHash);
RUN_TEST(test_cryptonight_lite_av3_should_CalcHash);
RUN_TEST(test_cryptonight_lite_av4_should_CalcHash);
return UNITY_END();
}

View File

@@ -33,24 +33,20 @@ void cpu_init_common();
void cpu_init() {
cpu_info.count = sysconf(_SC_NPROCESSORS_CONF);
# ifdef XMRIG_NO_LIBCPUID
cpu_info.total_logical_cpus = sysconf(_SC_NPROCESSORS_CONF);
# endif
cpu_init_common();
}
int get_optimal_threads_count() {
int count = cpu_info.count / 2;
return count < 1 ? 1 : count;
}
int affine_to_cpu_mask(int id, unsigned long mask)
{
cpu_set_t set;
CPU_ZERO(&set);
for (unsigned i = 0; i < cpu_info.count; i++) {
for (unsigned i = 0; i < cpu_info.total_logical_cpus; i++) {
if (mask & (1UL << i)) {
CPU_SET(i, &set);
}

View File

@@ -21,9 +21,6 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __MEMORY_H__
#define __MEMORY_H__
#include <stdlib.h>
#include <mm_malloc.h>
#include <sys/mman.h>
@@ -38,13 +35,14 @@ int persistent_memory_flags = 0;
const char * persistent_memory_allocate() {
const int size = TWO_MB_PAGE * (opt_n_threads + 1);
const int ratio = (opt_double_hash && opt_algo != ALGO_CRYPTONIGHT_LITE) ? 2 : 1;
const int size = MEMORY * (opt_n_threads * ratio + 1);
persistent_memory_flags |= MEMORY_HUGEPAGES_AVAILABLE;
persistent_memory = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, 0, 0);
if (persistent_memory == MAP_FAILED) {
persistent_memory = _mm_malloc(size, 4096);
persistent_memory = _mm_malloc(size, 16);
return persistent_memory;
}
@@ -63,7 +61,7 @@ const char * persistent_memory_allocate() {
void persistent_memory_free() {
const int size = TWO_MB_PAGE * (opt_n_threads + 1);
const int size = MEMORY * (opt_n_threads + 1);
if (persistent_memory_flags & MEMORY_HUGEPAGES_ENABLED) {
if (persistent_memory_flags & MEMORY_LOCK) {
@@ -78,4 +76,21 @@ void persistent_memory_free() {
}
#endif /* __MEMORY_H__ */
void *allocate_executable_memory(size_t size)
{
return mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
}
void protect_executable_memory(void *p, size_t size)
{
mprotect(p, size, PROT_READ | PROT_EXEC);
}
void flush_instruction_cache(void *p, size_t size)
{
# ifndef __FreeBSD__
__builtin___clear_cache((char*) p, (char*)(p) + size);
# endif
}

View File

@@ -75,6 +75,11 @@ void applog(int prio, const char *fmt, ...)
prio = LOG_NOTICE;
color = CL_CYN;
break;
case LOG_GREEN:
prio = LOG_NOTICE;
color = CL_LGR;
break;
}
}
@@ -116,7 +121,7 @@ void applog_notime(int prio, const char *fmt, ...)
if (opt_colors) {
switch (prio) {
case LOG_ERR: color = CL_RED; break;
case LOG_WARNING: color = CL_YLW; break;
case LOG_WARNING: color = CL_LYL; break;
case LOG_NOTICE: color = CL_WHT; break;
case LOG_INFO: color = ""; break;
case LOG_DEBUG: color = CL_GRY; break;

View File

@@ -30,7 +30,8 @@ enum {
LOG_NOTICE,
LOG_INFO,
LOG_DEBUG,
LOG_BLUE = 0x10
LOG_BLUE = 0x10,
LOG_GREEN
};
#define CL_N "\x1B[0m"
@@ -57,7 +58,7 @@ enum {
#endif
#define CL_LRD "\x1B[01;31m" /* light red */
#define CL_LGR "\x1B[01;32m" /* light green */
#define CL_YL2 "\x1B[01;33m" /* yellow */
#define CL_LYL "\x1B[01;33m" /* light yellow */
#define CL_LBL "\x1B[01;34m" /* light blue */
#define CL_LMA "\x1B[01;35m" /* light magenta */
#define CL_LCY "\x1B[01;36m" /* light cyan */

View File

@@ -44,25 +44,43 @@ static void print_memory() {
static void print_cpu() {
const char *t1 = (cpu_info.flags & CPU_FLAG_X86_64) ? OPT_COLOR(CL_LGR, "x86_64") : OPT_COLOR(CL_LRD, "-x86_64");
const char *t2 = (cpu_info.flags & CPU_FLAG_AES) ? OPT_COLOR(CL_LGR, "AES-NI") : OPT_COLOR(CL_LRD, "-AES-NI");
const char *t3 = (cpu_info.flags & CPU_FLAG_BMI2) ? OPT_COLOR(CL_LGR, "BMI2") : OPT_COLOR(CL_LRD, "-BMI2");
if (opt_colors) {
applog_notime(LOG_INFO, CL_LGR " * " CL_WHT "CPU: %s", cpu_info.brand);
applog_notime(LOG_INFO, CL_LGR " * " CL_WHT "CPU FEATURES: %s %s %s", t1, t2, t3);
applog_notime(LOG_INFO, CL_LGR " * " CL_WHT "CPU: %s (%d)", cpu_info.brand, cpu_info.sockets);
}
else {
applog_notime(LOG_INFO, " * CPU: %s", cpu_info.brand);
applog_notime(LOG_INFO, " * CPU FEATURES: %s %s %s", t1, t2, t3);
applog_notime(LOG_INFO, " * CPU: %s (%d)", cpu_info.brand, cpu_info.sockets);
}
# ifndef XMRIG_NO_LIBCPUID
if (opt_colors) {
applog_notime(LOG_INFO, CL_LGR " * " CL_WHT "CPU L2/L3: %.1f MB/%.1f MB", cpu_info.l2_cache / 1024.0, cpu_info.l3_cache / 1024.0);
}
else {
applog_notime(LOG_INFO, " * CPU L2/L3: %.1f MB/%.1f MB", cpu_info.l2_cache / 1024.0, cpu_info.l3_cache / 1024.0);
}
# endif
if (opt_colors) {
applog_notime(LOG_INFO, CL_LGR " * " CL_WHT "CPU FEATURES: %s %s", t1, t2);
}
else {
applog_notime(LOG_INFO, " * CPU FEATURES: %s %s", t1, t2);
}
}
static void print_threads() {
const char *extra = "";
if (opt_nicehash) {
extra = ", nicehash";
}
if (opt_colors) {
applog_notime(LOG_INFO, CL_LGR " * " CL_WHT "THREADS: " CL_WHT "%d" CL_WHT ", av=%d, donate=%d%%", opt_n_threads, opt_algo_variant, opt_donate_level);
applog_notime(LOG_INFO, CL_LGR " * " CL_WHT "THREADS: " CL_WHT "%d" CL_WHT ", av=%d, %s/%s, donate=%d%%%s", opt_n_threads, opt_av, get_current_algo_name(), get_current_variant_name(), opt_donate_level, extra);
}
else {
applog_notime(LOG_INFO, " * THREADS: %d, av=%d, donate=%d%%", opt_n_threads, opt_algo_variant, opt_donate_level);
applog_notime(LOG_INFO, " * THREADS: %d, av=%d, %s/%s, donate=%d%%%s", opt_n_threads, opt_av, get_current_algo_name(), get_current_variant_name(), opt_donate_level, extra);
}
}

View File

@@ -4,8 +4,9 @@
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -21,19 +22,20 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __VERSION_H__
#define __VERSION_H__
#ifndef XMRIG_VERSION_H
#define XMRIG_VERSION_H
#define APP_ID "xmrig"
#define APP_NAME "XMRig"
#define APP_VERSION "0.5.0"
#define APP_DESC "Monero (XMR) CPU miner"
#define APP_VERSION "0.10.0-dev"
#define APP_DOMAIN "xmrig.com"
#define APP_SITE "www.xmrig.com"
#define APP_COPYRIGHT "Copyright (C) 2016-2017 xmrig.com"
#define APP_COPYRIGHT "Copyright (C) 2016-2019 xmrig.com"
#define APP_VER_MAJOR 0
#define APP_VER_MINOR 5
#define APP_VER_MINOR 10
#define APP_VER_BUILD 0
#define APP_VER_REV 0
#endif /* __VERSION_H__ */
#endif /* XMRIG_VERSION_H */

Some files were not shown because too many files have changed in this diff Show More