mirror of
https://github.com/xmrig/xmrig.git
synced 2025-12-08 16:33:32 -05:00
Compare commits
78 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b145f14ad8 | ||
|
|
ce19edf36c | ||
|
|
108fd5690e | ||
|
|
c19fe3cea7 | ||
|
|
187c7680cc | ||
|
|
20061e1b8b | ||
|
|
2baccab0f9 | ||
|
|
44782befea | ||
|
|
5b7a1bc6dc | ||
|
|
e67a95bd8b | ||
|
|
88dd218ad8 | ||
|
|
ee9ba778f8 | ||
|
|
6080f292e7 | ||
|
|
cf8f81f5fa | ||
|
|
aab48fde96 | ||
|
|
bf25b4e5d4 | ||
|
|
0c2bda9aa5 | ||
|
|
d71a15e8da | ||
|
|
c4bccf410b | ||
|
|
719601f92b | ||
|
|
ff7be00f6f | ||
|
|
d3b0038bda | ||
|
|
3b46f5eb64 | ||
|
|
03dbb85c82 | ||
|
|
a2574e1b1b | ||
|
|
15b4244ea8 | ||
|
|
0dcf127c26 | ||
|
|
9964952c92 | ||
|
|
90648771c0 | ||
|
|
985adcbc13 | ||
|
|
16f3338e42 | ||
|
|
2650545916 | ||
|
|
c107547c6c | ||
|
|
60f7f93408 | ||
|
|
dfbfde5b22 | ||
|
|
0c752ee018 | ||
|
|
f329410940 | ||
|
|
0a6d70c499 | ||
|
|
1678dc1d6d | ||
|
|
caf7cda1d5 | ||
|
|
3de7983826 | ||
|
|
8dda8d293b | ||
|
|
e71e9486c6 | ||
|
|
b35ecef06f | ||
|
|
454c78cf0a | ||
|
|
c97693cd51 | ||
|
|
d855ae2e36 | ||
|
|
42d2ab18ee | ||
|
|
97a8d448c0 | ||
|
|
54cef68aa9 | ||
|
|
3492670839 | ||
|
|
c43c667fed | ||
|
|
361394be21 | ||
|
|
8235ae0fa6 | ||
|
|
ac89023a79 | ||
|
|
f92b5ed9f6 | ||
|
|
7ce21d458a | ||
|
|
5513fab59b | ||
|
|
5e6560cb07 | ||
|
|
25d76626c1 | ||
|
|
cad15069c8 | ||
|
|
8ab4c1c8bd | ||
|
|
f29d05bdde | ||
|
|
1474d3fe53 | ||
|
|
d2fd43ca03 | ||
|
|
95f48fd058 | ||
|
|
f8bda3a6b3 | ||
|
|
21c243ed8f | ||
|
|
1013aa5004 | ||
|
|
44875b0a94 | ||
|
|
b1f1474438 | ||
|
|
4eb7e5bbfd | ||
|
|
d874ede49e | ||
|
|
add10c829c | ||
|
|
4acfb213b8 | ||
|
|
78a4b9de0f | ||
|
|
9fe2bbcd81 | ||
|
|
adb778de8a |
21
CHANGELOG.md
Normal file
21
CHANGELOG.md
Normal file
@@ -0,0 +1,21 @@
|
||||
# v0.8.1
|
||||
- Added nicehash support, detects automaticaly by pool URL, for example `cryptonight.eu.nicehash.com:3355` or manually via option `--nicehash`.
|
||||
|
||||
# v0.8.0
|
||||
- Added double hash mode, also known as lower power mode. `--av=2` and `--av=4`.
|
||||
- Added smart automatic CPU configuration. Default threads count now depends on size of the L3 cache of CPU.
|
||||
- Added CryptoNight-Lite support for AEON `-a cryptonight-lite`.
|
||||
- Added `--max-cpu-usage` option for auto CPU configuration mode.
|
||||
- Added `--safe` option for adjust threads and algorithm variations to current CPU.
|
||||
- No more manual steps to enable huge pages on Windows. XMRig will do it automatically.
|
||||
- Removed BMI2 algorithm variation.
|
||||
- Removed default pool URL.
|
||||
|
||||
# v0.6.0
|
||||
- Added automatic cryptonight self test.
|
||||
- New software AES algorithm variation. Will be automatically selected if cpu not support AES-NI.
|
||||
- Added 32 bit builds.
|
||||
- Documented [algorithm variations](https://github.com/xmrig/xmrig#algorithm-variations).
|
||||
|
||||
# v0.5.0
|
||||
- Initial public release.
|
||||
@@ -1,9 +1,14 @@
|
||||
cmake_minimum_required(VERSION 3.0)
|
||||
project(xmrig C)
|
||||
|
||||
option(WITH_LIBCPUID "Use Libcpuid" ON)
|
||||
option(WITH_AEON "CryptoNight-Lite support" ON)
|
||||
|
||||
set(HEADERS
|
||||
compat.h
|
||||
algo/cryptonight/cryptonight.h
|
||||
algo/cryptonight/cryptonight_aesni.h
|
||||
algo/cryptonight/cryptonight_softaes.h
|
||||
elist.h
|
||||
xmrig.h
|
||||
version.h
|
||||
@@ -21,9 +26,6 @@ set(HEADERS_CRYPTO
|
||||
crypto/c_blake256.h
|
||||
crypto/c_jh.h
|
||||
crypto/c_skein.h
|
||||
crypto/oaes_lib.h
|
||||
crypto/oaes_config.h
|
||||
crypto/aesb.h
|
||||
)
|
||||
|
||||
set(HEADERS_COMPAT
|
||||
@@ -38,10 +40,13 @@ set(HEADERS_UTILS
|
||||
|
||||
set(SOURCES
|
||||
xmrig.c
|
||||
algo/cryptonight/cryptonight_common.c
|
||||
algo/cryptonight/cryptonight.c
|
||||
algo/cryptonight/cryptonight_av1_aesni.c
|
||||
algo/cryptonight/cryptonight_av2_aesni_double.c
|
||||
algo/cryptonight/cryptonight_av3_softaes.c
|
||||
algo/cryptonight/cryptonight_av4_softaes_double.c
|
||||
util.c
|
||||
options.c
|
||||
cpu.c
|
||||
stratum.c
|
||||
stats.c
|
||||
memory.c
|
||||
@@ -53,8 +58,7 @@ set(SOURCES_CRYPTO
|
||||
crypto/c_blake256.c
|
||||
crypto/c_jh.c
|
||||
crypto/c_skein.c
|
||||
crypto/oaes_lib.c
|
||||
crypto/aesb.c
|
||||
crypto/soft_aes.c
|
||||
)
|
||||
|
||||
set(SOURCES_UTILS
|
||||
@@ -63,7 +67,7 @@ set(SOURCES_UTILS
|
||||
)
|
||||
|
||||
if (WIN32)
|
||||
set(SOURCES_OS win/cpu_win.c win/memory_win.c win/xmrig_win.c compat/winansi.c)
|
||||
set(SOURCES_OS win/cpu_win.c win/memory_win.c win/xmrig_win.c win/app.rc compat/winansi.c)
|
||||
set(EXTRA_LIBS ws2_32)
|
||||
add_definitions(/D_WIN32_WINNT=0x600)
|
||||
else()
|
||||
@@ -74,16 +78,15 @@ endif()
|
||||
include_directories(.)
|
||||
add_definitions(/DUSE_NATIVE_THREADS)
|
||||
add_definitions(/D_GNU_SOURCE)
|
||||
add_definitions(/DDEBUG_THREADS)
|
||||
add_definitions(/DUNICODE)
|
||||
|
||||
if ("${CMAKE_BUILD_TYPE}" STREQUAL "")
|
||||
set(CMAKE_BUILD_TYPE Release)
|
||||
endif()
|
||||
|
||||
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes -mbmi2")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes -Wno-pointer-to-int-cast")
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Ofast -funroll-loops -fvariable-expansion-in-unroller -ftree-loop-if-convert-stores -fmerge-all-constants -fbranch-target-load-optimize2")
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -gdwarf-2")
|
||||
#set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -gdwarf-2")
|
||||
#set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -fprofile-generate")
|
||||
#set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -fprofile-use -fprofile-correction")
|
||||
|
||||
@@ -94,34 +97,43 @@ endif()
|
||||
include_directories(compat/jansson)
|
||||
add_subdirectory(compat/jansson)
|
||||
|
||||
find_package(CURL REQUIRED)
|
||||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/")
|
||||
|
||||
if (CURL_FOUND)
|
||||
include_directories(${CURL_INCLUDE_DIRS})
|
||||
add_definitions(/DCURL_STATICLIB)
|
||||
link_directories(${CURL_LIBRARIES})
|
||||
find_package(CURL REQUIRED)
|
||||
include_directories(${CURL_INCLUDE_DIRS})
|
||||
add_definitions(/DCURL_STATICLIB)
|
||||
link_directories(${CURL_LIBRARIES})
|
||||
|
||||
if (WITH_LIBCPUID)
|
||||
add_subdirectory(compat/libcpuid)
|
||||
|
||||
include_directories(compat/libcpuid)
|
||||
set(CPUID_LIB cpuid)
|
||||
set(SOURCES_CPUID cpu.c)
|
||||
else()
|
||||
add_definitions(/DXMRIG_NO_LIBCPUID)
|
||||
set(SOURCES_CPUID cpu_stub.c)
|
||||
endif()
|
||||
|
||||
if (WITH_AEON)
|
||||
set(SOURCES_AEON
|
||||
algo/cryptonight-lite/cryptonight_lite_av1_aesni.c
|
||||
algo/cryptonight-lite/cryptonight_lite_av2_aesni_double.c
|
||||
algo/cryptonight-lite/cryptonight_lite_av3_softaes.c
|
||||
algo/cryptonight-lite/cryptonight_lite_av4_softaes_double.c
|
||||
algo/cryptonight-lite/cryptonight_lite_aesni.h
|
||||
algo/cryptonight-lite/cryptonight_lite_softaes.h
|
||||
)
|
||||
else()
|
||||
add_definitions(/DXMRIG_NO_AEON)
|
||||
endif()
|
||||
|
||||
if (CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
add_subdirectory(algo/cryptonight/bmi2)
|
||||
|
||||
set(CRYPTONIGHT64
|
||||
algo/cryptonight/cryptonight_av1_aesni.c
|
||||
algo/cryptonight/cryptonight_av2_aesni_wolf.c
|
||||
algo/cryptonight/cryptonight_av4_legacy.c
|
||||
algo/cryptonight/cryptonight_av5_aesni_experimental.c
|
||||
)
|
||||
|
||||
add_executable(xmrig ${HEADERS} ${HEADERS_CRYPTO} ${SOURCES} ${SOURCES_CRYPTO} ${HEADERS_UTILS} ${SOURCES_UTILS} ${HEADERS_COMPAT} ${SOURCES_COMPAT} ${SOURCES_OS} ${CRYPTONIGHT64})
|
||||
target_link_libraries(xmrig jansson curl cryptonight_av3_aesni_bmi2 ${EXTRA_LIBS})
|
||||
add_executable(xmrig ${HEADERS} ${HEADERS_CRYPTO} ${SOURCES} ${SOURCES_CRYPTO} ${HEADERS_UTILS} ${SOURCES_UTILS} ${HEADERS_COMPAT} ${SOURCES_COMPAT} ${SOURCES_OS} ${SOURCES_CPUID} ${SOURCES_AEON})
|
||||
target_link_libraries(xmrig jansson curl ${CPUID_LIB} ${EXTRA_LIBS})
|
||||
else()
|
||||
set(CRYPTONIGHT32
|
||||
algo/cryptonight/cryptonight_av1_aesni32.c
|
||||
algo/cryptonight/cryptonight_av4_legacy.c
|
||||
)
|
||||
|
||||
add_executable(xmrig32 ${HEADERS} ${HEADERS_CRYPTO} ${SOURCES} ${SOURCES_CRYPTO} ${HEADERS_UTILS} ${SOURCES_UTILS} ${HEADERS_COMPAT} ${SOURCES_COMPAT} ${SOURCES_OS} ${CRYPTONIGHT32})
|
||||
target_link_libraries(xmrig32 jansson -L${CURL_LIBRARIES} ${EXTRA_LIBS})
|
||||
add_executable(xmrig32 ${HEADERS} ${HEADERS_CRYPTO} ${SOURCES} ${SOURCES_CRYPTO} ${HEADERS_UTILS} ${SOURCES_UTILS} ${HEADERS_COMPAT} ${SOURCES_COMPAT} ${SOURCES_OS} ${SOURCES_CPUID} ${SOURCES_AEON})
|
||||
target_link_libraries(xmrig32 jansson curl ${CPUID_LIB} ${EXTRA_LIBS})
|
||||
endif()
|
||||
|
||||
source_group("HEADERS" FILES ${HEADERS})
|
||||
|
||||
50
README.md
50
README.md
@@ -2,24 +2,29 @@
|
||||
XMRig is high performance Monero (XMR) CPU miner, with the official full Windows support.
|
||||
Based on cpuminer-multi with heavy optimizations/rewrites and removing a lot of legacy code.
|
||||
|
||||
<img src="https://i.imgur.com/GhmdK2f.png" width="480">
|
||||
<img src="http://i.imgur.com/GdRDnAu.png" width="596" >
|
||||
|
||||
#### Table of contents
|
||||
* [Features](#features)
|
||||
* [Download](#download)
|
||||
* [Usage](#usage)
|
||||
* [Algorithm variations](#algorithm-variations)
|
||||
* [Build](#build)
|
||||
* [Common Issues](#common-issues)
|
||||
* [Other information](#other-information)
|
||||
* [Donations](#Donations)
|
||||
* [Donations](#donations)
|
||||
* [Contacts](#contacts)
|
||||
|
||||
## Features
|
||||
* High performance, faster than others (290+ H/s on i7 6700).
|
||||
* High performance (290+ H/s on i7 6700).
|
||||
* Official Windows support.
|
||||
* Small Windows executable, only 350 KB without dependencies.
|
||||
* Small Windows executable, only 535 KB without dependencies.
|
||||
* Support for backup (failover) mining server.
|
||||
* keepalived support.
|
||||
* Command line options compatible with cpuminer.
|
||||
* CryptoNight-Lite support for AEON.
|
||||
* Smart automatic [CPU configuration](https://github.com/xmrig/xmrig/wiki/Threads).
|
||||
* Nicehash support
|
||||
* It's open source software.
|
||||
|
||||
## Download
|
||||
@@ -30,11 +35,12 @@ Based on cpuminer-multi with heavy optimizations/rewrites and removing a lot of
|
||||
## Usage
|
||||
### Basic example
|
||||
```
|
||||
xmrig.exe -o xmr-eu.dwarfpool.com:8005 -b xmr-usa.dwarfpool.com:8005 -u YOUR_WALLET -p x -k
|
||||
xmrig.exe -o xmr-eu.dwarfpool.com:8005 -u YOUR_WALLET -p x -k
|
||||
```
|
||||
|
||||
### Options
|
||||
```
|
||||
-a, --algo=ALGO cryptonight (default) or cryptonight-lite
|
||||
-o, --url=URL URL of mining server
|
||||
-b, --backup-url=URL URL of backup mining server
|
||||
-O, --userpass=U:P username:password pair for mining server
|
||||
@@ -50,10 +56,20 @@ xmrig.exe -o xmr-eu.dwarfpool.com:8005 -b xmr-usa.dwarfpool.com:8005 -u YOUR_WAL
|
||||
--donate-level=N donate level, default 5% (5 minutes in 100 minutes)
|
||||
-B, --background run the miner in the background
|
||||
-c, --config=FILE load a JSON-format configuration file
|
||||
--max-cpu-usage=N maximum cpu usage for automatic threads mode (default 75)
|
||||
--safe safe adjust threads and av settings for current cpu
|
||||
--nicehash enable nicehash support
|
||||
-h, --help display this help and exit
|
||||
-V, --version output version information and exit
|
||||
```
|
||||
|
||||
## Algorithm variations
|
||||
Since version 0.8.0.
|
||||
* `--av=1` For CPUs with hardware AES.
|
||||
* `--av=2` Lower power mode (double hash) of `1`.
|
||||
* `--av=3` Software AES implementation.
|
||||
* `--av=4` Lower power mode (double hash) of `3`.
|
||||
|
||||
## Build
|
||||
### Ubuntu (Debian-based distros)
|
||||
```
|
||||
@@ -62,28 +78,40 @@ git clone https://github.com/xmrig/xmrig.git
|
||||
cd xmrig
|
||||
mkdir build
|
||||
cd build
|
||||
cmake ..
|
||||
cmake .. -DCMAKE_BUILD_TYPE=Release
|
||||
make
|
||||
```
|
||||
|
||||
### Windows
|
||||
It's complicated, you need [MSYS2](http://www.msys2.org/), custom libcurl build, and of course CMake too.
|
||||
|
||||
Necessary MSYS2 packages:
|
||||
```
|
||||
pacman -Sy
|
||||
pacman -S mingw-w64-x86_64-gcc
|
||||
pacman -S make
|
||||
pacman -S mingw-w64-x86_64-cmake
|
||||
pacman -S mingw-w64-x86_64-pkg-config
|
||||
```
|
||||
Configure options for libcurl:
|
||||
```
|
||||
./configure --disable-shared --enable-optimize --enable-threaded-resolver --disable-libcurl-option --disable-ares --disable-rt --disable-ftp --disable-file --disable-ldap --disable-ldaps --disable-rtsp --disable-dict --disable-telnet --disable-tftp --disable-pop3 --disable-imap --disable-smb --disable-smtp --disable-gopher --disable-manual --disable-ipv6 --disable-sspi --disable-crypto-auth --disable-ntlm-wb --disable-tls-srp --disable-unix-sockets --without-zlib --without-winssl --without-ssl --without-libssh2 --without-nghttp2 --disable-cookies --without-ca-bundle
|
||||
./configure --disable-shared --enable-optimize --enable-threaded-resolver --disable-libcurl-option --disable-ares --disable-rt --disable-ftp --disable-file --disable-ldap --disable-ldaps --disable-rtsp --disable-dict --disable-telnet --disable-tftp --disable-pop3 --disable-imap --disable-smb --disable-smtp --disable-gopher --disable-manual --disable-ipv6 --disable-sspi --disable-crypto-auth --disable-ntlm-wb --disable-tls-srp --disable-unix-sockets --without-zlib --without-winssl --without-ssl --without-libssh2 --without-nghttp2 --disable-cookies --without-ca-bundle --without-librtmp
|
||||
```
|
||||
CMake options:
|
||||
```
|
||||
cmake .. -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release -DCURL_INCLUDE_DIR="c:\<path>\curl-7.53.1\include" -DCURL_LIBRARY="c:\<path>\curl-7.53.1\lib\.libs"
|
||||
```
|
||||
|
||||
### Optional features
|
||||
`-DWITH_LIBCPUID=OFF` Disable libcpuid. Auto configuration of CPU after this will be very limited.
|
||||
`-DWITH_AEON=OFF` Disable CryptoNight-Lite support.
|
||||
|
||||
## Common Issues
|
||||
### HUGE PAGES unavailable
|
||||
* Run XMRig as Administrator.
|
||||
* Enable SeLockMemoryPrivilege. For Windows 7 pro, or Windows 8 and above see [this article](https://msdn.microsoft.com/en-gb/library/ms190730.aspx).
|
||||
* Since version 0.8.0 XMRig automatically enable SeLockMemoryPrivilege for current user, but reboot or sign out still required. [Manual instruction](https://msdn.microsoft.com/en-gb/library/ms190730.aspx).
|
||||
|
||||
## Other information
|
||||
* Now only support 64 bit operating systems (Windows/Linux).
|
||||
* No HTTP support, only stratum protocol support.
|
||||
* No TLS support.
|
||||
* Default donation 5% (5 minutes in 100 minutes) can be reduced to 1% via command line option `--donate-level`.
|
||||
@@ -105,3 +133,7 @@ Please note performance is highly dependent on system load. The numbers above ar
|
||||
## Donations
|
||||
* XMR: `48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD`
|
||||
* BTC: `1P7ujsXeX7GxQwHNnJsRMgAdNkFZmNVqJT`
|
||||
|
||||
## Contacts
|
||||
* support@xmrig.com
|
||||
* [reddit](https://www.reddit.com/user/XMRig/)
|
||||
|
||||
256
algo/cryptonight-lite/cryptonight_lite_aesni.h
Normal file
256
algo/cryptonight-lite/cryptonight_lite_aesni.h
Normal file
@@ -0,0 +1,256 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef __CRYPTONIGHT_LITE_AESNI_H__
|
||||
#define __CRYPTONIGHT_LITE_AESNI_H__
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
|
||||
#define aes_genkey_sub(imm8) \
|
||||
__m128i xout1 = _mm_aeskeygenassist_si128(*xout2, (imm8)); \
|
||||
xout1 = _mm_shuffle_epi32(xout1, 0xFF); \
|
||||
*xout0 = sl_xor(*xout0); \
|
||||
*xout0 = _mm_xor_si128(*xout0, xout1); \
|
||||
xout1 = _mm_aeskeygenassist_si128(*xout0, 0x00);\
|
||||
xout1 = _mm_shuffle_epi32(xout1, 0xAA); \
|
||||
*xout2 = sl_xor(*xout2); \
|
||||
*xout2 = _mm_xor_si128(*xout2, xout1); \
|
||||
|
||||
|
||||
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
|
||||
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
|
||||
inline __m128i sl_xor(__m128i tmp1)
|
||||
{
|
||||
__m128i tmp4;
|
||||
tmp4 = _mm_slli_si128(tmp1, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
return tmp1;
|
||||
}
|
||||
|
||||
|
||||
inline void aes_genkey_sub1(__m128i* xout0, __m128i* xout2)
|
||||
{
|
||||
aes_genkey_sub(0x1)
|
||||
}
|
||||
|
||||
|
||||
inline void aes_genkey_sub2(__m128i* xout0, __m128i* xout2)
|
||||
{
|
||||
aes_genkey_sub(0x2)
|
||||
}
|
||||
|
||||
|
||||
inline void aes_genkey_sub4(__m128i* xout0, __m128i* xout2)
|
||||
{
|
||||
aes_genkey_sub(0x4)
|
||||
}
|
||||
|
||||
|
||||
inline void aes_genkey_sub8(__m128i* xout0, __m128i* xout2)
|
||||
{
|
||||
aes_genkey_sub(0x8)
|
||||
}
|
||||
|
||||
|
||||
inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7)
|
||||
{
|
||||
*x0 = _mm_aesenc_si128(*x0, key);
|
||||
*x1 = _mm_aesenc_si128(*x1, key);
|
||||
*x2 = _mm_aesenc_si128(*x2, key);
|
||||
*x3 = _mm_aesenc_si128(*x3, key);
|
||||
*x4 = _mm_aesenc_si128(*x4, key);
|
||||
*x5 = _mm_aesenc_si128(*x5, key);
|
||||
*x6 = _mm_aesenc_si128(*x6, key);
|
||||
*x7 = _mm_aesenc_si128(*x7, key);
|
||||
}
|
||||
|
||||
|
||||
inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9)
|
||||
{
|
||||
__m128i xout0 = _mm_load_si128(memory);
|
||||
__m128i xout2 = _mm_load_si128(memory + 1);
|
||||
*k0 = xout0;
|
||||
*k1 = xout2;
|
||||
|
||||
aes_genkey_sub1(&xout0, &xout2);
|
||||
*k2 = xout0;
|
||||
*k3 = xout2;
|
||||
|
||||
aes_genkey_sub2(&xout0, &xout2);
|
||||
*k4 = xout0;
|
||||
*k5 = xout2;
|
||||
|
||||
aes_genkey_sub4(&xout0, &xout2);
|
||||
*k6 = xout0;
|
||||
*k7 = xout2;
|
||||
|
||||
aes_genkey_sub8(&xout0, &xout2);
|
||||
*k8 = xout0;
|
||||
*k9 = xout2;
|
||||
}
|
||||
|
||||
|
||||
inline void cn_explode_scratchpad(const __m128i* input, __m128i* output)
|
||||
{
|
||||
// This is more than we have registers, compiler will assign 2 keys on the stack
|
||||
__m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
|
||||
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
||||
|
||||
aes_genkey(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
|
||||
|
||||
xin0 = _mm_load_si128(input + 4);
|
||||
xin1 = _mm_load_si128(input + 5);
|
||||
xin2 = _mm_load_si128(input + 6);
|
||||
xin3 = _mm_load_si128(input + 7);
|
||||
xin4 = _mm_load_si128(input + 8);
|
||||
xin5 = _mm_load_si128(input + 9);
|
||||
xin6 = _mm_load_si128(input + 10);
|
||||
xin7 = _mm_load_si128(input + 11);
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < MEMORY_LITE / sizeof(__m128i), 1); i += 8) {
|
||||
aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
|
||||
_mm_store_si128(output + i + 0, xin0);
|
||||
_mm_store_si128(output + i + 1, xin1);
|
||||
_mm_store_si128(output + i + 2, xin2);
|
||||
_mm_store_si128(output + i + 3, xin3);
|
||||
_mm_store_si128(output + i + 4, xin4);
|
||||
_mm_store_si128(output + i + 5, xin5);
|
||||
_mm_store_si128(output + i + 6, xin6);
|
||||
_mm_store_si128(output + i + 7, xin7);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
inline void cn_implode_scratchpad(const __m128i* input, __m128i* output)
|
||||
{
|
||||
// This is more than we have registers, compiler will assign 2 keys on the stack
|
||||
__m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
|
||||
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
||||
|
||||
aes_genkey(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
|
||||
|
||||
xout0 = _mm_load_si128(output + 4);
|
||||
xout1 = _mm_load_si128(output + 5);
|
||||
xout2 = _mm_load_si128(output + 6);
|
||||
xout3 = _mm_load_si128(output + 7);
|
||||
xout4 = _mm_load_si128(output + 8);
|
||||
xout5 = _mm_load_si128(output + 9);
|
||||
xout6 = _mm_load_si128(output + 10);
|
||||
xout7 = _mm_load_si128(output + 11);
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < MEMORY_LITE / sizeof(__m128i), 1); i += 8)
|
||||
{
|
||||
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
|
||||
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
|
||||
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
|
||||
xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
|
||||
xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
|
||||
xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
|
||||
xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
|
||||
xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
|
||||
|
||||
aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
}
|
||||
|
||||
_mm_store_si128(output + 4, xout0);
|
||||
_mm_store_si128(output + 5, xout1);
|
||||
_mm_store_si128(output + 6, xout2);
|
||||
_mm_store_si128(output + 7, xout3);
|
||||
_mm_store_si128(output + 8, xout4);
|
||||
_mm_store_si128(output + 9, xout5);
|
||||
_mm_store_si128(output + 10, xout6);
|
||||
_mm_store_si128(output + 11, xout7);
|
||||
}
|
||||
|
||||
|
||||
#if defined(__x86_64__)
|
||||
# define EXTRACT64(X) _mm_cvtsi128_si64(X)
|
||||
|
||||
inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi)
|
||||
{
|
||||
unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b;
|
||||
*hi = r >> 64;
|
||||
return (uint64_t) r;
|
||||
}
|
||||
#elif defined(__i386__)
|
||||
# define HI32(X) \
|
||||
_mm_srli_si128((X), 4)
|
||||
|
||||
|
||||
# define EXTRACT64(X) \
|
||||
((uint64_t)(uint32_t)_mm_cvtsi128_si32(X) | \
|
||||
((uint64_t)(uint32_t)_mm_cvtsi128_si32(HI32(X)) << 32))
|
||||
|
||||
inline uint64_t _umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) {
|
||||
// multiplier = ab = a * 2^32 + b
|
||||
// multiplicand = cd = c * 2^32 + d
|
||||
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
|
||||
uint64_t a = multiplier >> 32;
|
||||
uint64_t b = multiplier & 0xFFFFFFFF;
|
||||
uint64_t c = multiplicand >> 32;
|
||||
uint64_t d = multiplicand & 0xFFFFFFFF;
|
||||
|
||||
//uint64_t ac = a * c;
|
||||
uint64_t ad = a * d;
|
||||
//uint64_t bc = b * c;
|
||||
uint64_t bd = b * d;
|
||||
|
||||
uint64_t adbc = ad + (b * c);
|
||||
uint64_t adbc_carry = adbc < ad ? 1 : 0;
|
||||
|
||||
// multiplier * multiplicand = product_hi * 2^64 + product_lo
|
||||
uint64_t product_lo = bd + (adbc << 32);
|
||||
uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
|
||||
*product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
|
||||
|
||||
return product_lo;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* __CRYPTONIGHT_LITE_AESNI_H__ */
|
||||
77
algo/cryptonight-lite/cryptonight_lite_av1_aesni.c
Normal file
77
algo/cryptonight-lite/cryptonight_lite_av1_aesni.c
Normal file
@@ -0,0 +1,77 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "algo/cryptonight/cryptonight.h"
|
||||
#include "cryptonight_lite_aesni.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
|
||||
|
||||
void cryptonight_lite_av1_aesni(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx)
|
||||
{
|
||||
keccak((const uint8_t *) input, size, ctx->state0, 200);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) ctx->state0, (__m128i*) ctx->memory);
|
||||
|
||||
const uint8_t* l0 = ctx->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx->state0;
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
|
||||
__m128i cx;
|
||||
cx = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
|
||||
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
|
||||
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx));
|
||||
idx0 = EXTRACT64(cx);
|
||||
bx0 = cx;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0xFFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0xFFFF0])[1] = ah0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx->memory, (__m128i*) ctx->state0);
|
||||
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
|
||||
}
|
||||
111
algo/cryptonight-lite/cryptonight_lite_av2_aesni_double.c
Normal file
111
algo/cryptonight-lite/cryptonight_lite_av2_aesni_double.c
Normal file
@@ -0,0 +1,111 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "algo/cryptonight/cryptonight.h"
|
||||
#include "cryptonight_lite_aesni.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
|
||||
|
||||
void cryptonight_lite_av2_aesni_double(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx)
|
||||
{
|
||||
keccak((const uint8_t *) input, size, ctx->state0, 200);
|
||||
keccak((const uint8_t *) input + size, size, ctx->state1, 200);
|
||||
|
||||
const uint8_t* l0 = ctx->memory;
|
||||
const uint8_t* l1 = ctx->memory + MEMORY_LITE;
|
||||
uint64_t* h0 = (uint64_t*) ctx->state0;
|
||||
uint64_t* h1 = (uint64_t*) ctx->state1;
|
||||
|
||||
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
uint64_t idx1 = h1[0] ^ h1[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
|
||||
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
|
||||
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0xFFFF0]);
|
||||
|
||||
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
|
||||
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
|
||||
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx0));
|
||||
_mm_store_si128((__m128i *) &l1[idx1 & 0xFFFF0], _mm_xor_si128(bx1, cx1));
|
||||
|
||||
idx0 = EXTRACT64(cx0);
|
||||
idx1 = EXTRACT64(cx1);
|
||||
|
||||
bx0 = cx0;
|
||||
bx1 = cx1;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*) &l0[idx0 & 0xFFFF0])[0] = al0;
|
||||
((uint64_t*) &l0[idx0 & 0xFFFF0])[1] = ah0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
|
||||
cl = ((uint64_t*) &l1[idx1 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*) &l1[idx1 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx1, cl, &hi);
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
||||
((uint64_t*) &l1[idx1 & 0xFFFF0])[0] = al1;
|
||||
((uint64_t*) &l1[idx1 & 0xFFFF0])[1] = ah1;
|
||||
|
||||
ah1 ^= ch;
|
||||
al1 ^= cl;
|
||||
idx1 = al1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
|
||||
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
|
||||
extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, (char*) output + 32);
|
||||
}
|
||||
77
algo/cryptonight-lite/cryptonight_lite_av3_softaes.c
Normal file
77
algo/cryptonight-lite/cryptonight_lite_av3_softaes.c
Normal file
@@ -0,0 +1,77 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "algo/cryptonight/cryptonight.h"
|
||||
#include "cryptonight_lite_softaes.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
|
||||
|
||||
void cryptonight_lite_av3_softaes(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx)
|
||||
{
|
||||
keccak((const uint8_t *) input, size, ctx->state0, 200);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) ctx->state0, (__m128i*) ctx->memory);
|
||||
|
||||
const uint8_t* l0 = ctx->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx->state0;
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
|
||||
__m128i cx;
|
||||
cx = _mm_load_si128((__m128i *)&l0[idx0 & 0xFFFF0]);
|
||||
cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
|
||||
|
||||
_mm_store_si128((__m128i *)&l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx));
|
||||
idx0 = EXTRACT64(cx);
|
||||
bx0 = cx;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*)&l0[idx0 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*)&l0[idx0 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0xFFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0xFFFF0])[1] = ah0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx->memory, (__m128i*) ctx->state0);
|
||||
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
|
||||
}
|
||||
111
algo/cryptonight-lite/cryptonight_lite_av4_softaes_double.c
Normal file
111
algo/cryptonight-lite/cryptonight_lite_av4_softaes_double.c
Normal file
@@ -0,0 +1,111 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "algo/cryptonight/cryptonight.h"
|
||||
#include "cryptonight_lite_softaes.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
|
||||
|
||||
void cryptonight_lite_av4_softaes_double(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx)
|
||||
{
|
||||
keccak((const uint8_t *) input, size, ctx->state0, 200);
|
||||
keccak((const uint8_t *) input + size, size, ctx->state1, 200);
|
||||
|
||||
const uint8_t* l0 = ctx->memory;
|
||||
const uint8_t* l1 = ctx->memory + MEMORY_LITE;
|
||||
uint64_t* h0 = (uint64_t*) ctx->state0;
|
||||
uint64_t* h1 = (uint64_t*) ctx->state1;
|
||||
|
||||
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
uint64_t idx1 = h1[0] ^ h1[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
|
||||
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
|
||||
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0xFFFF0]);
|
||||
|
||||
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
|
||||
cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
|
||||
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx0));
|
||||
_mm_store_si128((__m128i *) &l1[idx1 & 0xFFFF0], _mm_xor_si128(bx1, cx1));
|
||||
|
||||
idx0 = EXTRACT64(cx0);
|
||||
idx1 = EXTRACT64(cx1);
|
||||
|
||||
bx0 = cx0;
|
||||
bx1 = cx1;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*) &l0[idx0 & 0xFFFF0])[0] = al0;
|
||||
((uint64_t*) &l0[idx0 & 0xFFFF0])[1] = ah0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
|
||||
cl = ((uint64_t*) &l1[idx1 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*) &l1[idx1 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx1, cl, &hi);
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
||||
((uint64_t*) &l1[idx1 & 0xFFFF0])[0] = al1;
|
||||
((uint64_t*) &l1[idx1 & 0xFFFF0])[1] = ah1;
|
||||
|
||||
ah1 ^= ch;
|
||||
al1 ^= cl;
|
||||
idx1 = al1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
|
||||
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
|
||||
extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, (char*) output + 32);
|
||||
}
|
||||
237
algo/cryptonight-lite/cryptonight_lite_softaes.h
Normal file
237
algo/cryptonight-lite/cryptonight_lite_softaes.h
Normal file
@@ -0,0 +1,237 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef __CRYPTONIGHT_LITE_SOFTAES_H__
|
||||
#define __CRYPTONIGHT_LITE_SOFTAES_H__
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
extern __m128i soft_aesenc(__m128i in, __m128i key);
|
||||
extern __m128i soft_aeskeygenassist(__m128i key, uint8_t rcon);
|
||||
|
||||
|
||||
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
|
||||
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
|
||||
inline __m128i sl_xor(__m128i tmp1)
|
||||
{
|
||||
__m128i tmp4;
|
||||
tmp4 = _mm_slli_si128(tmp1, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
return tmp1;
|
||||
}
|
||||
|
||||
|
||||
inline void aes_genkey_sub(__m128i* xout0, __m128i* xout2, uint8_t rcon)
|
||||
{
|
||||
__m128i xout1 = soft_aeskeygenassist(*xout2, rcon);
|
||||
xout1 = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem
|
||||
*xout0 = sl_xor(*xout0);
|
||||
*xout0 = _mm_xor_si128(*xout0, xout1);
|
||||
xout1 = soft_aeskeygenassist(*xout0, 0x00);
|
||||
xout1 = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem
|
||||
*xout2 = sl_xor(*xout2);
|
||||
*xout2 = _mm_xor_si128(*xout2, xout1);
|
||||
}
|
||||
|
||||
|
||||
inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7)
|
||||
{
|
||||
*x0 = soft_aesenc(*x0, key);
|
||||
*x1 = soft_aesenc(*x1, key);
|
||||
*x2 = soft_aesenc(*x2, key);
|
||||
*x3 = soft_aesenc(*x3, key);
|
||||
*x4 = soft_aesenc(*x4, key);
|
||||
*x5 = soft_aesenc(*x5, key);
|
||||
*x6 = soft_aesenc(*x6, key);
|
||||
*x7 = soft_aesenc(*x7, key);
|
||||
}
|
||||
|
||||
|
||||
inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9)
|
||||
{
|
||||
__m128i xout0 = _mm_load_si128(memory);
|
||||
__m128i xout2 = _mm_load_si128(memory + 1);
|
||||
*k0 = xout0;
|
||||
*k1 = xout2;
|
||||
|
||||
aes_genkey_sub(&xout0, &xout2, 0x1);
|
||||
*k2 = xout0;
|
||||
*k3 = xout2;
|
||||
|
||||
aes_genkey_sub(&xout0, &xout2, 0x2);
|
||||
*k4 = xout0;
|
||||
*k5 = xout2;
|
||||
|
||||
aes_genkey_sub(&xout0, &xout2, 0x4);
|
||||
*k6 = xout0;
|
||||
*k7 = xout2;
|
||||
|
||||
aes_genkey_sub(&xout0, &xout2, 0x8);
|
||||
*k8 = xout0;
|
||||
*k9 = xout2;
|
||||
}
|
||||
|
||||
|
||||
inline void cn_explode_scratchpad(const __m128i* input, __m128i* output)
|
||||
{
|
||||
// This is more than we have registers, compiler will assign 2 keys on the stack
|
||||
__m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
|
||||
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
||||
|
||||
aes_genkey(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
|
||||
|
||||
xin0 = _mm_load_si128(input + 4);
|
||||
xin1 = _mm_load_si128(input + 5);
|
||||
xin2 = _mm_load_si128(input + 6);
|
||||
xin3 = _mm_load_si128(input + 7);
|
||||
xin4 = _mm_load_si128(input + 8);
|
||||
xin5 = _mm_load_si128(input + 9);
|
||||
xin6 = _mm_load_si128(input + 10);
|
||||
xin7 = _mm_load_si128(input + 11);
|
||||
|
||||
for (size_t i = 0; i < MEMORY_LITE / sizeof(__m128i); i += 8) {
|
||||
aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
|
||||
_mm_store_si128(output + i + 0, xin0);
|
||||
_mm_store_si128(output + i + 1, xin1);
|
||||
_mm_store_si128(output + i + 2, xin2);
|
||||
_mm_store_si128(output + i + 3, xin3);
|
||||
_mm_store_si128(output + i + 4, xin4);
|
||||
_mm_store_si128(output + i + 5, xin5);
|
||||
_mm_store_si128(output + i + 6, xin6);
|
||||
_mm_store_si128(output + i + 7, xin7);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
inline void cn_implode_scratchpad(const __m128i* input, __m128i* output)
|
||||
{
|
||||
// This is more than we have registers, compiler will assign 2 keys on the stack
|
||||
__m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
|
||||
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
||||
|
||||
aes_genkey(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
|
||||
|
||||
xout0 = _mm_load_si128(output + 4);
|
||||
xout1 = _mm_load_si128(output + 5);
|
||||
xout2 = _mm_load_si128(output + 6);
|
||||
xout3 = _mm_load_si128(output + 7);
|
||||
xout4 = _mm_load_si128(output + 8);
|
||||
xout5 = _mm_load_si128(output + 9);
|
||||
xout6 = _mm_load_si128(output + 10);
|
||||
xout7 = _mm_load_si128(output + 11);
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < MEMORY_LITE / sizeof(__m128i), 1); i += 8)
|
||||
{
|
||||
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
|
||||
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
|
||||
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
|
||||
xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
|
||||
xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
|
||||
xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
|
||||
xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
|
||||
xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
|
||||
|
||||
aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
}
|
||||
|
||||
_mm_store_si128(output + 4, xout0);
|
||||
_mm_store_si128(output + 5, xout1);
|
||||
_mm_store_si128(output + 6, xout2);
|
||||
_mm_store_si128(output + 7, xout3);
|
||||
_mm_store_si128(output + 8, xout4);
|
||||
_mm_store_si128(output + 9, xout5);
|
||||
_mm_store_si128(output + 10, xout6);
|
||||
_mm_store_si128(output + 11, xout7);
|
||||
}
|
||||
|
||||
|
||||
#if defined(__x86_64__)
|
||||
# define EXTRACT64(X) _mm_cvtsi128_si64(X)
|
||||
|
||||
inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi)
|
||||
{
|
||||
unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b;
|
||||
*hi = r >> 64;
|
||||
return (uint64_t) r;
|
||||
}
|
||||
#elif defined(__i386__)
|
||||
# define HI32(X) \
|
||||
_mm_srli_si128((X), 4)
|
||||
|
||||
|
||||
# define EXTRACT64(X) \
|
||||
((uint64_t)(uint32_t)_mm_cvtsi128_si32(X) | \
|
||||
((uint64_t)(uint32_t)_mm_cvtsi128_si32(HI32(X)) << 32))
|
||||
|
||||
inline uint64_t _umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) {
|
||||
// multiplier = ab = a * 2^32 + b
|
||||
// multiplicand = cd = c * 2^32 + d
|
||||
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
|
||||
uint64_t a = multiplier >> 32;
|
||||
uint64_t b = multiplier & 0xFFFFFFFF;
|
||||
uint64_t c = multiplicand >> 32;
|
||||
uint64_t d = multiplicand & 0xFFFFFFFF;
|
||||
|
||||
//uint64_t ac = a * c;
|
||||
uint64_t ad = a * d;
|
||||
//uint64_t bc = b * c;
|
||||
uint64_t bd = b * d;
|
||||
|
||||
uint64_t adbc = ad + (b * c);
|
||||
uint64_t adbc_carry = adbc < ad ? 1 : 0;
|
||||
|
||||
// multiplier * multiplicand = product_hi * 2^64 + product_lo
|
||||
uint64_t product_lo = bd + (adbc << 32);
|
||||
uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
|
||||
*product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
|
||||
|
||||
return product_lo;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* __CRYPTONIGHT_LITE_SOFTAES_H__ */
|
||||
@@ -1,2 +0,0 @@
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mbmi2")
|
||||
add_library(cryptonight_av3_aesni_bmi2 STATIC ../cryptonight_av3_aesni_bmi2.c)
|
||||
244
algo/cryptonight/cryptonight.c
Normal file
244
algo/cryptonight/cryptonight.c
Normal file
@@ -0,0 +1,244 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <mm_malloc.h>
|
||||
|
||||
#ifndef BUILD_TEST
|
||||
# include "xmrig.h"
|
||||
#endif
|
||||
|
||||
#include "crypto/c_groestl.h"
|
||||
#include "crypto/c_blake256.h"
|
||||
#include "crypto/c_jh.h"
|
||||
#include "crypto/c_skein.h"
|
||||
#include "cryptonight.h"
|
||||
#include "options.h"
|
||||
|
||||
|
||||
const static char test_input[152] = {
|
||||
0x01, 0x00, 0xFB, 0x8E, 0x8A, 0xC8, 0x05, 0x89, 0x93, 0x23, 0x37, 0x1B, 0xB7, 0x90, 0xDB, 0x19,
|
||||
0x21, 0x8A, 0xFD, 0x8D, 0xB8, 0xE3, 0x75, 0x5D, 0x8B, 0x90, 0xF3, 0x9B, 0x3D, 0x55, 0x06, 0xA9,
|
||||
0xAB, 0xCE, 0x4F, 0xA9, 0x12, 0x24, 0x45, 0x00, 0x00, 0x00, 0x00, 0xEE, 0x81, 0x46, 0xD4, 0x9F,
|
||||
0xA9, 0x3E, 0xE7, 0x24, 0xDE, 0xB5, 0x7D, 0x12, 0xCB, 0xC6, 0xC6, 0xF3, 0xB9, 0x24, 0xD9, 0x46,
|
||||
0x12, 0x7C, 0x7A, 0x97, 0x41, 0x8F, 0x93, 0x48, 0x82, 0x8F, 0x0F, 0x02,
|
||||
0x03, 0x05, 0xA0, 0xDB, 0xD6, 0xBF, 0x05, 0xCF, 0x16, 0xE5, 0x03, 0xF3, 0xA6, 0x6F, 0x78, 0x00,
|
||||
0x7C, 0xBF, 0x34, 0x14, 0x43, 0x32, 0xEC, 0xBF, 0xC2, 0x2E, 0xD9, 0x5C, 0x87, 0x00, 0x38, 0x3B,
|
||||
0x30, 0x9A, 0xCE, 0x19, 0x23, 0xA0, 0x96, 0x4B, 0x00, 0x00, 0x00, 0x08, 0xBA, 0x93, 0x9A, 0x62,
|
||||
0x72, 0x4C, 0x0D, 0x75, 0x81, 0xFC, 0xE5, 0x76, 0x1E, 0x9D, 0x8A, 0x0E, 0x6A, 0x1C, 0x3F, 0x92,
|
||||
0x4F, 0xDD, 0x84, 0x93, 0xD1, 0x11, 0x56, 0x49, 0xC0, 0x5E, 0xB6, 0x01
|
||||
};
|
||||
|
||||
|
||||
const static char test_output0[64] = {
|
||||
0x1B, 0x60, 0x6A, 0x3F, 0x4A, 0x07, 0xD6, 0x48, 0x9A, 0x1B, 0xCD, 0x07, 0x69, 0x7B, 0xD1, 0x66,
|
||||
0x96, 0xB6, 0x1C, 0x8A, 0xE9, 0x82, 0xF6, 0x1A, 0x90, 0x16, 0x0F, 0x4E, 0x52, 0x82, 0x8A, 0x7F,
|
||||
0x1A, 0x3F, 0xFB, 0xEE, 0x90, 0x9B, 0x42, 0x0D, 0x91, 0xF7, 0xBE, 0x6E, 0x5F, 0xB5, 0x6D, 0xB7,
|
||||
0x1B, 0x31, 0x10, 0xD8, 0x86, 0x01, 0x1E, 0x87, 0x7E, 0xE5, 0x78, 0x6A, 0xFD, 0x08, 0x01, 0x00
|
||||
};
|
||||
|
||||
|
||||
void cryptonight_av1_aesni(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
|
||||
void cryptonight_av2_aesni_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
|
||||
void cryptonight_av3_softaes(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
|
||||
void cryptonight_av4_softaes_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
|
||||
|
||||
#ifndef XMRIG_NO_AEON
|
||||
const static char test_output1[64] = {
|
||||
0x28, 0xA2, 0x2B, 0xAD, 0x3F, 0x93, 0xD1, 0x40, 0x8F, 0xCA, 0x47, 0x2E, 0xB5, 0xAD, 0x1C, 0xBE,
|
||||
0x75, 0xF2, 0x1D, 0x05, 0x3C, 0x8C, 0xE5, 0xB3, 0xAF, 0x10, 0x5A, 0x57, 0x71, 0x3E, 0x21, 0xDD,
|
||||
0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E,
|
||||
0x00, 0x4E, 0xEC, 0xE0, 0x9B, 0x83, 0xA7, 0x2E, 0xF6, 0xBA, 0x98, 0x64, 0xD3, 0x51, 0x0C, 0x88,
|
||||
};
|
||||
|
||||
void cryptonight_lite_av1_aesni(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
|
||||
void cryptonight_lite_av2_aesni_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
|
||||
void cryptonight_lite_av3_softaes(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
|
||||
void cryptonight_lite_av4_softaes_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
|
||||
#endif
|
||||
|
||||
void (*cryptonight_hash_ctx)(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx) = NULL;
|
||||
|
||||
|
||||
static bool self_test() {
|
||||
if (cryptonight_hash_ctx == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
char output[64];
|
||||
|
||||
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*) _mm_malloc(sizeof(struct cryptonight_ctx), 16);
|
||||
ctx->memory = (uint8_t *) _mm_malloc(MEMORY * 2, 16);
|
||||
|
||||
cryptonight_hash_ctx(test_input, 76, output, ctx);
|
||||
|
||||
_mm_free(ctx->memory);
|
||||
_mm_free(ctx);
|
||||
|
||||
# ifndef XMRIG_NO_AEON
|
||||
if (opt_algo == ALGO_CRYPTONIGHT_LITE) {
|
||||
return memcmp(output, test_output1, (opt_double_hash ? 64 : 32)) == 0;
|
||||
}
|
||||
# endif
|
||||
|
||||
return memcmp(output, test_output0, (opt_double_hash ? 64 : 32)) == 0;
|
||||
}
|
||||
|
||||
|
||||
#ifndef XMRIG_NO_AEON
|
||||
bool cryptonight_lite_init(int variant) {
|
||||
switch (variant) {
|
||||
case AEON_AV1_AESNI:
|
||||
cryptonight_hash_ctx = cryptonight_lite_av1_aesni;
|
||||
break;
|
||||
|
||||
case AEON_AV2_AESNI_DOUBLE:
|
||||
opt_double_hash = true;
|
||||
cryptonight_hash_ctx = cryptonight_lite_av2_aesni_double;
|
||||
break;
|
||||
|
||||
case AEON_AV3_SOFT_AES:
|
||||
cryptonight_hash_ctx = cryptonight_lite_av3_softaes;
|
||||
break;
|
||||
|
||||
case AEON_AV4_SOFT_AES_DOUBLE:
|
||||
opt_double_hash = true;
|
||||
cryptonight_hash_ctx = cryptonight_lite_av4_softaes_double;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return self_test();
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
bool cryptonight_init(int variant)
|
||||
{
|
||||
# ifndef XMRIG_NO_AEON
|
||||
if (opt_algo == ALGO_CRYPTONIGHT_LITE) {
|
||||
return cryptonight_lite_init(variant);
|
||||
}
|
||||
# endif
|
||||
|
||||
switch (variant) {
|
||||
case XMR_AV1_AESNI:
|
||||
cryptonight_hash_ctx = cryptonight_av1_aesni;
|
||||
break;
|
||||
|
||||
case XMR_AV2_AESNI_DOUBLE:
|
||||
opt_double_hash = true;
|
||||
cryptonight_hash_ctx = cryptonight_av2_aesni_double;
|
||||
break;
|
||||
|
||||
case XMR_AV3_SOFT_AES:
|
||||
cryptonight_hash_ctx = cryptonight_av3_softaes;
|
||||
break;
|
||||
|
||||
case XMR_AV4_SOFT_AES_DOUBLE:
|
||||
opt_double_hash = true;
|
||||
cryptonight_hash_ctx = cryptonight_av4_softaes_double;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return self_test();
|
||||
}
|
||||
|
||||
|
||||
static inline void do_blake_hash(const void* input, size_t len, char* output) {
|
||||
blake256_hash((uint8_t*)output, input, len);
|
||||
}
|
||||
|
||||
|
||||
static inline void do_groestl_hash(const void* input, size_t len, char* output) {
|
||||
groestl(input, len * 8, (uint8_t*)output);
|
||||
}
|
||||
|
||||
|
||||
static inline void do_jh_hash(const void* input, size_t len, char* output) {
|
||||
jh_hash(32 * 8, input, 8 * len, (uint8_t*)output);
|
||||
}
|
||||
|
||||
|
||||
static inline void do_skein_hash(const void* input, size_t len, char* output) {
|
||||
skein_hash(8 * 32, input, 8 * len, (uint8_t*)output);
|
||||
}
|
||||
|
||||
|
||||
void (* const extra_hashes[4])(const void *, size_t, char *) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash};
|
||||
|
||||
|
||||
#ifndef BUILD_TEST
|
||||
int scanhash_cryptonight(int thr_id, uint32_t *hash, uint32_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx *restrict ctx) {
|
||||
uint32_t *nonceptr = (uint32_t*) (((char*) blob) + 39);
|
||||
|
||||
do {
|
||||
cryptonight_hash_ctx(blob, blob_size, hash, ctx);
|
||||
(*hashes_done)++;
|
||||
|
||||
if (unlikely(hash[7] < target)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
(*nonceptr)++;
|
||||
} while (likely(((*nonceptr) < max_nonce && !work_restart[thr_id].restart)));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int scanhash_cryptonight_double(int thr_id, uint32_t *hash, uint8_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx *restrict ctx) {
|
||||
int rc = 0;
|
||||
uint32_t *nonceptr0 = (uint32_t*) (((char*) blob) + 39);
|
||||
uint32_t *nonceptr1 = (uint32_t*) (((char*) blob) + 39 + blob_size);
|
||||
|
||||
do {
|
||||
cryptonight_hash_ctx(blob, blob_size, hash, ctx);
|
||||
(*hashes_done) += 2;
|
||||
|
||||
if (unlikely(hash[7] < target)) {
|
||||
return rc |= 1;
|
||||
}
|
||||
|
||||
if (unlikely(hash[15] < target)) {
|
||||
return rc |= 2;
|
||||
}
|
||||
|
||||
if (rc) {
|
||||
break;
|
||||
}
|
||||
|
||||
(*nonceptr0)++;
|
||||
(*nonceptr1)++;
|
||||
} while (likely(((*nonceptr0) < max_nonce && !work_restart[thr_id].restart)));
|
||||
|
||||
return rc;
|
||||
}
|
||||
#endif
|
||||
@@ -26,47 +26,22 @@
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#define MEMORY (1 << 21) /* 2 MiB */
|
||||
#define MEMORY_M128I (MEMORY >> 4) // 2 MiB / 16 = 128 ki * __m128i
|
||||
#define ITER (1 << 20)
|
||||
#define AES_BLOCK_SIZE 16
|
||||
#define AES_KEY_SIZE 32 /*16*/
|
||||
#define INIT_SIZE_BLK 8
|
||||
#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE) // 128
|
||||
#define INIT_SIZE_M128I (INIT_SIZE_BYTE >> 4) // 8
|
||||
|
||||
#pragma pack(push, 1)
|
||||
union hash_state {
|
||||
uint8_t b[200];
|
||||
uint64_t w[25];
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
#pragma pack(push, 1)
|
||||
union cn_slow_hash_state {
|
||||
union hash_state hs;
|
||||
struct {
|
||||
uint8_t k[64];
|
||||
uint8_t init[INIT_SIZE_BYTE];
|
||||
};
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
#define MEMORY 2097152 /* 2 MiB */
|
||||
#define MEMORY_LITE 1048576 /* 1 MiB */
|
||||
|
||||
struct cryptonight_ctx {
|
||||
union cn_slow_hash_state state;
|
||||
uint8_t text[INIT_SIZE_BYTE] __attribute((aligned(16)));
|
||||
uint64_t a[2] __attribute__((aligned(16)));
|
||||
uint64_t b[2] __attribute__((aligned(16)));
|
||||
uint64_t c[2] __attribute__((aligned(16)));
|
||||
uint8_t state0[200] __attribute__((aligned(16)));
|
||||
uint8_t state1[200] __attribute__((aligned(16)));
|
||||
uint8_t* memory __attribute__((aligned(16)));
|
||||
};
|
||||
|
||||
|
||||
extern void (* const extra_hashes[4])(const void *, size_t, char *);
|
||||
|
||||
void cryptonight_init(int variant);
|
||||
void cryptonight_hash(void* output, const void* input, size_t input_len);
|
||||
int scanhash_cryptonight(int thr_id, uint32_t *hash, uint32_t *restrict pdata, const uint32_t *restrict ptarget, uint32_t max_nonce, unsigned long *restrict hashes_done, const char *memory, struct cryptonight_ctx *persistentctx);
|
||||
bool cryptonight_init(int variant);
|
||||
int scanhash_cryptonight(int thr_id, uint32_t *hash, uint32_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx *restrict ctx);
|
||||
int scanhash_cryptonight_double(int thr_id, uint32_t *hash, uint8_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx *restrict ctx);
|
||||
|
||||
#endif /* __CRYPTONIGHT_H__ */
|
||||
|
||||
256
algo/cryptonight/cryptonight_aesni.h
Normal file
256
algo/cryptonight/cryptonight_aesni.h
Normal file
@@ -0,0 +1,256 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef __CRYPTONIGHT_AESNI_H__
|
||||
#define __CRYPTONIGHT_AESNI_H__
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
|
||||
#define aes_genkey_sub(imm8) \
|
||||
__m128i xout1 = _mm_aeskeygenassist_si128(*xout2, (imm8)); \
|
||||
xout1 = _mm_shuffle_epi32(xout1, 0xFF); \
|
||||
*xout0 = sl_xor(*xout0); \
|
||||
*xout0 = _mm_xor_si128(*xout0, xout1); \
|
||||
xout1 = _mm_aeskeygenassist_si128(*xout0, 0x00);\
|
||||
xout1 = _mm_shuffle_epi32(xout1, 0xAA); \
|
||||
*xout2 = sl_xor(*xout2); \
|
||||
*xout2 = _mm_xor_si128(*xout2, xout1); \
|
||||
|
||||
|
||||
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
|
||||
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
|
||||
inline __m128i sl_xor(__m128i tmp1)
|
||||
{
|
||||
__m128i tmp4;
|
||||
tmp4 = _mm_slli_si128(tmp1, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
return tmp1;
|
||||
}
|
||||
|
||||
|
||||
inline void aes_genkey_sub1(__m128i* xout0, __m128i* xout2)
|
||||
{
|
||||
aes_genkey_sub(0x1)
|
||||
}
|
||||
|
||||
|
||||
inline void aes_genkey_sub2(__m128i* xout0, __m128i* xout2)
|
||||
{
|
||||
aes_genkey_sub(0x2)
|
||||
}
|
||||
|
||||
|
||||
inline void aes_genkey_sub4(__m128i* xout0, __m128i* xout2)
|
||||
{
|
||||
aes_genkey_sub(0x4)
|
||||
}
|
||||
|
||||
|
||||
inline void aes_genkey_sub8(__m128i* xout0, __m128i* xout2)
|
||||
{
|
||||
aes_genkey_sub(0x8)
|
||||
}
|
||||
|
||||
|
||||
inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7)
|
||||
{
|
||||
*x0 = _mm_aesenc_si128(*x0, key);
|
||||
*x1 = _mm_aesenc_si128(*x1, key);
|
||||
*x2 = _mm_aesenc_si128(*x2, key);
|
||||
*x3 = _mm_aesenc_si128(*x3, key);
|
||||
*x4 = _mm_aesenc_si128(*x4, key);
|
||||
*x5 = _mm_aesenc_si128(*x5, key);
|
||||
*x6 = _mm_aesenc_si128(*x6, key);
|
||||
*x7 = _mm_aesenc_si128(*x7, key);
|
||||
}
|
||||
|
||||
|
||||
inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9)
|
||||
{
|
||||
__m128i xout0 = _mm_load_si128(memory);
|
||||
__m128i xout2 = _mm_load_si128(memory + 1);
|
||||
*k0 = xout0;
|
||||
*k1 = xout2;
|
||||
|
||||
aes_genkey_sub1(&xout0, &xout2);
|
||||
*k2 = xout0;
|
||||
*k3 = xout2;
|
||||
|
||||
aes_genkey_sub2(&xout0, &xout2);
|
||||
*k4 = xout0;
|
||||
*k5 = xout2;
|
||||
|
||||
aes_genkey_sub4(&xout0, &xout2);
|
||||
*k6 = xout0;
|
||||
*k7 = xout2;
|
||||
|
||||
aes_genkey_sub8(&xout0, &xout2);
|
||||
*k8 = xout0;
|
||||
*k9 = xout2;
|
||||
}
|
||||
|
||||
|
||||
inline void cn_explode_scratchpad(const __m128i* input, __m128i* output)
|
||||
{
|
||||
// This is more than we have registers, compiler will assign 2 keys on the stack
|
||||
__m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
|
||||
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
||||
|
||||
aes_genkey(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
|
||||
|
||||
xin0 = _mm_load_si128(input + 4);
|
||||
xin1 = _mm_load_si128(input + 5);
|
||||
xin2 = _mm_load_si128(input + 6);
|
||||
xin3 = _mm_load_si128(input + 7);
|
||||
xin4 = _mm_load_si128(input + 8);
|
||||
xin5 = _mm_load_si128(input + 9);
|
||||
xin6 = _mm_load_si128(input + 10);
|
||||
xin7 = _mm_load_si128(input + 11);
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < MEMORY / sizeof(__m128i), 1); i += 8) {
|
||||
aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
|
||||
_mm_store_si128(output + i + 0, xin0);
|
||||
_mm_store_si128(output + i + 1, xin1);
|
||||
_mm_store_si128(output + i + 2, xin2);
|
||||
_mm_store_si128(output + i + 3, xin3);
|
||||
_mm_store_si128(output + i + 4, xin4);
|
||||
_mm_store_si128(output + i + 5, xin5);
|
||||
_mm_store_si128(output + i + 6, xin6);
|
||||
_mm_store_si128(output + i + 7, xin7);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
inline void cn_implode_scratchpad(const __m128i* input, __m128i* output)
|
||||
{
|
||||
// This is more than we have registers, compiler will assign 2 keys on the stack
|
||||
__m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
|
||||
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
||||
|
||||
aes_genkey(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
|
||||
|
||||
xout0 = _mm_load_si128(output + 4);
|
||||
xout1 = _mm_load_si128(output + 5);
|
||||
xout2 = _mm_load_si128(output + 6);
|
||||
xout3 = _mm_load_si128(output + 7);
|
||||
xout4 = _mm_load_si128(output + 8);
|
||||
xout5 = _mm_load_si128(output + 9);
|
||||
xout6 = _mm_load_si128(output + 10);
|
||||
xout7 = _mm_load_si128(output + 11);
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < MEMORY / sizeof(__m128i), 1); i += 8)
|
||||
{
|
||||
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
|
||||
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
|
||||
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
|
||||
xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
|
||||
xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
|
||||
xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
|
||||
xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
|
||||
xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
|
||||
|
||||
aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
}
|
||||
|
||||
_mm_store_si128(output + 4, xout0);
|
||||
_mm_store_si128(output + 5, xout1);
|
||||
_mm_store_si128(output + 6, xout2);
|
||||
_mm_store_si128(output + 7, xout3);
|
||||
_mm_store_si128(output + 8, xout4);
|
||||
_mm_store_si128(output + 9, xout5);
|
||||
_mm_store_si128(output + 10, xout6);
|
||||
_mm_store_si128(output + 11, xout7);
|
||||
}
|
||||
|
||||
|
||||
#if defined(__x86_64__)
|
||||
# define EXTRACT64(X) _mm_cvtsi128_si64(X)
|
||||
|
||||
inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi)
|
||||
{
|
||||
unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b;
|
||||
*hi = r >> 64;
|
||||
return (uint64_t) r;
|
||||
}
|
||||
#elif defined(__i386__)
|
||||
# define HI32(X) \
|
||||
_mm_srli_si128((X), 4)
|
||||
|
||||
|
||||
# define EXTRACT64(X) \
|
||||
((uint64_t)(uint32_t)_mm_cvtsi128_si32(X) | \
|
||||
((uint64_t)(uint32_t)_mm_cvtsi128_si32(HI32(X)) << 32))
|
||||
|
||||
inline uint64_t _umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) {
|
||||
// multiplier = ab = a * 2^32 + b
|
||||
// multiplicand = cd = c * 2^32 + d
|
||||
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
|
||||
uint64_t a = multiplier >> 32;
|
||||
uint64_t b = multiplier & 0xFFFFFFFF;
|
||||
uint64_t c = multiplicand >> 32;
|
||||
uint64_t d = multiplicand & 0xFFFFFFFF;
|
||||
|
||||
//uint64_t ac = a * c;
|
||||
uint64_t ad = a * d;
|
||||
//uint64_t bc = b * c;
|
||||
uint64_t bd = b * d;
|
||||
|
||||
uint64_t adbc = ad + (b * c);
|
||||
uint64_t adbc_carry = adbc < ad ? 1 : 0;
|
||||
|
||||
// multiplier * multiplicand = product_hi * 2^64 + product_lo
|
||||
uint64_t product_lo = bd + (adbc << 32);
|
||||
uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
|
||||
*product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
|
||||
|
||||
return product_lo;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* __CRYPTONIGHT_AESNI_H__ */
|
||||
@@ -4,6 +4,7 @@
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
*
|
||||
@@ -25,192 +26,52 @@
|
||||
#include <string.h>
|
||||
|
||||
#include "cryptonight.h"
|
||||
#include "cryptonight_aesni.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
|
||||
|
||||
static inline void ExpandAESKey256_sub1(__m128i *tmp1, __m128i *tmp2)
|
||||
void cryptonight_av1_aesni(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx)
|
||||
{
|
||||
__m128i tmp4;
|
||||
*tmp2 = _mm_shuffle_epi32(*tmp2, 0xFF);
|
||||
tmp4 = _mm_slli_si128(*tmp1, 0x04);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, *tmp2);
|
||||
}
|
||||
keccak((const uint8_t *) input, size, ctx->state0, 200);
|
||||
|
||||
static inline void ExpandAESKey256_sub2(__m128i *tmp1, __m128i *tmp3)
|
||||
{
|
||||
__m128i tmp2, tmp4;
|
||||
cn_explode_scratchpad((__m128i*) ctx->state0, (__m128i*) ctx->memory);
|
||||
|
||||
tmp4 = _mm_aeskeygenassist_si128(*tmp1, 0x00);
|
||||
tmp2 = _mm_shuffle_epi32(tmp4, 0xAA);
|
||||
tmp4 = _mm_slli_si128(*tmp3, 0x04);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp2);
|
||||
}
|
||||
const uint8_t* l0 = ctx->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx->state0;
|
||||
|
||||
// Special thanks to Intel for helping me
|
||||
// with ExpandAESKey256() and its subroutines
|
||||
static inline void ExpandAESKey256(char *keybuf)
|
||||
{
|
||||
__m128i tmp1, tmp2, tmp3, *keys;
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
|
||||
keys = (__m128i *)keybuf;
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
|
||||
tmp1 = _mm_load_si128((__m128i *)keybuf);
|
||||
tmp3 = _mm_load_si128((__m128i *)(keybuf+0x10));
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx;
|
||||
cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x01);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[2] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[3] = tmp3;
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
|
||||
idx0 = EXTRACT64(cx);
|
||||
bx0 = cx;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x02);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[4] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[5] = tmp3;
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x04);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[6] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[7] = tmp3;
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x08);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[8] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[9] = tmp3;
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x10);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[10] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[11] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x20);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[12] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[13] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x40);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[14] = tmp1;
|
||||
}
|
||||
|
||||
void cryptonight_av1_aesni(void *restrict output, const void *restrict input, const char *restrict memory, struct cryptonight_ctx *restrict ctx)
|
||||
{
|
||||
keccak((const uint8_t *)input, 76, (uint8_t *) &ctx->state.hs, 200);
|
||||
uint8_t ExpandedKey[256];
|
||||
size_t i, j;
|
||||
|
||||
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
|
||||
memcpy(ExpandedKey, ctx->state.hs.b, AES_KEY_SIZE);
|
||||
ExpandAESKey256(ExpandedKey);
|
||||
|
||||
__m128i *longoutput, *expkey, *xmminput;
|
||||
longoutput = (__m128i *) memory;
|
||||
expkey = (__m128i *)ExpandedKey;
|
||||
xmminput = (__m128i *)ctx->text;
|
||||
|
||||
for (i = 0; __builtin_expect(i < MEMORY, 1); i += INIT_SIZE_BYTE)
|
||||
{
|
||||
for(j = 0; j < 10; j++)
|
||||
{
|
||||
xmminput[0] = _mm_aesenc_si128(xmminput[0], expkey[j]);
|
||||
xmminput[1] = _mm_aesenc_si128(xmminput[1], expkey[j]);
|
||||
xmminput[2] = _mm_aesenc_si128(xmminput[2], expkey[j]);
|
||||
xmminput[3] = _mm_aesenc_si128(xmminput[3], expkey[j]);
|
||||
xmminput[4] = _mm_aesenc_si128(xmminput[4], expkey[j]);
|
||||
xmminput[5] = _mm_aesenc_si128(xmminput[5], expkey[j]);
|
||||
xmminput[6] = _mm_aesenc_si128(xmminput[6], expkey[j]);
|
||||
xmminput[7] = _mm_aesenc_si128(xmminput[7], expkey[j]);
|
||||
}
|
||||
_mm_store_si128(&(longoutput[(i >> 4)]), xmminput[0]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 1]), xmminput[1]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 2]), xmminput[2]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 3]), xmminput[3]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 4]), xmminput[4]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 5]), xmminput[5]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 6]), xmminput[6]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 7]), xmminput[7]);
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
}
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
ctx->a[i] = ((uint64_t *)ctx->state.k)[i] ^ ((uint64_t *)ctx->state.k)[i+4];
|
||||
ctx->b[i] = ((uint64_t *)ctx->state.k)[i+2] ^ ((uint64_t *)ctx->state.k)[i+6];
|
||||
}
|
||||
cn_implode_scratchpad((__m128i*) ctx->memory, (__m128i*) ctx->state0);
|
||||
|
||||
__m128i a_x = _mm_load_si128((__m128i *) &memory[ctx->a[0] & 0x1FFFF0]);
|
||||
__m128i b_x = _mm_load_si128((__m128i *) ctx->b);
|
||||
|
||||
uint64_t c[2] __attribute((aligned(16)));
|
||||
uint64_t d[2] __attribute((aligned(16)));
|
||||
|
||||
for (i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i c_x = _mm_aesenc_si128(a_x, _mm_load_si128((__m128i *) ctx->a));
|
||||
_mm_store_si128((__m128i *) c, c_x);
|
||||
|
||||
uint64_t *restrict d_ptr = (uint64_t *) &memory[c[0] & 0x1FFFF0];
|
||||
_mm_store_si128((__m128i *) &memory[ctx->a[0] & 0x1FFFF0], _mm_xor_si128(b_x, c_x));
|
||||
b_x = c_x;
|
||||
|
||||
d[0] = d_ptr[0];
|
||||
d[1] = d_ptr[1];
|
||||
|
||||
{
|
||||
unsigned __int128 res = (unsigned __int128) c[0] * d[0];
|
||||
|
||||
d_ptr[0] = ctx->a[0] += res >> 64;
|
||||
d_ptr[1] = ctx->a[1] += (uint64_t) res;
|
||||
}
|
||||
|
||||
ctx->a[0] ^= d[0];
|
||||
ctx->a[1] ^= d[1];
|
||||
|
||||
a_x = _mm_load_si128((__m128i *) &memory[ctx->a[0] & 0x1FFFF0]);
|
||||
}
|
||||
|
||||
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
|
||||
memcpy(ExpandedKey, &ctx->state.hs.b[32], AES_KEY_SIZE);
|
||||
ExpandAESKey256(ExpandedKey);
|
||||
|
||||
for (i = 0; __builtin_expect(i < MEMORY, 1); i += INIT_SIZE_BYTE) {
|
||||
xmminput[0] = _mm_xor_si128(longoutput[(i >> 4)], xmminput[0]);
|
||||
xmminput[1] = _mm_xor_si128(longoutput[(i >> 4) + 1], xmminput[1]);
|
||||
xmminput[2] = _mm_xor_si128(longoutput[(i >> 4) + 2], xmminput[2]);
|
||||
xmminput[3] = _mm_xor_si128(longoutput[(i >> 4) + 3], xmminput[3]);
|
||||
xmminput[4] = _mm_xor_si128(longoutput[(i >> 4) + 4], xmminput[4]);
|
||||
xmminput[5] = _mm_xor_si128(longoutput[(i >> 4) + 5], xmminput[5]);
|
||||
xmminput[6] = _mm_xor_si128(longoutput[(i >> 4) + 6], xmminput[6]);
|
||||
xmminput[7] = _mm_xor_si128(longoutput[(i >> 4) + 7], xmminput[7]);
|
||||
|
||||
for(j = 0; j < 10; j++)
|
||||
{
|
||||
xmminput[0] = _mm_aesenc_si128(xmminput[0], expkey[j]);
|
||||
xmminput[1] = _mm_aesenc_si128(xmminput[1], expkey[j]);
|
||||
xmminput[2] = _mm_aesenc_si128(xmminput[2], expkey[j]);
|
||||
xmminput[3] = _mm_aesenc_si128(xmminput[3], expkey[j]);
|
||||
xmminput[4] = _mm_aesenc_si128(xmminput[4], expkey[j]);
|
||||
xmminput[5] = _mm_aesenc_si128(xmminput[5], expkey[j]);
|
||||
xmminput[6] = _mm_aesenc_si128(xmminput[6], expkey[j]);
|
||||
xmminput[7] = _mm_aesenc_si128(xmminput[7], expkey[j]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
|
||||
keccakf((uint64_t *) &ctx->state.hs, 24);
|
||||
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
|
||||
}
|
||||
|
||||
@@ -1,239 +0,0 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "cryptonight.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
|
||||
|
||||
static inline uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) {
|
||||
// multiplier = ab = a * 2^32 + b
|
||||
// multiplicand = cd = c * 2^32 + d
|
||||
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
|
||||
uint64_t a = multiplier >> 32;
|
||||
uint64_t b = multiplier & 0xFFFFFFFF;
|
||||
uint64_t c = multiplicand >> 32;
|
||||
uint64_t d = multiplicand & 0xFFFFFFFF;
|
||||
|
||||
//uint64_t ac = a * c;
|
||||
uint64_t ad = a * d;
|
||||
//uint64_t bc = b * c;
|
||||
uint64_t bd = b * d;
|
||||
|
||||
uint64_t adbc = ad + (b * c);
|
||||
uint64_t adbc_carry = adbc < ad ? 1 : 0;
|
||||
|
||||
// multiplier * multiplicand = product_hi * 2^64 + product_lo
|
||||
uint64_t product_lo = bd + (adbc << 32);
|
||||
uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
|
||||
*product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
|
||||
|
||||
return product_lo;
|
||||
}
|
||||
|
||||
|
||||
static inline void ExpandAESKey256_sub1(__m128i *tmp1, __m128i *tmp2)
|
||||
{
|
||||
__m128i tmp4;
|
||||
*tmp2 = _mm_shuffle_epi32(*tmp2, 0xFF);
|
||||
tmp4 = _mm_slli_si128(*tmp1, 0x04);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, *tmp2);
|
||||
}
|
||||
|
||||
static inline void ExpandAESKey256_sub2(__m128i *tmp1, __m128i *tmp3)
|
||||
{
|
||||
__m128i tmp2, tmp4;
|
||||
|
||||
tmp4 = _mm_aeskeygenassist_si128(*tmp1, 0x00);
|
||||
tmp2 = _mm_shuffle_epi32(tmp4, 0xAA);
|
||||
tmp4 = _mm_slli_si128(*tmp3, 0x04);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp2);
|
||||
}
|
||||
|
||||
// Special thanks to Intel for helping me
|
||||
// with ExpandAESKey256() and its subroutines
|
||||
static inline void ExpandAESKey256(char *keybuf)
|
||||
{
|
||||
__m128i tmp1, tmp2, tmp3, *keys;
|
||||
|
||||
keys = (__m128i *)keybuf;
|
||||
|
||||
tmp1 = _mm_load_si128((__m128i *)keybuf);
|
||||
tmp3 = _mm_load_si128((__m128i *)(keybuf+0x10));
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x01);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[2] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[3] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x02);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[4] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[5] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x04);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[6] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[7] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x08);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[8] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[9] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x10);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[10] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[11] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x20);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[12] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[13] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x40);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[14] = tmp1;
|
||||
}
|
||||
|
||||
void cryptonight_av1_aesni32(void *restrict output, const void *restrict input, const char *restrict memory, struct cryptonight_ctx *restrict ctx)
|
||||
{
|
||||
keccak((const uint8_t *)input, 76, (uint8_t *) &ctx->state.hs, 200);
|
||||
uint8_t ExpandedKey[256];
|
||||
size_t i, j;
|
||||
|
||||
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
|
||||
memcpy(ExpandedKey, ctx->state.hs.b, AES_KEY_SIZE);
|
||||
ExpandAESKey256(ExpandedKey);
|
||||
|
||||
__m128i *longoutput, *expkey, *xmminput;
|
||||
longoutput = (__m128i *) memory;
|
||||
expkey = (__m128i *)ExpandedKey;
|
||||
xmminput = (__m128i *)ctx->text;
|
||||
|
||||
for (i = 0; __builtin_expect(i < MEMORY, 1); i += INIT_SIZE_BYTE)
|
||||
{
|
||||
for(j = 0; j < 10; j++)
|
||||
{
|
||||
xmminput[0] = _mm_aesenc_si128(xmminput[0], expkey[j]);
|
||||
xmminput[1] = _mm_aesenc_si128(xmminput[1], expkey[j]);
|
||||
xmminput[2] = _mm_aesenc_si128(xmminput[2], expkey[j]);
|
||||
xmminput[3] = _mm_aesenc_si128(xmminput[3], expkey[j]);
|
||||
xmminput[4] = _mm_aesenc_si128(xmminput[4], expkey[j]);
|
||||
xmminput[5] = _mm_aesenc_si128(xmminput[5], expkey[j]);
|
||||
xmminput[6] = _mm_aesenc_si128(xmminput[6], expkey[j]);
|
||||
xmminput[7] = _mm_aesenc_si128(xmminput[7], expkey[j]);
|
||||
}
|
||||
_mm_store_si128(&(longoutput[(i >> 4)]), xmminput[0]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 1]), xmminput[1]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 2]), xmminput[2]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 3]), xmminput[3]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 4]), xmminput[4]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 5]), xmminput[5]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 6]), xmminput[6]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 7]), xmminput[7]);
|
||||
}
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
ctx->a[i] = ((uint64_t *)ctx->state.k)[i] ^ ((uint64_t *)ctx->state.k)[i+4];
|
||||
ctx->b[i] = ((uint64_t *)ctx->state.k)[i+2] ^ ((uint64_t *)ctx->state.k)[i+6];
|
||||
}
|
||||
|
||||
__m128i a_x = _mm_load_si128((__m128i *) &memory[ctx->a[0] & 0x1FFFF0]);
|
||||
__m128i b_x = _mm_load_si128((__m128i *) ctx->b);
|
||||
|
||||
uint64_t c[2] __attribute((aligned(16)));
|
||||
uint64_t d[2] __attribute((aligned(16)));
|
||||
uint64_t hi;
|
||||
|
||||
for (i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i c_x = _mm_aesenc_si128(a_x, _mm_load_si128((__m128i *) ctx->a));
|
||||
_mm_store_si128((__m128i *) c, c_x);
|
||||
|
||||
uint64_t *restrict d_ptr = (uint64_t *) &memory[c[0] & 0x1FFFF0];
|
||||
_mm_store_si128((__m128i *) &memory[ctx->a[0] & 0x1FFFF0], _mm_xor_si128(b_x, c_x));
|
||||
b_x = c_x;
|
||||
|
||||
d[0] = d_ptr[0];
|
||||
d[1] = d_ptr[1];
|
||||
|
||||
d_ptr[1] = ctx->a[1] += mul128(c[0], d[0], &hi);
|
||||
d_ptr[0] = ctx->a[0] += hi;
|
||||
|
||||
ctx->a[0] ^= d[0];
|
||||
ctx->a[1] ^= d[1];
|
||||
|
||||
a_x = _mm_load_si128((__m128i *) &memory[ctx->a[0] & 0x1FFFF0]);
|
||||
}
|
||||
|
||||
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
|
||||
memcpy(ExpandedKey, &ctx->state.hs.b[32], AES_KEY_SIZE);
|
||||
ExpandAESKey256(ExpandedKey);
|
||||
|
||||
for (i = 0; __builtin_expect(i < MEMORY, 1); i += INIT_SIZE_BYTE) {
|
||||
xmminput[0] = _mm_xor_si128(longoutput[(i >> 4)], xmminput[0]);
|
||||
xmminput[1] = _mm_xor_si128(longoutput[(i >> 4) + 1], xmminput[1]);
|
||||
xmminput[2] = _mm_xor_si128(longoutput[(i >> 4) + 2], xmminput[2]);
|
||||
xmminput[3] = _mm_xor_si128(longoutput[(i >> 4) + 3], xmminput[3]);
|
||||
xmminput[4] = _mm_xor_si128(longoutput[(i >> 4) + 4], xmminput[4]);
|
||||
xmminput[5] = _mm_xor_si128(longoutput[(i >> 4) + 5], xmminput[5]);
|
||||
xmminput[6] = _mm_xor_si128(longoutput[(i >> 4) + 6], xmminput[6]);
|
||||
xmminput[7] = _mm_xor_si128(longoutput[(i >> 4) + 7], xmminput[7]);
|
||||
|
||||
for(j = 0; j < 10; j++)
|
||||
{
|
||||
xmminput[0] = _mm_aesenc_si128(xmminput[0], expkey[j]);
|
||||
xmminput[1] = _mm_aesenc_si128(xmminput[1], expkey[j]);
|
||||
xmminput[2] = _mm_aesenc_si128(xmminput[2], expkey[j]);
|
||||
xmminput[3] = _mm_aesenc_si128(xmminput[3], expkey[j]);
|
||||
xmminput[4] = _mm_aesenc_si128(xmminput[4], expkey[j]);
|
||||
xmminput[5] = _mm_aesenc_si128(xmminput[5], expkey[j]);
|
||||
xmminput[6] = _mm_aesenc_si128(xmminput[6], expkey[j]);
|
||||
xmminput[7] = _mm_aesenc_si128(xmminput[7], expkey[j]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
|
||||
keccakf((uint64_t *) &ctx->state.hs, 24);
|
||||
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
|
||||
}
|
||||
111
algo/cryptonight/cryptonight_av2_aesni_double.c
Normal file
111
algo/cryptonight/cryptonight_av2_aesni_double.c
Normal file
@@ -0,0 +1,111 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "cryptonight.h"
|
||||
#include "cryptonight_aesni.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
|
||||
|
||||
void cryptonight_av2_aesni_double(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx)
|
||||
{
|
||||
keccak((const uint8_t *) input, size, ctx->state0, 200);
|
||||
keccak((const uint8_t *) input + size, size, ctx->state1, 200);
|
||||
|
||||
const uint8_t* l0 = ctx->memory;
|
||||
const uint8_t* l1 = ctx->memory + MEMORY;
|
||||
uint64_t* h0 = (uint64_t*) ctx->state0;
|
||||
uint64_t* h1 = (uint64_t*) ctx->state1;
|
||||
|
||||
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
uint64_t idx1 = h1[0] ^ h1[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]);
|
||||
|
||||
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
|
||||
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
|
||||
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx0));
|
||||
_mm_store_si128((__m128i *) &l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx1, cx1));
|
||||
|
||||
idx0 = EXTRACT64(cx0);
|
||||
idx1 = EXTRACT64(cx1);
|
||||
|
||||
bx0 = cx0;
|
||||
bx1 = cx1;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*) &l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*) &l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
|
||||
cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx1, cl, &hi);
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
||||
((uint64_t*) &l1[idx1 & 0x1FFFF0])[0] = al1;
|
||||
((uint64_t*) &l1[idx1 & 0x1FFFF0])[1] = ah1;
|
||||
|
||||
ah1 ^= ch;
|
||||
al1 ^= cl;
|
||||
idx1 = al1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
|
||||
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
|
||||
extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, (char*) output + 32);
|
||||
}
|
||||
@@ -1,237 +0,0 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
#include "cryptonight.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
|
||||
|
||||
static inline void ExpandAESKey256_sub1(__m128i *tmp1, __m128i *tmp2)
|
||||
{
|
||||
__m128i tmp4;
|
||||
*tmp2 = _mm_shuffle_epi32(*tmp2, 0xFF);
|
||||
tmp4 = _mm_slli_si128(*tmp1, 0x04);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, *tmp2);
|
||||
}
|
||||
|
||||
static inline void ExpandAESKey256_sub2(__m128i *tmp1, __m128i *tmp3)
|
||||
{
|
||||
__m128i tmp2, tmp4;
|
||||
|
||||
tmp4 = _mm_aeskeygenassist_si128(*tmp1, 0x00);
|
||||
tmp2 = _mm_shuffle_epi32(tmp4, 0xAA);
|
||||
tmp4 = _mm_slli_si128(*tmp3, 0x04);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp2);
|
||||
}
|
||||
|
||||
// Special thanks to Intel for helping me
|
||||
// with ExpandAESKey256() and its subroutines
|
||||
static inline void ExpandAESKey256(char *keybuf)
|
||||
{
|
||||
__m128i tmp1, tmp2, tmp3, *keys;
|
||||
|
||||
keys = (__m128i *)keybuf;
|
||||
|
||||
tmp1 = _mm_load_si128((__m128i *)keybuf);
|
||||
tmp3 = _mm_load_si128((__m128i *)(keybuf+0x10));
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x01);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[2] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[3] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x02);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[4] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[5] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x04);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[6] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[7] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x08);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[8] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[9] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x10);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[10] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[11] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x20);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[12] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[13] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x40);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[14] = tmp1;
|
||||
}
|
||||
|
||||
void cryptonight_av2_aesni_wolf(void *restrict output, const void *restrict input, const char *restrict memory, struct cryptonight_ctx *restrict ctx)
|
||||
{
|
||||
keccak((const uint8_t *) input, 76, (uint8_t *) &ctx->state.hs, 200);
|
||||
uint8_t ExpandedKey[256];
|
||||
size_t i, j;
|
||||
|
||||
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
|
||||
memcpy(ExpandedKey, ctx->state.hs.b, AES_KEY_SIZE);
|
||||
ExpandAESKey256(ExpandedKey);
|
||||
|
||||
__m128i *longoutput, *expkey, *xmminput;
|
||||
longoutput = (__m128i *)memory;
|
||||
expkey = (__m128i *)ExpandedKey;
|
||||
xmminput = (__m128i *)ctx->text;
|
||||
|
||||
for (i = 0; __builtin_expect(i < MEMORY, 1); i += INIT_SIZE_BYTE)
|
||||
{
|
||||
for(j = 0; j < 10; j++)
|
||||
{
|
||||
xmminput[0] = _mm_aesenc_si128(xmminput[0], expkey[j]);
|
||||
xmminput[1] = _mm_aesenc_si128(xmminput[1], expkey[j]);
|
||||
xmminput[2] = _mm_aesenc_si128(xmminput[2], expkey[j]);
|
||||
xmminput[3] = _mm_aesenc_si128(xmminput[3], expkey[j]);
|
||||
xmminput[4] = _mm_aesenc_si128(xmminput[4], expkey[j]);
|
||||
xmminput[5] = _mm_aesenc_si128(xmminput[5], expkey[j]);
|
||||
xmminput[6] = _mm_aesenc_si128(xmminput[6], expkey[j]);
|
||||
xmminput[7] = _mm_aesenc_si128(xmminput[7], expkey[j]);
|
||||
}
|
||||
_mm_store_si128(&(longoutput[(i >> 4)]), xmminput[0]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 1]), xmminput[1]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 2]), xmminput[2]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 3]), xmminput[3]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 4]), xmminput[4]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 5]), xmminput[5]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 6]), xmminput[6]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 7]), xmminput[7]);
|
||||
}
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
ctx->a[i] = ((uint64_t *)ctx->state.k)[i] ^ ((uint64_t *)ctx->state.k)[i+4];
|
||||
ctx->b[i] = ((uint64_t *)ctx->state.k)[i+2] ^ ((uint64_t *)ctx->state.k)[i+6];
|
||||
}
|
||||
|
||||
__m128i b_x = _mm_load_si128((__m128i *)ctx->b);
|
||||
uint64_t a[2] __attribute((aligned(16))), b[2] __attribute((aligned(16)));
|
||||
a[0] = ctx->a[0];
|
||||
a[1] = ctx->a[1];
|
||||
|
||||
for(i = 0; __builtin_expect(i < 0x80000, 1); i++)
|
||||
{
|
||||
__m128i c_x = _mm_load_si128((__m128i *)&memory[a[0] & 0x1FFFF0]);
|
||||
__m128i a_x = _mm_load_si128((__m128i *)a);
|
||||
uint64_t c[2];
|
||||
c_x = _mm_aesenc_si128(c_x, a_x);
|
||||
|
||||
_mm_store_si128((__m128i *)c, c_x);
|
||||
__builtin_prefetch(&memory[c[0] & 0x1FFFF0], 0, 1);
|
||||
|
||||
b_x = _mm_xor_si128(b_x, c_x);
|
||||
_mm_store_si128((__m128i *)&memory[a[0] & 0x1FFFF0], b_x);
|
||||
|
||||
uint64_t *nextblock = (uint64_t *)&memory[c[0] & 0x1FFFF0];
|
||||
uint64_t b[2];
|
||||
b[0] = nextblock[0];
|
||||
b[1] = nextblock[1];
|
||||
|
||||
{
|
||||
uint64_t hi, lo;
|
||||
// hi,lo = 64bit x 64bit multiply of c[0] and b[0]
|
||||
|
||||
__asm__("mulq %3\n\t"
|
||||
: "=d" (hi),
|
||||
"=a" (lo)
|
||||
: "%a" (c[0]),
|
||||
"rm" (b[0])
|
||||
: "cc" );
|
||||
|
||||
a[0] += hi;
|
||||
a[1] += lo;
|
||||
}
|
||||
|
||||
uint64_t *dst = (uint64_t *) &memory[c[0] & 0x1FFFF0];
|
||||
dst[0] = a[0];
|
||||
dst[1] = a[1];
|
||||
|
||||
a[0] ^= b[0];
|
||||
a[1] ^= b[1];
|
||||
b_x = c_x;
|
||||
__builtin_prefetch(&memory[a[0] & 0x1FFFF0], 0, 3);
|
||||
}
|
||||
|
||||
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
|
||||
memcpy(ExpandedKey, &ctx->state.hs.b[32], AES_KEY_SIZE);
|
||||
ExpandAESKey256(ExpandedKey);
|
||||
|
||||
//for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE)
|
||||
// aesni_parallel_xor(&ctx->text, ExpandedKey, &ctx->long_state[i]);
|
||||
|
||||
for (i = 0; __builtin_expect(i < MEMORY, 1); i += INIT_SIZE_BYTE)
|
||||
{
|
||||
xmminput[0] = _mm_xor_si128(longoutput[(i >> 4)], xmminput[0]);
|
||||
xmminput[1] = _mm_xor_si128(longoutput[(i >> 4) + 1], xmminput[1]);
|
||||
xmminput[2] = _mm_xor_si128(longoutput[(i >> 4) + 2], xmminput[2]);
|
||||
xmminput[3] = _mm_xor_si128(longoutput[(i >> 4) + 3], xmminput[3]);
|
||||
xmminput[4] = _mm_xor_si128(longoutput[(i >> 4) + 4], xmminput[4]);
|
||||
xmminput[5] = _mm_xor_si128(longoutput[(i >> 4) + 5], xmminput[5]);
|
||||
xmminput[6] = _mm_xor_si128(longoutput[(i >> 4) + 6], xmminput[6]);
|
||||
xmminput[7] = _mm_xor_si128(longoutput[(i >> 4) + 7], xmminput[7]);
|
||||
|
||||
for(j = 0; j < 10; j++)
|
||||
{
|
||||
xmminput[0] = _mm_aesenc_si128(xmminput[0], expkey[j]);
|
||||
xmminput[1] = _mm_aesenc_si128(xmminput[1], expkey[j]);
|
||||
xmminput[2] = _mm_aesenc_si128(xmminput[2], expkey[j]);
|
||||
xmminput[3] = _mm_aesenc_si128(xmminput[3], expkey[j]);
|
||||
xmminput[4] = _mm_aesenc_si128(xmminput[4], expkey[j]);
|
||||
xmminput[5] = _mm_aesenc_si128(xmminput[5], expkey[j]);
|
||||
xmminput[6] = _mm_aesenc_si128(xmminput[6], expkey[j]);
|
||||
xmminput[7] = _mm_aesenc_si128(xmminput[7], expkey[j]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
|
||||
keccakf((uint64_t *) &ctx->state.hs, 24);
|
||||
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
|
||||
}
|
||||
@@ -1,214 +0,0 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "cryptonight.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
|
||||
|
||||
static inline void ExpandAESKey256_sub1(__m128i *tmp1, __m128i *tmp2)
|
||||
{
|
||||
__m128i tmp4;
|
||||
*tmp2 = _mm_shuffle_epi32(*tmp2, 0xFF);
|
||||
tmp4 = _mm_slli_si128(*tmp1, 0x04);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, *tmp2);
|
||||
}
|
||||
|
||||
static inline void ExpandAESKey256_sub2(__m128i *tmp1, __m128i *tmp3)
|
||||
{
|
||||
__m128i tmp2, tmp4;
|
||||
|
||||
tmp4 = _mm_aeskeygenassist_si128(*tmp1, 0x00);
|
||||
tmp2 = _mm_shuffle_epi32(tmp4, 0xAA);
|
||||
tmp4 = _mm_slli_si128(*tmp3, 0x04);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp2);
|
||||
}
|
||||
|
||||
// Special thanks to Intel for helping me
|
||||
// with ExpandAESKey256() and its subroutines
|
||||
static inline void ExpandAESKey256(char *keybuf)
|
||||
{
|
||||
__m128i tmp1, tmp2, tmp3, *keys;
|
||||
|
||||
keys = (__m128i *)keybuf;
|
||||
|
||||
tmp1 = _mm_load_si128((__m128i *)keybuf);
|
||||
tmp3 = _mm_load_si128((__m128i *)(keybuf+0x10));
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x01);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[2] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[3] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x02);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[4] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[5] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x04);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[6] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[7] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x08);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[8] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[9] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x10);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[10] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[11] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x20);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[12] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[13] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x40);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[14] = tmp1;
|
||||
}
|
||||
|
||||
void cryptonight_av3_aesni_bmi2(void *restrict output, const void *restrict input, const char *restrict memory, struct cryptonight_ctx *restrict ctx)
|
||||
{
|
||||
keccak((const uint8_t *) input, 76, (uint8_t *) &ctx->state.hs, 200);
|
||||
uint8_t ExpandedKey[256];
|
||||
size_t i, j;
|
||||
|
||||
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
|
||||
memcpy(ExpandedKey, ctx->state.hs.b, AES_KEY_SIZE);
|
||||
ExpandAESKey256(ExpandedKey);
|
||||
|
||||
__m128i *longoutput, *expkey, *xmminput;
|
||||
longoutput = (__m128i *) memory;
|
||||
expkey = (__m128i *)ExpandedKey;
|
||||
xmminput = (__m128i *)ctx->text;
|
||||
|
||||
for (i = 0; __builtin_expect(i < MEMORY, 1); i += INIT_SIZE_BYTE)
|
||||
{
|
||||
for(j = 0; j < 10; j++)
|
||||
{
|
||||
xmminput[0] = _mm_aesenc_si128(xmminput[0], expkey[j]);
|
||||
xmminput[1] = _mm_aesenc_si128(xmminput[1], expkey[j]);
|
||||
xmminput[2] = _mm_aesenc_si128(xmminput[2], expkey[j]);
|
||||
xmminput[3] = _mm_aesenc_si128(xmminput[3], expkey[j]);
|
||||
xmminput[4] = _mm_aesenc_si128(xmminput[4], expkey[j]);
|
||||
xmminput[5] = _mm_aesenc_si128(xmminput[5], expkey[j]);
|
||||
xmminput[6] = _mm_aesenc_si128(xmminput[6], expkey[j]);
|
||||
xmminput[7] = _mm_aesenc_si128(xmminput[7], expkey[j]);
|
||||
}
|
||||
_mm_store_si128(&(longoutput[(i >> 4)]), xmminput[0]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 1]), xmminput[1]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 2]), xmminput[2]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 3]), xmminput[3]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 4]), xmminput[4]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 5]), xmminput[5]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 6]), xmminput[6]);
|
||||
_mm_store_si128(&(longoutput[(i >> 4) + 7]), xmminput[7]);
|
||||
}
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
ctx->a[i] = ((uint64_t *)ctx->state.k)[i] ^ ((uint64_t *)ctx->state.k)[i+4];
|
||||
ctx->b[i] = ((uint64_t *)ctx->state.k)[i+2] ^ ((uint64_t *)ctx->state.k)[i+6];
|
||||
}
|
||||
|
||||
__m128i a_x = _mm_load_si128((__m128i *) &memory[ctx->a[0] & 0x1FFFF0]);
|
||||
__m128i b_x = _mm_load_si128((__m128i *) ctx->b);
|
||||
|
||||
uint64_t c[2] __attribute((aligned(16)));
|
||||
uint64_t d[2] __attribute((aligned(16)));
|
||||
uint64_t hi;
|
||||
|
||||
for (i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i c_x = _mm_aesenc_si128(a_x, _mm_load_si128((__m128i *) ctx->a));
|
||||
_mm_store_si128((__m128i *) c, c_x);
|
||||
|
||||
uint64_t *restrict d_ptr = (uint64_t *) &memory[c[0] & 0x1FFFF0];
|
||||
_mm_store_si128((__m128i *) &memory[ctx->a[0] & 0x1FFFF0], _mm_xor_si128(b_x, c_x));
|
||||
b_x = c_x;
|
||||
|
||||
d[0] = d_ptr[0];
|
||||
d[1] = d_ptr[1];
|
||||
|
||||
d_ptr[1] = ctx->a[1] += _mulx_u64(c[0], d[0], &hi);
|
||||
d_ptr[0] = ctx->a[0] += hi;
|
||||
|
||||
ctx->a[0] ^= d[0];
|
||||
ctx->a[1] ^= d[1];
|
||||
|
||||
a_x = _mm_load_si128((__m128i *) &memory[ctx->a[0] & 0x1FFFF0]);
|
||||
|
||||
}
|
||||
|
||||
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
|
||||
memcpy(ExpandedKey, &ctx->state.hs.b[32], AES_KEY_SIZE);
|
||||
ExpandAESKey256(ExpandedKey);
|
||||
|
||||
for (i = 0; __builtin_expect(i < MEMORY, 1); i += INIT_SIZE_BYTE) {
|
||||
xmminput[0] = _mm_xor_si128(longoutput[(i >> 4)], xmminput[0]);
|
||||
xmminput[1] = _mm_xor_si128(longoutput[(i >> 4) + 1], xmminput[1]);
|
||||
xmminput[2] = _mm_xor_si128(longoutput[(i >> 4) + 2], xmminput[2]);
|
||||
xmminput[3] = _mm_xor_si128(longoutput[(i >> 4) + 3], xmminput[3]);
|
||||
xmminput[4] = _mm_xor_si128(longoutput[(i >> 4) + 4], xmminput[4]);
|
||||
xmminput[5] = _mm_xor_si128(longoutput[(i >> 4) + 5], xmminput[5]);
|
||||
xmminput[6] = _mm_xor_si128(longoutput[(i >> 4) + 6], xmminput[6]);
|
||||
xmminput[7] = _mm_xor_si128(longoutput[(i >> 4) + 7], xmminput[7]);
|
||||
|
||||
for(j = 0; j < 10; j++)
|
||||
{
|
||||
xmminput[0] = _mm_aesenc_si128(xmminput[0], expkey[j]);
|
||||
xmminput[1] = _mm_aesenc_si128(xmminput[1], expkey[j]);
|
||||
xmminput[2] = _mm_aesenc_si128(xmminput[2], expkey[j]);
|
||||
xmminput[3] = _mm_aesenc_si128(xmminput[3], expkey[j]);
|
||||
xmminput[4] = _mm_aesenc_si128(xmminput[4], expkey[j]);
|
||||
xmminput[5] = _mm_aesenc_si128(xmminput[5], expkey[j]);
|
||||
xmminput[6] = _mm_aesenc_si128(xmminput[6], expkey[j]);
|
||||
xmminput[7] = _mm_aesenc_si128(xmminput[7], expkey[j]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
|
||||
keccakf((uint64_t *) &ctx->state.hs, 24);
|
||||
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
|
||||
}
|
||||
77
algo/cryptonight/cryptonight_av3_softaes.c
Normal file
77
algo/cryptonight/cryptonight_av3_softaes.c
Normal file
@@ -0,0 +1,77 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "cryptonight.h"
|
||||
#include "cryptonight_softaes.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
|
||||
|
||||
void cryptonight_av3_softaes(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx)
|
||||
{
|
||||
keccak((const uint8_t *) input, size, ctx->state0, 200);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) ctx->state0, (__m128i*) ctx->memory);
|
||||
|
||||
const uint8_t* l0 = ctx->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx->state0;
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx;
|
||||
cx = _mm_load_si128((__m128i *)&l0[idx0 & 0x1FFFF0]);
|
||||
cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
|
||||
|
||||
_mm_store_si128((__m128i *)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
|
||||
idx0 = EXTRACT64(cx);
|
||||
bx0 = cx;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx->memory, (__m128i*) ctx->state0);
|
||||
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
|
||||
}
|
||||
@@ -1,151 +0,0 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "cryptonight.h"
|
||||
#include "compat.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
#include "crypto/aesb.h"
|
||||
#include "crypto/oaes_lib.h"
|
||||
|
||||
|
||||
static inline uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) {
|
||||
// multiplier = ab = a * 2^32 + b
|
||||
// multiplicand = cd = c * 2^32 + d
|
||||
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
|
||||
uint64_t a = multiplier >> 32;
|
||||
uint64_t b = multiplier & 0xFFFFFFFF;
|
||||
uint64_t c = multiplicand >> 32;
|
||||
uint64_t d = multiplicand & 0xFFFFFFFF;
|
||||
|
||||
//uint64_t ac = a * c;
|
||||
uint64_t ad = a * d;
|
||||
//uint64_t bc = b * c;
|
||||
uint64_t bd = b * d;
|
||||
|
||||
uint64_t adbc = ad + (b * c);
|
||||
uint64_t adbc_carry = adbc < ad ? 1 : 0;
|
||||
|
||||
// multiplier * multiplicand = product_hi * 2^64 + product_lo
|
||||
uint64_t product_lo = bd + (adbc << 32);
|
||||
uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
|
||||
*product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
|
||||
|
||||
return product_lo;
|
||||
}
|
||||
|
||||
|
||||
static inline void mul_sum_xor_dst(const uint8_t* a, uint8_t* c, uint8_t* dst) {
|
||||
uint64_t hi, lo = mul128(((uint64_t*) a)[0], ((uint64_t*) dst)[0], &hi) + ((uint64_t*) c)[1];
|
||||
hi += ((uint64_t*) c)[0];
|
||||
|
||||
((uint64_t*) c)[0] = ((uint64_t*) dst)[0] ^ hi;
|
||||
((uint64_t*) c)[1] = ((uint64_t*) dst)[1] ^ lo;
|
||||
((uint64_t*) dst)[0] = hi;
|
||||
((uint64_t*) dst)[1] = lo;
|
||||
}
|
||||
|
||||
|
||||
static inline void xor_blocks(uint8_t* a, const uint8_t* b) {
|
||||
((uint64_t*) a)[0] ^= ((uint64_t*) b)[0];
|
||||
((uint64_t*) a)[1] ^= ((uint64_t*) b)[1];
|
||||
}
|
||||
|
||||
|
||||
static inline void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) {
|
||||
((uint64_t*) dst)[0] = ((uint64_t*) a)[0] ^ ((uint64_t*) b)[0];
|
||||
((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1];
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_av4_legacy(void *restrict output, const void *restrict input, const char *restrict memory, struct cryptonight_ctx *restrict ctx) {
|
||||
oaes_ctx *aes_ctx = (oaes_ctx*) oaes_alloc();
|
||||
size_t i, j;
|
||||
keccak((const uint8_t *)input, 76, (uint8_t *) &ctx->state.hs, 200);
|
||||
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
|
||||
|
||||
oaes_key_import_data(aes_ctx, ctx->state.hs.b, AES_KEY_SIZE);
|
||||
|
||||
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 0], aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 1], aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 2], aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 3], aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 4], aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 5], aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 6], aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 7], aes_ctx->key->exp_data);
|
||||
memcpy((void *) &memory[i], ctx->text, INIT_SIZE_BYTE);
|
||||
}
|
||||
|
||||
xor_blocks_dst(&ctx->state.k[0], &ctx->state.k[32], (uint8_t*) ctx->a);
|
||||
xor_blocks_dst(&ctx->state.k[16], &ctx->state.k[48], (uint8_t*) ctx->b);
|
||||
|
||||
for (i = 0; likely(i < ITER / 4); ++i) {
|
||||
/* Dependency chain: address -> read value ------+
|
||||
* written value <-+ hard function (AES or MUL) <+
|
||||
* next address <-+
|
||||
*/
|
||||
/* Iteration 1 */
|
||||
j = ctx->a[0] & 0x1FFFF0;
|
||||
aesb_single_round((const uint8_t*) &memory[j], (uint8_t *) ctx->c, (const uint8_t *) ctx->a);
|
||||
xor_blocks_dst((const uint8_t*) ctx->c, (const uint8_t*) ctx->b, (uint8_t*) &memory[j]);
|
||||
/* Iteration 2 */
|
||||
mul_sum_xor_dst((const uint8_t*) ctx->c, (uint8_t*) ctx->a, (uint8_t*) &memory[ctx->c[0] & 0x1FFFF0]);
|
||||
/* Iteration 3 */
|
||||
j = ctx->a[0] & 0x1FFFF0;
|
||||
aesb_single_round(&memory[j], (uint8_t *) ctx->b, (uint8_t *) ctx->a);
|
||||
xor_blocks_dst((const uint8_t*) ctx->b, (const uint8_t*) ctx->c, (uint8_t*) &memory[j]);
|
||||
/* Iteration 4 */
|
||||
mul_sum_xor_dst((const uint8_t*) ctx->b, (uint8_t*) ctx->a, (uint8_t*) &memory[ctx->b[0] & 0x1FFFF0]);
|
||||
}
|
||||
|
||||
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
|
||||
oaes_key_import_data(aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE);
|
||||
|
||||
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
|
||||
xor_blocks(&ctx->text[0 * AES_BLOCK_SIZE], &memory[i + 0 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[0 * AES_BLOCK_SIZE], aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[1 * AES_BLOCK_SIZE], &memory[i + 1 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[1 * AES_BLOCK_SIZE], aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[2 * AES_BLOCK_SIZE], &memory[i + 2 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[2 * AES_BLOCK_SIZE], aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[3 * AES_BLOCK_SIZE], &memory[i + 3 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[3 * AES_BLOCK_SIZE], aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[4 * AES_BLOCK_SIZE], &memory[i + 4 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[4 * AES_BLOCK_SIZE], aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[5 * AES_BLOCK_SIZE], &memory[i + 5 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[5 * AES_BLOCK_SIZE], aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[6 * AES_BLOCK_SIZE], &memory[i + 6 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[6 * AES_BLOCK_SIZE], aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[7 * AES_BLOCK_SIZE], &memory[i + 7 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[7 * AES_BLOCK_SIZE], aes_ctx->key->exp_data);
|
||||
}
|
||||
|
||||
memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
|
||||
keccakf((uint64_t *) &ctx->state.hs, 24);
|
||||
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
|
||||
oaes_free((OAES_CTX **) &aes_ctx);
|
||||
}
|
||||
111
algo/cryptonight/cryptonight_av4_softaes_double.c
Normal file
111
algo/cryptonight/cryptonight_av4_softaes_double.c
Normal file
@@ -0,0 +1,111 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "cryptonight.h"
|
||||
#include "cryptonight_softaes.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
|
||||
|
||||
void cryptonight_av4_softaes_double(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx)
|
||||
{
|
||||
keccak((const uint8_t *) input, size, ctx->state0, 200);
|
||||
keccak((const uint8_t *) input + size, size, ctx->state1, 200);
|
||||
|
||||
const uint8_t* l0 = ctx->memory;
|
||||
const uint8_t* l1 = ctx->memory + MEMORY;
|
||||
uint64_t* h0 = (uint64_t*) ctx->state0;
|
||||
uint64_t* h1 = (uint64_t*) ctx->state1;
|
||||
|
||||
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
uint64_t idx1 = h1[0] ^ h1[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]);
|
||||
|
||||
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
|
||||
cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
|
||||
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx0));
|
||||
_mm_store_si128((__m128i *) &l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx1, cx1));
|
||||
|
||||
idx0 = EXTRACT64(cx0);
|
||||
idx1 = EXTRACT64(cx1);
|
||||
|
||||
bx0 = cx0;
|
||||
bx1 = cx1;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*) &l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*) &l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
|
||||
cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx1, cl, &hi);
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
||||
((uint64_t*) &l1[idx1 & 0x1FFFF0])[0] = al1;
|
||||
((uint64_t*) &l1[idx1 & 0x1FFFF0])[1] = ah1;
|
||||
|
||||
ah1 ^= ch;
|
||||
al1 ^= cl;
|
||||
idx1 = al1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
|
||||
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
|
||||
extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, (char*) output + 32);
|
||||
}
|
||||
@@ -1,248 +0,0 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "cryptonight.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
|
||||
|
||||
static inline void ExpandAESKey256_sub1(__m128i *tmp1, __m128i *tmp2)
|
||||
{
|
||||
__m128i tmp4;
|
||||
*tmp2 = _mm_shuffle_epi32(*tmp2, 0xFF);
|
||||
tmp4 = _mm_slli_si128(*tmp1, 0x04);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, *tmp2);
|
||||
}
|
||||
|
||||
static inline void ExpandAESKey256_sub2(__m128i *tmp1, __m128i *tmp3)
|
||||
{
|
||||
__m128i tmp2, tmp4;
|
||||
|
||||
tmp4 = _mm_aeskeygenassist_si128(*tmp1, 0x00);
|
||||
tmp2 = _mm_shuffle_epi32(tmp4, 0xAA);
|
||||
tmp4 = _mm_slli_si128(*tmp3, 0x04);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp2);
|
||||
}
|
||||
|
||||
// Special thanks to Intel for helping me
|
||||
// with ExpandAESKey256() and its subroutines
|
||||
static inline void ExpandAESKey256(char *keybuf)
|
||||
{
|
||||
__m128i tmp1, tmp2, tmp3, *keys;
|
||||
|
||||
keys = (__m128i *)keybuf;
|
||||
|
||||
tmp1 = _mm_load_si128((__m128i *)keybuf);
|
||||
tmp3 = _mm_load_si128((__m128i *)(keybuf+0x10));
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x01);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[2] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[3] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x02);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[4] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[5] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x04);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[6] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[7] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x08);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[8] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[9] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x10);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[10] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[11] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x20);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[12] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[13] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x40);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[14] = tmp1;
|
||||
}
|
||||
|
||||
void cryptonight_av5_aesni_experimental(void *restrict output, const void *restrict input, const char *restrict memory, struct cryptonight_ctx *restrict ctx)
|
||||
{
|
||||
keccak((const uint8_t *)input, 76, (uint8_t *) &ctx->state.hs, 200);
|
||||
uint8_t ExpandedKey[256];
|
||||
size_t i, j;
|
||||
|
||||
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
|
||||
memcpy(ExpandedKey, ctx->state.hs.b, AES_KEY_SIZE);
|
||||
ExpandAESKey256(ExpandedKey);
|
||||
|
||||
__m128i *longoutput, *expkey, *xmminput;
|
||||
longoutput = (__m128i *) memory;
|
||||
expkey = (__m128i *) ExpandedKey;
|
||||
xmminput = (__m128i *)ctx->text;
|
||||
|
||||
// prefetch expkey, all of xmminput and enough longoutput for 4 loops
|
||||
_mm_prefetch(xmminput, _MM_HINT_T0 );
|
||||
_mm_prefetch(xmminput + 4, _MM_HINT_T0 );
|
||||
|
||||
for (i = 0; i < 64; i += 16) {
|
||||
_mm_prefetch(longoutput + i, _MM_HINT_T0);
|
||||
_mm_prefetch(longoutput + i + 4, _MM_HINT_T0);
|
||||
_mm_prefetch(longoutput + i + 8, _MM_HINT_T0);
|
||||
_mm_prefetch(longoutput + i + 12, _MM_HINT_T0);
|
||||
}
|
||||
|
||||
_mm_prefetch(expkey, _MM_HINT_T0);
|
||||
_mm_prefetch(expkey + 4, _MM_HINT_T0);
|
||||
_mm_prefetch(expkey + 8, _MM_HINT_T0);
|
||||
|
||||
for (i = 0; __builtin_expect(i < MEMORY_M128I, 1); i += INIT_SIZE_M128I) {
|
||||
__builtin_prefetch(longoutput + i + 64, 1, 0);
|
||||
__builtin_prefetch(longoutput + i + 68, 1, 0);
|
||||
|
||||
for(j = 0; j < 10; j++) {
|
||||
xmminput[0] = _mm_aesenc_si128(xmminput[0], expkey[j]);
|
||||
xmminput[1] = _mm_aesenc_si128(xmminput[1], expkey[j]);
|
||||
xmminput[2] = _mm_aesenc_si128(xmminput[2], expkey[j]);
|
||||
xmminput[3] = _mm_aesenc_si128(xmminput[3], expkey[j]);
|
||||
xmminput[4] = _mm_aesenc_si128(xmminput[4], expkey[j]);
|
||||
xmminput[5] = _mm_aesenc_si128(xmminput[5], expkey[j]);
|
||||
xmminput[6] = _mm_aesenc_si128(xmminput[6], expkey[j]);
|
||||
xmminput[7] = _mm_aesenc_si128(xmminput[7], expkey[j]);
|
||||
}
|
||||
|
||||
_mm_store_si128(&(longoutput[i ]), xmminput[0]);
|
||||
_mm_store_si128(&(longoutput[i + 1 ]), xmminput[1]);
|
||||
_mm_store_si128(&(longoutput[i + 2 ]), xmminput[2]);
|
||||
_mm_store_si128(&(longoutput[i + 3 ]), xmminput[3]);
|
||||
_mm_store_si128(&(longoutput[i + 4 ]), xmminput[4]);
|
||||
_mm_store_si128(&(longoutput[i + 5 ]), xmminput[5]);
|
||||
_mm_store_si128(&(longoutput[i + 6 ]), xmminput[6]);
|
||||
_mm_store_si128(&(longoutput[i + 7 ]), xmminput[7]);
|
||||
}
|
||||
|
||||
ctx->a[0] = ((uint64_t *) ctx->state.k)[0] ^ ((uint64_t *) ctx->state.k)[4];
|
||||
ctx->b[0] = ((uint64_t *) ctx->state.k)[2] ^ ((uint64_t *) ctx->state.k)[6];
|
||||
ctx->a[1] = ((uint64_t *) ctx->state.k)[1] ^ ((uint64_t *) ctx->state.k)[5];
|
||||
ctx->b[1] = ((uint64_t *) ctx->state.k)[3] ^ ((uint64_t *) ctx->state.k)[7];
|
||||
|
||||
__m128i a_x = _mm_load_si128((__m128i *) &memory[ctx->a[0] & 0x1FFFF0]);
|
||||
__m128i b_x = _mm_load_si128((__m128i *) ctx->b);
|
||||
|
||||
uint64_t c[2] __attribute((aligned(16)));
|
||||
uint64_t d[2] __attribute((aligned(16)));
|
||||
|
||||
for (i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i c_x = _mm_aesenc_si128(a_x, _mm_load_si128((__m128i *) ctx->a));
|
||||
_mm_store_si128((__m128i *) c, c_x);
|
||||
|
||||
uint64_t *restrict d_ptr = (uint64_t *) &memory[c[0] & 0x1FFFF0];
|
||||
_mm_store_si128((__m128i *) &memory[ctx->a[0] & 0x1FFFF0], _mm_xor_si128(b_x, c_x));
|
||||
b_x = c_x;
|
||||
|
||||
d[0] = d_ptr[0];
|
||||
d[1] = d_ptr[1];
|
||||
|
||||
{
|
||||
unsigned __int128 res = (unsigned __int128) c[0] * d[0];
|
||||
|
||||
d_ptr[0] = ctx->a[0] += res >> 64;
|
||||
d_ptr[1] = ctx->a[1] += (uint64_t) res;
|
||||
}
|
||||
|
||||
ctx->a[0] ^= d[0];
|
||||
ctx->a[1] ^= d[1];
|
||||
|
||||
a_x = _mm_load_si128((__m128i *) &memory[ctx->a[0] & 0x1FFFF0]);
|
||||
}
|
||||
|
||||
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
|
||||
memcpy(ExpandedKey, &ctx->state.hs.b[32], AES_KEY_SIZE);
|
||||
ExpandAESKey256(ExpandedKey);
|
||||
|
||||
_mm_prefetch(xmminput, _MM_HINT_T0 );
|
||||
_mm_prefetch(xmminput + 4, _MM_HINT_T0 );
|
||||
|
||||
for (i = 0; i < 64; i += 16) {
|
||||
_mm_prefetch(longoutput + i, _MM_HINT_T0);
|
||||
_mm_prefetch(longoutput + i + 4, _MM_HINT_T0);
|
||||
_mm_prefetch(longoutput + i + 8, _MM_HINT_T0);
|
||||
_mm_prefetch(longoutput + i + 12, _MM_HINT_T0);
|
||||
}
|
||||
|
||||
_mm_prefetch(expkey, _MM_HINT_T0);
|
||||
_mm_prefetch(expkey + 4, _MM_HINT_T0);
|
||||
_mm_prefetch(expkey + 8, _MM_HINT_T0);
|
||||
|
||||
for (i = 0; __builtin_expect(i < MEMORY_M128I, 1); i += INIT_SIZE_M128I) {
|
||||
_mm_prefetch(longoutput + i + 64, _MM_HINT_T0);
|
||||
_mm_prefetch(longoutput + i + 68, _MM_HINT_T0);
|
||||
|
||||
xmminput[0] = _mm_xor_si128(longoutput[i ], xmminput[0]);
|
||||
xmminput[1] = _mm_xor_si128(longoutput[i + 1], xmminput[1]);
|
||||
xmminput[2] = _mm_xor_si128(longoutput[i + 2], xmminput[2]);
|
||||
xmminput[3] = _mm_xor_si128(longoutput[i + 3], xmminput[3]);
|
||||
xmminput[4] = _mm_xor_si128(longoutput[i + 4], xmminput[4]);
|
||||
xmminput[5] = _mm_xor_si128(longoutput[i + 5], xmminput[5]);
|
||||
xmminput[6] = _mm_xor_si128(longoutput[i + 6], xmminput[6]);
|
||||
xmminput[7] = _mm_xor_si128(longoutput[i + 7], xmminput[7]);
|
||||
|
||||
for(j = 0; j < 10; j++) {
|
||||
xmminput[0] = _mm_aesenc_si128(xmminput[0], expkey[j]);
|
||||
xmminput[1] = _mm_aesenc_si128(xmminput[1], expkey[j]);
|
||||
xmminput[2] = _mm_aesenc_si128(xmminput[2], expkey[j]);
|
||||
xmminput[3] = _mm_aesenc_si128(xmminput[3], expkey[j]);
|
||||
xmminput[4] = _mm_aesenc_si128(xmminput[4], expkey[j]);
|
||||
xmminput[5] = _mm_aesenc_si128(xmminput[5], expkey[j]);
|
||||
xmminput[6] = _mm_aesenc_si128(xmminput[6], expkey[j]);
|
||||
xmminput[7] = _mm_aesenc_si128(xmminput[7], expkey[j]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
|
||||
keccakf((uint64_t *) &ctx->state.hs, 24);
|
||||
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
|
||||
}
|
||||
@@ -1,142 +0,0 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifndef BUILD_TEST
|
||||
# include "xmrig.h"
|
||||
#endif
|
||||
|
||||
#include "crypto/c_groestl.h"
|
||||
#include "crypto/c_blake256.h"
|
||||
#include "crypto/c_jh.h"
|
||||
#include "crypto/c_skein.h"
|
||||
#include "cryptonight.h"
|
||||
#include "options.h"
|
||||
|
||||
|
||||
#if defined(__x86_64__)
|
||||
void cryptonight_av1_aesni(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx);
|
||||
void cryptonight_av2_aesni_wolf(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx);
|
||||
void cryptonight_av3_aesni_bmi2(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx);
|
||||
void cryptonight_av5_aesni_experimental(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx);
|
||||
#elif defined(__i386__)
|
||||
void cryptonight_av1_aesni32(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx);
|
||||
#endif
|
||||
|
||||
void cryptonight_av4_legacy(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx);
|
||||
|
||||
void (*cryptonight_hash_ctx)(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx) = NULL;
|
||||
|
||||
|
||||
void cryptonight_init(int variant)
|
||||
{
|
||||
switch (variant) {
|
||||
#if defined(__x86_64__)
|
||||
case XMR_VARIANT_AESNI:
|
||||
cryptonight_hash_ctx = cryptonight_av1_aesni;
|
||||
break;
|
||||
|
||||
case XMR_VARIANT_AESNI_WOLF:
|
||||
cryptonight_hash_ctx = cryptonight_av2_aesni_wolf;
|
||||
break;
|
||||
|
||||
case XMR_VARIANT_AESNI_BMI2:
|
||||
cryptonight_hash_ctx = cryptonight_av3_aesni_bmi2;
|
||||
break;
|
||||
|
||||
case XMR_VARIANT_EXPERIMENTAL:
|
||||
cryptonight_hash_ctx = cryptonight_av5_aesni_experimental;
|
||||
break;
|
||||
#elif defined(__i386__)
|
||||
case XMR_VARIANT_AESNI:
|
||||
cryptonight_hash_ctx = cryptonight_av1_aesni32;
|
||||
break;
|
||||
#endif
|
||||
|
||||
case XMR_VARIANT_LEGACY:
|
||||
cryptonight_hash_ctx = cryptonight_av4_legacy;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
static inline void do_blake_hash(const void* input, size_t len, char* output) {
|
||||
blake256_hash((uint8_t*)output, input, len);
|
||||
}
|
||||
|
||||
|
||||
static inline void do_groestl_hash(const void* input, size_t len, char* output) {
|
||||
groestl(input, len * 8, (uint8_t*)output);
|
||||
}
|
||||
|
||||
|
||||
static inline void do_jh_hash(const void* input, size_t len, char* output) {
|
||||
jh_hash(32 * 8, input, 8 * len, (uint8_t*)output);
|
||||
}
|
||||
|
||||
|
||||
static inline void do_skein_hash(const void* input, size_t len, char* output) {
|
||||
skein_hash(8 * 32, input, 8 * len, (uint8_t*)output);
|
||||
}
|
||||
|
||||
|
||||
void (* const extra_hashes[4])(const void *, size_t, char *) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash};
|
||||
|
||||
|
||||
void cryptonight_hash(void* output, const void* input, size_t len) {
|
||||
uint8_t *memory __attribute((aligned(16))) = (uint8_t *) malloc(MEMORY);
|
||||
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
|
||||
|
||||
cryptonight_hash_ctx(output, input, memory, ctx);
|
||||
|
||||
free(memory);
|
||||
free(ctx);
|
||||
}
|
||||
|
||||
|
||||
#ifndef BUILD_TEST
|
||||
int scanhash_cryptonight(int thr_id, uint32_t *hash, uint32_t *restrict pdata, const uint32_t *restrict ptarget, uint32_t max_nonce, unsigned long *restrict hashes_done, const char *restrict memory, struct cryptonight_ctx *persistentctx) {
|
||||
uint32_t *nonceptr = (uint32_t*) (((char*)pdata) + 39);
|
||||
uint32_t n = *nonceptr - 1;
|
||||
const uint32_t first_nonce = n + 1;
|
||||
|
||||
do {
|
||||
*nonceptr = ++n;
|
||||
cryptonight_hash_ctx(hash, pdata, memory, persistentctx);
|
||||
|
||||
if (unlikely(hash[7] < ptarget[7])) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
} while (likely((n <= max_nonce && !work_restart[thr_id].restart)));
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
237
algo/cryptonight/cryptonight_softaes.h
Normal file
237
algo/cryptonight/cryptonight_softaes.h
Normal file
@@ -0,0 +1,237 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef __CRYPTONIGHT_SOFTAES_H__
|
||||
#define __CRYPTONIGHT_SOFTAES_H__
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
extern __m128i soft_aesenc(__m128i in, __m128i key);
|
||||
extern __m128i soft_aeskeygenassist(__m128i key, uint8_t rcon);
|
||||
|
||||
|
||||
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
|
||||
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
|
||||
inline __m128i sl_xor(__m128i tmp1)
|
||||
{
|
||||
__m128i tmp4;
|
||||
tmp4 = _mm_slli_si128(tmp1, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
return tmp1;
|
||||
}
|
||||
|
||||
|
||||
inline void aes_genkey_sub(__m128i* xout0, __m128i* xout2, uint8_t rcon)
|
||||
{
|
||||
__m128i xout1 = soft_aeskeygenassist(*xout2, rcon);
|
||||
xout1 = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem
|
||||
*xout0 = sl_xor(*xout0);
|
||||
*xout0 = _mm_xor_si128(*xout0, xout1);
|
||||
xout1 = soft_aeskeygenassist(*xout0, 0x00);
|
||||
xout1 = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem
|
||||
*xout2 = sl_xor(*xout2);
|
||||
*xout2 = _mm_xor_si128(*xout2, xout1);
|
||||
}
|
||||
|
||||
|
||||
inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7)
|
||||
{
|
||||
*x0 = soft_aesenc(*x0, key);
|
||||
*x1 = soft_aesenc(*x1, key);
|
||||
*x2 = soft_aesenc(*x2, key);
|
||||
*x3 = soft_aesenc(*x3, key);
|
||||
*x4 = soft_aesenc(*x4, key);
|
||||
*x5 = soft_aesenc(*x5, key);
|
||||
*x6 = soft_aesenc(*x6, key);
|
||||
*x7 = soft_aesenc(*x7, key);
|
||||
}
|
||||
|
||||
|
||||
inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9)
|
||||
{
|
||||
__m128i xout0 = _mm_load_si128(memory);
|
||||
__m128i xout2 = _mm_load_si128(memory + 1);
|
||||
*k0 = xout0;
|
||||
*k1 = xout2;
|
||||
|
||||
aes_genkey_sub(&xout0, &xout2, 0x1);
|
||||
*k2 = xout0;
|
||||
*k3 = xout2;
|
||||
|
||||
aes_genkey_sub(&xout0, &xout2, 0x2);
|
||||
*k4 = xout0;
|
||||
*k5 = xout2;
|
||||
|
||||
aes_genkey_sub(&xout0, &xout2, 0x4);
|
||||
*k6 = xout0;
|
||||
*k7 = xout2;
|
||||
|
||||
aes_genkey_sub(&xout0, &xout2, 0x8);
|
||||
*k8 = xout0;
|
||||
*k9 = xout2;
|
||||
}
|
||||
|
||||
|
||||
inline void cn_explode_scratchpad(const __m128i* input, __m128i* output)
|
||||
{
|
||||
// This is more than we have registers, compiler will assign 2 keys on the stack
|
||||
__m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
|
||||
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
||||
|
||||
aes_genkey(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
|
||||
|
||||
xin0 = _mm_load_si128(input + 4);
|
||||
xin1 = _mm_load_si128(input + 5);
|
||||
xin2 = _mm_load_si128(input + 6);
|
||||
xin3 = _mm_load_si128(input + 7);
|
||||
xin4 = _mm_load_si128(input + 8);
|
||||
xin5 = _mm_load_si128(input + 9);
|
||||
xin6 = _mm_load_si128(input + 10);
|
||||
xin7 = _mm_load_si128(input + 11);
|
||||
|
||||
for (size_t i = 0; i < MEMORY / sizeof(__m128i); i += 8) {
|
||||
aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
|
||||
_mm_store_si128(output + i + 0, xin0);
|
||||
_mm_store_si128(output + i + 1, xin1);
|
||||
_mm_store_si128(output + i + 2, xin2);
|
||||
_mm_store_si128(output + i + 3, xin3);
|
||||
_mm_store_si128(output + i + 4, xin4);
|
||||
_mm_store_si128(output + i + 5, xin5);
|
||||
_mm_store_si128(output + i + 6, xin6);
|
||||
_mm_store_si128(output + i + 7, xin7);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
inline void cn_implode_scratchpad(const __m128i* input, __m128i* output)
|
||||
{
|
||||
// This is more than we have registers, compiler will assign 2 keys on the stack
|
||||
__m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
|
||||
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
||||
|
||||
aes_genkey(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
|
||||
|
||||
xout0 = _mm_load_si128(output + 4);
|
||||
xout1 = _mm_load_si128(output + 5);
|
||||
xout2 = _mm_load_si128(output + 6);
|
||||
xout3 = _mm_load_si128(output + 7);
|
||||
xout4 = _mm_load_si128(output + 8);
|
||||
xout5 = _mm_load_si128(output + 9);
|
||||
xout6 = _mm_load_si128(output + 10);
|
||||
xout7 = _mm_load_si128(output + 11);
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < MEMORY / sizeof(__m128i), 1); i += 8)
|
||||
{
|
||||
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
|
||||
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
|
||||
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
|
||||
xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
|
||||
xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
|
||||
xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
|
||||
xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
|
||||
xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
|
||||
|
||||
aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
}
|
||||
|
||||
_mm_store_si128(output + 4, xout0);
|
||||
_mm_store_si128(output + 5, xout1);
|
||||
_mm_store_si128(output + 6, xout2);
|
||||
_mm_store_si128(output + 7, xout3);
|
||||
_mm_store_si128(output + 8, xout4);
|
||||
_mm_store_si128(output + 9, xout5);
|
||||
_mm_store_si128(output + 10, xout6);
|
||||
_mm_store_si128(output + 11, xout7);
|
||||
}
|
||||
|
||||
|
||||
#if defined(__x86_64__)
|
||||
# define EXTRACT64(X) _mm_cvtsi128_si64(X)
|
||||
|
||||
inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi)
|
||||
{
|
||||
unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b;
|
||||
*hi = r >> 64;
|
||||
return (uint64_t) r;
|
||||
}
|
||||
#elif defined(__i386__)
|
||||
# define HI32(X) \
|
||||
_mm_srli_si128((X), 4)
|
||||
|
||||
|
||||
# define EXTRACT64(X) \
|
||||
((uint64_t)(uint32_t)_mm_cvtsi128_si32(X) | \
|
||||
((uint64_t)(uint32_t)_mm_cvtsi128_si32(HI32(X)) << 32))
|
||||
|
||||
inline uint64_t _umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) {
|
||||
// multiplier = ab = a * 2^32 + b
|
||||
// multiplicand = cd = c * 2^32 + d
|
||||
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
|
||||
uint64_t a = multiplier >> 32;
|
||||
uint64_t b = multiplier & 0xFFFFFFFF;
|
||||
uint64_t c = multiplicand >> 32;
|
||||
uint64_t d = multiplicand & 0xFFFFFFFF;
|
||||
|
||||
//uint64_t ac = a * c;
|
||||
uint64_t ad = a * d;
|
||||
//uint64_t bc = b * c;
|
||||
uint64_t bd = b * d;
|
||||
|
||||
uint64_t adbc = ad + (b * c);
|
||||
uint64_t adbc_carry = adbc < ad ? 1 : 0;
|
||||
|
||||
// multiplier * multiplicand = product_hi * 2^64 + product_lo
|
||||
uint64_t product_lo = bd + (adbc << 32);
|
||||
uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
|
||||
*product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
|
||||
|
||||
return product_lo;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* __CRYPTONIGHT_SOFTAES_H__ */
|
||||
30
compat/libcpuid/CMakeLists.txt
Normal file
30
compat/libcpuid/CMakeLists.txt
Normal file
@@ -0,0 +1,30 @@
|
||||
cmake_minimum_required (VERSION 2.8)
|
||||
project (cpuid C)
|
||||
|
||||
add_definitions(/DVERSION="0.4.0")
|
||||
|
||||
set(HEADERS
|
||||
libcpuid.h
|
||||
libcpuid_types.h
|
||||
libcpuid_constants.h
|
||||
libcpuid_internal.h
|
||||
amd_code_t.h
|
||||
intel_code_t.h
|
||||
recog_amd.h
|
||||
recog_intel.h
|
||||
asm-bits.h
|
||||
libcpuid_util.h
|
||||
)
|
||||
|
||||
set(SOURCES
|
||||
cpuid_main.c
|
||||
asm-bits.c
|
||||
recog_amd.c
|
||||
recog_intel.c
|
||||
libcpuid_util.c
|
||||
)
|
||||
|
||||
add_library(cpuid STATIC
|
||||
${HEADERS}
|
||||
${SOURCES}
|
||||
)
|
||||
39
compat/libcpuid/amd_code_t.h
Normal file
39
compat/libcpuid/amd_code_t.h
Normal file
@@ -0,0 +1,39 @@
|
||||
/*
|
||||
* Copyright 2016 Veselin Georgiev,
|
||||
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file contains a list of internal codes we use in detection. It is
|
||||
* of no external use and isn't a complete list of AMD products.
|
||||
*/
|
||||
CODE2(OPTERON_800, 1000),
|
||||
CODE(PHENOM),
|
||||
CODE(PHENOM2),
|
||||
CODE(FUSION_C),
|
||||
CODE(FUSION_E),
|
||||
CODE(FUSION_EA),
|
||||
CODE(FUSION_Z),
|
||||
CODE(FUSION_A),
|
||||
|
||||
825
compat/libcpuid/asm-bits.c
Normal file
825
compat/libcpuid/asm-bits.c
Normal file
@@ -0,0 +1,825 @@
|
||||
/*
|
||||
* Copyright 2008 Veselin Georgiev,
|
||||
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "libcpuid.h"
|
||||
#include "asm-bits.h"
|
||||
|
||||
int cpuid_exists_by_eflags(void)
|
||||
{
|
||||
#if defined(PLATFORM_X64)
|
||||
return 1; /* CPUID is always present on the x86_64 */
|
||||
#elif defined(PLATFORM_X86)
|
||||
# if defined(COMPILER_GCC)
|
||||
int result;
|
||||
__asm __volatile(
|
||||
" pushfl\n"
|
||||
" pop %%eax\n"
|
||||
" mov %%eax, %%ecx\n"
|
||||
" xor $0x200000, %%eax\n"
|
||||
" push %%eax\n"
|
||||
" popfl\n"
|
||||
" pushfl\n"
|
||||
" pop %%eax\n"
|
||||
" xor %%ecx, %%eax\n"
|
||||
" mov %%eax, %0\n"
|
||||
" push %%ecx\n"
|
||||
" popfl\n"
|
||||
: "=m"(result)
|
||||
: :"eax", "ecx", "memory");
|
||||
return (result != 0);
|
||||
# elif defined(COMPILER_MICROSOFT)
|
||||
int result;
|
||||
__asm {
|
||||
pushfd
|
||||
pop eax
|
||||
mov ecx, eax
|
||||
xor eax, 0x200000
|
||||
push eax
|
||||
popfd
|
||||
pushfd
|
||||
pop eax
|
||||
xor eax, ecx
|
||||
mov result, eax
|
||||
push ecx
|
||||
popfd
|
||||
};
|
||||
return (result != 0);
|
||||
# else
|
||||
return 0;
|
||||
# endif /* COMPILER_MICROSOFT */
|
||||
#else
|
||||
return 0;
|
||||
#endif /* PLATFORM_X86 */
|
||||
}
|
||||
|
||||
#ifdef INLINE_ASM_SUPPORTED
|
||||
/*
|
||||
* with MSVC/AMD64, the exec_cpuid() and cpu_rdtsc() functions
|
||||
* are implemented in separate .asm files. Otherwise, use inline assembly
|
||||
*/
|
||||
void exec_cpuid(uint32_t *regs)
|
||||
{
|
||||
#ifdef COMPILER_GCC
|
||||
# ifdef PLATFORM_X64
|
||||
__asm __volatile(
|
||||
" mov %0, %%rdi\n"
|
||||
|
||||
" push %%rbx\n"
|
||||
" push %%rcx\n"
|
||||
" push %%rdx\n"
|
||||
|
||||
" mov (%%rdi), %%eax\n"
|
||||
" mov 4(%%rdi), %%ebx\n"
|
||||
" mov 8(%%rdi), %%ecx\n"
|
||||
" mov 12(%%rdi), %%edx\n"
|
||||
|
||||
" cpuid\n"
|
||||
|
||||
" movl %%eax, (%%rdi)\n"
|
||||
" movl %%ebx, 4(%%rdi)\n"
|
||||
" movl %%ecx, 8(%%rdi)\n"
|
||||
" movl %%edx, 12(%%rdi)\n"
|
||||
" pop %%rdx\n"
|
||||
" pop %%rcx\n"
|
||||
" pop %%rbx\n"
|
||||
:
|
||||
:"m"(regs)
|
||||
:"memory", "eax", "rdi"
|
||||
);
|
||||
# else
|
||||
__asm __volatile(
|
||||
" mov %0, %%edi\n"
|
||||
|
||||
" push %%ebx\n"
|
||||
" push %%ecx\n"
|
||||
" push %%edx\n"
|
||||
|
||||
" mov (%%edi), %%eax\n"
|
||||
" mov 4(%%edi), %%ebx\n"
|
||||
" mov 8(%%edi), %%ecx\n"
|
||||
" mov 12(%%edi), %%edx\n"
|
||||
|
||||
" cpuid\n"
|
||||
|
||||
" mov %%eax, (%%edi)\n"
|
||||
" mov %%ebx, 4(%%edi)\n"
|
||||
" mov %%ecx, 8(%%edi)\n"
|
||||
" mov %%edx, 12(%%edi)\n"
|
||||
" pop %%edx\n"
|
||||
" pop %%ecx\n"
|
||||
" pop %%ebx\n"
|
||||
:
|
||||
:"m"(regs)
|
||||
:"memory", "eax", "edi"
|
||||
);
|
||||
# endif /* COMPILER_GCC */
|
||||
#else
|
||||
# ifdef COMPILER_MICROSOFT
|
||||
__asm {
|
||||
push ebx
|
||||
push ecx
|
||||
push edx
|
||||
push edi
|
||||
mov edi, regs
|
||||
|
||||
mov eax, [edi]
|
||||
mov ebx, [edi+4]
|
||||
mov ecx, [edi+8]
|
||||
mov edx, [edi+12]
|
||||
|
||||
cpuid
|
||||
|
||||
mov [edi], eax
|
||||
mov [edi+4], ebx
|
||||
mov [edi+8], ecx
|
||||
mov [edi+12], edx
|
||||
|
||||
pop edi
|
||||
pop edx
|
||||
pop ecx
|
||||
pop ebx
|
||||
}
|
||||
# else
|
||||
# error "Unsupported compiler"
|
||||
# endif /* COMPILER_MICROSOFT */
|
||||
#endif
|
||||
}
|
||||
#endif /* INLINE_ASSEMBLY_SUPPORTED */
|
||||
|
||||
#ifdef INLINE_ASM_SUPPORTED
|
||||
void cpu_rdtsc(uint64_t* result)
|
||||
{
|
||||
uint32_t low_part, hi_part;
|
||||
#ifdef COMPILER_GCC
|
||||
__asm __volatile (
|
||||
" rdtsc\n"
|
||||
" mov %%eax, %0\n"
|
||||
" mov %%edx, %1\n"
|
||||
:"=m"(low_part), "=m"(hi_part)::"memory", "eax", "edx"
|
||||
);
|
||||
#else
|
||||
# ifdef COMPILER_MICROSOFT
|
||||
__asm {
|
||||
rdtsc
|
||||
mov low_part, eax
|
||||
mov hi_part, edx
|
||||
};
|
||||
# else
|
||||
# error "Unsupported compiler"
|
||||
# endif /* COMPILER_MICROSOFT */
|
||||
#endif /* COMPILER_GCC */
|
||||
*result = (uint64_t)low_part + (((uint64_t) hi_part) << 32);
|
||||
}
|
||||
#endif /* INLINE_ASM_SUPPORTED */
|
||||
|
||||
#ifdef INLINE_ASM_SUPPORTED
|
||||
void busy_sse_loop(int cycles)
|
||||
{
|
||||
#ifdef COMPILER_GCC
|
||||
#ifndef __APPLE__
|
||||
# define XALIGN ".balign 16\n"
|
||||
#else
|
||||
# define XALIGN ".align 4\n"
|
||||
#endif
|
||||
__asm __volatile (
|
||||
" xorps %%xmm0, %%xmm0\n"
|
||||
" xorps %%xmm1, %%xmm1\n"
|
||||
" xorps %%xmm2, %%xmm2\n"
|
||||
" xorps %%xmm3, %%xmm3\n"
|
||||
" xorps %%xmm4, %%xmm4\n"
|
||||
" xorps %%xmm5, %%xmm5\n"
|
||||
" xorps %%xmm6, %%xmm6\n"
|
||||
" xorps %%xmm7, %%xmm7\n"
|
||||
XALIGN
|
||||
/* ".bsLoop:\n" */
|
||||
"1:\n"
|
||||
// 0:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
// 1:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
// 2:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
// 3:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
// 4:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
// 5:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
// 6:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
// 7:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
// 8:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
// 9:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//10:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//11:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//12:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//13:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//14:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//15:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//16:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//17:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//18:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//19:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//20:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//21:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//22:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//23:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//24:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//25:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//26:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//27:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//28:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//29:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//30:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//31:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
|
||||
" dec %%eax\n"
|
||||
/* "jnz .bsLoop\n" */
|
||||
" jnz 1b\n"
|
||||
::"a"(cycles)
|
||||
);
|
||||
#else
|
||||
# ifdef COMPILER_MICROSOFT
|
||||
__asm {
|
||||
mov eax, cycles
|
||||
xorps xmm0, xmm0
|
||||
xorps xmm1, xmm1
|
||||
xorps xmm2, xmm2
|
||||
xorps xmm3, xmm3
|
||||
xorps xmm4, xmm4
|
||||
xorps xmm5, xmm5
|
||||
xorps xmm6, xmm6
|
||||
xorps xmm7, xmm7
|
||||
//--
|
||||
align 16
|
||||
bsLoop:
|
||||
// 0:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 1:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 2:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 3:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 4:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 5:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 6:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 7:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 8:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 9:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 10:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 11:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 12:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 13:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 14:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 15:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 16:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 17:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 18:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 19:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 20:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 21:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 22:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 23:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 24:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 25:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 26:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 27:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 28:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 29:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 30:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 31:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
//----------------------
|
||||
dec eax
|
||||
jnz bsLoop
|
||||
}
|
||||
# else
|
||||
# error "Unsupported compiler"
|
||||
# endif /* COMPILER_MICROSOFT */
|
||||
#endif /* COMPILER_GCC */
|
||||
}
|
||||
#endif /* INLINE_ASSEMBLY_SUPPORTED */
|
||||
53
compat/libcpuid/asm-bits.h
Normal file
53
compat/libcpuid/asm-bits.h
Normal file
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
* Copyright 2008 Veselin Georgiev,
|
||||
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#ifndef __ASM_BITS_H__
|
||||
#define __ASM_BITS_H__
|
||||
#include "libcpuid.h"
|
||||
|
||||
/* Determine Compiler: */
|
||||
#if defined(_MSC_VER)
|
||||
# define COMPILER_MICROSOFT
|
||||
#elif defined(__GNUC__)
|
||||
# define COMPILER_GCC
|
||||
#endif
|
||||
|
||||
/* Determine Platform */
|
||||
#if defined(__x86_64__) || defined(_M_AMD64)
|
||||
# define PLATFORM_X64
|
||||
#elif defined(__i386__) || defined(_M_IX86)
|
||||
# define PLATFORM_X86
|
||||
#endif
|
||||
|
||||
/* Under Windows/AMD64 with MSVC, inline assembly isn't supported */
|
||||
#if (defined(COMPILER_GCC) && defined(PLATFORM_X64)) || defined(PLATFORM_X86)
|
||||
# define INLINE_ASM_SUPPORTED
|
||||
#endif
|
||||
|
||||
int cpuid_exists_by_eflags(void);
|
||||
void exec_cpuid(uint32_t *regs);
|
||||
void busy_sse_loop(int cycles);
|
||||
|
||||
#endif /* __ASM_BITS_H__ */
|
||||
438
compat/libcpuid/cpuid_main.c
Normal file
438
compat/libcpuid/cpuid_main.c
Normal file
@@ -0,0 +1,438 @@
|
||||
/*
|
||||
* Copyright 2008 Veselin Georgiev,
|
||||
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#include "libcpuid.h"
|
||||
#include "libcpuid_internal.h"
|
||||
#include "recog_intel.h"
|
||||
#include "recog_amd.h"
|
||||
#include "asm-bits.h"
|
||||
#include "libcpuid_util.h"
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
/* Implementation: */
|
||||
|
||||
static int _libcpiud_errno = ERR_OK;
|
||||
|
||||
int set_error(cpu_error_t err)
|
||||
{
|
||||
_libcpiud_errno = (int) err;
|
||||
return (int) err;
|
||||
}
|
||||
|
||||
static void raw_data_t_constructor(struct cpu_raw_data_t* raw)
|
||||
{
|
||||
memset(raw, 0, sizeof(struct cpu_raw_data_t));
|
||||
}
|
||||
|
||||
static void cpu_id_t_constructor(struct cpu_id_t* id)
|
||||
{
|
||||
memset(id, 0, sizeof(struct cpu_id_t));
|
||||
id->l1_data_cache = id->l1_instruction_cache = id->l2_cache = id->l3_cache = id->l4_cache = -1;
|
||||
id->l1_assoc = id->l2_assoc = id->l3_assoc = id->l4_assoc = -1;
|
||||
id->l1_cacheline = id->l2_cacheline = id->l3_cacheline = id->l4_cacheline = -1;
|
||||
id->sse_size = -1;
|
||||
}
|
||||
|
||||
static int parse_token(const char* expected_token, const char *token,
|
||||
const char *value, uint32_t array[][4], int limit, int *recognized)
|
||||
{
|
||||
char format[32];
|
||||
int veax, vebx, vecx, vedx;
|
||||
int index;
|
||||
|
||||
if (*recognized) return 1; /* already recognized */
|
||||
if (strncmp(token, expected_token, strlen(expected_token))) return 1; /* not what we search for */
|
||||
sprintf(format, "%s[%%d]", expected_token);
|
||||
*recognized = 1;
|
||||
if (1 == sscanf(token, format, &index) && index >=0 && index < limit) {
|
||||
if (4 == sscanf(value, "%x%x%x%x", &veax, &vebx, &vecx, &vedx)) {
|
||||
array[index][0] = veax;
|
||||
array[index][1] = vebx;
|
||||
array[index][2] = vecx;
|
||||
array[index][3] = vedx;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* get_total_cpus() system specific code: uses OS routines to determine total number of CPUs */
|
||||
#ifdef __APPLE__
|
||||
#include <unistd.h>
|
||||
#include <mach/clock_types.h>
|
||||
#include <mach/clock.h>
|
||||
#include <mach/mach.h>
|
||||
static int get_total_cpus(void)
|
||||
{
|
||||
kern_return_t kr;
|
||||
host_basic_info_data_t basic_info;
|
||||
host_info_t info = (host_info_t)&basic_info;
|
||||
host_flavor_t flavor = HOST_BASIC_INFO;
|
||||
mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
|
||||
kr = host_info(mach_host_self(), flavor, info, &count);
|
||||
if (kr != KERN_SUCCESS) return 1;
|
||||
return basic_info.avail_cpus;
|
||||
}
|
||||
#define GET_TOTAL_CPUS_DEFINED
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
static int get_total_cpus(void)
|
||||
{
|
||||
SYSTEM_INFO system_info;
|
||||
GetSystemInfo(&system_info);
|
||||
return system_info.dwNumberOfProcessors;
|
||||
}
|
||||
#define GET_TOTAL_CPUS_DEFINED
|
||||
#endif
|
||||
|
||||
#if defined linux || defined __linux__ || defined __sun
|
||||
#include <sys/sysinfo.h>
|
||||
#include <unistd.h>
|
||||
|
||||
static int get_total_cpus(void)
|
||||
{
|
||||
return sysconf(_SC_NPROCESSORS_ONLN);
|
||||
}
|
||||
#define GET_TOTAL_CPUS_DEFINED
|
||||
#endif
|
||||
|
||||
#if defined __FreeBSD__ || defined __OpenBSD__ || defined __NetBSD__ || defined __bsdi__ || defined __QNX__
|
||||
#include <sys/types.h>
|
||||
#include <sys/sysctl.h>
|
||||
|
||||
static int get_total_cpus(void)
|
||||
{
|
||||
int mib[2] = { CTL_HW, HW_NCPU };
|
||||
int ncpus;
|
||||
size_t len = sizeof(ncpus);
|
||||
if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) != 0) return 1;
|
||||
return ncpus;
|
||||
}
|
||||
#define GET_TOTAL_CPUS_DEFINED
|
||||
#endif
|
||||
|
||||
#ifndef GET_TOTAL_CPUS_DEFINED
|
||||
static int get_total_cpus(void)
|
||||
{
|
||||
static int warning_printed = 0;
|
||||
if (!warning_printed) {
|
||||
warning_printed = 1;
|
||||
warnf("Your system is not supported by libcpuid -- don't know how to detect the\n");
|
||||
warnf("total number of CPUs on your system. It will be reported as 1.\n");
|
||||
printf("Please use cpu_id_t.logical_cpus field instead.\n");
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
#endif /* GET_TOTAL_CPUS_DEFINED */
|
||||
|
||||
|
||||
static void load_features_common(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
|
||||
{
|
||||
const struct feature_map_t matchtable_edx1[] = {
|
||||
{ 0, CPU_FEATURE_FPU },
|
||||
{ 1, CPU_FEATURE_VME },
|
||||
{ 2, CPU_FEATURE_DE },
|
||||
{ 3, CPU_FEATURE_PSE },
|
||||
{ 4, CPU_FEATURE_TSC },
|
||||
{ 5, CPU_FEATURE_MSR },
|
||||
{ 6, CPU_FEATURE_PAE },
|
||||
{ 7, CPU_FEATURE_MCE },
|
||||
{ 8, CPU_FEATURE_CX8 },
|
||||
{ 9, CPU_FEATURE_APIC },
|
||||
{ 11, CPU_FEATURE_SEP },
|
||||
{ 12, CPU_FEATURE_MTRR },
|
||||
{ 13, CPU_FEATURE_PGE },
|
||||
{ 14, CPU_FEATURE_MCA },
|
||||
{ 15, CPU_FEATURE_CMOV },
|
||||
{ 16, CPU_FEATURE_PAT },
|
||||
{ 17, CPU_FEATURE_PSE36 },
|
||||
{ 19, CPU_FEATURE_CLFLUSH },
|
||||
{ 23, CPU_FEATURE_MMX },
|
||||
{ 24, CPU_FEATURE_FXSR },
|
||||
{ 25, CPU_FEATURE_SSE },
|
||||
{ 26, CPU_FEATURE_SSE2 },
|
||||
{ 28, CPU_FEATURE_HT },
|
||||
};
|
||||
const struct feature_map_t matchtable_ecx1[] = {
|
||||
{ 0, CPU_FEATURE_PNI },
|
||||
{ 1, CPU_FEATURE_PCLMUL },
|
||||
{ 3, CPU_FEATURE_MONITOR },
|
||||
{ 9, CPU_FEATURE_SSSE3 },
|
||||
{ 12, CPU_FEATURE_FMA3 },
|
||||
{ 13, CPU_FEATURE_CX16 },
|
||||
{ 19, CPU_FEATURE_SSE4_1 },
|
||||
{ 20, CPU_FEATURE_SSE4_2 },
|
||||
{ 22, CPU_FEATURE_MOVBE },
|
||||
{ 23, CPU_FEATURE_POPCNT },
|
||||
{ 25, CPU_FEATURE_AES },
|
||||
{ 26, CPU_FEATURE_XSAVE },
|
||||
{ 27, CPU_FEATURE_OSXSAVE },
|
||||
{ 28, CPU_FEATURE_AVX },
|
||||
{ 29, CPU_FEATURE_F16C },
|
||||
{ 30, CPU_FEATURE_RDRAND },
|
||||
};
|
||||
const struct feature_map_t matchtable_ebx7[] = {
|
||||
{ 3, CPU_FEATURE_BMI1 },
|
||||
{ 5, CPU_FEATURE_AVX2 },
|
||||
{ 8, CPU_FEATURE_BMI2 },
|
||||
};
|
||||
const struct feature_map_t matchtable_edx81[] = {
|
||||
{ 11, CPU_FEATURE_SYSCALL },
|
||||
{ 27, CPU_FEATURE_RDTSCP },
|
||||
{ 29, CPU_FEATURE_LM },
|
||||
};
|
||||
const struct feature_map_t matchtable_ecx81[] = {
|
||||
{ 0, CPU_FEATURE_LAHF_LM },
|
||||
};
|
||||
const struct feature_map_t matchtable_edx87[] = {
|
||||
{ 8, CPU_FEATURE_CONSTANT_TSC },
|
||||
};
|
||||
if (raw->basic_cpuid[0][0] >= 1) {
|
||||
match_features(matchtable_edx1, COUNT_OF(matchtable_edx1), raw->basic_cpuid[1][3], data);
|
||||
match_features(matchtable_ecx1, COUNT_OF(matchtable_ecx1), raw->basic_cpuid[1][2], data);
|
||||
}
|
||||
if (raw->basic_cpuid[0][0] >= 7) {
|
||||
match_features(matchtable_ebx7, COUNT_OF(matchtable_ebx7), raw->basic_cpuid[7][1], data);
|
||||
}
|
||||
if (raw->ext_cpuid[0][0] >= 0x80000001) {
|
||||
match_features(matchtable_edx81, COUNT_OF(matchtable_edx81), raw->ext_cpuid[1][3], data);
|
||||
match_features(matchtable_ecx81, COUNT_OF(matchtable_ecx81), raw->ext_cpuid[1][2], data);
|
||||
}
|
||||
if (raw->ext_cpuid[0][0] >= 0x80000007) {
|
||||
match_features(matchtable_edx87, COUNT_OF(matchtable_edx87), raw->ext_cpuid[7][3], data);
|
||||
}
|
||||
if (data->flags[CPU_FEATURE_SSE]) {
|
||||
/* apply guesswork to check if the SSE unit width is 128 bit */
|
||||
switch (data->vendor) {
|
||||
case VENDOR_AMD:
|
||||
data->sse_size = (data->ext_family >= 16 && data->ext_family != 17) ? 128 : 64;
|
||||
break;
|
||||
case VENDOR_INTEL:
|
||||
data->sse_size = (data->family == 6 && data->ext_model >= 15) ? 128 : 64;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
/* leave the CPU_FEATURE_128BIT_SSE_AUTH 0; the advanced per-vendor detection routines
|
||||
* will set it accordingly if they detect the needed bit */
|
||||
}
|
||||
}
|
||||
|
||||
static cpu_vendor_t cpuid_vendor_identify(const uint32_t *raw_vendor, char *vendor_str)
|
||||
{
|
||||
int i;
|
||||
cpu_vendor_t vendor = VENDOR_UNKNOWN;
|
||||
const struct { cpu_vendor_t vendor; char match[16]; }
|
||||
matchtable[NUM_CPU_VENDORS] = {
|
||||
/* source: http://www.sandpile.org/ia32/cpuid.htm */
|
||||
{ VENDOR_INTEL , "GenuineIntel" },
|
||||
{ VENDOR_AMD , "AuthenticAMD" },
|
||||
{ VENDOR_CYRIX , "CyrixInstead" },
|
||||
{ VENDOR_NEXGEN , "NexGenDriven" },
|
||||
{ VENDOR_TRANSMETA , "GenuineTMx86" },
|
||||
{ VENDOR_UMC , "UMC UMC UMC " },
|
||||
{ VENDOR_CENTAUR , "CentaurHauls" },
|
||||
{ VENDOR_RISE , "RiseRiseRise" },
|
||||
{ VENDOR_SIS , "SiS SiS SiS " },
|
||||
{ VENDOR_NSC , "Geode by NSC" },
|
||||
};
|
||||
|
||||
memcpy(vendor_str + 0, &raw_vendor[1], 4);
|
||||
memcpy(vendor_str + 4, &raw_vendor[3], 4);
|
||||
memcpy(vendor_str + 8, &raw_vendor[2], 4);
|
||||
vendor_str[12] = 0;
|
||||
|
||||
/* Determine vendor: */
|
||||
for (i = 0; i < NUM_CPU_VENDORS; i++)
|
||||
if (!strcmp(vendor_str, matchtable[i].match)) {
|
||||
vendor = matchtable[i].vendor;
|
||||
break;
|
||||
}
|
||||
return vendor;
|
||||
}
|
||||
|
||||
static int cpuid_basic_identify(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
|
||||
{
|
||||
int i, j, basic, xmodel, xfamily, ext;
|
||||
char brandstr[64] = {0};
|
||||
data->vendor = cpuid_vendor_identify(raw->basic_cpuid[0], data->vendor_str);
|
||||
|
||||
if (data->vendor == VENDOR_UNKNOWN)
|
||||
return set_error(ERR_CPU_UNKN);
|
||||
basic = raw->basic_cpuid[0][0];
|
||||
if (basic >= 1) {
|
||||
data->family = (raw->basic_cpuid[1][0] >> 8) & 0xf;
|
||||
data->model = (raw->basic_cpuid[1][0] >> 4) & 0xf;
|
||||
data->stepping = raw->basic_cpuid[1][0] & 0xf;
|
||||
xmodel = (raw->basic_cpuid[1][0] >> 16) & 0xf;
|
||||
xfamily = (raw->basic_cpuid[1][0] >> 20) & 0xff;
|
||||
if (data->vendor == VENDOR_AMD && data->family < 0xf)
|
||||
data->ext_family = data->family;
|
||||
else
|
||||
data->ext_family = data->family + xfamily;
|
||||
data->ext_model = data->model + (xmodel << 4);
|
||||
}
|
||||
ext = raw->ext_cpuid[0][0] - 0x8000000;
|
||||
|
||||
/* obtain the brand string, if present: */
|
||||
if (ext >= 4) {
|
||||
for (i = 0; i < 3; i++)
|
||||
for (j = 0; j < 4; j++)
|
||||
memcpy(brandstr + i * 16 + j * 4,
|
||||
&raw->ext_cpuid[2 + i][j], 4);
|
||||
brandstr[48] = 0;
|
||||
i = 0;
|
||||
while (brandstr[i] == ' ') i++;
|
||||
strncpy(data->brand_str, brandstr + i, sizeof(data->brand_str));
|
||||
data->brand_str[48] = 0;
|
||||
}
|
||||
load_features_common(raw, data);
|
||||
data->total_logical_cpus = get_total_cpus();
|
||||
return set_error(ERR_OK);
|
||||
}
|
||||
|
||||
static void make_list_from_string(const char* csv, struct cpu_list_t* list)
|
||||
{
|
||||
int i, n, l, last;
|
||||
l = (int) strlen(csv);
|
||||
n = 0;
|
||||
for (i = 0; i < l; i++) if (csv[i] == ',') n++;
|
||||
n++;
|
||||
list->num_entries = n;
|
||||
list->names = (char**) malloc(sizeof(char*) * n);
|
||||
last = -1;
|
||||
n = 0;
|
||||
for (i = 0; i <= l; i++) if (i == l || csv[i] == ',') {
|
||||
list->names[n] = (char*) malloc(i - last);
|
||||
memcpy(list->names[n], &csv[last + 1], i - last - 1);
|
||||
list->names[n][i - last - 1] = '\0';
|
||||
n++;
|
||||
last = i;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Interface: */
|
||||
|
||||
int cpuid_get_total_cpus(void)
|
||||
{
|
||||
return get_total_cpus();
|
||||
}
|
||||
|
||||
int cpuid_present(void)
|
||||
{
|
||||
return cpuid_exists_by_eflags();
|
||||
}
|
||||
|
||||
void cpu_exec_cpuid(uint32_t eax, uint32_t* regs)
|
||||
{
|
||||
regs[0] = eax;
|
||||
regs[1] = regs[2] = regs[3] = 0;
|
||||
exec_cpuid(regs);
|
||||
}
|
||||
|
||||
void cpu_exec_cpuid_ext(uint32_t* regs)
|
||||
{
|
||||
exec_cpuid(regs);
|
||||
}
|
||||
|
||||
int cpuid_get_raw_data(struct cpu_raw_data_t* data)
|
||||
{
|
||||
unsigned i;
|
||||
if (!cpuid_present())
|
||||
return set_error(ERR_NO_CPUID);
|
||||
for (i = 0; i < 32; i++)
|
||||
cpu_exec_cpuid(i, data->basic_cpuid[i]);
|
||||
for (i = 0; i < 32; i++)
|
||||
cpu_exec_cpuid(0x80000000 + i, data->ext_cpuid[i]);
|
||||
for (i = 0; i < MAX_INTELFN4_LEVEL; i++) {
|
||||
memset(data->intel_fn4[i], 0, sizeof(data->intel_fn4[i]));
|
||||
data->intel_fn4[i][0] = 4;
|
||||
data->intel_fn4[i][2] = i;
|
||||
cpu_exec_cpuid_ext(data->intel_fn4[i]);
|
||||
}
|
||||
for (i = 0; i < MAX_INTELFN11_LEVEL; i++) {
|
||||
memset(data->intel_fn11[i], 0, sizeof(data->intel_fn11[i]));
|
||||
data->intel_fn11[i][0] = 11;
|
||||
data->intel_fn11[i][2] = i;
|
||||
cpu_exec_cpuid_ext(data->intel_fn11[i]);
|
||||
}
|
||||
for (i = 0; i < MAX_INTELFN12H_LEVEL; i++) {
|
||||
memset(data->intel_fn12h[i], 0, sizeof(data->intel_fn12h[i]));
|
||||
data->intel_fn12h[i][0] = 0x12;
|
||||
data->intel_fn12h[i][2] = i;
|
||||
cpu_exec_cpuid_ext(data->intel_fn12h[i]);
|
||||
}
|
||||
for (i = 0; i < MAX_INTELFN14H_LEVEL; i++) {
|
||||
memset(data->intel_fn14h[i], 0, sizeof(data->intel_fn14h[i]));
|
||||
data->intel_fn14h[i][0] = 0x14;
|
||||
data->intel_fn14h[i][2] = i;
|
||||
cpu_exec_cpuid_ext(data->intel_fn14h[i]);
|
||||
}
|
||||
return set_error(ERR_OK);
|
||||
}
|
||||
|
||||
int cpu_ident_internal(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal)
|
||||
{
|
||||
int r;
|
||||
struct cpu_raw_data_t myraw;
|
||||
if (!raw) {
|
||||
if ((r = cpuid_get_raw_data(&myraw)) < 0)
|
||||
return set_error(r);
|
||||
raw = &myraw;
|
||||
}
|
||||
cpu_id_t_constructor(data);
|
||||
if ((r = cpuid_basic_identify(raw, data)) < 0)
|
||||
return set_error(r);
|
||||
switch (data->vendor) {
|
||||
case VENDOR_INTEL:
|
||||
r = cpuid_identify_intel(raw, data, internal);
|
||||
break;
|
||||
case VENDOR_AMD:
|
||||
r = cpuid_identify_amd(raw, data, internal);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return set_error(r);
|
||||
}
|
||||
|
||||
int cpu_identify(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
|
||||
{
|
||||
struct internal_id_info_t throwaway;
|
||||
return cpu_ident_internal(raw, data, &throwaway);
|
||||
}
|
||||
|
||||
const char* cpuid_lib_version(void)
|
||||
{
|
||||
return VERSION;
|
||||
}
|
||||
58
compat/libcpuid/intel_code_t.h
Normal file
58
compat/libcpuid/intel_code_t.h
Normal file
@@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Copyright 2016 Veselin Georgiev,
|
||||
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file contains a list of internal codes we use in detection. It is
|
||||
* of no external use and isn't a complete list of intel products.
|
||||
*/
|
||||
CODE2(PENTIUM, 2000),
|
||||
|
||||
CODE(IRWIN),
|
||||
CODE(POTOMAC),
|
||||
CODE(GAINESTOWN),
|
||||
CODE(WESTMERE),
|
||||
|
||||
CODE(PENTIUM_M),
|
||||
CODE(NOT_CELERON),
|
||||
|
||||
CODE(CORE_SOLO),
|
||||
CODE(MOBILE_CORE_SOLO),
|
||||
CODE(CORE_DUO),
|
||||
CODE(MOBILE_CORE_DUO),
|
||||
|
||||
CODE(WOLFDALE),
|
||||
CODE(MEROM),
|
||||
CODE(PENRYN),
|
||||
CODE(QUAD_CORE),
|
||||
CODE(DUAL_CORE_HT),
|
||||
CODE(QUAD_CORE_HT),
|
||||
CODE(MORE_THAN_QUADCORE),
|
||||
CODE(PENTIUM_D),
|
||||
|
||||
CODE(SILVERTHORNE),
|
||||
CODE(DIAMONDVILLE),
|
||||
CODE(PINEVIEW),
|
||||
CODE(CEDARVIEW),
|
||||
1150
compat/libcpuid/libcpuid.h
Normal file
1150
compat/libcpuid/libcpuid.h
Normal file
File diff suppressed because it is too large
Load Diff
47
compat/libcpuid/libcpuid_constants.h
Normal file
47
compat/libcpuid/libcpuid_constants.h
Normal file
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
* Copyright 2008 Veselin Georgiev,
|
||||
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
/**
|
||||
* @File libcpuid_constants.h
|
||||
* @Author Veselin Georgiev
|
||||
* @Brief Some limits and constants for libcpuid
|
||||
*/
|
||||
|
||||
#ifndef __LIBCPUID_CONSTANTS_H__
|
||||
#define __LIBCPUID_CONSTANTS_H__
|
||||
|
||||
#define VENDOR_STR_MAX 16
|
||||
#define BRAND_STR_MAX 64
|
||||
#define CPU_FLAGS_MAX 128
|
||||
#define MAX_CPUID_LEVEL 32
|
||||
#define MAX_EXT_CPUID_LEVEL 32
|
||||
#define MAX_INTELFN4_LEVEL 8
|
||||
#define MAX_INTELFN11_LEVEL 4
|
||||
#define MAX_INTELFN12H_LEVEL 4
|
||||
#define MAX_INTELFN14H_LEVEL 4
|
||||
#define CPU_HINTS_MAX 16
|
||||
#define SGX_FLAGS_MAX 14
|
||||
|
||||
#endif /* __LIBCPUID_CONSTANTS_H__ */
|
||||
64
compat/libcpuid/libcpuid_internal.h
Normal file
64
compat/libcpuid/libcpuid_internal.h
Normal file
@@ -0,0 +1,64 @@
|
||||
/*
|
||||
* Copyright 2016 Veselin Georgiev,
|
||||
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#ifndef __LIBCPUID_INTERNAL_H__
|
||||
#define __LIBCPUID_INTERNAL_H__
|
||||
/*
|
||||
* This file contains internal undocumented declarations and function prototypes
|
||||
* for the workings of the internal library infrastructure.
|
||||
*/
|
||||
|
||||
enum _common_codes_t {
|
||||
NA = 0,
|
||||
NC, /* No code */
|
||||
};
|
||||
|
||||
#define CODE(x) x
|
||||
#define CODE2(x, y) x = y
|
||||
enum _amd_code_t {
|
||||
#include "amd_code_t.h"
|
||||
};
|
||||
typedef enum _amd_code_t amd_code_t;
|
||||
|
||||
enum _intel_code_t {
|
||||
#include "intel_code_t.h"
|
||||
};
|
||||
typedef enum _intel_code_t intel_code_t;
|
||||
#undef CODE
|
||||
#undef CODE2
|
||||
|
||||
struct internal_id_info_t {
|
||||
union {
|
||||
amd_code_t amd;
|
||||
intel_code_t intel;
|
||||
} code;
|
||||
uint64_t bits;
|
||||
int score; // detection (matchtable) score
|
||||
};
|
||||
|
||||
int cpu_ident_internal(struct cpu_raw_data_t* raw, struct cpu_id_t* data,
|
||||
struct internal_id_info_t* internal);
|
||||
|
||||
#endif /* __LIBCPUID_INTERNAL_H__ */
|
||||
37
compat/libcpuid/libcpuid_types.h
Normal file
37
compat/libcpuid/libcpuid_types.h
Normal file
@@ -0,0 +1,37 @@
|
||||
/*
|
||||
* Copyright 2008 Veselin Georgiev,
|
||||
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
/**
|
||||
* @File libcpuid_types.h
|
||||
* @Author Veselin Georgiev
|
||||
* @Brief Type specifications for libcpuid.
|
||||
*/
|
||||
|
||||
#ifndef __LIBCPUID_TYPES_H__
|
||||
#define __LIBCPUID_TYPES_H__
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#endif /* __LIBCPUID_TYPES_H__ */
|
||||
218
compat/libcpuid/libcpuid_util.c
Normal file
218
compat/libcpuid/libcpuid_util.c
Normal file
@@ -0,0 +1,218 @@
|
||||
/*
|
||||
* Copyright 2008 Veselin Georgiev,
|
||||
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdarg.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include "libcpuid.h"
|
||||
#include "libcpuid_util.h"
|
||||
|
||||
int _current_verboselevel;
|
||||
|
||||
void match_features(const struct feature_map_t* matchtable, int count, uint32_t reg, struct cpu_id_t* data)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < count; i++)
|
||||
if (reg & (1u << matchtable[i].bit))
|
||||
data->flags[matchtable[i].feature] = 1;
|
||||
}
|
||||
|
||||
static void default_warn(const char *msg)
|
||||
{
|
||||
fprintf(stderr, "%s", msg);
|
||||
}
|
||||
|
||||
libcpuid_warn_fn_t _warn_fun = default_warn;
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
# define vsnprintf _vsnprintf
|
||||
#endif
|
||||
void warnf(const char* format, ...)
|
||||
{
|
||||
char buff[1024];
|
||||
va_list va;
|
||||
if (!_warn_fun) return;
|
||||
va_start(va, format);
|
||||
vsnprintf(buff, sizeof(buff), format, va);
|
||||
va_end(va);
|
||||
_warn_fun(buff);
|
||||
}
|
||||
|
||||
void debugf(int verboselevel, const char* format, ...)
|
||||
{
|
||||
char buff[1024];
|
||||
va_list va;
|
||||
if (verboselevel > _current_verboselevel) return;
|
||||
va_start(va, format);
|
||||
vsnprintf(buff, sizeof(buff), format, va);
|
||||
va_end(va);
|
||||
_warn_fun(buff);
|
||||
}
|
||||
|
||||
static int popcount64(uint64_t mask)
|
||||
{
|
||||
int num_set_bits = 0;
|
||||
|
||||
while (mask) {
|
||||
mask &= mask - 1;
|
||||
num_set_bits++;
|
||||
}
|
||||
|
||||
return num_set_bits;
|
||||
}
|
||||
|
||||
static int score(const struct match_entry_t* entry, const struct cpu_id_t* data,
|
||||
int brand_code, uint64_t bits, int model_code)
|
||||
{
|
||||
int res = 0;
|
||||
if (entry->family == data->family ) res += 2;
|
||||
if (entry->model == data->model ) res += 2;
|
||||
if (entry->stepping == data->stepping ) res += 2;
|
||||
if (entry->ext_family == data->ext_family) res += 2;
|
||||
if (entry->ext_model == data->ext_model ) res += 2;
|
||||
if (entry->ncores == data->num_cores ) res += 2;
|
||||
if (entry->l2cache == data->l2_cache ) res += 1;
|
||||
if (entry->l3cache == data->l3_cache ) res += 1;
|
||||
if (entry->brand_code == brand_code ) res += 2;
|
||||
if (entry->model_code == model_code ) res += 2;
|
||||
|
||||
res += popcount64(entry->model_bits & bits) * 2;
|
||||
return res;
|
||||
}
|
||||
|
||||
int match_cpu_codename(const struct match_entry_t* matchtable, int count,
|
||||
struct cpu_id_t* data, int brand_code, uint64_t bits,
|
||||
int model_code)
|
||||
{
|
||||
int bestscore = -1;
|
||||
int bestindex = 0;
|
||||
int i, t;
|
||||
|
||||
debugf(3, "Matching cpu f:%d, m:%d, s:%d, xf:%d, xm:%d, ncore:%d, l2:%d, bcode:%d, bits:%llu, code:%d\n",
|
||||
data->family, data->model, data->stepping, data->ext_family,
|
||||
data->ext_model, data->num_cores, data->l2_cache, brand_code, (unsigned long long) bits, model_code);
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
t = score(&matchtable[i], data, brand_code, bits, model_code);
|
||||
debugf(3, "Entry %d, `%s', score %d\n", i, matchtable[i].name, t);
|
||||
if (t > bestscore) {
|
||||
debugf(2, "Entry `%s' selected - best score so far (%d)\n", matchtable[i].name, t);
|
||||
bestscore = t;
|
||||
bestindex = i;
|
||||
}
|
||||
}
|
||||
strcpy(data->cpu_codename, matchtable[bestindex].name);
|
||||
return bestscore;
|
||||
}
|
||||
|
||||
void generic_get_cpu_list(const struct match_entry_t* matchtable, int count,
|
||||
struct cpu_list_t* list)
|
||||
{
|
||||
int i, j, n, good;
|
||||
n = 0;
|
||||
list->names = (char**) malloc(sizeof(char*) * count);
|
||||
for (i = 0; i < count; i++) {
|
||||
if (strstr(matchtable[i].name, "Unknown")) continue;
|
||||
good = 1;
|
||||
for (j = n - 1; j >= 0; j--)
|
||||
if (!strcmp(list->names[j], matchtable[i].name)) {
|
||||
good = 0;
|
||||
break;
|
||||
}
|
||||
if (!good) continue;
|
||||
#if defined(_MSC_VER)
|
||||
list->names[n++] = _strdup(matchtable[i].name);
|
||||
#else
|
||||
list->names[n++] = strdup(matchtable[i].name);
|
||||
#endif
|
||||
}
|
||||
list->num_entries = n;
|
||||
}
|
||||
|
||||
static int xmatch_entry(char c, const char* p)
|
||||
{
|
||||
int i, j;
|
||||
if (c == 0) return -1;
|
||||
if (c == p[0]) return 1;
|
||||
if (p[0] == '.') return 1;
|
||||
if (p[0] == '#' && isdigit(c)) return 1;
|
||||
if (p[0] == '[') {
|
||||
j = 1;
|
||||
while (p[j] && p[j] != ']') j++;
|
||||
if (!p[j]) return -1;
|
||||
for (i = 1; i < j; i++)
|
||||
if (p[i] == c) return j + 1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int match_pattern(const char* s, const char* p)
|
||||
{
|
||||
int i, j, dj, k, n, m;
|
||||
n = (int) strlen(s);
|
||||
m = (int) strlen(p);
|
||||
for (i = 0; i < n; i++) {
|
||||
if (xmatch_entry(s[i], p) != -1) {
|
||||
j = 0;
|
||||
k = 0;
|
||||
while (j < m && ((dj = xmatch_entry(s[i + k], p + j)) != -1)) {
|
||||
k++;
|
||||
j += dj;
|
||||
}
|
||||
if (j == m) return i + 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct cpu_id_t* get_cached_cpuid(void)
|
||||
{
|
||||
static int initialized = 0;
|
||||
static struct cpu_id_t id;
|
||||
if (initialized) return &id;
|
||||
if (cpu_identify(NULL, &id))
|
||||
memset(&id, 0, sizeof(id));
|
||||
initialized = 1;
|
||||
return &id;
|
||||
}
|
||||
|
||||
int match_all(uint64_t bits, uint64_t mask)
|
||||
{
|
||||
return (bits & mask) == mask;
|
||||
}
|
||||
|
||||
void debug_print_lbits(int debuglevel, uint64_t mask)
|
||||
{
|
||||
int i, first = 0;
|
||||
for (i = 0; i < 64; i++) if (mask & (((uint64_t) 1) << i)) {
|
||||
if (first) first = 0;
|
||||
else debugf(2, " + ");
|
||||
debugf(2, "LBIT(%d)", i);
|
||||
}
|
||||
debugf(2, "\n");
|
||||
}
|
||||
100
compat/libcpuid/libcpuid_util.h
Normal file
100
compat/libcpuid/libcpuid_util.h
Normal file
@@ -0,0 +1,100 @@
|
||||
/*
|
||||
* Copyright 2008 Veselin Georgiev,
|
||||
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#ifndef __LIBCPUID_UTIL_H__
|
||||
#define __LIBCPUID_UTIL_H__
|
||||
|
||||
#define COUNT_OF(array) (sizeof(array) / sizeof(array[0]))
|
||||
|
||||
#define LBIT(x) (((long long) 1) << x)
|
||||
|
||||
struct feature_map_t {
|
||||
unsigned bit;
|
||||
cpu_feature_t feature;
|
||||
};
|
||||
|
||||
void match_features(const struct feature_map_t* matchtable, int count,
|
||||
uint32_t reg, struct cpu_id_t* data);
|
||||
|
||||
struct match_entry_t {
|
||||
int family, model, stepping, ext_family, ext_model;
|
||||
int ncores, l2cache, l3cache, brand_code;
|
||||
uint64_t model_bits;
|
||||
int model_code;
|
||||
char name[32];
|
||||
};
|
||||
|
||||
// returns the match score:
|
||||
int match_cpu_codename(const struct match_entry_t* matchtable, int count,
|
||||
struct cpu_id_t* data, int brand_code, uint64_t bits,
|
||||
int model_code);
|
||||
|
||||
void warnf(const char* format, ...)
|
||||
#ifdef __GNUC__
|
||||
__attribute__((format(printf, 1, 2)))
|
||||
#endif
|
||||
;
|
||||
void debugf(int verboselevel, const char* format, ...)
|
||||
#ifdef __GNUC__
|
||||
__attribute__((format(printf, 2, 3)))
|
||||
#endif
|
||||
;
|
||||
void generic_get_cpu_list(const struct match_entry_t* matchtable, int count,
|
||||
struct cpu_list_t* list);
|
||||
|
||||
/*
|
||||
* Seek for a pattern in `haystack'.
|
||||
* Pattern may be an fixed string, or contain the special metacharacters
|
||||
* '.' - match any single character
|
||||
* '#' - match any digit
|
||||
* '[<chars>] - match any of the given chars (regex-like ranges are not
|
||||
* supported)
|
||||
* Return val: 0 if the pattern is not found. Nonzero if it is found (actually,
|
||||
* x + 1 where x is the index where the match is found).
|
||||
*/
|
||||
int match_pattern(const char* haystack, const char* pattern);
|
||||
|
||||
/*
|
||||
* Gets an initialized cpu_id_t. It is cached, so that internal libcpuid
|
||||
* machinery doesn't need to issue cpu_identify more than once.
|
||||
*/
|
||||
struct cpu_id_t* get_cached_cpuid(void);
|
||||
|
||||
|
||||
/* returns true if all bits of mask are present in `bits'. */
|
||||
int match_all(uint64_t bits, uint64_t mask);
|
||||
|
||||
/* print what bits a mask consists of */
|
||||
void debug_print_lbits(int debuglevel, uint64_t mask);
|
||||
|
||||
/*
|
||||
* Sets the current errno
|
||||
*/
|
||||
int set_error(cpu_error_t err);
|
||||
|
||||
extern libcpuid_warn_fn_t _warn_fun;
|
||||
extern int _current_verboselevel;
|
||||
|
||||
#endif /* __LIBCPUID_UTIL_H__ */
|
||||
549
compat/libcpuid/recog_amd.c
Normal file
549
compat/libcpuid/recog_amd.c
Normal file
@@ -0,0 +1,549 @@
|
||||
/*
|
||||
* Copyright 2008 Veselin Georgiev,
|
||||
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include "libcpuid.h"
|
||||
#include "libcpuid_util.h"
|
||||
#include "libcpuid_internal.h"
|
||||
#include "recog_amd.h"
|
||||
|
||||
const struct amd_code_str { amd_code_t code; char *str; } amd_code_str[] = {
|
||||
#define CODE(x) { x, #x }
|
||||
#define CODE2(x, y) CODE(x)
|
||||
#include "amd_code_t.h"
|
||||
#undef CODE
|
||||
};
|
||||
|
||||
struct amd_code_and_bits_t {
|
||||
int code;
|
||||
uint64_t bits;
|
||||
};
|
||||
|
||||
enum _amd_bits_t {
|
||||
ATHLON_ = LBIT( 0 ),
|
||||
_XP_ = LBIT( 1 ),
|
||||
_M_ = LBIT( 2 ),
|
||||
_MP_ = LBIT( 3 ),
|
||||
MOBILE_ = LBIT( 4 ),
|
||||
DURON_ = LBIT( 5 ),
|
||||
SEMPRON_ = LBIT( 6 ),
|
||||
OPTERON_ = LBIT( 7 ),
|
||||
TURION_ = LBIT( 8 ),
|
||||
_LV_ = LBIT( 9 ),
|
||||
_64_ = LBIT( 10 ),
|
||||
_X2 = LBIT( 11 ),
|
||||
_X3 = LBIT( 12 ),
|
||||
_X4 = LBIT( 13 ),
|
||||
_X6 = LBIT( 14 ),
|
||||
_FX = LBIT( 15 ),
|
||||
};
|
||||
typedef enum _amd_bits_t amd_bits_t;
|
||||
|
||||
enum _amd_model_codes_t {
|
||||
// Only for Ryzen CPUs:
|
||||
_1400,
|
||||
_1500,
|
||||
_1600,
|
||||
};
|
||||
|
||||
|
||||
const struct match_entry_t cpudb_amd[] = {
|
||||
{ -1, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown AMD CPU" },
|
||||
|
||||
/* 486 and the likes */
|
||||
{ 4, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown AMD 486" },
|
||||
{ 4, 3, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "AMD 486DX2" },
|
||||
{ 4, 7, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "AMD 486DX2WB" },
|
||||
{ 4, 8, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "AMD 486DX4" },
|
||||
{ 4, 9, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "AMD 486DX4WB" },
|
||||
|
||||
/* Pentia clones */
|
||||
{ 5, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown AMD 586" },
|
||||
{ 5, 0, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K5" },
|
||||
{ 5, 1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K5" },
|
||||
{ 5, 2, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K5" },
|
||||
{ 5, 3, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K5" },
|
||||
|
||||
/* The K6 */
|
||||
{ 5, 6, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K6" },
|
||||
{ 5, 7, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K6" },
|
||||
|
||||
{ 5, 8, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K6-2" },
|
||||
{ 5, 9, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K6-III" },
|
||||
{ 5, 10, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown K6" },
|
||||
{ 5, 11, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown K6" },
|
||||
{ 5, 12, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown K6" },
|
||||
{ 5, 13, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K6-2+" },
|
||||
|
||||
/* Athlon et al. */
|
||||
{ 6, 1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Athlon (Slot-A)" },
|
||||
{ 6, 2, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Athlon (Slot-A)" },
|
||||
{ 6, 3, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Duron (Spitfire)" },
|
||||
{ 6, 4, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Athlon (ThunderBird)" },
|
||||
|
||||
{ 6, 6, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown Athlon" },
|
||||
{ 6, 6, -1, -1, -1, 1, -1, -1, NC, ATHLON_ , 0, "Athlon (Palomino)" },
|
||||
{ 6, 6, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_MP_ , 0, "Athlon MP (Palomino)" },
|
||||
{ 6, 6, -1, -1, -1, 1, -1, -1, NC, DURON_ , 0, "Duron (Palomino)" },
|
||||
{ 6, 6, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_XP_ , 0, "Athlon XP" },
|
||||
|
||||
{ 6, 7, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown Athlon XP" },
|
||||
{ 6, 7, -1, -1, -1, 1, -1, -1, NC, DURON_ , 0, "Duron (Morgan)" },
|
||||
|
||||
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Athlon XP" },
|
||||
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, ATHLON_ , 0, "Athlon XP (Thoroughbred)" },
|
||||
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_XP_ , 0, "Athlon XP (Thoroughbred)" },
|
||||
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, DURON_ , 0, "Duron (Applebred)" },
|
||||
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, SEMPRON_ , 0, "Sempron (Thoroughbred)" },
|
||||
{ 6, 8, -1, -1, -1, 1, 128, -1, NC, SEMPRON_ , 0, "Sempron (Thoroughbred)" },
|
||||
{ 6, 8, -1, -1, -1, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron (Thoroughbred)" },
|
||||
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_MP_ , 0, "Athlon MP (Thoroughbred)" },
|
||||
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_XP_|_M_ , 0, "Mobile Athlon (T-Bred)" },
|
||||
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_XP_|_M_|_LV_, 0, "Mobile Athlon (T-Bred)" },
|
||||
|
||||
{ 6, 10, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Athlon XP (Barton)" },
|
||||
{ 6, 10, -1, -1, -1, 1, 512, -1, NC, ATHLON_|_XP_ , 0, "Athlon XP (Barton)" },
|
||||
{ 6, 10, -1, -1, -1, 1, 512, -1, NC, SEMPRON_ , 0, "Sempron (Barton)" },
|
||||
{ 6, 10, -1, -1, -1, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron (Thorton)" },
|
||||
{ 6, 10, -1, -1, -1, 1, 256, -1, NC, ATHLON_|_XP_ , 0, "Athlon XP (Thorton)" },
|
||||
{ 6, 10, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_MP_ , 0, "Athlon MP (Barton)" },
|
||||
{ 6, 10, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_XP_|_M_ , 0, "Mobile Athlon (Barton)" },
|
||||
{ 6, 10, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_XP_|_M_|_LV_, 0, "Mobile Athlon (Barton)" },
|
||||
|
||||
/* K8 Architecture */
|
||||
{ 15, -1, -1, 15, -1, 1, -1, -1, NC, 0 , 0, "Unknown K8" },
|
||||
{ 15, -1, -1, 16, -1, 1, -1, -1, NC, 0 , 0, "Unknown K9" },
|
||||
|
||||
{ 15, -1, -1, 15, -1, 1, -1, -1, NC, 0 , 0, "Unknown A64" },
|
||||
{ 15, -1, -1, 15, -1, 1, -1, -1, NC, OPTERON_ , 0, "Opteron" },
|
||||
{ 15, -1, -1, 15, -1, 2, -1, -1, NC, OPTERON_|_X2 , 0, "Opteron (Dual Core)" },
|
||||
{ 15, 3, -1, 15, -1, 1, -1, -1, NC, OPTERON_ , 0, "Opteron" },
|
||||
{ 15, 3, -1, 15, -1, 2, -1, -1, NC, OPTERON_|_X2 , 0, "Opteron (Dual Core)" },
|
||||
{ 15, -1, -1, 15, -1, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (512K)" },
|
||||
{ 15, -1, -1, 15, -1, 1, 1024, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (1024K)" },
|
||||
{ 15, -1, -1, 15, -1, 1, -1, -1, NC, ATHLON_|_FX , 0, "Athlon FX" },
|
||||
{ 15, -1, -1, 15, -1, 1, -1, -1, NC, ATHLON_|_64_|_FX , 0, "Athlon 64 FX" },
|
||||
{ 15, 3, -1, 15, 35, 2, -1, -1, NC, ATHLON_|_64_|_FX , 0, "Athlon 64 FX X2 (Toledo)" },
|
||||
{ 15, -1, -1, 15, -1, 2, 512, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (512K)" },
|
||||
{ 15, -1, -1, 15, -1, 2, 1024, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (1024K)" },
|
||||
{ 15, -1, -1, 15, -1, 1, 512, -1, NC, TURION_|_64_ , 0, "Turion 64 (512K)" },
|
||||
{ 15, -1, -1, 15, -1, 1, 1024, -1, NC, TURION_|_64_ , 0, "Turion 64 (1024K)" },
|
||||
{ 15, -1, -1, 15, -1, 2, 512, -1, NC, TURION_|_X2 , 0, "Turion 64 X2 (512K)" },
|
||||
{ 15, -1, -1, 15, -1, 2, 1024, -1, NC, TURION_|_X2 , 0, "Turion 64 X2 (1024K)" },
|
||||
{ 15, -1, -1, 15, -1, 1, 128, -1, NC, SEMPRON_ , 0, "A64 Sempron (128K)" },
|
||||
{ 15, -1, -1, 15, -1, 1, 256, -1, NC, SEMPRON_ , 0, "A64 Sempron (256K)" },
|
||||
{ 15, -1, -1, 15, -1, 1, 512, -1, NC, SEMPRON_ , 0, "A64 Sempron (512K)" },
|
||||
{ 15, -1, -1, 15, 0x4f, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (Orleans/512K)" },
|
||||
{ 15, -1, -1, 15, 0x5f, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (Orleans/512K)" },
|
||||
{ 15, -1, -1, 15, 0x2f, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (Venice/512K)" },
|
||||
{ 15, -1, -1, 15, 0x2c, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (Venice/512K)" },
|
||||
{ 15, -1, -1, 15, 0x1f, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (Winchester/512K)" },
|
||||
{ 15, -1, -1, 15, 0x0c, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (Newcastle/512K)" },
|
||||
{ 15, -1, -1, 15, 0x27, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (San Diego/512K)" },
|
||||
{ 15, -1, -1, 15, 0x37, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (San Diego/512K)" },
|
||||
{ 15, -1, -1, 15, 0x04, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (ClawHammer/512K)" },
|
||||
|
||||
{ 15, -1, -1, 15, 0x5f, 1, 1024, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (Orleans/1024K)" },
|
||||
{ 15, -1, -1, 15, 0x27, 1, 1024, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (San Diego/1024K)" },
|
||||
{ 15, -1, -1, 15, 0x04, 1, 1024, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (ClawHammer/1024K)" },
|
||||
|
||||
{ 15, -1, -1, 15, 0x4b, 2, 256, -1, NC, SEMPRON_ , 0, "Athlon 64 X2 (Windsor/256K)" },
|
||||
|
||||
{ 15, -1, -1, 15, 0x23, 2, 512, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (Toledo/512K)" },
|
||||
{ 15, -1, -1, 15, 0x4b, 2, 512, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (Windsor/512K)" },
|
||||
{ 15, -1, -1, 15, 0x43, 2, 512, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (Windsor/512K)" },
|
||||
{ 15, -1, -1, 15, 0x6b, 2, 512, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (Brisbane/512K)" },
|
||||
{ 15, -1, -1, 15, 0x2b, 2, 512, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (Manchester/512K)"},
|
||||
|
||||
{ 15, -1, -1, 15, 0x23, 2, 1024, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (Toledo/1024K)" },
|
||||
{ 15, -1, -1, 15, 0x43, 2, 1024, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (Windsor/1024K)" },
|
||||
|
||||
{ 15, -1, -1, 15, 0x08, 1, 128, -1, NC, MOBILE_|SEMPRON_ , 0, "Mobile Sempron 64 (Dublin/128K)"},
|
||||
{ 15, -1, -1, 15, 0x08, 1, 256, -1, NC, MOBILE_|SEMPRON_ , 0, "Mobile Sempron 64 (Dublin/256K)"},
|
||||
{ 15, -1, -1, 15, 0x0c, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron 64 (Paris)" },
|
||||
{ 15, -1, -1, 15, 0x1c, 1, 128, -1, NC, SEMPRON_ , 0, "Sempron 64 (Palermo/128K)" },
|
||||
{ 15, -1, -1, 15, 0x1c, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron 64 (Palermo/256K)" },
|
||||
{ 15, -1, -1, 15, 0x1c, 1, 128, -1, NC, MOBILE_| SEMPRON_ , 0, "Mobile Sempron 64 (Sonora/128K)"},
|
||||
{ 15, -1, -1, 15, 0x1c, 1, 256, -1, NC, MOBILE_| SEMPRON_ , 0, "Mobile Sempron 64 (Sonora/256K)"},
|
||||
{ 15, -1, -1, 15, 0x2c, 1, 128, -1, NC, SEMPRON_ , 0, "Sempron 64 (Palermo/128K)" },
|
||||
{ 15, -1, -1, 15, 0x2c, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron 64 (Palermo/256K)" },
|
||||
{ 15, -1, -1, 15, 0x2c, 1, 128, -1, NC, MOBILE_| SEMPRON_ , 0, "Mobile Sempron 64 (Albany/128K)"},
|
||||
{ 15, -1, -1, 15, 0x2c, 1, 256, -1, NC, MOBILE_| SEMPRON_ , 0, "Mobile Sempron 64 (Albany/256K)"},
|
||||
{ 15, -1, -1, 15, 0x2f, 1, 128, -1, NC, SEMPRON_ , 0, "Sempron 64 (Palermo/128K)" },
|
||||
{ 15, -1, -1, 15, 0x2f, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron 64 (Palermo/256K)" },
|
||||
{ 15, -1, -1, 15, 0x4f, 1, 128, -1, NC, SEMPRON_ , 0, "Sempron 64 (Manila/128K)" },
|
||||
{ 15, -1, -1, 15, 0x4f, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron 64 (Manila/256K)" },
|
||||
{ 15, -1, -1, 15, 0x5f, 1, 128, -1, NC, SEMPRON_ , 0, "Sempron 64 (Manila/128K)" },
|
||||
{ 15, -1, -1, 15, 0x5f, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron 64 (Manila/256K)" },
|
||||
{ 15, -1, -1, 15, 0x6b, 2, 256, -1, NC, SEMPRON_ , 0, "Sempron 64 Dual (Sherman/256K)"},
|
||||
{ 15, -1, -1, 15, 0x6b, 2, 512, -1, NC, SEMPRON_ , 0, "Sempron 64 Dual (Sherman/512K)"},
|
||||
{ 15, -1, -1, 15, 0x7f, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron 64 (Sparta/256K)" },
|
||||
{ 15, -1, -1, 15, 0x7f, 1, 512, -1, NC, SEMPRON_ , 0, "Sempron 64 (Sparta/512K)" },
|
||||
{ 15, -1, -1, 15, 0x4c, 1, 256, -1, NC, MOBILE_| SEMPRON_ , 0, "Mobile Sempron 64 (Keene/256K)"},
|
||||
{ 15, -1, -1, 15, 0x4c, 1, 512, -1, NC, MOBILE_| SEMPRON_ , 0, "Mobile Sempron 64 (Keene/512K)"},
|
||||
{ 15, -1, -1, 15, -1, 2, -1, -1, NC, SEMPRON_ , 0, "Sempron Dual Core" },
|
||||
|
||||
{ 15, -1, -1, 15, 0x24, 1, 512, -1, NC, TURION_|_64_ , 0, "Turion 64 (Lancaster/512K)" },
|
||||
{ 15, -1, -1, 15, 0x24, 1, 1024, -1, NC, TURION_|_64_ , 0, "Turion 64 (Lancaster/1024K)" },
|
||||
{ 15, -1, -1, 15, 0x48, 2, 256, -1, NC, TURION_|_X2 , 0, "Turion X2 (Taylor)" },
|
||||
{ 15, -1, -1, 15, 0x48, 2, 512, -1, NC, TURION_|_X2 , 0, "Turion X2 (Trinidad)" },
|
||||
{ 15, -1, -1, 15, 0x4c, 1, 512, -1, NC, TURION_|_64_ , 0, "Turion 64 (Richmond)" },
|
||||
{ 15, -1, -1, 15, 0x68, 2, 256, -1, NC, TURION_|_X2 , 0, "Turion X2 (Tyler/256K)" },
|
||||
{ 15, -1, -1, 15, 0x68, 2, 512, -1, NC, TURION_|_X2 , 0, "Turion X2 (Tyler/512K)" },
|
||||
{ 15, -1, -1, 17, 3, 2, 512, -1, NC, TURION_|_X2 , 0, "Turion X2 (Griffin/512K)" },
|
||||
{ 15, -1, -1, 17, 3, 2, 1024, -1, NC, TURION_|_X2 , 0, "Turion X2 (Griffin/1024K)" },
|
||||
|
||||
/* K10 Architecture (2007) */
|
||||
{ 15, -1, -1, 16, -1, 1, -1, -1, PHENOM, 0 , 0, "Unknown AMD Phenom" },
|
||||
{ 15, 2, -1, 16, -1, 1, -1, -1, PHENOM, 0 , 0, "Phenom" },
|
||||
{ 15, 2, -1, 16, -1, 3, -1, -1, PHENOM, 0 , 0, "Phenom X3 (Toliman)" },
|
||||
{ 15, 2, -1, 16, -1, 4, -1, -1, PHENOM, 0 , 0, "Phenom X4 (Agena)" },
|
||||
{ 15, 2, -1, 16, -1, 3, 512, -1, PHENOM, 0 , 0, "Phenom X3 (Toliman/256K)" },
|
||||
{ 15, 2, -1, 16, -1, 3, 512, -1, PHENOM, 0 , 0, "Phenom X3 (Toliman/512K)" },
|
||||
{ 15, 2, -1, 16, -1, 4, 128, -1, PHENOM, 0 , 0, "Phenom X4 (Agena/128K)" },
|
||||
{ 15, 2, -1, 16, -1, 4, 256, -1, PHENOM, 0 , 0, "Phenom X4 (Agena/256K)" },
|
||||
{ 15, 2, -1, 16, -1, 4, 512, -1, PHENOM, 0 , 0, "Phenom X4 (Agena/512K)" },
|
||||
{ 15, 2, -1, 16, -1, 2, 512, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon X2 (Kuma)" },
|
||||
/* Phenom II derivates: */
|
||||
{ 15, 4, -1, 16, -1, 4, -1, -1, NC, 0 , 0, "Phenom (Deneb-based)" },
|
||||
{ 15, 4, -1, 16, -1, 1, 1024, -1, NC, SEMPRON_ , 0, "Sempron (Sargas)" },
|
||||
{ 15, 4, -1, 16, -1, 2, 512, -1, PHENOM2, 0 , 0, "Phenom II X2 (Callisto)" },
|
||||
{ 15, 4, -1, 16, -1, 3, 512, -1, PHENOM2, 0 , 0, "Phenom II X3 (Heka)" },
|
||||
{ 15, 4, -1, 16, -1, 4, 512, -1, PHENOM2, 0 , 0, "Phenom II X4" },
|
||||
{ 15, 4, -1, 16, 4, 4, 512, -1, PHENOM2, 0 , 0, "Phenom II X4 (Deneb)" },
|
||||
{ 15, 5, -1, 16, 5, 4, 512, -1, PHENOM2, 0 , 0, "Phenom II X4 (Deneb)" },
|
||||
{ 15, 4, -1, 16, 10, 4, 512, -1, PHENOM2, 0 , 0, "Phenom II X4 (Zosma)" },
|
||||
{ 15, 4, -1, 16, 10, 6, 512, -1, PHENOM2, 0 , 0, "Phenom II X6 (Thuban)" },
|
||||
/* Athlon II derivates: */
|
||||
{ 15, 6, -1, 16, 6, 2, 512, -1, NC, ATHLON_|_X2 , 0, "Athlon II (Champlain)" },
|
||||
{ 15, 6, -1, 16, 6, 2, 512, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon II X2 (Regor)" },
|
||||
{ 15, 6, -1, 16, 6, 2, 1024, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon II X2 (Regor)" },
|
||||
{ 15, 5, -1, 16, 5, 3, 512, -1, NC, ATHLON_|_64_|_X3 , 0, "Athlon II X3 (Rana)" },
|
||||
{ 15, 5, -1, 16, 5, 4, 512, -1, NC, ATHLON_|_64_|_X4 , 0, "Athlon II X4 (Propus)" },
|
||||
/* Llano APUs (2011): */
|
||||
{ 15, 1, -1, 18, 1, 2, -1, -1, FUSION_EA, 0 , 0, "Llano X2" },
|
||||
{ 15, 1, -1, 18, 1, 3, -1, -1, FUSION_EA, 0 , 0, "Llano X3" },
|
||||
{ 15, 1, -1, 18, 1, 4, -1, -1, FUSION_EA, 0 , 0, "Llano X4" },
|
||||
|
||||
/* Family 14h: Bobcat Architecture (2011) */
|
||||
{ 15, 2, -1, 20, -1, 1, -1, -1, FUSION_C, 0 , 0, "Brazos Ontario" },
|
||||
{ 15, 2, -1, 20, -1, 2, -1, -1, FUSION_C, 0 , 0, "Brazos Ontario (Dual-core)" },
|
||||
{ 15, 1, -1, 20, -1, 1, -1, -1, FUSION_E, 0 , 0, "Brazos Zacate" },
|
||||
{ 15, 1, -1, 20, -1, 2, -1, -1, FUSION_E, 0 , 0, "Brazos Zacate (Dual-core)" },
|
||||
{ 15, 2, -1, 20, -1, 2, -1, -1, FUSION_Z, 0 , 0, "Brazos Desna (Dual-core)" },
|
||||
|
||||
/* Family 15h: Bulldozer Architecture (2011) */
|
||||
{ 15, -1, -1, 21, 0, 4, -1, -1, NC, 0 , 0, "Bulldozer X2" },
|
||||
{ 15, -1, -1, 21, 1, 4, -1, -1, NC, 0 , 0, "Bulldozer X2" },
|
||||
{ 15, -1, -1, 21, 1, 6, -1, -1, NC, 0 , 0, "Bulldozer X3" },
|
||||
{ 15, -1, -1, 21, 1, 8, -1, -1, NC, 0 , 0, "Bulldozer X4" },
|
||||
/* 2nd-gen, Piledriver core (2012): */
|
||||
{ 15, -1, -1, 21, 2, 4, -1, -1, NC, 0 , 0, "Vishera X2" },
|
||||
{ 15, -1, -1, 21, 2, 6, -1, -1, NC, 0 , 0, "Vishera X3" },
|
||||
{ 15, -1, -1, 21, 2, 8, -1, -1, NC, 0 , 0, "Vishera X4" },
|
||||
{ 15, 0, -1, 21, 16, 2, -1, -1, FUSION_A, 0 , 0, "Trinity X2" },
|
||||
{ 15, 0, -1, 21, 16, 4, -1, -1, FUSION_A, 0 , 0, "Trinity X4" },
|
||||
{ 15, 3, -1, 21, 19, 2, -1, -1, FUSION_A, 0 , 0, "Richland X2" },
|
||||
{ 15, 3, -1, 21, 19, 4, -1, -1, FUSION_A, 0 , 0, "Richland X4" },
|
||||
/* 3rd-gen, Steamroller core (2014): */
|
||||
{ 15, 0, -1, 21, 48, 2, -1, -1, FUSION_A, 0 , 0, "Kaveri X2" },
|
||||
{ 15, 0, -1, 21, 48, 4, -1, -1, FUSION_A, 0 , 0, "Kaveri X4" },
|
||||
{ 15, 8, -1, 21, 56, 4, -1, -1, FUSION_A, 0 , 0, "Godavari X4" },
|
||||
/* 4th-gen, Excavator core (2015): */
|
||||
{ 15, 1, -1, 21, 96, 2, -1, -1, FUSION_A, 0 , 0, "Carrizo X2" },
|
||||
{ 15, 1, -1, 21, 96, 4, -1, -1, FUSION_A, 0 , 0, "Carrizo X4" },
|
||||
{ 15, 5, -1, 21, 101, 2, -1, -1, FUSION_A, 0 , 0, "Bristol Ridge X2" },
|
||||
{ 15, 5, -1, 21, 101, 4, -1, -1, FUSION_A, 0 , 0, "Bristol Ridge X4" },
|
||||
{ 15, 0, -1, 21, 112, 2, -1, -1, FUSION_A, 0 , 0, "Stoney Ridge X2" },
|
||||
{ 15, 0, -1, 21, 112, 2, -1, -1, FUSION_E, 0 , 0, "Stoney Ridge X2" },
|
||||
|
||||
/* Family 16h: Jaguar Architecture (2013) */
|
||||
{ 15, 0, -1, 22, 0, 2, -1, -1, FUSION_A, 0 , 0, "Kabini X2" },
|
||||
{ 15, 0, -1, 22, 0, 4, -1, -1, FUSION_A, 0 , 0, "Kabini X4" },
|
||||
/* 2nd-gen, Puma core (2013): */
|
||||
{ 15, 0, -1, 22, 48, 2, -1, -1, FUSION_E, 0 , 0, "Mullins X2" },
|
||||
{ 15, 0, -1, 22, 48, 4, -1, -1, FUSION_A, 0 , 0, "Mullins X4" },
|
||||
|
||||
/* Family 17h: Zen Architecture (2017) */
|
||||
{ 15, -1, -1, 23, 1, 8, -1, -1, NC, 0 , 0, "Ryzen 7" },
|
||||
{ 15, -1, -1, 23, 1, 6, -1, -1, NC, 0 , _1600, "Ryzen 5" },
|
||||
{ 15, -1, -1, 23, 1, 4, -1, -1, NC, 0 , _1500, "Ryzen 5" },
|
||||
{ 15, -1, -1, 23, 1, 4, -1, -1, NC, 0 , _1400, "Ryzen 5" },
|
||||
{ 15, -1, -1, 23, 1, 4, -1, -1, NC, 0 , 0, "Ryzen 3" },
|
||||
//{ 15, -1, -1, 23, 1, 4, -1, -1, NC, 0 , 0, "Raven Ridge" }, //TBA
|
||||
|
||||
/* Newer Opterons: */
|
||||
{ 15, 9, -1, 22, 9, 8, -1, -1, NC, OPTERON_ , 0, "Magny-Cours Opteron" },
|
||||
};
|
||||
|
||||
|
||||
static void load_amd_features(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
|
||||
{
|
||||
const struct feature_map_t matchtable_edx81[] = {
|
||||
{ 20, CPU_FEATURE_NX },
|
||||
{ 22, CPU_FEATURE_MMXEXT },
|
||||
{ 25, CPU_FEATURE_FXSR_OPT },
|
||||
{ 30, CPU_FEATURE_3DNOWEXT },
|
||||
{ 31, CPU_FEATURE_3DNOW },
|
||||
};
|
||||
const struct feature_map_t matchtable_ecx81[] = {
|
||||
{ 1, CPU_FEATURE_CMP_LEGACY },
|
||||
{ 2, CPU_FEATURE_SVM },
|
||||
{ 5, CPU_FEATURE_ABM },
|
||||
{ 6, CPU_FEATURE_SSE4A },
|
||||
{ 7, CPU_FEATURE_MISALIGNSSE },
|
||||
{ 8, CPU_FEATURE_3DNOWPREFETCH },
|
||||
{ 9, CPU_FEATURE_OSVW },
|
||||
{ 10, CPU_FEATURE_IBS },
|
||||
{ 11, CPU_FEATURE_XOP },
|
||||
{ 12, CPU_FEATURE_SKINIT },
|
||||
{ 13, CPU_FEATURE_WDT },
|
||||
{ 16, CPU_FEATURE_FMA4 },
|
||||
{ 21, CPU_FEATURE_TBM },
|
||||
};
|
||||
const struct feature_map_t matchtable_edx87[] = {
|
||||
{ 0, CPU_FEATURE_TS },
|
||||
{ 1, CPU_FEATURE_FID },
|
||||
{ 2, CPU_FEATURE_VID },
|
||||
{ 3, CPU_FEATURE_TTP },
|
||||
{ 4, CPU_FEATURE_TM_AMD },
|
||||
{ 5, CPU_FEATURE_STC },
|
||||
{ 6, CPU_FEATURE_100MHZSTEPS },
|
||||
{ 7, CPU_FEATURE_HWPSTATE },
|
||||
/* id 8 is handled in common */
|
||||
{ 9, CPU_FEATURE_CPB },
|
||||
{ 10, CPU_FEATURE_APERFMPERF },
|
||||
{ 11, CPU_FEATURE_PFI },
|
||||
{ 12, CPU_FEATURE_PA },
|
||||
};
|
||||
if (raw->ext_cpuid[0][0] >= 0x80000001) {
|
||||
match_features(matchtable_edx81, COUNT_OF(matchtable_edx81), raw->ext_cpuid[1][3], data);
|
||||
match_features(matchtable_ecx81, COUNT_OF(matchtable_ecx81), raw->ext_cpuid[1][2], data);
|
||||
}
|
||||
if (raw->ext_cpuid[0][0] >= 0x80000007)
|
||||
match_features(matchtable_edx87, COUNT_OF(matchtable_edx87), raw->ext_cpuid[7][3], data);
|
||||
if (raw->ext_cpuid[0][0] >= 0x8000001a) {
|
||||
/* We have the extended info about SSE unit size */
|
||||
data->detection_hints[CPU_HINT_SSE_SIZE_AUTH] = 1;
|
||||
data->sse_size = (raw->ext_cpuid[0x1a][0] & 1) ? 128 : 64;
|
||||
}
|
||||
}
|
||||
|
||||
static void decode_amd_cache_info(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
|
||||
{
|
||||
int l3_result;
|
||||
const int assoc_table[16] = {
|
||||
0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, 255
|
||||
};
|
||||
unsigned n = raw->ext_cpuid[0][0];
|
||||
|
||||
if (n >= 0x80000005) {
|
||||
data->l1_data_cache = (raw->ext_cpuid[5][2] >> 24) & 0xff;
|
||||
data->l1_assoc = (raw->ext_cpuid[5][2] >> 16) & 0xff;
|
||||
data->l1_cacheline = (raw->ext_cpuid[5][2]) & 0xff;
|
||||
data->l1_instruction_cache = (raw->ext_cpuid[5][3] >> 24) & 0xff;
|
||||
}
|
||||
if (n >= 0x80000006) {
|
||||
data->l2_cache = (raw->ext_cpuid[6][2] >> 16) & 0xffff;
|
||||
data->l2_assoc = assoc_table[(raw->ext_cpuid[6][2] >> 12) & 0xf];
|
||||
data->l2_cacheline = (raw->ext_cpuid[6][2]) & 0xff;
|
||||
|
||||
l3_result = (raw->ext_cpuid[6][3] >> 18);
|
||||
if (l3_result > 0) {
|
||||
l3_result = 512 * l3_result; /* AMD spec says it's a range,
|
||||
but we take the lower bound */
|
||||
data->l3_cache = l3_result;
|
||||
data->l3_assoc = assoc_table[(raw->ext_cpuid[6][3] >> 12) & 0xf];
|
||||
data->l3_cacheline = (raw->ext_cpuid[6][3]) & 0xff;
|
||||
} else {
|
||||
data->l3_cache = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void decode_amd_number_of_cores(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
|
||||
{
|
||||
int logical_cpus = -1, num_cores = -1;
|
||||
|
||||
if (raw->basic_cpuid[0][0] >= 1) {
|
||||
logical_cpus = (raw->basic_cpuid[1][1] >> 16) & 0xff;
|
||||
if (raw->ext_cpuid[0][0] >= 8) {
|
||||
num_cores = 1 + (raw->ext_cpuid[8][2] & 0xff);
|
||||
}
|
||||
}
|
||||
if (data->flags[CPU_FEATURE_HT]) {
|
||||
if (num_cores > 1) {
|
||||
if (data->ext_family >= 23)
|
||||
num_cores /= 2; // e.g., Ryzen 7 reports 16 "real" cores, but they are really just 8.
|
||||
data->num_cores = num_cores;
|
||||
data->num_logical_cpus = logical_cpus;
|
||||
} else {
|
||||
data->num_cores = 1;
|
||||
data->num_logical_cpus = (logical_cpus >= 2 ? logical_cpus : 2);
|
||||
}
|
||||
} else {
|
||||
data->num_cores = data->num_logical_cpus = 1;
|
||||
}
|
||||
}
|
||||
|
||||
static int amd_has_turion_modelname(const char *bs)
|
||||
{
|
||||
/* We search for something like TL-60. Ahh, I miss regexes...*/
|
||||
int i, l, k;
|
||||
char code[3] = {0};
|
||||
const char* codes[] = { "ML", "MT", "MK", "TK", "TL", "RM", "ZM", "" };
|
||||
l = (int) strlen(bs);
|
||||
for (i = 3; i < l - 2; i++) {
|
||||
if (bs[i] == '-' &&
|
||||
isupper(bs[i-1]) && isupper(bs[i-2]) && !isupper(bs[i-3]) &&
|
||||
isdigit(bs[i+1]) && isdigit(bs[i+2]) && !isdigit(bs[i+3]))
|
||||
{
|
||||
code[0] = bs[i-2];
|
||||
code[1] = bs[i-1];
|
||||
for (k = 0; codes[k][0]; k++)
|
||||
if (!strcmp(codes[k], code)) return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct amd_code_and_bits_t decode_amd_codename_part1(const char *bs)
|
||||
{
|
||||
amd_code_t code = NC;
|
||||
uint64_t bits = 0;
|
||||
struct amd_code_and_bits_t result;
|
||||
|
||||
if (strstr(bs, "Dual Core") ||
|
||||
strstr(bs, "Dual-Core") ||
|
||||
strstr(bs, " X2 "))
|
||||
bits |= _X2;
|
||||
if (strstr(bs, " X4 ")) bits |= _X4;
|
||||
if (strstr(bs, " X3 ")) bits |= _X3;
|
||||
if (strstr(bs, "Opteron")) bits |= OPTERON_;
|
||||
if (strstr(bs, "Phenom")) {
|
||||
code = (strstr(bs, "II")) ? PHENOM2 : PHENOM;
|
||||
}
|
||||
if (amd_has_turion_modelname(bs)) {
|
||||
bits |= TURION_;
|
||||
}
|
||||
if (strstr(bs, "Athlon(tm)")) bits |= ATHLON_;
|
||||
if (strstr(bs, "Sempron(tm)")) bits |= SEMPRON_;
|
||||
if (strstr(bs, "Duron")) bits |= DURON_;
|
||||
if (strstr(bs, " 64 ")) bits |= _64_;
|
||||
if (strstr(bs, " FX")) bits |= _FX;
|
||||
if (strstr(bs, " MP")) bits |= _MP_;
|
||||
if (strstr(bs, "Athlon(tm) 64") || strstr(bs, "Athlon(tm) II X") || match_pattern(bs, "Athlon(tm) X#")) {
|
||||
bits |= ATHLON_ | _64_;
|
||||
}
|
||||
if (strstr(bs, "Turion")) bits |= TURION_;
|
||||
|
||||
if (strstr(bs, "mobile") || strstr(bs, "Mobile")) {
|
||||
bits |= MOBILE_;
|
||||
}
|
||||
|
||||
if (strstr(bs, "XP")) bits |= _XP_;
|
||||
if (strstr(bs, "XP-M")) bits |= _M_;
|
||||
if (strstr(bs, "(LV)")) bits |= _LV_;
|
||||
|
||||
if (match_pattern(bs, "C-##")) code = FUSION_C;
|
||||
if (match_pattern(bs, "E-###")) code = FUSION_E;
|
||||
if (match_pattern(bs, "Z-##")) code = FUSION_Z;
|
||||
if (match_pattern(bs, "E#-####") || match_pattern(bs, "A#-####")) code = FUSION_EA;
|
||||
|
||||
result.code = code;
|
||||
result.bits = bits;
|
||||
return result;
|
||||
}
|
||||
|
||||
static int decode_amd_ryzen_model_code(const char* bs)
|
||||
{
|
||||
const struct {
|
||||
int model_code;
|
||||
const char* match_str;
|
||||
} patterns[] = {
|
||||
{ _1600, "1600" },
|
||||
{ _1500, "1500" },
|
||||
{ _1400, "1400" },
|
||||
};
|
||||
int i;
|
||||
|
||||
for (i = 0; i < COUNT_OF(patterns); i++)
|
||||
if (strstr(bs, patterns[i].match_str))
|
||||
return patterns[i].model_code;
|
||||
//
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void decode_amd_codename(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal)
|
||||
{
|
||||
struct amd_code_and_bits_t code_and_bits = decode_amd_codename_part1(data->brand_str);
|
||||
int i = 0;
|
||||
char* code_str = NULL;
|
||||
int model_code;
|
||||
|
||||
for (i = 0; i < COUNT_OF(amd_code_str); i++) {
|
||||
if (code_and_bits.code == amd_code_str[i].code) {
|
||||
code_str = amd_code_str[i].str;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (/*code == ATHLON_64_X2*/ match_all(code_and_bits.bits, ATHLON_|_64_|_X2) && data->l2_cache < 512) {
|
||||
code_and_bits.bits &= ~(ATHLON_ | _64_);
|
||||
code_and_bits.bits |= SEMPRON_;
|
||||
}
|
||||
if (code_str)
|
||||
debugf(2, "Detected AMD brand code: %d (%s)\n", code_and_bits.code, code_str);
|
||||
else
|
||||
debugf(2, "Detected AMD brand code: %d\n", code_and_bits.code);
|
||||
|
||||
if (code_and_bits.bits) {
|
||||
debugf(2, "Detected AMD bits: ");
|
||||
debug_print_lbits(2, code_and_bits.bits);
|
||||
}
|
||||
// is it Ryzen? if so, we need to detect discern between the four-core 1400/1500 (Ryzen 5) and the four-core Ryzen 3:
|
||||
model_code = (data->ext_family == 23) ? decode_amd_ryzen_model_code(data->brand_str) : 0;
|
||||
|
||||
internal->code.amd = code_and_bits.code;
|
||||
internal->bits = code_and_bits.bits;
|
||||
internal->score = match_cpu_codename(cpudb_amd, COUNT_OF(cpudb_amd), data, code_and_bits.code,
|
||||
code_and_bits.bits, model_code);
|
||||
}
|
||||
|
||||
int cpuid_identify_amd(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal)
|
||||
{
|
||||
load_amd_features(raw, data);
|
||||
decode_amd_cache_info(raw, data);
|
||||
decode_amd_number_of_cores(raw, data);
|
||||
decode_amd_codename(raw, data, internal);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void cpuid_get_list_amd(struct cpu_list_t* list)
|
||||
{
|
||||
generic_get_cpu_list(cpudb_amd, COUNT_OF(cpudb_amd), list);
|
||||
}
|
||||
32
compat/libcpuid/recog_amd.h
Normal file
32
compat/libcpuid/recog_amd.h
Normal file
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
* Copyright 2008 Veselin Georgiev,
|
||||
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#ifndef __RECOG_AMD_H__
|
||||
#define __RECOG_AMD_H__
|
||||
|
||||
int cpuid_identify_amd(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal);
|
||||
void cpuid_get_list_amd(struct cpu_list_t* list);
|
||||
|
||||
#endif /* __RECOG_AMD_H__ */
|
||||
935
compat/libcpuid/recog_intel.c
Normal file
935
compat/libcpuid/recog_intel.c
Normal file
@@ -0,0 +1,935 @@
|
||||
/*
|
||||
* Copyright 2008 Veselin Georgiev,
|
||||
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include "libcpuid.h"
|
||||
#include "libcpuid_util.h"
|
||||
#include "libcpuid_internal.h"
|
||||
#include "recog_intel.h"
|
||||
|
||||
const struct intel_bcode_str { intel_code_t code; char *str; } intel_bcode_str[] = {
|
||||
#define CODE(x) { x, #x }
|
||||
#define CODE2(x, y) CODE(x)
|
||||
#include "intel_code_t.h"
|
||||
#undef CODE
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
int code;
|
||||
uint64_t bits;
|
||||
} intel_code_and_bits_t;
|
||||
|
||||
enum _intel_model_t {
|
||||
UNKNOWN = -1,
|
||||
_3000 = 100,
|
||||
_3100,
|
||||
_3200,
|
||||
X3200,
|
||||
_3300,
|
||||
X3300,
|
||||
_5100,
|
||||
_5200,
|
||||
_5300,
|
||||
_5400,
|
||||
_2xxx, /* Core i[357] 2xxx */
|
||||
_3xxx, /* Core i[357] 3xxx */
|
||||
};
|
||||
typedef enum _intel_model_t intel_model_t;
|
||||
|
||||
enum _intel_bits_t {
|
||||
PENTIUM_ = LBIT( 0 ),
|
||||
CELERON_ = LBIT( 1 ),
|
||||
MOBILE_ = LBIT( 2 ),
|
||||
CORE_ = LBIT( 3 ),
|
||||
_I_ = LBIT( 4 ),
|
||||
_M_ = LBIT( 5 ),
|
||||
_3 = LBIT( 6 ),
|
||||
_5 = LBIT( 7 ),
|
||||
_7 = LBIT( 8 ),
|
||||
XEON_ = LBIT( 9 ),
|
||||
_MP = LBIT( 10 ),
|
||||
ATOM_ = LBIT( 11 ),
|
||||
|
||||
};
|
||||
typedef enum _intel_bits_t intel_bits_t;
|
||||
|
||||
const struct match_entry_t cpudb_intel[] = {
|
||||
{ -1, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown Intel CPU" },
|
||||
|
||||
/* i486 */
|
||||
{ 4, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown i486" },
|
||||
{ 4, 0, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 DX-25/33" },
|
||||
{ 4, 1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 DX-50" },
|
||||
{ 4, 2, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 SX" },
|
||||
{ 4, 3, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 DX2" },
|
||||
{ 4, 4, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 SL" },
|
||||
{ 4, 5, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 SX2" },
|
||||
{ 4, 7, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 DX2 WriteBack" },
|
||||
{ 4, 8, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 DX4" },
|
||||
{ 4, 9, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 DX4 WriteBack" },
|
||||
|
||||
/* All Pentia:
|
||||
Pentium 1 */
|
||||
{ 5, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown Pentium" },
|
||||
{ 5, 0, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium A-Step" },
|
||||
{ 5, 1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium 1 (0.8u)" },
|
||||
{ 5, 2, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium 1 (0.35u)" },
|
||||
{ 5, 3, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium OverDrive" },
|
||||
{ 5, 4, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium 1 (0.35u)" },
|
||||
{ 5, 7, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium 1 (0.35u)" },
|
||||
{ 5, 8, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium MMX (0.25u)" },
|
||||
|
||||
/* Pentium 2 / 3 / M / Conroe / whatsnext - all P6 based. */
|
||||
{ 6, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown P6" },
|
||||
{ 6, 0, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium Pro" },
|
||||
{ 6, 1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium Pro" },
|
||||
{ 6, 3, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium II (Klamath)" },
|
||||
{ 6, 5, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium II (Deschutes)" },
|
||||
{ 6, 5, -1, -1, -1, 1, -1, -1, NC, MOBILE_|PENTIUM_, 0, "Mobile Pentium II (Tonga)"},
|
||||
{ 6, 6, -1, -1, -1, 1, -1, -1, NC,0 , 0, "Pentium II (Dixon)" },
|
||||
|
||||
{ 6, 3, -1, -1, -1, 1, -1, -1, NC, XEON_ , 0, "P-II Xeon (Klamath)" },
|
||||
{ 6, 5, -1, -1, -1, 1, -1, -1, NC, XEON_ , 0, "P-II Xeon (Drake)" },
|
||||
{ 6, 6, -1, -1, -1, 1, -1, -1, NC, XEON_ , 0, "P-II Xeon (Dixon)" },
|
||||
|
||||
{ 6, 5, -1, -1, -1, 1, -1, -1, NC, CELERON_ , 0, "P-II Celeron (Covington)" },
|
||||
{ 6, 6, -1, -1, -1, 1, -1, -1, NC, CELERON_ , 0, "P-II Celeron (Mendocino)" },
|
||||
|
||||
/* -------------------------------------------------- */
|
||||
|
||||
{ 6, 7, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium III (Katmai)" },
|
||||
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium III (Coppermine)"},
|
||||
{ 6, 10, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium III (Coppermine)"},
|
||||
{ 6, 11, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium III (Tualatin)" },
|
||||
|
||||
{ 6, 7, -1, -1, -1, 1, -1, -1, NC, XEON_ , 0, "P-III Xeon (Tanner)" },
|
||||
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, XEON_ , 0, "P-III Xeon (Cascades)" },
|
||||
{ 6, 10, -1, -1, -1, 1, -1, -1, NC, XEON_ , 0, "P-III Xeon (Cascades)" },
|
||||
{ 6, 11, -1, -1, -1, 1, -1, -1, NC, XEON_ , 0, "P-III Xeon (Tualatin)" },
|
||||
|
||||
{ 6, 7, -1, -1, -1, 1, -1, -1, NC, CELERON_ , 0, "P-III Celeron (Katmai)" },
|
||||
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, CELERON_ , 0, "P-III Celeron (Coppermine)" },
|
||||
{ 6, 10, -1, -1, -1, 1, -1, -1, NC, CELERON_ , 0, "P-III Celeron (Coppermine)" },
|
||||
{ 6, 11, -1, -1, -1, 1, -1, -1, NC, CELERON_ , 0, "P-III Celeron (Tualatin)" },
|
||||
|
||||
/* Netburst based (Pentium 4 and later)
|
||||
classic P4s */
|
||||
{ 15, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown Pentium 4" },
|
||||
{ 15, -1, -1, 15, -1, 1, -1, -1, NC, CELERON_ , 0, "Unknown P-4 Celeron" },
|
||||
{ 15, -1, -1, 15, -1, 1, -1, -1, NC, XEON_ , 0, "Unknown Xeon" },
|
||||
|
||||
{ 15, 0, -1, 15, -1, 1, -1, -1, NC, PENTIUM_ , 0, "Pentium 4 (Willamette)" },
|
||||
{ 15, 1, -1, 15, -1, 1, -1, -1, NC, PENTIUM_ , 0, "Pentium 4 (Willamette)" },
|
||||
{ 15, 2, -1, 15, -1, 1, -1, -1, NC, PENTIUM_ , 0, "Pentium 4 (Northwood)" },
|
||||
{ 15, 3, -1, 15, -1, 1, -1, -1, NC, PENTIUM_ , 0, "Pentium 4 (Prescott)" },
|
||||
{ 15, 4, -1, 15, -1, 1, -1, -1, NC, PENTIUM_ , 0, "Pentium 4 (Prescott)" },
|
||||
{ 15, 6, -1, 15, -1, 1, -1, -1, NC, PENTIUM_ , 0, "Pentium 4 (Cedar Mill)" },
|
||||
{ 15, 0, -1, 15, -1, 1, -1, -1, NC, MOBILE_|PENTIUM_, 0, "Mobile P-4 (Willamette)" },
|
||||
{ 15, 1, -1, 15, -1, 1, -1, -1, NC, MOBILE_|PENTIUM_, 0, "Mobile P-4 (Willamette)" },
|
||||
{ 15, 2, -1, 15, -1, 1, -1, -1, NC, MOBILE_|PENTIUM_, 0, "Mobile P-4 (Northwood)" },
|
||||
{ 15, 3, -1, 15, -1, 1, -1, -1, NC, MOBILE_|PENTIUM_, 0, "Mobile P-4 (Prescott)" },
|
||||
{ 15, 4, -1, 15, -1, 1, -1, -1, NC, MOBILE_|PENTIUM_, 0, "Mobile P-4 (Prescott)" },
|
||||
{ 15, 6, -1, 15, -1, 1, -1, -1, NC, MOBILE_|PENTIUM_, 0, "Mobile P-4 (Cedar Mill)" },
|
||||
|
||||
/* server CPUs */
|
||||
{ 15, 0, -1, 15, -1, 1, -1, -1, NC, XEON_ , 0, "Xeon (Foster)" },
|
||||
{ 15, 1, -1, 15, -1, 1, -1, -1, NC, XEON_ , 0, "Xeon (Foster)" },
|
||||
{ 15, 2, -1, 15, -1, 1, -1, -1, NC, XEON_ , 0, "Xeon (Prestonia)" },
|
||||
{ 15, 2, -1, 15, -1, 1, -1, -1, NC, XEON_|_MP , 0, "Xeon (Gallatin)" },
|
||||
{ 15, 3, -1, 15, -1, 1, -1, -1, NC, XEON_ , 0, "Xeon (Nocona)" },
|
||||
{ 15, 4, -1, 15, -1, 1, -1, -1, NC, XEON_ , 0, "Xeon (Nocona)" },
|
||||
{ 15, 4, -1, 15, -1, 1, -1, -1, IRWIN, XEON_ , 0, "Xeon (Irwindale)" },
|
||||
{ 15, 4, -1, 15, -1, 1, -1, -1, NC, XEON_|_MP , 0, "Xeon (Cranford)" },
|
||||
{ 15, 4, -1, 15, -1, 1, -1, -1, POTOMAC, XEON_ , 0, "Xeon (Potomac)" },
|
||||
{ 15, 6, -1, 15, -1, 1, -1, -1, NC, XEON_ , 0, "Xeon (Dempsey)" },
|
||||
|
||||
/* Pentium Ds */
|
||||
{ 15, 4, 4, 15, -1, 1, -1, -1, NC, 0 , 0, "Pentium D (SmithField)" },
|
||||
{ 15, 4, -1, 15, -1, 1, -1, -1, PENTIUM_D, 0 , 0, "Pentium D (SmithField)" },
|
||||
{ 15, 4, 7, 15, -1, 1, -1, -1, NC, 0 , 0, "Pentium D (SmithField)" },
|
||||
{ 15, 6, -1, 15, -1, 1, -1, -1, PENTIUM_D, 0 , 0, "Pentium D (Presler)" },
|
||||
|
||||
/* Celeron and Celeron Ds */
|
||||
{ 15, 1, -1, 15, -1, 1, -1, -1, NC, CELERON_ , 0, "P-4 Celeron (Willamette)" },
|
||||
{ 15, 2, -1, 15, -1, 1, -1, -1, NC, CELERON_ , 0, "P-4 Celeron (Northwood)" },
|
||||
{ 15, 3, -1, 15, -1, 1, -1, -1, NC, CELERON_ , 0, "P-4 Celeron D (Prescott)" },
|
||||
{ 15, 4, -1, 15, -1, 1, -1, -1, NC, CELERON_ , 0, "P-4 Celeron D (Prescott)" },
|
||||
{ 15, 6, -1, 15, -1, 1, -1, -1, NC, CELERON_ , 0, "P-4 Celeron D (Cedar Mill)" },
|
||||
|
||||
/* -------------------------------------------------- */
|
||||
/* Intel Core microarchitecture - P6-based */
|
||||
|
||||
{ 6, 9, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown Pentium M" },
|
||||
{ 6, 9, -1, -1, -1, 1, -1, -1, PENTIUM_M, 0 , 0, "Unknown Pentium M" },
|
||||
{ 6, 9, -1, -1, -1, 1, -1, -1, NC, PENTIUM_ , 0, "Pentium M (Banias)" },
|
||||
{ 6, 9, -1, -1, -1, 1, -1, -1, PENTIUM_M, 0 , 0, "Pentium M (Banias)" },
|
||||
{ 6, 9, -1, -1, -1, 1, -1, -1, NC, CELERON_ , 0, "Celeron M" },
|
||||
{ 6, 13, -1, -1, -1, 1, -1, -1, NC, PENTIUM_ , 0, "Pentium M (Dothan)" },
|
||||
{ 6, 13, -1, -1, -1, 1, -1, -1, PENTIUM_M, 0 , 0, "Pentium M (Dothan)" },
|
||||
{ 6, 13, -1, -1, -1, 1, -1, -1, NC, CELERON_ , 0, "Celeron M" },
|
||||
|
||||
{ 6, 12, -1, -1, -1, -1, -1, -1, NC, ATOM_ , 0, "Unknown Atom" },
|
||||
{ 6, 12, -1, -1, -1, -1, -1, -1, DIAMONDVILLE,ATOM_, 0, "Atom (Diamondville)" },
|
||||
{ 6, 12, -1, -1, -1, -1, -1, -1, SILVERTHORNE,ATOM_, 0, "Atom (Silverthorne)" },
|
||||
{ 6, 12, -1, -1, -1, -1, -1, -1, CEDARVIEW, ATOM_ , 0, "Atom (Cedarview)" },
|
||||
{ 6, 6, -1, -1, -1, -1, -1, -1, CEDARVIEW, ATOM_ , 0, "Atom (Cedarview)" },
|
||||
{ 6, 12, -1, -1, -1, -1, -1, -1, PINEVIEW, ATOM_ , 0, "Atom (Pineview)" },
|
||||
|
||||
/* -------------------------------------------------- */
|
||||
|
||||
{ 6, 14, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown Yonah" },
|
||||
{ 6, 14, -1, -1, -1, 1, -1, -1, CORE_SOLO, 0 , 0, "Yonah (Core Solo)" },
|
||||
{ 6, 14, -1, -1, -1, 2, -1, -1, CORE_DUO, 0 , 0, "Yonah (Core Duo)" },
|
||||
{ 6, 14, -1, -1, -1, 1, -1, -1, CORE_SOLO, MOBILE_, 0, "Yonah (Core Solo)" },
|
||||
{ 6, 14, -1, -1, -1, 2, -1, -1, CORE_DUO , MOBILE_, 0, "Yonah (Core Duo)" },
|
||||
{ 6, 14, -1, -1, -1, 1, -1, -1, CORE_SOLO, 0 , 0, "Yonah (Core Solo)" },
|
||||
|
||||
{ 6, 15, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown Core 2" },
|
||||
{ 6, 15, -1, -1, -1, 2, 4096, -1, CORE_DUO, 0 , 0, "Conroe (Core 2 Duo)" },
|
||||
{ 6, 15, -1, -1, -1, 2, 1024, -1, CORE_DUO, 0 , 0, "Conroe (Core 2 Duo) 1024K" },
|
||||
{ 6, 15, -1, -1, -1, 2, 512, -1, CORE_DUO, 0 , 0, "Conroe (Core 2 Duo) 512K" },
|
||||
{ 6, 15, -1, -1, -1, 4, -1, -1, QUAD_CORE, 0 , 0, "Kentsfield (Core 2 Quad)" },
|
||||
{ 6, 15, -1, -1, -1, 4, 4096, -1, QUAD_CORE, 0 , 0, "Kentsfield (Core 2 Quad)" },
|
||||
{ 6, 15, -1, -1, -1, 400, -1, -1, MORE_THAN_QUADCORE, 0, 0, "More than quad-core" },
|
||||
{ 6, 15, -1, -1, -1, 2, 2048, -1, CORE_DUO, 0 , 0, "Allendale (Core 2 Duo)" },
|
||||
{ 6, 15, -1, -1, -1, 2, -1, -1, MOBILE_CORE_DUO, 0, 0, "Merom (Core 2 Duo)" },
|
||||
{ 6, 15, -1, -1, -1, 2, 2048, -1, MEROM, 0 , 0, "Merom (Core 2 Duo) 2048K" },
|
||||
{ 6, 15, -1, -1, -1, 2, 4096, -1, MEROM, 0 , 0, "Merom (Core 2 Duo) 4096K" },
|
||||
|
||||
{ 6, 15, -1, -1, 15, 1, -1, -1, NC, CELERON_ , 0, "Conroe-L (Celeron)" },
|
||||
{ 6, 6, -1, -1, 22, 1, -1, -1, NC, CELERON_ , 0, "Conroe-L (Celeron)" },
|
||||
{ 6, 15, -1, -1, 15, 2, -1, -1, NC, CELERON_ , 0, "Conroe-L (Allendale)" },
|
||||
{ 6, 6, -1, -1, 22, 2, -1, -1, NC, CELERON_ , 0, "Conroe-L (Allendale)" },
|
||||
|
||||
|
||||
{ 6, 6, -1, -1, 22, 1, -1, -1, NC, 0 , 0, "Unknown Core ?" },
|
||||
{ 6, 7, -1, -1, 23, 1, -1, -1, NC, 0 , 0, "Unknown Core ?" },
|
||||
{ 6, 6, -1, -1, 22, 400, -1, -1, MORE_THAN_QUADCORE, 0, 0, "More than quad-core" },
|
||||
{ 6, 7, -1, -1, 23, 400, -1, -1, MORE_THAN_QUADCORE, 0, 0, "More than quad-core" },
|
||||
|
||||
{ 6, 7, -1, -1, 23, 1, -1, -1, CORE_SOLO , 0, 0, "Unknown Core 45nm" },
|
||||
{ 6, 7, -1, -1, 23, 1, -1, -1, CORE_DUO , 0, 0, "Unknown Core 45nm" },
|
||||
{ 6, 7, -1, -1, 23, 2, 1024, -1, WOLFDALE , 0, 0, "Celeron Wolfdale 1M" },
|
||||
{ 6, 7, -1, -1, 23, 2, 2048, -1, WOLFDALE , 0, 0, "Wolfdale (Core 2 Duo) 2M" },
|
||||
{ 6, 7, -1, -1, 23, 2, 3072, -1, WOLFDALE , 0, 0, "Wolfdale (Core 2 Duo) 3M" },
|
||||
{ 6, 7, -1, -1, 23, 2, 6144, -1, WOLFDALE , 0, 0, "Wolfdale (Core 2 Duo) 6M" },
|
||||
{ 6, 7, -1, -1, 23, 1, -1, -1, MOBILE_CORE_DUO , 0, 0, "Penryn (Core 2 Duo)" },
|
||||
{ 6, 7, -1, -1, 23, 2, 1024, -1, PENRYN , 0, 0, "Penryn (Core 2 Duo)" },
|
||||
{ 6, 7, -1, -1, 23, 2, 3072, -1, PENRYN , 0, 0, "Penryn (Core 2 Duo) 3M" },
|
||||
{ 6, 7, -1, -1, 23, 2, 6144, -1, PENRYN , 0, 0, "Penryn (Core 2 Duo) 6M" },
|
||||
{ 6, 7, -1, -1, 23, 4, 2048, -1, NC , 0, 0, "Yorkfield (Core 2 Quad) 2M"},
|
||||
{ 6, 7, -1, -1, 23, 4, 3072, -1, NC , 0, 0, "Yorkfield (Core 2 Quad) 3M"},
|
||||
{ 6, 7, -1, -1, 23, 4, 6144, -1, NC , 0, 0, "Yorkfield (Core 2 Quad) 6M"},
|
||||
|
||||
/* Core microarchitecture-based Xeons: */
|
||||
{ 6, 14, -1, -1, 14, 1, -1, -1, NC, XEON_ , 0, "Xeon LV" },
|
||||
{ 6, 15, -1, -1, 15, 2, 4096, -1, NC, XEON_ , _5100, "Xeon (Woodcrest)" },
|
||||
{ 6, 15, -1, -1, 15, 2, 2048, -1, NC, XEON_ , _3000, "Xeon (Conroe/2M)" },
|
||||
{ 6, 15, -1, -1, 15, 2, 4096, -1, NC, XEON_ , _3000, "Xeon (Conroe/4M)" },
|
||||
{ 6, 15, -1, -1, 15, 4, 4096, -1, NC, XEON_ , X3200, "Xeon (Kentsfield)" },
|
||||
{ 6, 15, -1, -1, 15, 4, 4096, -1, NC, XEON_ , _5300, "Xeon (Clovertown)" },
|
||||
{ 6, 7, -1, -1, 23, 2, 6144, -1, NC, XEON_ , _3100, "Xeon (Wolfdale)" },
|
||||
{ 6, 7, -1, -1, 23, 2, 6144, -1, NC, XEON_ , _5200, "Xeon (Wolfdale DP)" },
|
||||
{ 6, 7, -1, -1, 23, 4, 6144, -1, NC, XEON_ , _5400, "Xeon (Harpertown)" },
|
||||
{ 6, 7, -1, -1, 23, 4, 3072, -1, NC, XEON_ , X3300, "Xeon (Yorkfield/3M)" },
|
||||
{ 6, 7, -1, -1, 23, 4, 6144, -1, NC, XEON_ , X3300, "Xeon (Yorkfield/6M)" },
|
||||
|
||||
/* Nehalem CPUs (45nm): */
|
||||
{ 6, 10, -1, -1, 26, 4, -1, -1, GAINESTOWN, XEON_ , 0, "Gainestown (Xeon)" },
|
||||
{ 6, 10, -1, -1, 26, 4, -1, 4096, GAINESTOWN, XEON_ , 0, "Gainestown 4M (Xeon)" },
|
||||
{ 6, 10, -1, -1, 26, 4, -1, 8192, GAINESTOWN, XEON_ , 0, "Gainestown 8M (Xeon)" },
|
||||
{ 6, 10, -1, -1, 26, 4, -1, -1, NC, XEON_|_7 , 0, "Bloomfield (Xeon)" },
|
||||
{ 6, 10, -1, -1, 26, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Bloomfield (Core i7)" },
|
||||
{ 6, 10, -1, -1, 30, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Lynnfield (Core i7)" },
|
||||
{ 6, 5, -1, -1, 37, 4, -1, 8192, NC, CORE_|_I_|_5 , 0, "Lynnfield (Core i5)" },
|
||||
|
||||
/* Westmere CPUs (32nm): */
|
||||
{ 6, 5, -1, -1, 37, 2, -1, -1, NC, 0 , 0, "Unknown Core i3/i5" },
|
||||
{ 6, 12, -1, -1, 44, -1, -1, -1, WESTMERE, XEON_ , 0, "Westmere (Xeon)" },
|
||||
{ 6, 12, -1, -1, 44, -1, -1, 12288, WESTMERE, XEON_ , 0, "Gulftown (Xeon)" },
|
||||
{ 6, 12, -1, -1, 44, 4, -1, 12288, NC, CORE_|_I_|_7 , 0, "Gulftown (Core i7)" },
|
||||
{ 6, 5, -1, -1, 37, 2, -1, 4096, NC, CORE_|_I_|_5 , 0, "Clarkdale (Core i5)" },
|
||||
{ 6, 5, -1, -1, 37, 2, -1, 4096, NC, CORE_|_I_|_3 , 0, "Clarkdale (Core i3)" },
|
||||
{ 6, 5, -1, -1, 37, 2, -1, -1, NC, PENTIUM_ , 0, "Arrandale" },
|
||||
{ 6, 5, -1, -1, 37, 2, -1, 4096, NC, CORE_|_I_|_7 , 0, "Arrandale (Core i7)" },
|
||||
{ 6, 5, -1, -1, 37, 2, -1, 3072, NC, CORE_|_I_|_5 , 0, "Arrandale (Core i5)" },
|
||||
{ 6, 5, -1, -1, 37, 2, -1, 3072, NC, CORE_|_I_|_3 , 0, "Arrandale (Core i3)" },
|
||||
|
||||
/* Sandy Bridge CPUs (32nm): */
|
||||
{ 6, 10, -1, -1, 42, -1, -1, -1, NC, 0 , 0, "Unknown Sandy Bridge" },
|
||||
{ 6, 10, -1, -1, 42, -1, -1, -1, NC, XEON_ , 0, "Sandy Bridge (Xeon)" },
|
||||
{ 6, 10, -1, -1, 42, -1, -1, -1, NC, CORE_|_I_|_7 , 0, "Sandy Bridge (Core i7)" },
|
||||
{ 6, 10, -1, -1, 42, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Sandy Bridge (Core i7)" },
|
||||
{ 6, 10, -1, -1, 42, 4, -1, -1, NC, CORE_|_I_|_5 , 0, "Sandy Bridge (Core i5)" },
|
||||
{ 6, 10, -1, -1, 42, 2, -1, -1, NC, CORE_|_I_|_3 , 0, "Sandy Bridge (Core i3)" },
|
||||
{ 6, 10, -1, -1, 42, 2, -1, -1, NC, PENTIUM_ , 0, "Sandy Bridge (Pentium)" },
|
||||
{ 6, 10, -1, -1, 42, 1, -1, -1, NC, CELERON_ , 0, "Sandy Bridge (Celeron)" },
|
||||
{ 6, 10, -1, -1, 42, 2, -1, -1, NC, CELERON_ , 0, "Sandy Bridge (Celeron)" },
|
||||
{ 6, 13, -1, -1, 45, -1, -1, -1, NC, CORE_|_I_|_3 , 0, "Sandy Bridge-E" },
|
||||
{ 6, 13, -1, -1, 45, -1, -1, -1, NC, XEON_ , 0, "Sandy Bridge-E (Xeon)" },
|
||||
|
||||
/* Ivy Bridge CPUs (22nm): */
|
||||
{ 6, 10, -1, -1, 58, -1, -1, -1, NC, XEON_ , 0, "Ivy Bridge (Xeon)" },
|
||||
{ 6, 10, -1, -1, 58, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Ivy Bridge (Core i7)" },
|
||||
{ 6, 10, -1, -1, 58, 4, -1, -1, NC, CORE_|_I_|_5 , 0, "Ivy Bridge (Core i5)" },
|
||||
{ 6, 10, -1, -1, 58, 2, -1, -1, NC, CORE_|_I_|_3 , 0, "Ivy Bridge (Core i3)" },
|
||||
{ 6, 10, -1, -1, 58, 2, -1, -1, NC, PENTIUM_ , 0, "Ivy Bridge (Pentium)" },
|
||||
{ 6, 10, -1, -1, 58, 1, -1, -1, NC, CELERON_ , 0, "Ivy Bridge (Celeron)" },
|
||||
{ 6, 10, -1, -1, 58, 2, -1, -1, NC, CELERON_ , 0, "Ivy Bridge (Celeron)" },
|
||||
{ 6, 14, -1, -1, 62, -1, -1, -1, NC, 0 , 0, "Ivy Bridge-E" },
|
||||
|
||||
/* Haswell CPUs (22nm): */
|
||||
{ 6, 12, -1, -1, 60, -1, -1, -1, NC, XEON_ , 0, "Haswell (Xeon)" },
|
||||
{ 6, 12, -1, -1, 60, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Haswell (Core i7)" },
|
||||
{ 6, 5, -1, -1, 69, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Haswell (Core i7)" },
|
||||
{ 6, 6, -1, -1, 70, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Haswell (Core i7)" },
|
||||
{ 6, 12, -1, -1, 60, 4, -1, -1, NC, CORE_|_I_|_5 , 0, "Haswell (Core i5)" },
|
||||
{ 6, 5, -1, -1, 69, 4, -1, -1, NC, CORE_|_I_|_5 , 0, "Haswell (Core i5)" },
|
||||
{ 6, 12, -1, -1, 60, 2, -1, -1, NC, CORE_|_I_|_5 , 0, "Haswell (Core i5)" },
|
||||
{ 6, 5, -1, -1, 69, 2, -1, -1, NC, CORE_|_I_|_5 , 0, "Haswell (Core i5)" },
|
||||
{ 6, 12, -1, -1, 60, 2, -1, -1, NC, CORE_|_I_|_3 , 0, "Haswell (Core i3)" },
|
||||
{ 6, 5, -1, -1, 69, 2, -1, -1, NC, CORE_|_I_|_3 , 0, "Haswell (Core i3)" },
|
||||
{ 6, 12, -1, -1, 60, 2, -1, -1, NC, PENTIUM_ , 0, "Haswell (Pentium)" },
|
||||
{ 6, 12, -1, -1, 60, 2, -1, -1, NC, CELERON_ , 0, "Haswell (Celeron)" },
|
||||
{ 6, 12, -1, -1, 60, 1, -1, -1, NC, CELERON_ , 0, "Haswell (Celeron)" },
|
||||
{ 6, 15, -1, -1, 63, -1, -1, -1, NC, 0 , 0, "Haswell-E" },
|
||||
|
||||
/* Broadwell CPUs (14nm): */
|
||||
{ 6, 7, -1, -1, 71, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Broadwell (Core i7)" },
|
||||
{ 6, 7, -1, -1, 71, 4, -1, -1, NC, CORE_|_I_|_5 , 0, "Broadwell (Core i5)" },
|
||||
{ 6, 13, -1, -1, 61, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Broadwell-U (Core i7)" },
|
||||
{ 6, 13, -1, -1, 61, 2, -1, -1, NC, CORE_|_I_|_7 , 0, "Broadwell-U (Core i7)" },
|
||||
{ 6, 13, -1, -1, 61, 2, -1, -1, NC, CORE_|_I_|_5 , 0, "Broadwell-U (Core i5)" },
|
||||
{ 6, 13, -1, -1, 61, 2, -1, -1, NC, CORE_|_I_|_3 , 0, "Broadwell-U (Core i3)" },
|
||||
{ 6, 13, -1, -1, 61, 2, -1, -1, NC, PENTIUM_ , 0, "Broadwell-U (Pentium)" },
|
||||
{ 6, 13, -1, -1, 61, 2, -1, -1, NC, CELERON_ , 0, "Broadwell-U (Celeron)" },
|
||||
{ 6, 13, -1, -1, 61, 2, -1, -1, NA, 0 , 0, "Broadwell-U (Core M)" },
|
||||
{ 6, 15, -1, -1, 79, -1, -1, -1, NC, XEON_ , 0, "Broadwell-E (Xeon)" },
|
||||
{ 6, 15, -1, -1, 79, 2, -1, -1, NC, CORE_|_I_|_3 , 0, "Broadwell-E (Core i3)" },
|
||||
{ 6, 15, -1, -1, 79, 2, -1, -1, NC, CORE_|_I_|_5 , 0, "Broadwell-E (Core i5)" },
|
||||
{ 6, 15, -1, -1, 79, 4, -1, -1, NC, CORE_|_I_|_5 , 0, "Broadwell-E (Core i5)" },
|
||||
{ 6, 15, -1, -1, 79, 2, -1, -1, NC, CORE_|_I_|_7 , 0, "Broadwell-E (Core i7)" },
|
||||
{ 6, 15, -1, -1, 79, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Broadwell-E (Core i7)" },
|
||||
|
||||
/* Skylake CPUs (14nm): */
|
||||
{ 6, 14, -1, -1, 94, -1, -1, -1, NC, XEON_ , 0, "Skylake (Xeon)" },
|
||||
{ 6, 14, -1, -1, 94, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Skylake (Core i7)" },
|
||||
{ 6, 14, -1, -1, 94, 4, -1, -1, NC, CORE_|_I_|_5 , 0, "Skylake (Core i5)" },
|
||||
{ 6, 14, -1, -1, 94, 2, -1, -1, NC, CORE_|_I_|_3 , 0, "Skylake (Core i3)" },
|
||||
{ 6, 14, -1, -1, 94, 2, -1, -1, NC, PENTIUM_ , 0, "Skylake (Pentium)" },
|
||||
{ 6, 14, -1, -1, 78, 2, -1, -1, NC, PENTIUM_ , 0, "Skylake (Pentium)" },
|
||||
{ 6, 14, -1, -1, 94, 2, -1, -1, NC, CELERON_ , 0, "Skylake (Celeron)" },
|
||||
{ 6, 14, -1, -1, 78, 2, -1, -1, NC, CELERON_ , 0, "Skylake (Celeron)" },
|
||||
{ 6, 14, -1, -1, 78, 2, -1, -1, NC, CORE_|_M_|_7 , 0, "Skylake (Core m7)" },
|
||||
{ 6, 14, -1, -1, 78, 2, -1, -1, NC, CORE_|_M_|_5 , 0, "Skylake (Core m5)" },
|
||||
{ 6, 14, -1, -1, 78, 2, -1, -1, NC, CORE_|_M_|_3 , 0, "Skylake (Core m3)" },
|
||||
|
||||
/* Kaby Lake CPUs (14nm): */
|
||||
{ 6, 14, -1, -1, 158, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Kaby Lake (Core i7)" },
|
||||
{ 6, 14, -1, -1, 158, 4, -1, -1, NC, CORE_|_I_|_5 , 0, "Kaby Lake (Core i5)" },
|
||||
{ 6, 14, -1, -1, 158, 2, -1, -1, NC, CORE_|_I_|_3 , 0, "Kaby Lake (Core i3)" },
|
||||
{ 6, 14, -1, -1, 158, 2, -1, -1, NC, PENTIUM_ , 0, "Kaby Lake (Pentium)" },
|
||||
{ 6, 14, -1, -1, 158, 2, -1, -1, NC, CELERON_ , 0, "Kaby Lake (Celeron)" },
|
||||
{ 6, 14, -1, -1, 158, 2, -1, -1, NC, CORE_|_M_|_3 , 0, "Kaby Lake (Core m3)" },
|
||||
|
||||
/* Itaniums */
|
||||
{ 7, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Itanium" },
|
||||
{ 15, -1, -1, 16, -1, 1, -1, -1, NC, 0 , 0, "Itanium 2" },
|
||||
};
|
||||
|
||||
|
||||
static void load_intel_features(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
|
||||
{
|
||||
const struct feature_map_t matchtable_edx1[] = {
|
||||
{ 18, CPU_FEATURE_PN },
|
||||
{ 21, CPU_FEATURE_DTS },
|
||||
{ 22, CPU_FEATURE_ACPI },
|
||||
{ 27, CPU_FEATURE_SS },
|
||||
{ 29, CPU_FEATURE_TM },
|
||||
{ 30, CPU_FEATURE_IA64 },
|
||||
{ 31, CPU_FEATURE_PBE },
|
||||
};
|
||||
const struct feature_map_t matchtable_ecx1[] = {
|
||||
{ 2, CPU_FEATURE_DTS64 },
|
||||
{ 4, CPU_FEATURE_DS_CPL },
|
||||
{ 5, CPU_FEATURE_VMX },
|
||||
{ 6, CPU_FEATURE_SMX },
|
||||
{ 7, CPU_FEATURE_EST },
|
||||
{ 8, CPU_FEATURE_TM2 },
|
||||
{ 10, CPU_FEATURE_CID },
|
||||
{ 14, CPU_FEATURE_XTPR },
|
||||
{ 15, CPU_FEATURE_PDCM },
|
||||
{ 18, CPU_FEATURE_DCA },
|
||||
{ 21, CPU_FEATURE_X2APIC },
|
||||
};
|
||||
const struct feature_map_t matchtable_edx81[] = {
|
||||
{ 20, CPU_FEATURE_XD },
|
||||
};
|
||||
const struct feature_map_t matchtable_ebx7[] = {
|
||||
{ 2, CPU_FEATURE_SGX },
|
||||
{ 4, CPU_FEATURE_HLE },
|
||||
{ 11, CPU_FEATURE_RTM },
|
||||
{ 16, CPU_FEATURE_AVX512F },
|
||||
{ 17, CPU_FEATURE_AVX512DQ },
|
||||
{ 18, CPU_FEATURE_RDSEED },
|
||||
{ 19, CPU_FEATURE_ADX },
|
||||
{ 26, CPU_FEATURE_AVX512PF },
|
||||
{ 27, CPU_FEATURE_AVX512ER },
|
||||
{ 28, CPU_FEATURE_AVX512CD },
|
||||
{ 29, CPU_FEATURE_SHA_NI },
|
||||
{ 30, CPU_FEATURE_AVX512BW },
|
||||
{ 31, CPU_FEATURE_AVX512VL },
|
||||
};
|
||||
if (raw->basic_cpuid[0][0] >= 1) {
|
||||
match_features(matchtable_edx1, COUNT_OF(matchtable_edx1), raw->basic_cpuid[1][3], data);
|
||||
match_features(matchtable_ecx1, COUNT_OF(matchtable_ecx1), raw->basic_cpuid[1][2], data);
|
||||
}
|
||||
if (raw->ext_cpuid[0][0] >= 1) {
|
||||
match_features(matchtable_edx81, COUNT_OF(matchtable_edx81), raw->ext_cpuid[1][3], data);
|
||||
}
|
||||
// detect TSX/AVX512:
|
||||
if (raw->basic_cpuid[0][0] >= 7) {
|
||||
match_features(matchtable_ebx7, COUNT_OF(matchtable_ebx7), raw->basic_cpuid[7][1], data);
|
||||
}
|
||||
}
|
||||
|
||||
enum _cache_type_t {
|
||||
L1I,
|
||||
L1D,
|
||||
L2,
|
||||
L3,
|
||||
L4
|
||||
};
|
||||
typedef enum _cache_type_t cache_type_t;
|
||||
|
||||
static void check_case(uint8_t on, cache_type_t cache, int size, int assoc, int linesize, struct cpu_id_t* data)
|
||||
{
|
||||
if (!on) return;
|
||||
switch (cache) {
|
||||
case L1I:
|
||||
data->l1_instruction_cache = size;
|
||||
break;
|
||||
case L1D:
|
||||
data->l1_data_cache = size;
|
||||
data->l1_assoc = assoc;
|
||||
data->l1_cacheline = linesize;
|
||||
break;
|
||||
case L2:
|
||||
data->l2_cache = size;
|
||||
data->l2_assoc = assoc;
|
||||
data->l2_cacheline = linesize;
|
||||
break;
|
||||
case L3:
|
||||
data->l3_cache = size;
|
||||
data->l3_assoc = assoc;
|
||||
data->l3_cacheline = linesize;
|
||||
break;
|
||||
case L4:
|
||||
data->l4_cache = size;
|
||||
data->l4_assoc = assoc;
|
||||
data->l4_cacheline = linesize;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void decode_intel_oldstyle_cache_info(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
|
||||
{
|
||||
uint8_t f[256] = {0};
|
||||
int reg, off;
|
||||
uint32_t x;
|
||||
for (reg = 0; reg < 4; reg++) {
|
||||
x = raw->basic_cpuid[2][reg];
|
||||
if (x & 0x80000000) continue;
|
||||
for (off = 0; off < 4; off++) {
|
||||
f[x & 0xff] = 1;
|
||||
x >>= 8;
|
||||
}
|
||||
}
|
||||
|
||||
check_case(f[0x06], L1I, 8, 4, 32, data);
|
||||
check_case(f[0x08], L1I, 16, 4, 32, data);
|
||||
check_case(f[0x0A], L1D, 8, 2, 32, data);
|
||||
check_case(f[0x0C], L1D, 16, 4, 32, data);
|
||||
check_case(f[0x22], L3, 512, 4, 64, data);
|
||||
check_case(f[0x23], L3, 1024, 8, 64, data);
|
||||
check_case(f[0x25], L3, 2048, 8, 64, data);
|
||||
check_case(f[0x29], L3, 4096, 8, 64, data);
|
||||
check_case(f[0x2C], L1D, 32, 8, 64, data);
|
||||
check_case(f[0x30], L1I, 32, 8, 64, data);
|
||||
check_case(f[0x39], L2, 128, 4, 64, data);
|
||||
check_case(f[0x3A], L2, 192, 6, 64, data);
|
||||
check_case(f[0x3B], L2, 128, 2, 64, data);
|
||||
check_case(f[0x3C], L2, 256, 4, 64, data);
|
||||
check_case(f[0x3D], L2, 384, 6, 64, data);
|
||||
check_case(f[0x3E], L2, 512, 4, 64, data);
|
||||
check_case(f[0x41], L2, 128, 4, 32, data);
|
||||
check_case(f[0x42], L2, 256, 4, 32, data);
|
||||
check_case(f[0x43], L2, 512, 4, 32, data);
|
||||
check_case(f[0x44], L2, 1024, 4, 32, data);
|
||||
check_case(f[0x45], L2, 2048, 4, 32, data);
|
||||
check_case(f[0x46], L3, 4096, 4, 64, data);
|
||||
check_case(f[0x47], L3, 8192, 8, 64, data);
|
||||
check_case(f[0x4A], L3, 6144, 12, 64, data);
|
||||
check_case(f[0x4B], L3, 8192, 16, 64, data);
|
||||
check_case(f[0x4C], L3, 12288, 12, 64, data);
|
||||
check_case(f[0x4D], L3, 16384, 16, 64, data);
|
||||
check_case(f[0x4E], L2, 6144, 24, 64, data);
|
||||
check_case(f[0x60], L1D, 16, 8, 64, data);
|
||||
check_case(f[0x66], L1D, 8, 4, 64, data);
|
||||
check_case(f[0x67], L1D, 16, 4, 64, data);
|
||||
check_case(f[0x68], L1D, 32, 4, 64, data);
|
||||
/* The following four entries are trace cache. Intel does not
|
||||
* specify a cache-line size, so we use -1 instead
|
||||
*/
|
||||
check_case(f[0x70], L1I, 12, 8, -1, data);
|
||||
check_case(f[0x71], L1I, 16, 8, -1, data);
|
||||
check_case(f[0x72], L1I, 32, 8, -1, data);
|
||||
check_case(f[0x73], L1I, 64, 8, -1, data);
|
||||
|
||||
check_case(f[0x78], L2, 1024, 4, 64, data);
|
||||
check_case(f[0x79], L2, 128, 8, 64, data);
|
||||
check_case(f[0x7A], L2, 256, 8, 64, data);
|
||||
check_case(f[0x7B], L2, 512, 8, 64, data);
|
||||
check_case(f[0x7C], L2, 1024, 8, 64, data);
|
||||
check_case(f[0x7D], L2, 2048, 8, 64, data);
|
||||
check_case(f[0x7F], L2, 512, 2, 64, data);
|
||||
check_case(f[0x82], L2, 256, 8, 32, data);
|
||||
check_case(f[0x83], L2, 512, 8, 32, data);
|
||||
check_case(f[0x84], L2, 1024, 8, 32, data);
|
||||
check_case(f[0x85], L2, 2048, 8, 32, data);
|
||||
check_case(f[0x86], L2, 512, 4, 64, data);
|
||||
check_case(f[0x87], L2, 1024, 8, 64, data);
|
||||
|
||||
if (f[0x49]) {
|
||||
/* This flag is overloaded with two meanings. On Xeon MP
|
||||
* (family 0xf, model 0x6) this means L3 cache. On all other
|
||||
* CPUs (notably Conroe et al), this is L2 cache. In both cases
|
||||
* it means 4MB, 16-way associative, 64-byte line size.
|
||||
*/
|
||||
if (data->family == 0xf && data->model == 0x6) {
|
||||
data->l3_cache = 4096;
|
||||
data->l3_assoc = 16;
|
||||
data->l3_cacheline = 64;
|
||||
} else {
|
||||
data->l2_cache = 4096;
|
||||
data->l2_assoc = 16;
|
||||
data->l2_cacheline = 64;
|
||||
}
|
||||
}
|
||||
if (f[0x40]) {
|
||||
/* Again, a special flag. It means:
|
||||
* 1) If no L2 is specified, then CPU is w/o L2 (0 KB)
|
||||
* 2) If L2 is specified by other flags, then, CPU is w/o L3.
|
||||
*/
|
||||
if (data->l2_cache == -1) {
|
||||
data->l2_cache = 0;
|
||||
} else {
|
||||
data->l3_cache = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void decode_intel_deterministic_cache_info(struct cpu_raw_data_t* raw,
|
||||
struct cpu_id_t* data)
|
||||
{
|
||||
int ecx;
|
||||
int ways, partitions, linesize, sets, size, level, typenumber;
|
||||
cache_type_t type;
|
||||
for (ecx = 0; ecx < MAX_INTELFN4_LEVEL; ecx++) {
|
||||
typenumber = raw->intel_fn4[ecx][0] & 0x1f;
|
||||
if (typenumber == 0) break;
|
||||
level = (raw->intel_fn4[ecx][0] >> 5) & 0x7;
|
||||
if (level == 1 && typenumber == 1)
|
||||
type = L1D;
|
||||
else if (level == 1 && typenumber == 2)
|
||||
type = L1I;
|
||||
else if (level == 2 && typenumber == 3)
|
||||
type = L2;
|
||||
else if (level == 3 && typenumber == 3)
|
||||
type = L3;
|
||||
else if (level == 4 && typenumber == 3)
|
||||
type = L4;
|
||||
else {
|
||||
warnf("deterministic_cache: unknown level/typenumber combo (%d/%d), cannot\n", level, typenumber);
|
||||
warnf("deterministic_cache: recognize cache type\n");
|
||||
continue;
|
||||
}
|
||||
ways = ((raw->intel_fn4[ecx][1] >> 22) & 0x3ff) + 1;
|
||||
partitions = ((raw->intel_fn4[ecx][1] >> 12) & 0x3ff) + 1;
|
||||
linesize = (raw->intel_fn4[ecx][1] & 0xfff) + 1;
|
||||
sets = raw->intel_fn4[ecx][2] + 1;
|
||||
size = ways * partitions * linesize * sets / 1024;
|
||||
check_case(1, type, size, ways, linesize, data);
|
||||
}
|
||||
}
|
||||
|
||||
static int decode_intel_extended_topology(struct cpu_raw_data_t* raw,
|
||||
struct cpu_id_t* data)
|
||||
{
|
||||
int i, level_type, num_smt = -1, num_core = -1;
|
||||
for (i = 0; i < MAX_INTELFN11_LEVEL; i++) {
|
||||
level_type = (raw->intel_fn11[i][2] & 0xff00) >> 8;
|
||||
switch (level_type) {
|
||||
case 0x01:
|
||||
num_smt = raw->intel_fn11[i][1] & 0xffff;
|
||||
break;
|
||||
case 0x02:
|
||||
num_core = raw->intel_fn11[i][1] & 0xffff;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (num_smt == -1 || num_core == -1) return 0;
|
||||
data->num_logical_cpus = num_core;
|
||||
data->num_cores = num_core / num_smt;
|
||||
// make sure num_cores is at least 1. In VMs, the CPUID instruction
|
||||
// is rigged and may give nonsensical results, but we should at least
|
||||
// avoid outputs like data->num_cores == 0.
|
||||
if (data->num_cores <= 0) data->num_cores = 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void decode_intel_number_of_cores(struct cpu_raw_data_t* raw,
|
||||
struct cpu_id_t* data)
|
||||
{
|
||||
int logical_cpus = -1, num_cores = -1;
|
||||
|
||||
if (raw->basic_cpuid[0][0] >= 11) {
|
||||
if (decode_intel_extended_topology(raw, data)) return;
|
||||
}
|
||||
|
||||
if (raw->basic_cpuid[0][0] >= 1) {
|
||||
logical_cpus = (raw->basic_cpuid[1][1] >> 16) & 0xff;
|
||||
if (raw->basic_cpuid[0][0] >= 4) {
|
||||
num_cores = 1 + ((raw->basic_cpuid[4][0] >> 26) & 0x3f);
|
||||
}
|
||||
}
|
||||
if (data->flags[CPU_FEATURE_HT]) {
|
||||
if (num_cores > 1) {
|
||||
data->num_cores = num_cores;
|
||||
data->num_logical_cpus = logical_cpus;
|
||||
} else {
|
||||
data->num_cores = 1;
|
||||
data->num_logical_cpus = (logical_cpus >= 1 ? logical_cpus : 1);
|
||||
if (data->num_logical_cpus == 1)
|
||||
data->flags[CPU_FEATURE_HT] = 0;
|
||||
}
|
||||
} else {
|
||||
data->num_cores = data->num_logical_cpus = 1;
|
||||
}
|
||||
}
|
||||
|
||||
static intel_code_and_bits_t get_brand_code_and_bits(struct cpu_id_t* data)
|
||||
{
|
||||
intel_code_t code = (intel_code_t) NC;
|
||||
intel_code_and_bits_t result;
|
||||
uint64_t bits = 0;
|
||||
int i = 0;
|
||||
const char* bs = data->brand_str;
|
||||
const char* s;
|
||||
const struct { intel_code_t c; const char *search; } matchtable[] = {
|
||||
{ PENTIUM_M, "Pentium(R) M" },
|
||||
{ CORE_SOLO, "Pentium(R) Dual CPU" },
|
||||
{ CORE_SOLO, "Pentium(R) Dual-Core" },
|
||||
{ PENTIUM_D, "Pentium(R) D" },
|
||||
{ CORE_SOLO, "Genuine Intel(R) CPU" },
|
||||
{ CORE_SOLO, "Intel(R) Core(TM)" },
|
||||
{ DIAMONDVILLE, "CPU [N ][23]## " },
|
||||
{ SILVERTHORNE, "CPU Z" },
|
||||
{ PINEVIEW, "CPU [ND][45]## " },
|
||||
{ CEDARVIEW, "CPU [ND]#### " },
|
||||
};
|
||||
|
||||
const struct { uint64_t bit; const char* search; } bit_matchtable[] = {
|
||||
{ XEON_, "Xeon" },
|
||||
{ _MP, " MP" },
|
||||
{ ATOM_, "Atom(TM) CPU" },
|
||||
{ MOBILE_, "Mobile" },
|
||||
{ CELERON_, "Celeron" },
|
||||
{ PENTIUM_, "Pentium" },
|
||||
};
|
||||
|
||||
for (i = 0; i < COUNT_OF(bit_matchtable); i++) {
|
||||
if (match_pattern(bs, bit_matchtable[i].search))
|
||||
bits |= bit_matchtable[i].bit;
|
||||
}
|
||||
|
||||
if ((i = match_pattern(bs, "Core(TM) [im][357]")) != 0) {
|
||||
bits |= CORE_;
|
||||
i--;
|
||||
switch (bs[i + 9]) {
|
||||
case 'i': bits |= _I_; break;
|
||||
case 'm': bits |= _M_; break;
|
||||
}
|
||||
switch (bs[i + 10]) {
|
||||
case '3': bits |= _3; break;
|
||||
case '5': bits |= _5; break;
|
||||
case '7': bits |= _7; break;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < COUNT_OF(matchtable); i++)
|
||||
if (match_pattern(bs, matchtable[i].search)) {
|
||||
code = matchtable[i].c;
|
||||
break;
|
||||
}
|
||||
debugf(2, "intel matchtable result is %d\n", code);
|
||||
if (bits & XEON_) {
|
||||
if (match_pattern(bs, "W35##") || match_pattern(bs, "[ELXW]75##"))
|
||||
bits |= _7;
|
||||
else if (match_pattern(bs, "[ELXW]55##"))
|
||||
code = GAINESTOWN;
|
||||
else if (match_pattern(bs, "[ELXW]56##"))
|
||||
code = WESTMERE;
|
||||
else if (data->l3_cache > 0 && data->family == 16)
|
||||
/* restrict by family, since later Xeons also have L3 ... */
|
||||
code = IRWIN;
|
||||
}
|
||||
if (match_all(bits, XEON_ + _MP) && data->l3_cache > 0)
|
||||
code = POTOMAC;
|
||||
if (code == CORE_SOLO) {
|
||||
s = strstr(bs, "CPU");
|
||||
if (s) {
|
||||
s += 3;
|
||||
while (*s == ' ') s++;
|
||||
if (*s == 'T')
|
||||
bits |= MOBILE_;
|
||||
}
|
||||
}
|
||||
if (code == CORE_SOLO) {
|
||||
switch (data->num_cores) {
|
||||
case 1: break;
|
||||
case 2:
|
||||
{
|
||||
code = CORE_DUO;
|
||||
if (data->num_logical_cpus > 2)
|
||||
code = DUAL_CORE_HT;
|
||||
break;
|
||||
}
|
||||
case 4:
|
||||
{
|
||||
code = QUAD_CORE;
|
||||
if (data->num_logical_cpus > 4)
|
||||
code = QUAD_CORE_HT;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
code = MORE_THAN_QUADCORE; break;
|
||||
}
|
||||
}
|
||||
|
||||
if (code == CORE_DUO && (bits & MOBILE_) && data->model != 14) {
|
||||
if (data->ext_model < 23) {
|
||||
code = MEROM;
|
||||
} else {
|
||||
code = PENRYN;
|
||||
}
|
||||
}
|
||||
if (data->ext_model == 23 &&
|
||||
(code == CORE_DUO || code == PENTIUM_D || (bits & CELERON_))) {
|
||||
code = WOLFDALE;
|
||||
}
|
||||
|
||||
result.code = code;
|
||||
result.bits = bits;
|
||||
return result;
|
||||
}
|
||||
|
||||
static intel_model_t get_model_code(struct cpu_id_t* data)
|
||||
{
|
||||
int i = 0;
|
||||
int l = (int) strlen(data->brand_str);
|
||||
const char *bs = data->brand_str;
|
||||
int mod_flags = 0, model_no = 0, ndigs = 0;
|
||||
/* If the CPU is a Core ix, then just return the model number generation: */
|
||||
if ((i = match_pattern(bs, "Core(TM) i[357]")) != 0) {
|
||||
i += 11;
|
||||
if (i + 4 >= l) return UNKNOWN;
|
||||
if (bs[i] == '2') return _2xxx;
|
||||
if (bs[i] == '3') return _3xxx;
|
||||
return UNKNOWN;
|
||||
}
|
||||
|
||||
/* For Core2-based Xeons: */
|
||||
while (i < l - 3) {
|
||||
if (bs[i] == 'C' && bs[i+1] == 'P' && bs[i+2] == 'U')
|
||||
break;
|
||||
i++;
|
||||
}
|
||||
if (i >= l - 3) return UNKNOWN;
|
||||
i += 3;
|
||||
while (i < l - 4 && bs[i] == ' ') i++;
|
||||
if (i >= l - 4) return UNKNOWN;
|
||||
while (i < l - 4 && !isdigit(bs[i])) {
|
||||
if (bs[i] >= 'A' && bs[i] <= 'Z')
|
||||
mod_flags |= (1 << (bs[i] - 'A'));
|
||||
i++;
|
||||
}
|
||||
if (i >= l - 4) return UNKNOWN;
|
||||
while (isdigit(bs[i])) {
|
||||
ndigs++;
|
||||
model_no = model_no * 10 + (int) (bs[i] - '0');
|
||||
i++;
|
||||
}
|
||||
if (ndigs != 4) return UNKNOWN;
|
||||
#define HAVE(ch, flags) ((flags & (1 << ((int)(ch-'A')))) != 0)
|
||||
switch (model_no / 100) {
|
||||
case 30: return _3000;
|
||||
case 31: return _3100;
|
||||
case 32:
|
||||
{
|
||||
return (HAVE('X', mod_flags)) ? X3200 : _3200;
|
||||
}
|
||||
case 33:
|
||||
{
|
||||
return (HAVE('X', mod_flags)) ? X3300 : _3300;
|
||||
}
|
||||
case 51: return _5100;
|
||||
case 52: return _5200;
|
||||
case 53: return _5300;
|
||||
case 54: return _5400;
|
||||
default:
|
||||
return UNKNOWN;
|
||||
}
|
||||
#undef HAVE
|
||||
}
|
||||
|
||||
static void decode_intel_sgx_features(const struct cpu_raw_data_t* raw, struct cpu_id_t* data)
|
||||
{
|
||||
struct cpu_epc_t epc;
|
||||
int i;
|
||||
|
||||
if (raw->basic_cpuid[0][0] < 0x12) return; // no 12h leaf
|
||||
if (raw->basic_cpuid[0x12][0] == 0) return; // no sub-leafs available, probably it's disabled by BIOS
|
||||
|
||||
// decode sub-leaf 0:
|
||||
if (raw->basic_cpuid[0x12][0] & 1) data->sgx.flags[INTEL_SGX1] = 1;
|
||||
if (raw->basic_cpuid[0x12][0] & 2) data->sgx.flags[INTEL_SGX2] = 1;
|
||||
if (data->sgx.flags[INTEL_SGX1] || data->sgx.flags[INTEL_SGX2])
|
||||
data->sgx.present = 1;
|
||||
data->sgx.misc_select = raw->basic_cpuid[0x12][1];
|
||||
data->sgx.max_enclave_32bit = (raw->basic_cpuid[0x12][3] ) & 0xff;
|
||||
data->sgx.max_enclave_64bit = (raw->basic_cpuid[0x12][3] >> 8) & 0xff;
|
||||
|
||||
// decode sub-leaf 1:
|
||||
data->sgx.secs_attributes = raw->intel_fn12h[1][0] | (((uint64_t) raw->intel_fn12h[1][1]) << 32);
|
||||
data->sgx.secs_xfrm = raw->intel_fn12h[1][2] | (((uint64_t) raw->intel_fn12h[1][3]) << 32);
|
||||
|
||||
// decode higher-order subleafs, whenever present:
|
||||
data->sgx.num_epc_sections = -1;
|
||||
for (i = 0; i < 1000000; i++) {
|
||||
epc = cpuid_get_epc(i, raw);
|
||||
if (epc.length == 0) {
|
||||
debugf(2, "SGX: epc section request for %d returned null, no more EPC sections.\n", i);
|
||||
data->sgx.num_epc_sections = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (data->sgx.num_epc_sections == -1) {
|
||||
debugf(1, "SGX: warning: seems to be infinitude of EPC sections.\n");
|
||||
data->sgx.num_epc_sections = 1000000;
|
||||
}
|
||||
}
|
||||
|
||||
struct cpu_epc_t cpuid_get_epc(int index, const struct cpu_raw_data_t* raw)
|
||||
{
|
||||
uint32_t regs[4];
|
||||
struct cpu_epc_t retval = {0, 0};
|
||||
if (raw && index < MAX_INTELFN12H_LEVEL - 2) {
|
||||
// this was queried already, use the data:
|
||||
memcpy(regs, raw->intel_fn12h[2 + index], sizeof(regs));
|
||||
} else {
|
||||
// query this ourselves:
|
||||
regs[0] = 0x12;
|
||||
regs[2] = 2 + index;
|
||||
regs[1] = regs[3] = 0;
|
||||
cpu_exec_cpuid_ext(regs);
|
||||
}
|
||||
|
||||
// decode values:
|
||||
if ((regs[0] & 0xf) == 0x1) {
|
||||
retval.start_addr |= (regs[0] & 0xfffff000); // bits [12, 32) -> bits [12, 32)
|
||||
retval.start_addr |= ((uint64_t) (regs[1] & 0x000fffff)) << 32; // bits [0, 20) -> bits [32, 52)
|
||||
retval.length |= (regs[2] & 0xfffff000); // bits [12, 32) -> bits [12, 32)
|
||||
retval.length |= ((uint64_t) (regs[3] & 0x000fffff)) << 32; // bits [0, 20) -> bits [32, 52)
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
int cpuid_identify_intel(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal)
|
||||
{
|
||||
intel_code_and_bits_t brand;
|
||||
intel_model_t model_code;
|
||||
int i;
|
||||
char* brand_code_str = NULL;
|
||||
|
||||
load_intel_features(raw, data);
|
||||
if (raw->basic_cpuid[0][0] >= 4) {
|
||||
/* Deterministic way is preferred, being more generic */
|
||||
decode_intel_deterministic_cache_info(raw, data);
|
||||
} else if (raw->basic_cpuid[0][0] >= 2) {
|
||||
decode_intel_oldstyle_cache_info(raw, data);
|
||||
}
|
||||
decode_intel_number_of_cores(raw, data);
|
||||
|
||||
brand = get_brand_code_and_bits(data);
|
||||
model_code = get_model_code(data);
|
||||
for (i = 0; i < COUNT_OF(intel_bcode_str); i++) {
|
||||
if (brand.code == intel_bcode_str[i].code) {
|
||||
brand_code_str = intel_bcode_str[i].str;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (brand_code_str)
|
||||
debugf(2, "Detected Intel brand code: %d (%s)\n", brand.code, brand_code_str);
|
||||
else
|
||||
debugf(2, "Detected Intel brand code: %d\n", brand.code);
|
||||
if (brand.bits) {
|
||||
debugf(2, "Detected Intel bits: ");
|
||||
debug_print_lbits(2, brand.bits);
|
||||
}
|
||||
debugf(2, "Detected Intel model code: %d\n", model_code);
|
||||
|
||||
internal->code.intel = brand.code;
|
||||
internal->bits = brand.bits;
|
||||
|
||||
if (data->flags[CPU_FEATURE_SGX]) {
|
||||
debugf(2, "SGX seems to be present, decoding...\n");
|
||||
// if SGX is indicated by the CPU, verify its presence:
|
||||
decode_intel_sgx_features(raw, data);
|
||||
}
|
||||
|
||||
internal->score = match_cpu_codename(cpudb_intel, COUNT_OF(cpudb_intel), data,
|
||||
brand.code, brand.bits, model_code);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void cpuid_get_list_intel(struct cpu_list_t* list)
|
||||
{
|
||||
generic_get_cpu_list(cpudb_intel, COUNT_OF(cpudb_intel), list);
|
||||
}
|
||||
32
compat/libcpuid/recog_intel.h
Normal file
32
compat/libcpuid/recog_intel.h
Normal file
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
* Copyright 2008 Veselin Georgiev,
|
||||
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#ifndef __RECOG_INTEL_H__
|
||||
#define __RECOG_INTEL_H__
|
||||
|
||||
int cpuid_identify_intel(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal);
|
||||
void cpuid_get_list_intel(struct cpu_list_t* list);
|
||||
|
||||
#endif /*__RECOG_INTEL_H__*/
|
||||
111
cpu.c
111
cpu.c
@@ -24,77 +24,70 @@
|
||||
#include <cpuid.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
#include <math.h>
|
||||
|
||||
#ifndef BUILD_TEST
|
||||
# include <libcpuid.h>
|
||||
#endif
|
||||
|
||||
#include "cpu.h"
|
||||
#include "utils/applog.h"
|
||||
|
||||
|
||||
#define VENDOR_ID (0)
|
||||
#define PROCESSOR_INFO (1)
|
||||
#define CACHE_TLB_DESCRIPTOR (2)
|
||||
#define EXTENDED_FEATURES (7)
|
||||
#define PROCESSOR_BRAND_STRING_1 (0x80000002)
|
||||
#define PROCESSOR_BRAND_STRING_2 (0x80000003)
|
||||
#define PROCESSOR_BRAND_STRING_3 (0x80000004)
|
||||
|
||||
#define EAX_Reg (0)
|
||||
#define EBX_Reg (1)
|
||||
#define ECX_Reg (2)
|
||||
#define EDX_Reg (3)
|
||||
|
||||
|
||||
static inline void cpuid(int level, int output[4]) {
|
||||
int a, b, c, d;
|
||||
__cpuid_count(level, 0, a, b, c, d);
|
||||
|
||||
output[0] = a;
|
||||
output[1] = b;
|
||||
output[2] = c;
|
||||
output[3] = d;
|
||||
}
|
||||
|
||||
|
||||
static void cpu_brand_string(char* s) {
|
||||
int cpu_info[4] = { 0 };
|
||||
cpuid(VENDOR_ID, cpu_info);
|
||||
|
||||
if (cpu_info[EAX_Reg] >= 4) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
cpuid(0x80000002 + i, cpu_info);
|
||||
memcpy(s, cpu_info, sizeof(cpu_info));
|
||||
s += 16;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static bool has_aes_ni()
|
||||
{
|
||||
int cpu_info[4] = { 0 };
|
||||
cpuid(PROCESSOR_INFO, cpu_info);
|
||||
|
||||
return cpu_info[ECX_Reg] & bit_AES;
|
||||
}
|
||||
|
||||
|
||||
static bool has_bmi2() {
|
||||
int cpu_info[4] = { 0 };
|
||||
cpuid(EXTENDED_FEATURES, cpu_info);
|
||||
|
||||
return cpu_info[EBX_Reg] & bit_BMI2;
|
||||
}
|
||||
|
||||
|
||||
#ifndef BUILD_TEST
|
||||
void cpu_init_common() {
|
||||
cpu_brand_string(cpu_info.brand);
|
||||
struct cpu_raw_data_t raw = { 0 };
|
||||
struct cpu_id_t data = { 0 };
|
||||
|
||||
cpuid_get_raw_data(&raw);
|
||||
cpu_identify(&raw, &data);
|
||||
|
||||
strncpy(cpu_info.brand, data.brand_str, sizeof(cpu_info.brand) - 1);
|
||||
|
||||
cpu_info.total_logical_cpus = data.total_logical_cpus;
|
||||
cpu_info.sockets = data.total_logical_cpus / data.num_logical_cpus;
|
||||
cpu_info.total_cores = data.num_cores * cpu_info.sockets;
|
||||
cpu_info.l2_cache = data.l2_cache > 0 ? data.l2_cache * cpu_info.total_cores * cpu_info.sockets : 0;
|
||||
cpu_info.l3_cache = data.l3_cache > 0 ? data.l3_cache * cpu_info.sockets : 0;
|
||||
|
||||
# ifdef __x86_64__
|
||||
cpu_info.flags |= CPU_FLAG_X86_64;
|
||||
# endif
|
||||
|
||||
if (has_aes_ni()) {
|
||||
if (data.flags[CPU_FEATURE_AES]) {
|
||||
cpu_info.flags |= CPU_FLAG_AES;
|
||||
}
|
||||
|
||||
if (has_bmi2()) {
|
||||
if (data.flags[CPU_FEATURE_BMI2]) {
|
||||
cpu_info.flags |= CPU_FLAG_BMI2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
int get_optimal_threads_count(int algo, bool double_hash, int max_cpu_usage) {
|
||||
if (cpu_info.total_logical_cpus == 1) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
int cache = cpu_info.l3_cache ? cpu_info.l3_cache : cpu_info.l2_cache;
|
||||
int count = 0;
|
||||
const int size = (algo ? 1024 : 2048) * (double_hash ? 2 : 1);
|
||||
|
||||
if (cache) {
|
||||
count = cache / size;
|
||||
}
|
||||
else {
|
||||
count = cpu_info.total_logical_cpus / 2;
|
||||
}
|
||||
|
||||
if (count > cpu_info.total_logical_cpus) {
|
||||
count = cpu_info.total_logical_cpus;
|
||||
}
|
||||
|
||||
if (((float) count / cpu_info.total_logical_cpus * 100) > max_cpu_usage) {
|
||||
count = ceil((float) cpu_info.total_logical_cpus * (max_cpu_usage / 100.0));
|
||||
}
|
||||
|
||||
return count < 1 ? 1 : count;
|
||||
}
|
||||
|
||||
13
cpu.h
13
cpu.h
@@ -24,10 +24,16 @@
|
||||
#ifndef __CPU_H__
|
||||
#define __CPU_H__
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
struct cpu_info {
|
||||
int count;
|
||||
int total_cores;
|
||||
int total_logical_cpus;
|
||||
int flags;
|
||||
char brand[48];
|
||||
int sockets;
|
||||
int l2_cache;
|
||||
int l3_cache;
|
||||
char brand[64];
|
||||
};
|
||||
|
||||
extern struct cpu_info cpu_info;
|
||||
@@ -40,9 +46,8 @@ enum cpu_flags {
|
||||
};
|
||||
|
||||
|
||||
|
||||
void cpu_init();
|
||||
int get_optimal_threads_count();
|
||||
int get_optimal_threads_count(int algo, bool double_hash, int max_cpu_usage);
|
||||
int affine_to_cpu_mask(int id, unsigned long mask);
|
||||
|
||||
#endif /* __CPU_H__ */
|
||||
|
||||
107
cpu_stub.c
Normal file
107
cpu_stub.c
Normal file
@@ -0,0 +1,107 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <cpuid.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
#include "cpu.h"
|
||||
|
||||
|
||||
#define VENDOR_ID (0)
|
||||
#define PROCESSOR_INFO (1)
|
||||
#define CACHE_TLB_DESCRIPTOR (2)
|
||||
#define EXTENDED_FEATURES (7)
|
||||
#define PROCESSOR_BRAND_STRING_1 (0x80000002)
|
||||
#define PROCESSOR_BRAND_STRING_2 (0x80000003)
|
||||
#define PROCESSOR_BRAND_STRING_3 (0x80000004)
|
||||
|
||||
#define EAX_Reg (0)
|
||||
#define EBX_Reg (1)
|
||||
#define ECX_Reg (2)
|
||||
#define EDX_Reg (3)
|
||||
|
||||
|
||||
static inline void cpuid(int level, int output[4]) {
|
||||
int a, b, c, d;
|
||||
__cpuid_count(level, 0, a, b, c, d);
|
||||
|
||||
output[0] = a;
|
||||
output[1] = b;
|
||||
output[2] = c;
|
||||
output[3] = d;
|
||||
}
|
||||
|
||||
|
||||
static void cpu_brand_string(char* s) {
|
||||
int cpu_info[4] = { 0 };
|
||||
cpuid(VENDOR_ID, cpu_info);
|
||||
|
||||
if (cpu_info[EAX_Reg] >= 4) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
cpuid(0x80000002 + i, cpu_info);
|
||||
memcpy(s, cpu_info, sizeof(cpu_info));
|
||||
s += 16;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static bool has_aes_ni()
|
||||
{
|
||||
int cpu_info[4] = { 0 };
|
||||
cpuid(PROCESSOR_INFO, cpu_info);
|
||||
|
||||
return cpu_info[ECX_Reg] & bit_AES;
|
||||
}
|
||||
|
||||
|
||||
static bool has_bmi2() {
|
||||
int cpu_info[4] = { 0 };
|
||||
cpuid(EXTENDED_FEATURES, cpu_info);
|
||||
|
||||
return cpu_info[EBX_Reg] & bit_BMI2;
|
||||
}
|
||||
|
||||
|
||||
void cpu_init_common() {
|
||||
cpu_info.sockets = 1;
|
||||
cpu_brand_string(cpu_info.brand);
|
||||
|
||||
# ifdef __x86_64__
|
||||
cpu_info.flags |= CPU_FLAG_X86_64;
|
||||
# endif
|
||||
|
||||
if (has_aes_ni()) {
|
||||
cpu_info.flags |= CPU_FLAG_AES;
|
||||
}
|
||||
|
||||
if (has_bmi2()) {
|
||||
cpu_info.flags |= CPU_FLAG_BMI2;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int get_optimal_threads_count(int algo, bool double_hash, int max_cpu_usage) {
|
||||
int count = cpu_info.total_logical_cpus / 2;
|
||||
return count < 1 ? 1 : count;
|
||||
}
|
||||
@@ -1,10 +0,0 @@
|
||||
#ifndef __AESB_H__
|
||||
#define __AESB_H__
|
||||
|
||||
void aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
|
||||
void aesb_pseudo_round_mut(uint8_t *val, const uint8_t *expandedKey);
|
||||
|
||||
#define fast_aesb_single_round aesb_single_round
|
||||
#define fast_aesb_pseudo_round_mut aesb_pseudo_round_mut
|
||||
|
||||
#endif /* __AESB_H__ */
|
||||
@@ -1,50 +0,0 @@
|
||||
/*
|
||||
* ---------------------------------------------------------------------------
|
||||
* OpenAES License
|
||||
* ---------------------------------------------------------------------------
|
||||
* Copyright (c) 2012, Nabil S. Al Ramli, www.nalramli.com
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
* ---------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef _OAES_CONFIG_H
|
||||
#define _OAES_CONFIG_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//#ifndef OAES_HAVE_ISAAC
|
||||
//#define OAES_HAVE_ISAAC 1
|
||||
//#endif // OAES_HAVE_ISAAC
|
||||
|
||||
//#ifndef OAES_DEBUG
|
||||
//#define OAES_DEBUG 0
|
||||
//#endif // OAES_DEBUG
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // _OAES_CONFIG_H
|
||||
1417
crypto/oaes_lib.c
1417
crypto/oaes_lib.c
File diff suppressed because it is too large
Load Diff
@@ -1,214 +0,0 @@
|
||||
/*
|
||||
* ---------------------------------------------------------------------------
|
||||
* OpenAES License
|
||||
* ---------------------------------------------------------------------------
|
||||
* Copyright (c) 2012, Nabil S. Al Ramli, www.nalramli.com
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
* ---------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef _OAES_LIB_H
|
||||
#define _OAES_LIB_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
# ifdef OAES_SHARED
|
||||
# ifdef oaes_lib_EXPORTS
|
||||
# define OAES_API __declspec(dllexport)
|
||||
# else
|
||||
# define OAES_API __declspec(dllimport)
|
||||
# endif
|
||||
# else
|
||||
# define OAES_API
|
||||
# endif
|
||||
#else
|
||||
# define OAES_API
|
||||
#endif // WIN32
|
||||
|
||||
#define OAES_VERSION "0.8.1"
|
||||
#define OAES_BLOCK_SIZE 16
|
||||
|
||||
typedef void OAES_CTX;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
OAES_RET_FIRST = 0,
|
||||
OAES_RET_SUCCESS = 0,
|
||||
OAES_RET_UNKNOWN,
|
||||
OAES_RET_ARG1,
|
||||
OAES_RET_ARG2,
|
||||
OAES_RET_ARG3,
|
||||
OAES_RET_ARG4,
|
||||
OAES_RET_ARG5,
|
||||
OAES_RET_NOKEY,
|
||||
OAES_RET_MEM,
|
||||
OAES_RET_BUF,
|
||||
OAES_RET_HEADER,
|
||||
OAES_RET_COUNT
|
||||
} OAES_RET;
|
||||
|
||||
/*
|
||||
* oaes_set_option() takes one of these values for its [option] parameter
|
||||
* some options accept either an optional or a required [value] parameter
|
||||
*/
|
||||
// no option
|
||||
#define OAES_OPTION_NONE 0
|
||||
// enable ECB mode, disable CBC mode
|
||||
#define OAES_OPTION_ECB 1
|
||||
// enable CBC mode, disable ECB mode
|
||||
// value is optional, may pass uint8_t iv[OAES_BLOCK_SIZE] to specify
|
||||
// the value of the initialization vector, iv
|
||||
#define OAES_OPTION_CBC 2
|
||||
|
||||
#ifdef OAES_DEBUG
|
||||
typedef int ( * oaes_step_cb ) (
|
||||
const uint8_t state[OAES_BLOCK_SIZE],
|
||||
const char * step_name,
|
||||
int step_count,
|
||||
void * user_data );
|
||||
// enable state stepping mode
|
||||
// value is required, must pass oaes_step_cb to receive the state at each step
|
||||
#define OAES_OPTION_STEP_ON 4
|
||||
// disable state stepping mode
|
||||
#define OAES_OPTION_STEP_OFF 8
|
||||
#endif // OAES_DEBUG
|
||||
|
||||
typedef uint16_t OAES_OPTION;
|
||||
|
||||
typedef struct _oaes_key
|
||||
{
|
||||
size_t data_len;
|
||||
uint8_t *data;
|
||||
size_t exp_data_len;
|
||||
uint8_t *exp_data;
|
||||
size_t num_keys;
|
||||
size_t key_base;
|
||||
} oaes_key;
|
||||
|
||||
typedef struct _oaes_ctx
|
||||
{
|
||||
#ifdef OAES_HAVE_ISAAC
|
||||
randctx * rctx;
|
||||
#endif // OAES_HAVE_ISAAC
|
||||
|
||||
#ifdef OAES_DEBUG
|
||||
oaes_step_cb step_cb;
|
||||
#endif // OAES_DEBUG
|
||||
|
||||
oaes_key * key;
|
||||
OAES_OPTION options;
|
||||
uint8_t iv[OAES_BLOCK_SIZE];
|
||||
} oaes_ctx;
|
||||
/*
|
||||
* // usage:
|
||||
*
|
||||
* OAES_CTX * ctx = oaes_alloc();
|
||||
* .
|
||||
* .
|
||||
* .
|
||||
* {
|
||||
* oaes_gen_key_xxx( ctx );
|
||||
* {
|
||||
* oaes_key_export( ctx, _buf, &_buf_len );
|
||||
* // or
|
||||
* oaes_key_export_data( ctx, _buf, &_buf_len );\
|
||||
* }
|
||||
* }
|
||||
* // or
|
||||
* {
|
||||
* oaes_key_import( ctx, _buf, _buf_len );
|
||||
* // or
|
||||
* oaes_key_import_data( ctx, _buf, _buf_len );
|
||||
* }
|
||||
* .
|
||||
* .
|
||||
* .
|
||||
* oaes_encrypt( ctx, m, m_len, c, &c_len );
|
||||
* .
|
||||
* .
|
||||
* .
|
||||
* oaes_decrypt( ctx, c, c_len, m, &m_len );
|
||||
* .
|
||||
* .
|
||||
* .
|
||||
* oaes_free( &ctx );
|
||||
*/
|
||||
|
||||
OAES_API OAES_CTX * oaes_alloc(void);
|
||||
|
||||
OAES_API OAES_RET oaes_free( OAES_CTX ** ctx );
|
||||
|
||||
OAES_API OAES_RET oaes_set_option( OAES_CTX * ctx,
|
||||
OAES_OPTION option, const void * value );
|
||||
|
||||
OAES_API OAES_RET oaes_key_gen_128( OAES_CTX * ctx );
|
||||
|
||||
OAES_API OAES_RET oaes_key_gen_192( OAES_CTX * ctx );
|
||||
|
||||
OAES_API OAES_RET oaes_key_gen_256( OAES_CTX * ctx );
|
||||
|
||||
// export key with header information
|
||||
// set data == NULL to get the required data_len
|
||||
OAES_API OAES_RET oaes_key_export( OAES_CTX * ctx,
|
||||
uint8_t * data, size_t * data_len );
|
||||
|
||||
// directly export the data from key
|
||||
// set data == NULL to get the required data_len
|
||||
OAES_API OAES_RET oaes_key_export_data( OAES_CTX * ctx,
|
||||
uint8_t * data, size_t * data_len );
|
||||
|
||||
// import key with header information
|
||||
OAES_API OAES_RET oaes_key_import( OAES_CTX * ctx,
|
||||
const uint8_t * data, size_t data_len );
|
||||
|
||||
// directly import data into key
|
||||
OAES_API OAES_RET oaes_key_import_data( OAES_CTX * ctx,
|
||||
const uint8_t * data, size_t data_len );
|
||||
|
||||
// set c == NULL to get the required c_len
|
||||
OAES_API OAES_RET oaes_encrypt( OAES_CTX * ctx,
|
||||
const uint8_t * m, size_t m_len, uint8_t * c, size_t * c_len );
|
||||
|
||||
// set m == NULL to get the required m_len
|
||||
OAES_API OAES_RET oaes_decrypt( OAES_CTX * ctx,
|
||||
const uint8_t * c, size_t c_len, uint8_t * m, size_t * m_len );
|
||||
|
||||
// set buf == NULL to get the required buf_len
|
||||
OAES_API OAES_RET oaes_sprintf(
|
||||
char * buf, size_t * buf_len, const uint8_t * data, size_t data_len );
|
||||
|
||||
OAES_API OAES_RET oaes_encryption_round( const uint8_t * key, uint8_t * c );
|
||||
|
||||
OAES_API OAES_RET oaes_pseudo_encrypt_ecb( OAES_CTX * ctx, uint8_t * c );
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // _OAES_LIB_H
|
||||
@@ -1,31 +1,37 @@
|
||||
/*
|
||||
---------------------------------------------------------------------------
|
||||
Copyright (c) 1998-2013, Brian Gladman, Worcester, UK. All rights reserved.
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* Additional permission under GNU GPL version 3 section 7
|
||||
*
|
||||
* If you modify this Program, or any covered work, by linking or combining
|
||||
* it with OpenSSL (or a modified version of that library), containing parts
|
||||
* covered by the terms of OpenSSL License and SSLeay License, the licensors
|
||||
* of this Program grant you additional permission to convey the resulting work.
|
||||
*
|
||||
*/
|
||||
|
||||
The redistribution and use of this software (with or without changes)
|
||||
is allowed without the payment of fees or royalties provided that:
|
||||
/*
|
||||
* The orginal author of this AES implementation is Karl Malbrain.
|
||||
*/
|
||||
|
||||
source code distributions include the above copyright notice, this
|
||||
list of conditions and the following disclaimer;
|
||||
#ifdef __GNUC__
|
||||
#include <x86intrin.h>
|
||||
#else
|
||||
#include <intrin.h>
|
||||
#endif // __GNUC__
|
||||
|
||||
binary distributions include the above copyright notice, this list
|
||||
of conditions and the following disclaimer in their documentation.
|
||||
|
||||
This software is provided 'as is' with no explicit or implied warranties
|
||||
in respect of its operation, including, but not limited to, correctness
|
||||
and fitness for purpose.
|
||||
---------------------------------------------------------------------------
|
||||
Issue Date: 20/12/2007
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "aesb.h"
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
#include <inttypes.h>
|
||||
|
||||
#define TABLE_ALIGN 32
|
||||
#define WPOLY 0x011b
|
||||
@@ -146,25 +152,61 @@ y[3] = (k)[3] ^ (t_fn[0][x[3] & 0xff] ^ t_fn[1][(x[0] >> 8) & 0xff] ^ t_fn[2][(
|
||||
|
||||
d_4(uint32_t, t_dec(f,n), sb_data, u0, u1, u2, u3);
|
||||
|
||||
inline void aesb_single_round(const uint8_t *restrict in, uint8_t *out, const uint8_t *restrict expandedKey) {
|
||||
round(((uint32_t*) out), ((uint32_t*) in), ((uint32_t*) expandedKey));
|
||||
__m128i soft_aesenc(__m128i in, __m128i key)
|
||||
{
|
||||
uint32_t x0, x1, x2, x3;
|
||||
x0 = _mm_cvtsi128_si32(in);
|
||||
x1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0x55));
|
||||
x2 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xAA));
|
||||
x3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xFF));
|
||||
|
||||
__m128i out = _mm_set_epi32(
|
||||
(t_fn[0][x3 & 0xff] ^ t_fn[1][(x0 >> 8) & 0xff] ^ t_fn[2][(x1 >> 16) & 0xff] ^ t_fn[3][x2 >> 24]),
|
||||
(t_fn[0][x2 & 0xff] ^ t_fn[1][(x3 >> 8) & 0xff] ^ t_fn[2][(x0 >> 16) & 0xff] ^ t_fn[3][x1 >> 24]),
|
||||
(t_fn[0][x1 & 0xff] ^ t_fn[1][(x2 >> 8) & 0xff] ^ t_fn[2][(x3 >> 16) & 0xff] ^ t_fn[3][x0 >> 24]),
|
||||
(t_fn[0][x0 & 0xff] ^ t_fn[1][(x1 >> 8) & 0xff] ^ t_fn[2][(x2 >> 16) & 0xff] ^ t_fn[3][x3 >> 24]));
|
||||
|
||||
return _mm_xor_si128(out, key);
|
||||
}
|
||||
|
||||
inline void aesb_pseudo_round_mut(uint8_t *restrict val, const uint8_t *restrict expandedKey) {
|
||||
uint32_t b1[4];
|
||||
round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey));
|
||||
round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 1 * N_COLS);
|
||||
round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey) + 2 * N_COLS);
|
||||
round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 3 * N_COLS);
|
||||
round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey) + 4 * N_COLS);
|
||||
round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 5 * N_COLS);
|
||||
round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey) + 6 * N_COLS);
|
||||
round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 7 * N_COLS);
|
||||
round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey) + 8 * N_COLS);
|
||||
round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 9 * N_COLS);
|
||||
uint8_t Sbox[256] = { // forward s-box
|
||||
0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
|
||||
0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
|
||||
0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
|
||||
0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
|
||||
0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
|
||||
0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
|
||||
0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
|
||||
0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
|
||||
0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
|
||||
0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
|
||||
0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
|
||||
0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
|
||||
0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
|
||||
0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
|
||||
0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
|
||||
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16};
|
||||
|
||||
static inline void sub_word(uint8_t* key)
|
||||
{
|
||||
key[0] = Sbox[key[0]];
|
||||
key[1] = Sbox[key[1]];
|
||||
key[2] = Sbox[key[2]];
|
||||
key[3] = Sbox[key[3]];
|
||||
}
|
||||
|
||||
|
||||
#if defined(__cplusplus)
|
||||
#ifdef __clang__
|
||||
uint32_t _rotr(uint32_t value, uint32_t amount)
|
||||
{
|
||||
return (value >> amount) | (value << ((32 - amount) & 31));
|
||||
}
|
||||
#endif
|
||||
|
||||
__m128i soft_aeskeygenassist(__m128i key, uint8_t rcon)
|
||||
{
|
||||
uint32_t X1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0x55));
|
||||
uint32_t X3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0xFF));
|
||||
sub_word((uint8_t*)&X1);
|
||||
sub_word((uint8_t*)&X3);
|
||||
return _mm_set_epi32(_rotr(X3, 8) ^ rcon, X3,_rotr(X1, 8) ^ rcon, X1);
|
||||
}
|
||||
38
memory.c
38
memory.c
@@ -24,10 +24,32 @@
|
||||
#include <string.h>
|
||||
|
||||
#include "persistent_memory.h"
|
||||
#include "algo/cryptonight/cryptonight.h"
|
||||
#include "options.h"
|
||||
|
||||
static size_t offset = 0;
|
||||
|
||||
|
||||
#ifndef XMRIG_NO_AEON
|
||||
static void * create_persistent_ctx_lite(int thr_id) {
|
||||
struct cryptonight_ctx *ctx = NULL;
|
||||
|
||||
if (!opt_double_hash) {
|
||||
const size_t offset = MEMORY * (thr_id + 1);
|
||||
|
||||
ctx = (struct cryptonight_ctx *) &persistent_memory[offset + MEMORY_LITE];
|
||||
ctx->memory = &persistent_memory[offset];
|
||||
return ctx;
|
||||
}
|
||||
|
||||
ctx = (struct cryptonight_ctx *) &persistent_memory[MEMORY - sizeof(struct cryptonight_ctx) * (thr_id + 1)];
|
||||
ctx->memory = &persistent_memory[MEMORY * (thr_id + 1)];
|
||||
|
||||
return ctx;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void * persistent_calloc(size_t num, size_t size) {
|
||||
void *mem = &persistent_memory[offset];
|
||||
offset += (num * size);
|
||||
@@ -36,3 +58,19 @@ void * persistent_calloc(size_t num, size_t size) {
|
||||
|
||||
return mem;
|
||||
}
|
||||
|
||||
|
||||
void * create_persistent_ctx(int thr_id) {
|
||||
# ifndef XMRIG_NO_AEON
|
||||
if (opt_algo == ALGO_CRYPTONIGHT_LITE) {
|
||||
return create_persistent_ctx_lite(thr_id);
|
||||
}
|
||||
# endif
|
||||
|
||||
struct cryptonight_ctx *ctx = (struct cryptonight_ctx *) &persistent_memory[MEMORY - sizeof(struct cryptonight_ctx) * (thr_id + 1)];
|
||||
|
||||
const int ratio = opt_double_hash ? 2 : 1;
|
||||
ctx->memory = &persistent_memory[MEMORY * (thr_id * ratio + 1)];
|
||||
|
||||
return ctx;
|
||||
}
|
||||
|
||||
209
options.c
209
options.c
@@ -36,25 +36,32 @@
|
||||
#include "algo/cryptonight/cryptonight.h"
|
||||
|
||||
|
||||
int64_t opt_affinity = -1L;
|
||||
int opt_n_threads = 0;
|
||||
int opt_algo_variant = 0;
|
||||
int opt_retries = 5;
|
||||
int opt_retry_pause = 5;
|
||||
int opt_donate_level = DONATE_LEVEL;
|
||||
bool opt_colors = true;
|
||||
bool opt_keepalive = false;
|
||||
bool opt_background = false;
|
||||
char *opt_url = NULL;
|
||||
char *opt_backup_url = NULL;
|
||||
char *opt_userpass = NULL;
|
||||
char *opt_user = NULL;
|
||||
char *opt_pass = NULL;
|
||||
int64_t opt_affinity = -1L;
|
||||
int opt_n_threads = 0;
|
||||
int opt_algo_variant = 0;
|
||||
int opt_retries = 5;
|
||||
int opt_retry_pause = 5;
|
||||
int opt_donate_level = DONATE_LEVEL;
|
||||
int opt_max_cpu_usage = 75;
|
||||
bool opt_colors = true;
|
||||
bool opt_keepalive = false;
|
||||
bool opt_background = false;
|
||||
bool opt_double_hash = false;
|
||||
bool opt_safe = false;
|
||||
bool opt_nicehash = false;
|
||||
char *opt_url = NULL;
|
||||
char *opt_backup_url = NULL;
|
||||
char *opt_userpass = NULL;
|
||||
char *opt_user = NULL;
|
||||
char *opt_pass = NULL;
|
||||
|
||||
enum mining_algo opt_algo = ALGO_CRYPTONIGHT;
|
||||
|
||||
|
||||
static char const usage[] = "\
|
||||
Usage: " APP_ID " [OPTIONS]\n\
|
||||
Options:\n\
|
||||
-a, --algo=ALGO cryptonight (default) or cryptonight-lite\n\
|
||||
-o, --url=URL URL of mining server\n\
|
||||
-b, --backup-url=URL URL of backup mining server\n\
|
||||
-O, --userpass=U:P username:password pair for mining server\n\
|
||||
@@ -65,11 +72,14 @@ Options:\n\
|
||||
-k, --keepalive send keepalived for prevent timeout (need pool support)\n\
|
||||
-r, --retries=N number of times to retry before switch to backup server (default: 5)\n\
|
||||
-R, --retry-pause=N time to pause between retries (default: 5)\n\
|
||||
--cpu-affinity set process affinity to cpu core(s), mask 0x3 for cores 0 and 1\n\
|
||||
--cpu-affinity set process affinity to CPU core(s), mask 0x3 for cores 0 and 1\n\
|
||||
--no-color disable colored output\n\
|
||||
--donate-level=N donate level, default 5%% (5 minutes in 100 minutes)\n\
|
||||
-B, --background run the miner in the background\n\
|
||||
-c, --config=FILE load a JSON-format configuration file\n\
|
||||
--max-cpu-usage=N maximum CPU usage for automatic threads mode (default 75)\n\
|
||||
--safe safe adjust threads and av settings for current CPU\n\
|
||||
--nicehash enable nicehash support\n\
|
||||
-h, --help display this help and exit\n\
|
||||
-V, --version output version information and exit\n\
|
||||
";
|
||||
@@ -79,42 +89,70 @@ static char const short_options[] = "a:c:khBp:Px:r:R:s:t:T:o:u:O:v:Vb:";
|
||||
|
||||
|
||||
static struct option const options[] = {
|
||||
{ "algo", 1, NULL, 'a' },
|
||||
{ "av", 1, NULL, 'v' },
|
||||
{ "background", 0, NULL, 'B' },
|
||||
{ "backup-url", 1, NULL, 'b' },
|
||||
{ "config", 1, NULL, 'c' },
|
||||
{ "cpu-affinity", 1, NULL, 1020 },
|
||||
{ "donate-level", 1, NULL, 1003 },
|
||||
{ "help", 0, NULL, 'h' },
|
||||
{ "keepalive", 0, NULL ,'k' },
|
||||
{ "no-color", 0, NULL, 1002 },
|
||||
{ "pass", 1, NULL, 'p' },
|
||||
{ "retries", 1, NULL, 'r' },
|
||||
{ "retry-pause", 1, NULL, 'R' },
|
||||
{ "threads", 1, NULL, 't' },
|
||||
{ "url", 1, NULL, 'o' },
|
||||
{ "user", 1, NULL, 'u' },
|
||||
{ "userpass", 1, NULL, 'O' },
|
||||
{ "version", 0, NULL, 'V' },
|
||||
{ "algo", 1, NULL, 'a' },
|
||||
{ "av", 1, NULL, 'v' },
|
||||
{ "background", 0, NULL, 'B' },
|
||||
{ "backup-url", 1, NULL, 'b' },
|
||||
{ "config", 1, NULL, 'c' },
|
||||
{ "cpu-affinity", 1, NULL, 1020 },
|
||||
{ "donate-level", 1, NULL, 1003 },
|
||||
{ "help", 0, NULL, 'h' },
|
||||
{ "keepalive", 0, NULL ,'k' },
|
||||
{ "max-cpu-usage", 1, NULL, 1004 },
|
||||
{ "nicehash", 0, NULL, 1006 },
|
||||
{ "no-color", 0, NULL, 1002 },
|
||||
{ "pass", 1, NULL, 'p' },
|
||||
{ "retries", 1, NULL, 'r' },
|
||||
{ "retry-pause", 1, NULL, 'R' },
|
||||
{ "safe", 0, NULL, 1005 },
|
||||
{ "threads", 1, NULL, 't' },
|
||||
{ "url", 1, NULL, 'o' },
|
||||
{ "user", 1, NULL, 'u' },
|
||||
{ "userpass", 1, NULL, 'O' },
|
||||
{ "version", 0, NULL, 'V' },
|
||||
{ 0, 0, 0, 0 }
|
||||
};
|
||||
|
||||
|
||||
static int get_algo_variant(int variant) {
|
||||
if (variant > XMR_VARIANT_AUTO && variant < XMR_VARIANT_MAX) {
|
||||
return variant;
|
||||
}
|
||||
static const char *algo_names[] = {
|
||||
[ALGO_CRYPTONIGHT] = "cryptonight",
|
||||
# ifndef XMRIG_NO_AEON
|
||||
[ALGO_CRYPTONIGHT_LITE] = "cryptonight-lite"
|
||||
# endif
|
||||
};
|
||||
|
||||
if (cpu_info.flags & CPU_FLAG_AES) {
|
||||
if (cpu_info.flags & CPU_FLAG_BMI2) {
|
||||
return XMR_VARIANT_AESNI_BMI2;
|
||||
}
|
||||
|
||||
return XMR_VARIANT_AESNI;
|
||||
}
|
||||
#ifndef XMRIG_NO_AEON
|
||||
static int get_cryptonight_lite_variant(int variant) {
|
||||
if (variant <= AEON_AV0_AUTO || variant >= AEON_AV_MAX) {
|
||||
return (cpu_info.flags & CPU_FLAG_AES) ? AEON_AV2_AESNI_DOUBLE : AEON_AV4_SOFT_AES_DOUBLE;
|
||||
}
|
||||
|
||||
return XMR_VARIANT_LEGACY;
|
||||
if (opt_safe && !(cpu_info.flags & CPU_FLAG_AES) && variant <= AEON_AV2_AESNI_DOUBLE) {
|
||||
return variant + 2;
|
||||
}
|
||||
|
||||
return variant;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
static int get_algo_variant(int algo, int variant) {
|
||||
# ifndef XMRIG_NO_AEON
|
||||
if (algo == ALGO_CRYPTONIGHT_LITE) {
|
||||
return get_cryptonight_lite_variant(variant);
|
||||
}
|
||||
# endif
|
||||
|
||||
if (variant <= XMR_AV0_AUTO || variant >= XMR_AV_MAX) {
|
||||
return (cpu_info.flags & CPU_FLAG_AES) ? XMR_AV1_AESNI : XMR_AV3_SOFT_AES;
|
||||
}
|
||||
|
||||
if (opt_safe && !(cpu_info.flags & CPU_FLAG_AES) && variant <= XMR_AV2_AESNI_DOUBLE) {
|
||||
return variant + 2;
|
||||
}
|
||||
|
||||
return variant;
|
||||
}
|
||||
|
||||
|
||||
@@ -130,6 +168,22 @@ static void parse_arg(int key, char *arg) {
|
||||
switch (key)
|
||||
{
|
||||
case 'a':
|
||||
for (int i = 0; i < ARRAY_SIZE(algo_names); i++) {
|
||||
if (algo_names[i] && !strcmp(arg, algo_names[i])) {
|
||||
opt_algo = i;
|
||||
break;
|
||||
}
|
||||
|
||||
# ifndef XMRIG_NO_AEON
|
||||
if (i == ARRAY_SIZE(algo_names) && !strcmp(arg, "cryptonight-light")) {
|
||||
opt_algo = i = ALGO_CRYPTONIGHT_LITE;
|
||||
}
|
||||
# endif
|
||||
|
||||
if (i == ARRAY_SIZE(algo_names)) {
|
||||
show_usage_and_exit(1);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case 'O': /* --userpass */
|
||||
@@ -200,7 +254,20 @@ static void parse_arg(int key, char *arg) {
|
||||
opt_n_threads = v;
|
||||
break;
|
||||
|
||||
case 'k':
|
||||
case 1004: /* --max-cpu-usage */
|
||||
v = atoi(arg);
|
||||
if (v < 1 || v > 100) {
|
||||
show_usage_and_exit(1);
|
||||
}
|
||||
|
||||
opt_max_cpu_usage = v;
|
||||
break;
|
||||
|
||||
case 1005: /* --safe */
|
||||
opt_safe = true;
|
||||
break;
|
||||
|
||||
case 'k': /* --keepalive */
|
||||
opt_keepalive = true;
|
||||
break;
|
||||
|
||||
@@ -230,14 +297,14 @@ static void parse_arg(int key, char *arg) {
|
||||
break;
|
||||
}
|
||||
|
||||
case 'B':
|
||||
case 'B': /* --background */
|
||||
opt_background = true;
|
||||
opt_colors = false;
|
||||
break;
|
||||
|
||||
case 'v': /* --av */
|
||||
v = atoi(arg);
|
||||
if (v < 0 || v > XMR_VARIANT_MAX) {
|
||||
if (v < 0 || v > 1000) {
|
||||
show_usage_and_exit(1);
|
||||
}
|
||||
|
||||
@@ -247,7 +314,7 @@ static void parse_arg(int key, char *arg) {
|
||||
case 1020: /* --cpu-affinity */
|
||||
p = strstr(arg, "0x");
|
||||
ul = p ? strtoul(p, NULL, 16) : atol(arg);
|
||||
if (ul > (1UL << cpu_info.count) -1) {
|
||||
if (ul > (1UL << cpu_info.total_logical_cpus) -1) {
|
||||
ul = -1;
|
||||
}
|
||||
|
||||
@@ -258,7 +325,7 @@ static void parse_arg(int key, char *arg) {
|
||||
opt_colors = false;
|
||||
break;
|
||||
|
||||
case 1003:
|
||||
case 1003: /* --donate-level */
|
||||
v = atoi(arg);
|
||||
if (v < 1 || v > 99) {
|
||||
show_usage_and_exit(1);
|
||||
@@ -267,6 +334,10 @@ static void parse_arg(int key, char *arg) {
|
||||
opt_donate_level = v;
|
||||
break;
|
||||
|
||||
case 1006: /* --nicehash */
|
||||
opt_nicehash = true;
|
||||
break;
|
||||
|
||||
default:
|
||||
show_usage_and_exit(1);
|
||||
}
|
||||
@@ -336,7 +407,7 @@ static char *parse_url(const char *arg)
|
||||
show_usage_and_exit(1);
|
||||
}
|
||||
|
||||
char *dest = malloc(strlen(arg) + 14);
|
||||
char *dest = malloc(strlen(arg) + 16);
|
||||
sprintf(dest, "stratum+tcp://%s", arg);
|
||||
|
||||
return dest;
|
||||
@@ -367,12 +438,12 @@ void parse_cmdline(int argc, char *argv[]) {
|
||||
}
|
||||
|
||||
if (!opt_url) {
|
||||
opt_url = strdup("stratum+tcp://proxy.xmrig.com:443");
|
||||
opt_keepalive = true;
|
||||
applog_notime(LOG_ERR, "No pool URL supplied. Exiting.\n", argv[0]);
|
||||
proper_exit(1);
|
||||
}
|
||||
|
||||
if (!opt_backup_url) {
|
||||
opt_backup_url = strdup("stratum+tcp://failover.xmrig.com:80");
|
||||
}
|
||||
if (strstr(opt_url, ".nicehash.com:") != NULL) {
|
||||
opt_nicehash = true;
|
||||
}
|
||||
|
||||
if (!opt_userpass) {
|
||||
@@ -384,20 +455,23 @@ void parse_cmdline(int argc, char *argv[]) {
|
||||
sprintf(opt_userpass, "%s:%s", opt_user, opt_pass);
|
||||
}
|
||||
|
||||
opt_algo_variant = get_algo_variant(opt_algo, opt_algo_variant);
|
||||
|
||||
if (!cryptonight_init(opt_algo_variant)) {
|
||||
applog(LOG_ERR, "Cryptonight hash self-test failed. This might be caused by bad compiler optimizations.");
|
||||
proper_exit(1);
|
||||
}
|
||||
|
||||
if (!opt_n_threads) {
|
||||
opt_n_threads = get_optimal_threads_count();
|
||||
opt_n_threads = get_optimal_threads_count(opt_algo, opt_double_hash, opt_max_cpu_usage);
|
||||
}
|
||||
|
||||
opt_algo_variant = get_algo_variant(opt_algo_variant);
|
||||
if (!opt_algo_variant) {
|
||||
opt_algo_variant = get_algo_variant(0);
|
||||
if (opt_safe) {
|
||||
const int count = get_optimal_threads_count(opt_algo, opt_double_hash, opt_max_cpu_usage);
|
||||
if (opt_n_threads > count) {
|
||||
opt_n_threads = count;
|
||||
}
|
||||
}
|
||||
|
||||
if (opt_donate_level < 1 || opt_donate_level > 99) {
|
||||
opt_donate_level = 1;
|
||||
}
|
||||
|
||||
cryptonight_init(opt_algo_variant);
|
||||
}
|
||||
|
||||
|
||||
@@ -439,3 +513,8 @@ void show_version_and_exit(void) {
|
||||
#endif
|
||||
proper_exit(0);
|
||||
}
|
||||
|
||||
|
||||
const char* get_current_algo_name(void) {
|
||||
return algo_names[opt_algo];
|
||||
}
|
||||
|
||||
39
options.h
39
options.h
@@ -32,20 +32,40 @@
|
||||
#endif
|
||||
|
||||
|
||||
enum xmr_algo_variant {
|
||||
XMR_VARIANT_AUTO,
|
||||
XMR_VARIANT_AESNI,
|
||||
XMR_VARIANT_AESNI_WOLF,
|
||||
XMR_VARIANT_AESNI_BMI2,
|
||||
XMR_VARIANT_LEGACY,
|
||||
XMR_VARIANT_EXPERIMENTAL,
|
||||
XMR_VARIANT_MAX
|
||||
enum mining_algo {
|
||||
ALGO_CRYPTONIGHT, /* CryptoNight (Monero) */
|
||||
ALGO_CRYPTONIGHT_LITE, /* CryptoNight-Lite (AEON) */
|
||||
};
|
||||
|
||||
|
||||
enum xmr_algo_variant {
|
||||
XMR_AV0_AUTO,
|
||||
XMR_AV1_AESNI,
|
||||
XMR_AV2_AESNI_DOUBLE,
|
||||
XMR_AV3_SOFT_AES,
|
||||
XMR_AV4_SOFT_AES_DOUBLE,
|
||||
XMR_AV_MAX
|
||||
};
|
||||
|
||||
|
||||
#ifndef XMRIG_NO_AEON
|
||||
enum aeon_algo_variant {
|
||||
AEON_AV0_AUTO,
|
||||
AEON_AV1_AESNI,
|
||||
AEON_AV2_AESNI_DOUBLE,
|
||||
AEON_AV3_SOFT_AES,
|
||||
AEON_AV4_SOFT_AES_DOUBLE,
|
||||
AEON_AV_MAX
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
extern bool opt_colors;
|
||||
extern bool opt_keepalive;
|
||||
extern bool opt_background;
|
||||
extern bool opt_double_hash;
|
||||
extern bool opt_safe;
|
||||
extern bool opt_nicehash;
|
||||
extern char *opt_url;
|
||||
extern char *opt_backup_url;
|
||||
extern char *opt_userpass;
|
||||
@@ -56,11 +76,14 @@ extern int opt_algo_variant;
|
||||
extern int opt_retry_pause;
|
||||
extern int opt_retries;
|
||||
extern int opt_donate_level;
|
||||
extern int opt_max_cpu_usage;
|
||||
extern int64_t opt_affinity;
|
||||
extern enum mining_algo opt_algo;
|
||||
|
||||
void parse_cmdline(int argc, char *argv[]);
|
||||
void show_usage_and_exit(int status);
|
||||
void show_version_and_exit(void);
|
||||
const char* get_current_algo_name(void);
|
||||
|
||||
extern void proper_exit(int reason);
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ enum memory_flags {
|
||||
};
|
||||
|
||||
|
||||
#define TWO_MB_PAGE 2097152
|
||||
#define MEMORY 2097152
|
||||
|
||||
|
||||
extern char *persistent_memory;
|
||||
@@ -44,6 +44,7 @@ extern int persistent_memory_flags;
|
||||
const char * persistent_memory_allocate();
|
||||
void persistent_memory_free();
|
||||
void * persistent_calloc(size_t num, size_t size);
|
||||
void * create_persistent_ctx(int thr_id);
|
||||
|
||||
|
||||
#endif /* __PERSISTENT_MEMORY_H__ */
|
||||
|
||||
120
stratum.c
120
stratum.c
@@ -58,6 +58,9 @@
|
||||
#define unlikely(expr) (__builtin_expect(!!(expr), 0))
|
||||
|
||||
|
||||
static struct work work;
|
||||
|
||||
|
||||
static bool send_line(curl_socket_t sock, char *s);
|
||||
static bool socket_full(curl_socket_t sock, int timeout);
|
||||
static void buffer_append(struct stratum_ctx *sctx, const char *s);
|
||||
@@ -66,7 +69,7 @@ static int sockopt_keepalive_cb(void *userdata, curl_socket_t fd, curlsocktype p
|
||||
static curl_socket_t opensocket_grab_cb(void *clientp, curlsocktype purpose, struct curl_sockaddr *addr);
|
||||
static int closesocket_cb(void *clientp, curl_socket_t item);
|
||||
static bool login_decode(struct stratum_ctx *sctx, const json_t *val);
|
||||
static bool job_decode(struct stratum_ctx *sctx, const json_t *job);
|
||||
static bool job_decode(const json_t *job);
|
||||
static bool jobj_binary(const json_t *obj, const char *key, void *buf, size_t buflen);
|
||||
|
||||
|
||||
@@ -235,13 +238,19 @@ bool stratum_handle_response(char *buf) {
|
||||
json_t *id_val = json_object_get(val, "id");
|
||||
|
||||
if (!id_val || json_is_null(id_val) || !res_val) {
|
||||
json_decref(val);
|
||||
return false;
|
||||
const char* message;
|
||||
|
||||
if (json_is_object(err_val) && (message = json_string_value(json_object_get(err_val, "message")))) {
|
||||
applog(LOG_ERR, "error: \"%s\"", message);
|
||||
}
|
||||
|
||||
json_decref(val);
|
||||
return false;
|
||||
}
|
||||
|
||||
json_t *status = json_object_get(res_val, "status");
|
||||
|
||||
if (!strcmp(json_string_value(status), "KEEPALIVED") ) {
|
||||
if (status && !strcmp(json_string_value(status), "KEEPALIVED") ) {
|
||||
applog(LOG_DEBUG, "Keepalived receveid");
|
||||
json_decref(val);
|
||||
return true;
|
||||
@@ -285,7 +294,6 @@ bool stratum_keepalived(struct stratum_ctx *sctx)
|
||||
bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *pass)
|
||||
{
|
||||
char *sret;
|
||||
json_error_t err;
|
||||
|
||||
char *req = malloc(128 + strlen(user) + strlen(pass));
|
||||
sprintf(req, "{\"method\":\"login\",\"params\":{\"login\":\"%s\",\"pass\":\"%s\",\"agent\":\"%s/%s\"},\"id\":1}", user, pass, APP_NAME, APP_VERSION);
|
||||
@@ -321,19 +329,24 @@ bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *p
|
||||
json_t *error = json_object_get(val, "error");
|
||||
|
||||
if (!result || json_is_false(result) || (error && !json_is_null(error))) {
|
||||
applog(LOG_ERR, "Stratum authentication failed");
|
||||
const char* message;
|
||||
|
||||
if (json_is_object(error) && (message = json_string_value(json_object_get(error, "message")))) {
|
||||
applog(LOG_ERR, "Stratum authentication failed: \"%s\"", message);
|
||||
}
|
||||
else {
|
||||
applog(LOG_ERR, "Stratum authentication failed");
|
||||
}
|
||||
|
||||
json_decref(val);
|
||||
return false;
|
||||
}
|
||||
|
||||
login_decode(sctx, val);
|
||||
json_t *job = json_object_get(result, "job");
|
||||
|
||||
pthread_mutex_lock(&sctx->work_lock);
|
||||
if (job) {
|
||||
job_decode(sctx, job);
|
||||
if (login_decode(sctx, val) && job(sctx, json_object_get(result, "job"))) {
|
||||
pthread_mutex_lock(&sctx->sock_lock);
|
||||
sctx->ready = true;
|
||||
pthread_mutex_unlock(&sctx->sock_lock);
|
||||
}
|
||||
pthread_mutex_unlock(&sctx->work_lock);
|
||||
|
||||
json_decref(val);
|
||||
return true;
|
||||
@@ -492,11 +505,20 @@ static void buffer_append(struct stratum_ctx *sctx, const char *s)
|
||||
*/
|
||||
static bool job(struct stratum_ctx *sctx, json_t *params)
|
||||
{
|
||||
bool ret = false;
|
||||
if (!job_decode(params)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&sctx->work_lock);
|
||||
ret = job_decode(sctx, params);
|
||||
|
||||
if (sctx->work.target != work.target) {
|
||||
stats_set_target(work.target);
|
||||
}
|
||||
|
||||
memcpy(&sctx->work, &work, sizeof(struct work));
|
||||
pthread_mutex_unlock(&sctx->work_lock);
|
||||
return ret;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -584,33 +606,18 @@ static bool login_decode(struct stratum_ctx *sctx, const json_t *val) {
|
||||
return false;
|
||||
}
|
||||
|
||||
json_t *tmp = json_object_get(res, "id");
|
||||
if (!tmp) {
|
||||
const char *id = json_string_value(json_object_get(res, "id"));
|
||||
if (!id || strlen(id) >= (sizeof(sctx->id))) {
|
||||
applog(LOG_ERR, "JSON invalid id");
|
||||
return false;
|
||||
}
|
||||
|
||||
const char *id = json_string_value(tmp);
|
||||
if (!id) {
|
||||
applog(LOG_ERR, "JSON id is not a string");
|
||||
return false;
|
||||
}
|
||||
memset(&sctx->id, 0, sizeof(sctx->id));
|
||||
memcpy(&sctx->id, id, strlen(id));
|
||||
|
||||
memcpy(&sctx->id, id, 64);
|
||||
|
||||
pthread_mutex_lock(&sctx->sock_lock);
|
||||
sctx->ready = true;
|
||||
pthread_mutex_unlock(&sctx->sock_lock);
|
||||
|
||||
tmp = json_object_get(res, "status");
|
||||
if (!tmp) {
|
||||
applog(LOG_ERR, "JSON invalid status");
|
||||
return false;
|
||||
}
|
||||
|
||||
const char *s = json_string_value(tmp);
|
||||
const char *s = json_string_value(json_object_get(res, "status"));
|
||||
if (!s) {
|
||||
applog(LOG_ERR, "JSON status is not a string");
|
||||
applog(LOG_ERR, "JSON invalid status");
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -630,46 +637,40 @@ static bool login_decode(struct stratum_ctx *sctx, const json_t *val) {
|
||||
* @param work
|
||||
* @return
|
||||
*/
|
||||
static bool job_decode(struct stratum_ctx *sctx, const json_t *job) {
|
||||
json_t *tmp = json_object_get(job, "job_id");
|
||||
if (!tmp) {
|
||||
static bool job_decode(const json_t *job) {
|
||||
const char *job_id = json_string_value(json_object_get(job, "job_id"));
|
||||
if (!job_id || strlen(job_id) >= sizeof(work.job_id)) {
|
||||
applog(LOG_ERR, "JSON invalid job id");
|
||||
return false;
|
||||
}
|
||||
|
||||
const char *job_id = json_string_value(tmp);
|
||||
tmp = json_object_get(job, "blob");
|
||||
if (!tmp) {
|
||||
const char *blob = json_string_value(json_object_get(job, "blob"));
|
||||
if (!blob) {
|
||||
applog(LOG_ERR, "JSON invalid blob");
|
||||
return false;
|
||||
}
|
||||
|
||||
const char *hexblob = json_string_value(tmp);
|
||||
if (!hexblob || strlen(hexblob) != 152) {
|
||||
work.blob_size = strlen(blob);
|
||||
if (work.blob_size % 2 != 0) {
|
||||
applog(LOG_ERR, "JSON invalid blob length");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!hex2bin(sctx->blob, hexblob, 76)) {
|
||||
applog(LOG_ERR, "JSON inval blob");
|
||||
work.blob_size /= 2;
|
||||
if (work.blob_size < 76 || work.blob_size > (sizeof(work.blob))) {
|
||||
applog(LOG_ERR, "JSON invalid blob length");
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t target;
|
||||
jobj_binary(job, "target", &target, 4);
|
||||
|
||||
if (sctx->target != target) {
|
||||
stats_set_target(target);
|
||||
sctx->target = target;
|
||||
if (!hex2bin((unsigned char *) work.blob, blob, work.blob_size)) {
|
||||
applog(LOG_ERR, "JSON invalid blob");
|
||||
return false;
|
||||
}
|
||||
|
||||
memcpy(sctx->work.data, sctx->blob, 76);
|
||||
memset(sctx->work.target, 0xff, sizeof(sctx->work.target));
|
||||
jobj_binary(job, "target", &work.target, 4);
|
||||
|
||||
sctx->work.target[7] = sctx->target;
|
||||
|
||||
free(sctx->work.job_id);
|
||||
sctx->work.job_id = strdup(job_id);
|
||||
memset(work.job_id, 0, sizeof(work.job_id));
|
||||
memcpy(work.job_id, job_id, strlen(job_id));
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -699,6 +700,7 @@ static bool jobj_binary(const json_t *obj, const char *key, void *buf, size_t bu
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
if (!hex2bin(buf, hexstr, buflen)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
20
stratum.h
20
stratum.h
@@ -29,14 +29,18 @@
|
||||
#include <curl/curl.h>
|
||||
|
||||
|
||||
/**
|
||||
* 128tx exploit.
|
||||
*
|
||||
* Max blob size is 84 (75 fixed + 9 variable), aligned to 96.
|
||||
* https://github.com/xmrig/xmrig/issues/1 Thanks fireice-uk.
|
||||
*/
|
||||
struct work {
|
||||
uint32_t data[19];
|
||||
uint32_t target[8];
|
||||
uint32_t hash[8];
|
||||
|
||||
char *job_id;
|
||||
size_t xnonce2_len;
|
||||
unsigned char *xnonce2;
|
||||
uint32_t blob[21] __attribute__((aligned(16)));
|
||||
size_t blob_size __attribute__((aligned(16)));
|
||||
uint32_t target __attribute__((aligned(16)));
|
||||
uint32_t hash[8] __attribute__((aligned(16)));
|
||||
char job_id[64] __attribute__((aligned(16)));
|
||||
};
|
||||
|
||||
|
||||
@@ -53,8 +57,6 @@ struct stratum_ctx {
|
||||
bool ready;
|
||||
|
||||
char id[64];
|
||||
char blob[76];
|
||||
uint32_t target;
|
||||
|
||||
struct work work;
|
||||
struct work g_work;
|
||||
|
||||
@@ -4,4 +4,6 @@ cmake_minimum_required(VERSION 3.0)
|
||||
include(CTest)
|
||||
|
||||
add_subdirectory(unity)
|
||||
add_subdirectory(cryptonight)
|
||||
add_subdirectory(cryptonight)
|
||||
add_subdirectory(cryptonight_lite)
|
||||
add_subdirectory(autoconf)
|
||||
16
test/autoconf/CMakeLists.txt
Normal file
16
test/autoconf/CMakeLists.txt
Normal file
@@ -0,0 +1,16 @@
|
||||
set(SOURCES
|
||||
autoconf.c
|
||||
../../cpu.h
|
||||
../../cpu.c
|
||||
)
|
||||
|
||||
add_executable(autoconf_app ${SOURCES})
|
||||
target_link_libraries(autoconf_app unity)
|
||||
|
||||
include_directories(../..)
|
||||
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-strict-aliasing")
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O2")
|
||||
add_definitions(-DBUILD_TEST)
|
||||
|
||||
add_test(autoconf_test autoconf_app)
|
||||
152
test/autoconf/autoconf.c
Normal file
152
test/autoconf/autoconf.c
Normal file
@@ -0,0 +1,152 @@
|
||||
#include <unity.h>
|
||||
|
||||
#include "cpu.h"
|
||||
#include "options.h"
|
||||
|
||||
struct cpu_info cpu_info = { 0 };
|
||||
|
||||
|
||||
static void set_cpu_info(int total_logical_cpus, int l2_cache, int l3_cache) {
|
||||
cpu_info.total_cores = total_logical_cpus;
|
||||
cpu_info.total_logical_cpus = total_logical_cpus;
|
||||
cpu_info.l2_cache = l2_cache;
|
||||
cpu_info.l3_cache = l3_cache;
|
||||
}
|
||||
|
||||
|
||||
void test_autoconf_should_GetOptimalThreadsCounti7(void) {
|
||||
set_cpu_info(8, 1024, 8192); // 4C/8T 8 MB (Generic i7 CPU)
|
||||
|
||||
TEST_ASSERT_EQUAL_INT(4, get_optimal_threads_count(ALGO_CRYPTONIGHT, false, 100));
|
||||
TEST_ASSERT_EQUAL_INT(2, get_optimal_threads_count(ALGO_CRYPTONIGHT, true, 100));
|
||||
|
||||
TEST_ASSERT_EQUAL_INT(8, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 100));
|
||||
TEST_ASSERT_EQUAL_INT(4, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, true, 100));
|
||||
|
||||
TEST_ASSERT_EQUAL_INT(6, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 75));
|
||||
TEST_ASSERT_EQUAL_INT(5, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 60));
|
||||
TEST_ASSERT_EQUAL_INT(4, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 50));
|
||||
TEST_ASSERT_EQUAL_INT(3, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 35));
|
||||
TEST_ASSERT_EQUAL_INT(2, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 20));
|
||||
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 5));
|
||||
}
|
||||
|
||||
|
||||
void test_autoconf_should_GetOptimalThreadsCounti5(void) {
|
||||
set_cpu_info(4, 1024, 6144); // 2C/4T 6 MB (Generic i5 CPU)
|
||||
|
||||
TEST_ASSERT_EQUAL_INT(3, get_optimal_threads_count(ALGO_CRYPTONIGHT, false, 100));
|
||||
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT, true, 100));
|
||||
|
||||
TEST_ASSERT_EQUAL_INT(3, get_optimal_threads_count(ALGO_CRYPTONIGHT, false, 75));
|
||||
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT, true, 75));
|
||||
|
||||
TEST_ASSERT_EQUAL_INT(4, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 100));
|
||||
TEST_ASSERT_EQUAL_INT(3, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, true, 100));
|
||||
|
||||
TEST_ASSERT_EQUAL_INT(3, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 75));
|
||||
TEST_ASSERT_EQUAL_INT(3, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, true, 75));
|
||||
}
|
||||
|
||||
|
||||
void test_autoconf_should_GetOptimalThreadsCounti3(void) {
|
||||
set_cpu_info(4, 512, 3072); // 2C/4T 3 MB (Generic i3 CPU)
|
||||
|
||||
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT, false, 100));
|
||||
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT, true, 100));
|
||||
|
||||
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT, false, 75));
|
||||
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT, true, 75));
|
||||
|
||||
TEST_ASSERT_EQUAL_INT(3, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 100));
|
||||
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, true, 100));
|
||||
|
||||
TEST_ASSERT_EQUAL_INT(3, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 75));
|
||||
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, true, 75));
|
||||
}
|
||||
|
||||
|
||||
void test_autoconf_should_GetOptimalThreadsCountR7(void) {
|
||||
set_cpu_info(16, 4096, 16384); // 8C/16T 16 MB (AMD Ryzen 7)
|
||||
|
||||
TEST_ASSERT_EQUAL_INT(8, get_optimal_threads_count(ALGO_CRYPTONIGHT, false, 100));
|
||||
TEST_ASSERT_EQUAL_INT(4, get_optimal_threads_count(ALGO_CRYPTONIGHT, true, 100));
|
||||
|
||||
TEST_ASSERT_EQUAL_INT(8, get_optimal_threads_count(ALGO_CRYPTONIGHT, false, 75));
|
||||
TEST_ASSERT_EQUAL_INT(4, get_optimal_threads_count(ALGO_CRYPTONIGHT, true, 75));
|
||||
|
||||
TEST_ASSERT_EQUAL_INT(16, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 100));
|
||||
TEST_ASSERT_EQUAL_INT(8, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, true, 100));
|
||||
|
||||
TEST_ASSERT_EQUAL_INT(12, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 75));
|
||||
TEST_ASSERT_EQUAL_INT(8, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, true, 75));
|
||||
}
|
||||
|
||||
|
||||
void test_autoconf_should_GetOptimalThreadsCountTwoE5620(void) {
|
||||
set_cpu_info(16, 2048, 24576); // 8C/16T 24 MB (Two E5620)
|
||||
|
||||
TEST_ASSERT_EQUAL_INT(12, get_optimal_threads_count(ALGO_CRYPTONIGHT, false, 100));
|
||||
TEST_ASSERT_EQUAL_INT(6, get_optimal_threads_count(ALGO_CRYPTONIGHT, true, 100));
|
||||
|
||||
TEST_ASSERT_EQUAL_INT(12, get_optimal_threads_count(ALGO_CRYPTONIGHT, false, 75));
|
||||
TEST_ASSERT_EQUAL_INT(6, get_optimal_threads_count(ALGO_CRYPTONIGHT, true, 75));
|
||||
|
||||
TEST_ASSERT_EQUAL_INT(16, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 100));
|
||||
TEST_ASSERT_EQUAL_INT(12, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, true, 100));
|
||||
|
||||
TEST_ASSERT_EQUAL_INT(12, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 75));
|
||||
TEST_ASSERT_EQUAL_INT(12, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, true, 75));
|
||||
}
|
||||
|
||||
|
||||
void test_autoconf_should_GetOptimalThreadsCountVCPU(void) {
|
||||
set_cpu_info(1, 1024, 15360); // 1C/1T 15 MB (Single core Virtual CPU)
|
||||
|
||||
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT, false, 100));
|
||||
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT, true, 100));
|
||||
|
||||
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT, false, 75));
|
||||
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT, true, 75));
|
||||
|
||||
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 100));
|
||||
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, true, 100));
|
||||
|
||||
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 75));
|
||||
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, true, 75));
|
||||
}
|
||||
|
||||
|
||||
void test_autoconf_should_GetOptimalThreadsCountNoL3(void) {
|
||||
set_cpu_info(8, 8192, 0); // 4C/8T (Multi core Virtual CPU without L3 cache)
|
||||
|
||||
TEST_ASSERT_EQUAL_INT(4, get_optimal_threads_count(ALGO_CRYPTONIGHT, false, 100));
|
||||
TEST_ASSERT_EQUAL_INT(2, get_optimal_threads_count(ALGO_CRYPTONIGHT, true, 100));
|
||||
|
||||
TEST_ASSERT_EQUAL_INT(8, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 100));
|
||||
TEST_ASSERT_EQUAL_INT(4, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, true, 100));
|
||||
|
||||
TEST_ASSERT_EQUAL_INT(6, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 75));
|
||||
TEST_ASSERT_EQUAL_INT(5, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 60));
|
||||
TEST_ASSERT_EQUAL_INT(4, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 50));
|
||||
TEST_ASSERT_EQUAL_INT(3, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 35));
|
||||
TEST_ASSERT_EQUAL_INT(2, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 20));
|
||||
TEST_ASSERT_EQUAL_INT(1, get_optimal_threads_count(ALGO_CRYPTONIGHT_LITE, false, 5));
|
||||
}
|
||||
|
||||
|
||||
int main(void)
|
||||
{
|
||||
UNITY_BEGIN();
|
||||
|
||||
RUN_TEST(test_autoconf_should_GetOptimalThreadsCounti7);
|
||||
RUN_TEST(test_autoconf_should_GetOptimalThreadsCounti5);
|
||||
RUN_TEST(test_autoconf_should_GetOptimalThreadsCounti3);
|
||||
RUN_TEST(test_autoconf_should_GetOptimalThreadsCountR7);
|
||||
RUN_TEST(test_autoconf_should_GetOptimalThreadsCountR7);
|
||||
RUN_TEST(test_autoconf_should_GetOptimalThreadsCountTwoE5620);
|
||||
RUN_TEST(test_autoconf_should_GetOptimalThreadsCountVCPU);
|
||||
RUN_TEST(test_autoconf_should_GetOptimalThreadsCountNoL3);
|
||||
|
||||
return UNITY_END();
|
||||
}
|
||||
@@ -1,44 +1,28 @@
|
||||
set(SOURCES
|
||||
cryptonight.c
|
||||
../../options.h
|
||||
../../algo/cryptonight/cryptonight.h
|
||||
../../algo/cryptonight/cryptonight_common.c
|
||||
../../algo/cryptonight/cryptonight_av4_legacy.c
|
||||
../../algo/cryptonight/cryptonight.c
|
||||
../../algo/cryptonight/cryptonight_av1_aesni.c
|
||||
../../algo/cryptonight/cryptonight_av2_aesni_double.c
|
||||
../../algo/cryptonight/cryptonight_av3_softaes.c
|
||||
../../algo/cryptonight/cryptonight_av4_softaes_double.c
|
||||
../../crypto/c_keccak.c
|
||||
../../crypto/c_blake256.c
|
||||
../../crypto/c_groestl.c
|
||||
../../crypto/c_jh.c
|
||||
../../crypto/c_skein.c
|
||||
../../crypto/oaes_config.h
|
||||
../../crypto/oaes_lib.h
|
||||
../../crypto/oaes_lib.c
|
||||
../../crypto/aesb.c
|
||||
../../crypto/soft_aes.c
|
||||
)
|
||||
|
||||
if (CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
add_subdirectory(bmi2)
|
||||
|
||||
add_executable(cryptonight_app ${SOURCES}
|
||||
cryptonight.c
|
||||
../../algo/cryptonight/cryptonight_av1_aesni.c
|
||||
../../algo/cryptonight/cryptonight_av2_aesni_wolf.c
|
||||
../../algo/cryptonight/cryptonight_av5_aesni_experimental.c
|
||||
)
|
||||
|
||||
target_link_libraries(cryptonight_app unity cryptonight_av3_aesni_bmi2)
|
||||
else()
|
||||
add_executable(cryptonight_app ${SOURCES}
|
||||
cryptonight32.c
|
||||
../../algo/cryptonight/cryptonight_av1_aesni32.c
|
||||
)
|
||||
|
||||
target_link_libraries(cryptonight_app unity)
|
||||
endif()
|
||||
|
||||
|
||||
add_executable(cryptonight_app ${SOURCES})
|
||||
target_link_libraries(cryptonight_app unity)
|
||||
|
||||
include_directories(../..)
|
||||
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes -fno-strict-aliasing")
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O2")
|
||||
add_definitions(-DBUILD_TEST)
|
||||
add_definitions(-DXMRIG_NO_AEON)
|
||||
|
||||
add_test(cryptonight_test cryptonight_app)
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes -mbmi2")
|
||||
include_directories(../../..)
|
||||
add_library(cryptonight_av3_aesni_bmi2 STATIC ../../../algo/cryptonight/cryptonight_av3_aesni_bmi2.c)
|
||||
@@ -1,146 +1,125 @@
|
||||
#include <unity.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
#include <algo/cryptonight/cryptonight.h>
|
||||
#include <string.h>
|
||||
#include <mm_malloc.h>
|
||||
|
||||
#include "options.h"
|
||||
#include "algo/cryptonight/cryptonight.h"
|
||||
|
||||
bool opt_double_hash = false;
|
||||
|
||||
const static char input1[152] = {
|
||||
0x03, 0x05, 0xA0, 0xDB, 0xD6, 0xBF, 0x05, 0xCF, 0x16, 0xE5, 0x03, 0xF3, 0xA6, 0x6F, 0x78, 0x00,
|
||||
0x7C, 0xBF, 0x34, 0x14, 0x43, 0x32, 0xEC, 0xBF, 0xC2, 0x2E, 0xD9, 0x5C, 0x87, 0x00, 0x38, 0x3B,
|
||||
0x30, 0x9A, 0xCE, 0x19, 0x23, 0xA0, 0x96, 0x4B, 0x00, 0x00, 0x00, 0x08, 0xBA, 0x93, 0x9A, 0x62,
|
||||
0x72, 0x4C, 0x0D, 0x75, 0x81, 0xFC, 0xE5, 0x76, 0x1E, 0x9D, 0x8A, 0x0E, 0x6A, 0x1C, 0x3F, 0x92,
|
||||
0x4F, 0xDD, 0x84, 0x93, 0xD1, 0x11, 0x56, 0x49, 0xC0, 0x5E, 0xB6, 0x01,
|
||||
0x01, 0x00, 0xFB, 0x8E, 0x8A, 0xC8, 0x05, 0x89, 0x93, 0x23, 0x37, 0x1B, 0xB7, 0x90, 0xDB, 0x19,
|
||||
0x21, 0x8A, 0xFD, 0x8D, 0xB8, 0xE3, 0x75, 0x5D, 0x8B, 0x90, 0xF3, 0x9B, 0x3D, 0x55, 0x06, 0xA9,
|
||||
0xAB, 0xCE, 0x4F, 0xA9, 0x12, 0x24, 0x45, 0x00, 0x00, 0x00, 0x00, 0xEE, 0x81, 0x46, 0xD4, 0x9F,
|
||||
0xA9, 0x3E, 0xE7, 0x24, 0xDE, 0xB5, 0x7D, 0x12, 0xCB, 0xC6, 0xC6, 0xF3, 0xB9, 0x24, 0xD9, 0x46,
|
||||
0x12, 0x7C, 0x7A, 0x97, 0x41, 0x8F, 0x93, 0x48, 0x82, 0x8F, 0x0F, 0x02,
|
||||
};
|
||||
|
||||
const static char input2[] = "This is a test";
|
||||
const static char input3[] = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vivamus pellentesque metus.";
|
||||
|
||||
|
||||
void cryptonight_av1_aesni(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx);
|
||||
void cryptonight_av2_aesni_wolf(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx);
|
||||
void cryptonight_av3_aesni_bmi2(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx);
|
||||
void cryptonight_av4_legacy(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx);
|
||||
void cryptonight_av5_aesni_experimental(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx);
|
||||
void cryptonight_av1_aesni(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
|
||||
void cryptonight_av2_aesni_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
|
||||
void cryptonight_av3_softaes(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
|
||||
void cryptonight_av4_softaes_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
|
||||
|
||||
|
||||
char *bin2hex(const unsigned char *p, size_t len)
|
||||
static char hash[64];
|
||||
#define RESULT1 "1a3ffbee909b420d91f7be6e5fb56db71b3110d886011e877ee5786afd080100"
|
||||
#define RESULT1_DOUBLE "1a3ffbee909b420d91f7be6e5fb56db71b3110d886011e877ee5786afd0801001b606a3f4a07d6489a1bcd07697bd16696b61c8ae982f61a90160f4e52828a7f"
|
||||
#define RESULT2 "a084f01d1437a09c6985401b60d43554ae105802c5f5d8a9b3253649c0be6605"
|
||||
#define RESULT3 "0bbe54bd26caa92a1d436eec71cbef02560062fa689fe14d7efcf42566b411cf"
|
||||
|
||||
|
||||
static char *bin2hex(const unsigned char *p, size_t len)
|
||||
{
|
||||
int i;
|
||||
char *s = malloc((len * 2) + 1);
|
||||
if (!s)
|
||||
if (!s) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (i = 0; i < len; i++)
|
||||
for (int i = 0; i < len; i++) {
|
||||
sprintf(s + (i * 2), "%02x", (unsigned int) p[i]);
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
bool hex2bin(unsigned char *p, const char *hexstr, size_t len)
|
||||
{
|
||||
char hex_byte[3];
|
||||
char *ep;
|
||||
|
||||
hex_byte[2] = '\0';
|
||||
static void * create_ctx(int ratio) {
|
||||
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*) _mm_malloc(sizeof(struct cryptonight_ctx), 16);
|
||||
ctx->memory = (uint8_t *) _mm_malloc(MEMORY * ratio, 16);
|
||||
|
||||
while (*hexstr && len) {
|
||||
if (!hexstr[1]) {
|
||||
return false;
|
||||
}
|
||||
hex_byte[0] = hexstr[0];
|
||||
hex_byte[1] = hexstr[1];
|
||||
*p = (unsigned char) strtol(hex_byte, &ep, 16);
|
||||
if (*ep) {
|
||||
return false;
|
||||
}
|
||||
p++;
|
||||
hexstr += 2;
|
||||
len--;
|
||||
}
|
||||
return ctx;
|
||||
}
|
||||
|
||||
return (len == 0 && *hexstr == 0) ? true : false;
|
||||
|
||||
static void free_ctx(struct cryptonight_ctx *ctx) {
|
||||
_mm_free(ctx->memory);
|
||||
_mm_free(ctx);
|
||||
}
|
||||
|
||||
|
||||
void test_cryptonight_av1_should_CalcHash(void) {
|
||||
char hash[32];
|
||||
char data[76];
|
||||
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*) create_ctx(1);
|
||||
|
||||
hex2bin((unsigned char *) &data, "0305a0dbd6bf05cf16e503f3a66f78007cbf34144332ecbfc22ed95c8700383b309ace1923a0964b00000008ba939a62724c0d7581fce5761e9d8a0e6a1c3f924fdd8493d1115649c05eb601", 76);
|
||||
cryptonight_av1_aesni(input1, 76, &hash, ctx);
|
||||
TEST_ASSERT_EQUAL_STRING(RESULT1, bin2hex(hash, 32));
|
||||
|
||||
uint8_t *memory = (uint8_t *) malloc(MEMORY);
|
||||
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
|
||||
cryptonight_av1_aesni(input2, strlen(input2), &hash, ctx);
|
||||
TEST_ASSERT_EQUAL_STRING(RESULT2, bin2hex(hash, 32));
|
||||
|
||||
cryptonight_av1_aesni(&hash, data, memory, ctx);
|
||||
cryptonight_av1_aesni(input3, strlen(input3), &hash, ctx);
|
||||
TEST_ASSERT_EQUAL_STRING(RESULT3, bin2hex(hash, 32));
|
||||
|
||||
free(memory);
|
||||
free(ctx);
|
||||
|
||||
TEST_ASSERT_EQUAL_STRING("1a3ffbee909b420d91f7be6e5fb56db71b3110d886011e877ee5786afd080100", bin2hex(hash, 32));
|
||||
free_ctx(ctx);
|
||||
}
|
||||
|
||||
|
||||
void test_cryptonight_av2_should_CalcHash(void)
|
||||
{
|
||||
char hash[32];
|
||||
char data[76];
|
||||
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*) create_ctx(2);
|
||||
|
||||
hex2bin((unsigned char *) &data, "0305a0dbd6bf05cf16e503f3a66f78007cbf34144332ecbfc22ed95c8700383b309ace1923a0964b00000008ba939a62724c0d7581fce5761e9d8a0e6a1c3f924fdd8493d1115649c05eb601", 76);
|
||||
cryptonight_av2_aesni_double(input1, 76, &hash, ctx);
|
||||
TEST_ASSERT_EQUAL_STRING(RESULT1_DOUBLE, bin2hex(hash, 64));
|
||||
|
||||
uint8_t *memory = (uint8_t *) malloc(MEMORY);
|
||||
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
|
||||
|
||||
cryptonight_av2_aesni_wolf(&hash, data, memory, ctx);
|
||||
|
||||
free(memory);
|
||||
free(ctx);
|
||||
|
||||
TEST_ASSERT_EQUAL_STRING("1a3ffbee909b420d91f7be6e5fb56db71b3110d886011e877ee5786afd080100", bin2hex(hash, 32));
|
||||
free_ctx(ctx);
|
||||
}
|
||||
|
||||
|
||||
void test_cryptonight_av3_should_CalcHash(void)
|
||||
{
|
||||
char hash[32];
|
||||
char data[76];
|
||||
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*) create_ctx(1);
|
||||
|
||||
hex2bin((unsigned char *) &data, "0305a0dbd6bf05cf16e503f3a66f78007cbf34144332ecbfc22ed95c8700383b309ace1923a0964b00000008ba939a62724c0d7581fce5761e9d8a0e6a1c3f924fdd8493d1115649c05eb601", 76);
|
||||
cryptonight_av3_softaes(input1, 76, &hash, ctx);
|
||||
TEST_ASSERT_EQUAL_STRING(RESULT1, bin2hex(hash, 32));
|
||||
|
||||
uint8_t *memory = (uint8_t *) malloc(MEMORY);
|
||||
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
|
||||
cryptonight_av3_softaes(input2, strlen(input2), &hash, ctx);
|
||||
TEST_ASSERT_EQUAL_STRING(RESULT2, bin2hex(hash, 32));
|
||||
|
||||
cryptonight_av3_aesni_bmi2(&hash, data, memory, ctx);
|
||||
cryptonight_av3_softaes(input3, strlen(input3), &hash, ctx);
|
||||
TEST_ASSERT_EQUAL_STRING(RESULT3, bin2hex(hash, 32));
|
||||
|
||||
free(memory);
|
||||
free(ctx);
|
||||
|
||||
TEST_ASSERT_EQUAL_STRING("1a3ffbee909b420d91f7be6e5fb56db71b3110d886011e877ee5786afd080100", bin2hex(hash, 32));
|
||||
free_ctx(ctx);
|
||||
}
|
||||
|
||||
|
||||
void test_cryptonight_av4_should_CalcHash(void)
|
||||
{
|
||||
char hash[32];
|
||||
char data[76];
|
||||
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*) create_ctx(2);
|
||||
|
||||
hex2bin((unsigned char *) &data, "0305a0dbd6bf05cf16e503f3a66f78007cbf34144332ecbfc22ed95c8700383b309ace1923a0964b00000008ba939a62724c0d7581fce5761e9d8a0e6a1c3f924fdd8493d1115649c05eb601", 76);
|
||||
cryptonight_av4_softaes_double(input1, 76, &hash, ctx);
|
||||
TEST_ASSERT_EQUAL_STRING(RESULT1_DOUBLE, bin2hex(hash, 64));
|
||||
|
||||
uint8_t *memory = (uint8_t *) malloc(MEMORY);
|
||||
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
|
||||
|
||||
cryptonight_av4_legacy(&hash, data, memory, ctx);
|
||||
|
||||
free(memory);
|
||||
free(ctx);
|
||||
|
||||
TEST_ASSERT_EQUAL_STRING("1a3ffbee909b420d91f7be6e5fb56db71b3110d886011e877ee5786afd080100", bin2hex(hash, 32));
|
||||
}
|
||||
|
||||
|
||||
void test_cryptonight_av5_should_CalcHash(void)
|
||||
{
|
||||
char hash[32];
|
||||
char data[76];
|
||||
|
||||
hex2bin((unsigned char *) &data, "0305a0dbd6bf05cf16e503f3a66f78007cbf34144332ecbfc22ed95c8700383b309ace1923a0964b00000008ba939a62724c0d7581fce5761e9d8a0e6a1c3f924fdd8493d1115649c05eb601", 76);
|
||||
|
||||
uint8_t *memory = (uint8_t *) malloc(MEMORY);
|
||||
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
|
||||
|
||||
cryptonight_av5_aesni_experimental(&hash, data, memory, ctx);
|
||||
|
||||
free(memory);
|
||||
free(ctx);
|
||||
|
||||
TEST_ASSERT_EQUAL_STRING("1a3ffbee909b420d91f7be6e5fb56db71b3110d886011e877ee5786afd080100", bin2hex(hash, 32));
|
||||
free_ctx(ctx);
|
||||
}
|
||||
|
||||
|
||||
@@ -152,7 +131,6 @@ int main(void)
|
||||
RUN_TEST(test_cryptonight_av2_should_CalcHash);
|
||||
RUN_TEST(test_cryptonight_av3_should_CalcHash);
|
||||
RUN_TEST(test_cryptonight_av4_should_CalcHash);
|
||||
RUN_TEST(test_cryptonight_av5_should_CalcHash);
|
||||
|
||||
return UNITY_END();
|
||||
}
|
||||
|
||||
@@ -1,95 +0,0 @@
|
||||
#include <unity.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
#include <algo/cryptonight/cryptonight.h>
|
||||
|
||||
|
||||
void cryptonight_av1_aesni32(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx);
|
||||
void cryptonight_av4_legacy(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx);
|
||||
|
||||
|
||||
char *bin2hex(const unsigned char *p, size_t len)
|
||||
{
|
||||
int i;
|
||||
char *s = malloc((len * 2) + 1);
|
||||
if (!s)
|
||||
return NULL;
|
||||
|
||||
for (i = 0; i < len; i++)
|
||||
sprintf(s + (i * 2), "%02x", (unsigned int) p[i]);
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
bool hex2bin(unsigned char *p, const char *hexstr, size_t len)
|
||||
{
|
||||
char hex_byte[3];
|
||||
char *ep;
|
||||
|
||||
hex_byte[2] = '\0';
|
||||
|
||||
while (*hexstr && len) {
|
||||
if (!hexstr[1]) {
|
||||
return false;
|
||||
}
|
||||
hex_byte[0] = hexstr[0];
|
||||
hex_byte[1] = hexstr[1];
|
||||
*p = (unsigned char) strtol(hex_byte, &ep, 16);
|
||||
if (*ep) {
|
||||
return false;
|
||||
}
|
||||
p++;
|
||||
hexstr += 2;
|
||||
len--;
|
||||
}
|
||||
|
||||
return (len == 0 && *hexstr == 0) ? true : false;
|
||||
}
|
||||
|
||||
|
||||
void test_cryptonight_av1_32_should_CalcHash(void) {
|
||||
char hash[32];
|
||||
char data[76];
|
||||
|
||||
hex2bin((unsigned char *) &data, "0305a0dbd6bf05cf16e503f3a66f78007cbf34144332ecbfc22ed95c8700383b309ace1923a0964b00000008ba939a62724c0d7581fce5761e9d8a0e6a1c3f924fdd8493d1115649c05eb601", 76);
|
||||
|
||||
uint8_t *memory = (uint8_t *) malloc(MEMORY);
|
||||
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
|
||||
|
||||
cryptonight_av1_aesni32(&hash, data, memory, ctx);
|
||||
|
||||
free(memory);
|
||||
free(ctx);
|
||||
|
||||
TEST_ASSERT_EQUAL_STRING("1a3ffbee909b420d91f7be6e5fb56db71b3110d886011e877ee5786afd080100", bin2hex(hash, 32));
|
||||
}
|
||||
|
||||
|
||||
void test_cryptonight_av4_should_CalcHash(void)
|
||||
{
|
||||
char hash[32];
|
||||
char data[76];
|
||||
|
||||
hex2bin((unsigned char *) &data, "0305a0dbd6bf05cf16e503f3a66f78007cbf34144332ecbfc22ed95c8700383b309ace1923a0964b00000008ba939a62724c0d7581fce5761e9d8a0e6a1c3f924fdd8493d1115649c05eb601", 76);
|
||||
|
||||
uint8_t *memory = (uint8_t *) malloc(MEMORY);
|
||||
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
|
||||
|
||||
cryptonight_av4_legacy(&hash, data, memory, ctx);
|
||||
|
||||
free(memory);
|
||||
free(ctx);
|
||||
|
||||
TEST_ASSERT_EQUAL_STRING("1a3ffbee909b420d91f7be6e5fb56db71b3110d886011e877ee5786afd080100", bin2hex(hash, 32));
|
||||
}
|
||||
|
||||
|
||||
int main(void)
|
||||
{
|
||||
UNITY_BEGIN();
|
||||
|
||||
RUN_TEST(test_cryptonight_av1_32_should_CalcHash);
|
||||
RUN_TEST(test_cryptonight_av4_should_CalcHash);
|
||||
|
||||
return UNITY_END();
|
||||
}
|
||||
27
test/cryptonight_lite/CMakeLists.txt
Normal file
27
test/cryptonight_lite/CMakeLists.txt
Normal file
@@ -0,0 +1,27 @@
|
||||
set(SOURCES
|
||||
cryptonight_lite.c
|
||||
../../options.h
|
||||
../../algo/cryptonight/cryptonight.h
|
||||
../../algo/cryptonight/cryptonight.c
|
||||
../../algo/cryptonight-lite/cryptonight_lite_av1_aesni.c
|
||||
../../algo/cryptonight-lite/cryptonight_lite_av2_aesni_double.c
|
||||
../../algo/cryptonight-lite/cryptonight_lite_av3_softaes.c
|
||||
../../algo/cryptonight-lite/cryptonight_lite_av4_softaes_double.c
|
||||
../../crypto/c_keccak.c
|
||||
../../crypto/c_blake256.c
|
||||
../../crypto/c_groestl.c
|
||||
../../crypto/c_jh.c
|
||||
../../crypto/c_skein.c
|
||||
../../crypto/soft_aes.c
|
||||
)
|
||||
|
||||
add_executable(cryptonight_lite_app ${SOURCES})
|
||||
target_link_libraries(cryptonight_lite_app unity)
|
||||
|
||||
include_directories(../..)
|
||||
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes -fno-strict-aliasing")
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O2")
|
||||
add_definitions(-DBUILD_TEST)
|
||||
|
||||
add_test(cryptonight_lite_test cryptonight_lite_app)
|
||||
124
test/cryptonight_lite/cryptonight_lite.c
Normal file
124
test/cryptonight_lite/cryptonight_lite.c
Normal file
@@ -0,0 +1,124 @@
|
||||
#include <unity.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <mm_malloc.h>
|
||||
|
||||
#include "options.h"
|
||||
#include "algo/cryptonight/cryptonight.h"
|
||||
|
||||
bool opt_double_hash = false;
|
||||
enum mining_algo opt_algo = ALGO_CRYPTONIGHT_LITE;
|
||||
|
||||
const static char input1[152] = {
|
||||
0x03, 0x05, 0xA0, 0xDB, 0xD6, 0xBF, 0x05, 0xCF, 0x16, 0xE5, 0x03, 0xF3, 0xA6, 0x6F, 0x78, 0x00,
|
||||
0x7C, 0xBF, 0x34, 0x14, 0x43, 0x32, 0xEC, 0xBF, 0xC2, 0x2E, 0xD9, 0x5C, 0x87, 0x00, 0x38, 0x3B,
|
||||
0x30, 0x9A, 0xCE, 0x19, 0x23, 0xA0, 0x96, 0x4B, 0x00, 0x00, 0x00, 0x08, 0xBA, 0x93, 0x9A, 0x62,
|
||||
0x72, 0x4C, 0x0D, 0x75, 0x81, 0xFC, 0xE5, 0x76, 0x1E, 0x9D, 0x8A, 0x0E, 0x6A, 0x1C, 0x3F, 0x92,
|
||||
0x4F, 0xDD, 0x84, 0x93, 0xD1, 0x11, 0x56, 0x49, 0xC0, 0x5E, 0xB6, 0x01,
|
||||
0x01, 0x00, 0xFB, 0x8E, 0x8A, 0xC8, 0x05, 0x89, 0x93, 0x23, 0x37, 0x1B, 0xB7, 0x90, 0xDB, 0x19,
|
||||
0x21, 0x8A, 0xFD, 0x8D, 0xB8, 0xE3, 0x75, 0x5D, 0x8B, 0x90, 0xF3, 0x9B, 0x3D, 0x55, 0x06, 0xA9,
|
||||
0xAB, 0xCE, 0x4F, 0xA9, 0x12, 0x24, 0x45, 0x00, 0x00, 0x00, 0x00, 0xEE, 0x81, 0x46, 0xD4, 0x9F,
|
||||
0xA9, 0x3E, 0xE7, 0x24, 0xDE, 0xB5, 0x7D, 0x12, 0xCB, 0xC6, 0xC6, 0xF3, 0xB9, 0x24, 0xD9, 0x46,
|
||||
0x12, 0x7C, 0x7A, 0x97, 0x41, 0x8F, 0x93, 0x48, 0x82, 0x8F, 0x0F, 0x02,
|
||||
};
|
||||
|
||||
|
||||
void cryptonight_av1_aesni(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx) {}
|
||||
void cryptonight_av2_aesni_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx) {}
|
||||
void cryptonight_av3_softaes(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx) {}
|
||||
void cryptonight_av4_softaes_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx) {}
|
||||
|
||||
void cryptonight_lite_av1_aesni(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
|
||||
void cryptonight_lite_av2_aesni_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
|
||||
void cryptonight_lite_av3_softaes(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
|
||||
void cryptonight_lite_av4_softaes_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
|
||||
|
||||
|
||||
static char hash[64];
|
||||
#define RESULT1 "3695b4b53bb00358b0ad38dc160feb9e004eece09b83a72ef6ba9864d3510c88"
|
||||
#define RESULT1_DOUBLE "3695b4b53bb00358b0ad38dc160feb9e004eece09b83a72ef6ba9864d3510c8828a22bad3f93d1408fca472eb5ad1cbe75f21d053c8ce5b3af105a57713e21dd"
|
||||
|
||||
|
||||
static char *bin2hex(const unsigned char *p, size_t len)
|
||||
{
|
||||
char *s = malloc((len * 2) + 1);
|
||||
if (!s) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (int i = 0; i < len; i++) {
|
||||
sprintf(s + (i * 2), "%02x", (unsigned int) p[i]);
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
static void * create_ctx(int ratio) {
|
||||
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*) _mm_malloc(sizeof(struct cryptonight_ctx), 16);
|
||||
ctx->memory = (uint8_t *) _mm_malloc(MEMORY_LITE * ratio, 16);
|
||||
|
||||
return ctx;
|
||||
}
|
||||
|
||||
|
||||
static void free_ctx(struct cryptonight_ctx *ctx) {
|
||||
_mm_free(ctx->memory);
|
||||
_mm_free(ctx);
|
||||
}
|
||||
|
||||
|
||||
void test_cryptonight_lite_av1_should_CalcHash(void) {
|
||||
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*) create_ctx(1);
|
||||
|
||||
cryptonight_lite_av1_aesni(input1, 76, &hash, ctx);
|
||||
TEST_ASSERT_EQUAL_STRING(RESULT1, bin2hex(hash, 32));
|
||||
|
||||
free_ctx(ctx);
|
||||
}
|
||||
|
||||
|
||||
void test_cryptonight_lite_av2_should_CalcHash(void)
|
||||
{
|
||||
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*) create_ctx(2);
|
||||
|
||||
cryptonight_lite_av2_aesni_double(input1, 76, &hash, ctx);
|
||||
TEST_ASSERT_EQUAL_STRING(RESULT1_DOUBLE, bin2hex(hash, 64));
|
||||
|
||||
free_ctx(ctx);
|
||||
}
|
||||
|
||||
|
||||
void test_cryptonight_lite_av3_should_CalcHash(void) {
|
||||
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*) create_ctx(1);
|
||||
|
||||
cryptonight_lite_av3_softaes(input1, 76, &hash, ctx);
|
||||
TEST_ASSERT_EQUAL_STRING(RESULT1, bin2hex(hash, 32));
|
||||
|
||||
free_ctx(ctx);
|
||||
}
|
||||
|
||||
|
||||
void test_cryptonight_lite_av4_should_CalcHash(void)
|
||||
{
|
||||
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*) create_ctx(2);
|
||||
|
||||
cryptonight_lite_av4_softaes_double(input1, 76, &hash, ctx);
|
||||
TEST_ASSERT_EQUAL_STRING(RESULT1_DOUBLE, bin2hex(hash, 64));
|
||||
|
||||
free_ctx(ctx);
|
||||
}
|
||||
|
||||
|
||||
int main(void)
|
||||
{
|
||||
UNITY_BEGIN();
|
||||
|
||||
RUN_TEST(test_cryptonight_lite_av1_should_CalcHash);
|
||||
RUN_TEST(test_cryptonight_lite_av2_should_CalcHash);
|
||||
RUN_TEST(test_cryptonight_lite_av3_should_CalcHash);
|
||||
RUN_TEST(test_cryptonight_lite_av4_should_CalcHash);
|
||||
|
||||
return UNITY_END();
|
||||
}
|
||||
@@ -33,24 +33,20 @@ void cpu_init_common();
|
||||
|
||||
|
||||
void cpu_init() {
|
||||
cpu_info.count = sysconf(_SC_NPROCESSORS_CONF);
|
||||
# ifdef XMRIG_NO_LIBCPUID
|
||||
cpu_info.total_logical_cpus = sysconf(_SC_NPROCESSORS_CONF);
|
||||
# endif
|
||||
|
||||
cpu_init_common();
|
||||
}
|
||||
|
||||
|
||||
int get_optimal_threads_count() {
|
||||
int count = cpu_info.count / 2;
|
||||
return count < 1 ? 1 : count;
|
||||
}
|
||||
|
||||
|
||||
int affine_to_cpu_mask(int id, unsigned long mask)
|
||||
{
|
||||
cpu_set_t set;
|
||||
CPU_ZERO(&set);
|
||||
|
||||
for (unsigned i = 0; i < cpu_info.count; i++) {
|
||||
for (unsigned i = 0; i < cpu_info.total_logical_cpus; i++) {
|
||||
if (mask & (1UL << i)) {
|
||||
CPU_SET(i, &set);
|
||||
}
|
||||
|
||||
@@ -38,7 +38,8 @@ int persistent_memory_flags = 0;
|
||||
|
||||
|
||||
const char * persistent_memory_allocate() {
|
||||
const int size = TWO_MB_PAGE * (opt_n_threads + 1);
|
||||
const int ratio = (opt_double_hash && opt_algo != ALGO_CRYPTONIGHT_LITE) ? 2 : 1;
|
||||
const int size = MEMORY * (opt_n_threads * ratio + 1);
|
||||
persistent_memory_flags |= MEMORY_HUGEPAGES_AVAILABLE;
|
||||
|
||||
persistent_memory = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, 0, 0);
|
||||
@@ -63,7 +64,7 @@ const char * persistent_memory_allocate() {
|
||||
|
||||
|
||||
void persistent_memory_free() {
|
||||
const int size = TWO_MB_PAGE * (opt_n_threads + 1);
|
||||
const int size = MEMORY * (opt_n_threads + 1);
|
||||
|
||||
if (persistent_memory_flags & MEMORY_HUGEPAGES_ENABLED) {
|
||||
if (persistent_memory_flags & MEMORY_LOCK) {
|
||||
|
||||
@@ -75,6 +75,11 @@ void applog(int prio, const char *fmt, ...)
|
||||
prio = LOG_NOTICE;
|
||||
color = CL_CYN;
|
||||
break;
|
||||
|
||||
case LOG_GREEN:
|
||||
prio = LOG_NOTICE;
|
||||
color = CL_LGR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -116,7 +121,7 @@ void applog_notime(int prio, const char *fmt, ...)
|
||||
if (opt_colors) {
|
||||
switch (prio) {
|
||||
case LOG_ERR: color = CL_RED; break;
|
||||
case LOG_WARNING: color = CL_YLW; break;
|
||||
case LOG_WARNING: color = CL_LYL; break;
|
||||
case LOG_NOTICE: color = CL_WHT; break;
|
||||
case LOG_INFO: color = ""; break;
|
||||
case LOG_DEBUG: color = CL_GRY; break;
|
||||
|
||||
@@ -30,7 +30,8 @@ enum {
|
||||
LOG_NOTICE,
|
||||
LOG_INFO,
|
||||
LOG_DEBUG,
|
||||
LOG_BLUE = 0x10
|
||||
LOG_BLUE = 0x10,
|
||||
LOG_GREEN
|
||||
};
|
||||
|
||||
#define CL_N "\x1B[0m"
|
||||
@@ -57,7 +58,7 @@ enum {
|
||||
#endif
|
||||
#define CL_LRD "\x1B[01;31m" /* light red */
|
||||
#define CL_LGR "\x1B[01;32m" /* light green */
|
||||
#define CL_YL2 "\x1B[01;33m" /* yellow */
|
||||
#define CL_LYL "\x1B[01;33m" /* light yellow */
|
||||
#define CL_LBL "\x1B[01;34m" /* light blue */
|
||||
#define CL_LMA "\x1B[01;35m" /* light magenta */
|
||||
#define CL_LCY "\x1B[01;36m" /* light cyan */
|
||||
|
||||
@@ -44,25 +44,43 @@ static void print_memory() {
|
||||
static void print_cpu() {
|
||||
const char *t1 = (cpu_info.flags & CPU_FLAG_X86_64) ? OPT_COLOR(CL_LGR, "x86_64") : OPT_COLOR(CL_LRD, "-x86_64");
|
||||
const char *t2 = (cpu_info.flags & CPU_FLAG_AES) ? OPT_COLOR(CL_LGR, "AES-NI") : OPT_COLOR(CL_LRD, "-AES-NI");
|
||||
const char *t3 = (cpu_info.flags & CPU_FLAG_BMI2) ? OPT_COLOR(CL_LGR, "BMI2") : OPT_COLOR(CL_LRD, "-BMI2");
|
||||
|
||||
if (opt_colors) {
|
||||
applog_notime(LOG_INFO, CL_LGR " * " CL_WHT "CPU: %s", cpu_info.brand);
|
||||
applog_notime(LOG_INFO, CL_LGR " * " CL_WHT "CPU FEATURES: %s %s %s", t1, t2, t3);
|
||||
applog_notime(LOG_INFO, CL_LGR " * " CL_WHT "CPU: %s (%d)", cpu_info.brand, cpu_info.sockets);
|
||||
}
|
||||
else {
|
||||
applog_notime(LOG_INFO, " * CPU: %s", cpu_info.brand);
|
||||
applog_notime(LOG_INFO, " * CPU FEATURES: %s %s %s", t1, t2, t3);
|
||||
applog_notime(LOG_INFO, " * CPU: %s (%d)", cpu_info.brand, cpu_info.sockets);
|
||||
}
|
||||
|
||||
# ifndef XMRIG_NO_LIBCPUID
|
||||
if (opt_colors) {
|
||||
applog_notime(LOG_INFO, CL_LGR " * " CL_WHT "CPU L2/L3: %.1f MB/%.1f MB", cpu_info.l2_cache / 1024.0, cpu_info.l3_cache / 1024.0);
|
||||
}
|
||||
else {
|
||||
applog_notime(LOG_INFO, " * CPU L2/L3: %.1f MB/%.1f MB", cpu_info.l2_cache / 1024.0, cpu_info.l3_cache / 1024.0);
|
||||
}
|
||||
# endif
|
||||
|
||||
if (opt_colors) {
|
||||
applog_notime(LOG_INFO, CL_LGR " * " CL_WHT "CPU FEATURES: %s %s", t1, t2);
|
||||
}
|
||||
else {
|
||||
applog_notime(LOG_INFO, " * CPU FEATURES: %s %s", t1, t2);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void print_threads() {
|
||||
const char *extra = "";
|
||||
if (opt_nicehash) {
|
||||
extra = ", nicehash";
|
||||
}
|
||||
|
||||
if (opt_colors) {
|
||||
applog_notime(LOG_INFO, CL_LGR " * " CL_WHT "THREADS: " CL_WHT "%d" CL_WHT ", av=%d, donate=%d%%", opt_n_threads, opt_algo_variant, opt_donate_level);
|
||||
applog_notime(LOG_INFO, CL_LGR " * " CL_WHT "THREADS: " CL_WHT "%d" CL_WHT ", av=%d, %s, donate=%d%%%s", opt_n_threads, opt_algo_variant, get_current_algo_name(), opt_donate_level, extra);
|
||||
}
|
||||
else {
|
||||
applog_notime(LOG_INFO, " * THREADS: %d, av=%d, donate=%d%%", opt_n_threads, opt_algo_variant, opt_donate_level);
|
||||
applog_notime(LOG_INFO, " * THREADS: %d, av=%d, %s, donate=%d%%%s", opt_n_threads, opt_algo_variant, get_current_algo_name(), opt_donate_level, extra);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -26,14 +26,15 @@
|
||||
|
||||
#define APP_ID "xmrig"
|
||||
#define APP_NAME "XMRig"
|
||||
#define APP_VERSION "0.5.0"
|
||||
#define APP_DESC "Monero (XMR) CPU miner"
|
||||
#define APP_VERSION "0.8.1"
|
||||
#define APP_DOMAIN "xmrig.com"
|
||||
#define APP_SITE "www.xmrig.com"
|
||||
#define APP_COPYRIGHT "Copyright (C) 2016-2017 xmrig.com"
|
||||
|
||||
#define APP_VER_MAJOR 0
|
||||
#define APP_VER_MINOR 5
|
||||
#define APP_VER_BUILD 0
|
||||
#define APP_VER_MINOR 8
|
||||
#define APP_VER_BUILD 1
|
||||
#define APP_VER_REV 0
|
||||
|
||||
#endif /* __VERSION_H__ */
|
||||
|
||||
BIN
win/app.ico
Normal file
BIN
win/app.ico
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 15 KiB |
37
win/app.rc
Normal file
37
win/app.rc
Normal file
@@ -0,0 +1,37 @@
|
||||
#include <windows.h>
|
||||
#include "../version.h"
|
||||
|
||||
IDI_ICON1 ICON DISCARDABLE "app.ico"
|
||||
|
||||
VS_VERSION_INFO VERSIONINFO
|
||||
FILEVERSION APP_VER_MAJOR,APP_VER_MINOR,APP_VER_BUILD,APP_VER_REV
|
||||
PRODUCTVERSION APP_VER_MAJOR,APP_VER_MINOR,APP_VER_BUILD,APP_VER_REV
|
||||
FILEFLAGSMASK 0x3fL
|
||||
#ifdef _DEBUG
|
||||
FILEFLAGS VS_FF_DEBUG
|
||||
#else
|
||||
FILEFLAGS 0x0L
|
||||
#endif
|
||||
FILEOS VOS__WINDOWS32
|
||||
FILETYPE VFT_APP
|
||||
FILESUBTYPE 0x0L
|
||||
BEGIN
|
||||
BLOCK "StringFileInfo"
|
||||
BEGIN
|
||||
BLOCK "000004b0"
|
||||
BEGIN
|
||||
VALUE "CompanyName", APP_SITE
|
||||
VALUE "FileDescription", APP_DESC
|
||||
VALUE "FileVersion", APP_VERSION
|
||||
VALUE "LegalCopyright", APP_COPYRIGHT
|
||||
VALUE "OriginalFilename", "xmrig.exe"
|
||||
VALUE "ProductName", APP_NAME
|
||||
VALUE "ProductVersion", APP_VERSION
|
||||
END
|
||||
END
|
||||
BLOCK "VarFileInfo"
|
||||
BEGIN
|
||||
VALUE "Translation", 0x0, 1200
|
||||
END
|
||||
END
|
||||
|
||||
@@ -20,7 +20,7 @@
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include <windows.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
@@ -32,21 +32,17 @@ void cpu_init_common();
|
||||
|
||||
|
||||
void cpu_init() {
|
||||
# ifdef XMRIG_NO_LIBCPUID
|
||||
SYSTEM_INFO sysinfo;
|
||||
GetSystemInfo(&sysinfo);
|
||||
|
||||
cpu_info.count = sysinfo.dwNumberOfProcessors;
|
||||
cpu_info.total_logical_cpus = sysinfo.dwNumberOfProcessors;
|
||||
# endif
|
||||
|
||||
cpu_init_common();
|
||||
}
|
||||
|
||||
|
||||
int get_optimal_threads_count(int mining_algo) {
|
||||
int count = cpu_info.count / 2;
|
||||
return count < 1 ? 1 : count;
|
||||
}
|
||||
|
||||
|
||||
int affine_to_cpu_mask(int id, unsigned long mask)
|
||||
{
|
||||
if (id == -1) {
|
||||
|
||||
105
win/memory_win.c
105
win/memory_win.c
@@ -20,13 +20,17 @@
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef __MEMORY_H__
|
||||
#define __MEMORY_H__
|
||||
|
||||
#include <windows.h>
|
||||
#include <ntsecapi.h>
|
||||
#include <tchar.h>
|
||||
|
||||
#include "options.h"
|
||||
#include "persistent_memory.h"
|
||||
#include "utils/applog.h"
|
||||
|
||||
|
||||
char *persistent_memory;
|
||||
@@ -51,50 +55,105 @@ Return value: TRUE indicates success, FALSE failure.
|
||||
* AWE Example: https://msdn.microsoft.com/en-us/library/windows/desktop/aa366531(v=vs.85).aspx
|
||||
* Creating a File Mapping Using Large Pages: https://msdn.microsoft.com/en-us/library/aa366543(VS.85).aspx
|
||||
*/
|
||||
static BOOL SetLockPagesPrivilege(HANDLE hProcess, BOOL bEnable) {
|
||||
struct {
|
||||
DWORD Count;
|
||||
LUID_AND_ATTRIBUTES Privilege[1];
|
||||
} Info;
|
||||
static BOOL SetLockPagesPrivilege() {
|
||||
HANDLE token;
|
||||
|
||||
HANDLE Token;
|
||||
BOOL result;
|
||||
|
||||
if (OpenProcessToken(hProcess, TOKEN_ADJUST_PRIVILEGES, &Token) != TRUE) {
|
||||
if (OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token) != TRUE) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
Info.Count = 1;
|
||||
Info.Privilege[0].Attributes = bEnable ? SE_PRIVILEGE_ENABLED : 0;
|
||||
TOKEN_PRIVILEGES tp;
|
||||
tp.PrivilegeCount = 1;
|
||||
tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
|
||||
|
||||
if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &(Info.Privilege[0].Luid)) != TRUE) {
|
||||
if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &(tp.Privileges[0].Luid)) != TRUE) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (AdjustTokenPrivileges(Token, FALSE, (PTOKEN_PRIVILEGES) &Info, 0, NULL, NULL) != TRUE) {
|
||||
BOOL rc = AdjustTokenPrivileges(token, FALSE, (PTOKEN_PRIVILEGES) &tp, 0, NULL, NULL);
|
||||
if (rc != TRUE || GetLastError() != ERROR_SUCCESS) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (GetLastError() != ERROR_SUCCESS) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
CloseHandle(Token);
|
||||
CloseHandle(token);
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
const char * persistent_memory_allocate() {
|
||||
const int size = TWO_MB_PAGE * (opt_n_threads + 1);
|
||||
static LSA_UNICODE_STRING StringToLsaUnicodeString(LPCTSTR string) {
|
||||
LSA_UNICODE_STRING lsaString;
|
||||
DWORD dwLen = 0;
|
||||
|
||||
if (SetLockPagesPrivilege(GetCurrentProcess(), TRUE)) {
|
||||
dwLen = wcslen(string);
|
||||
lsaString.Buffer = (LPWSTR) string;
|
||||
lsaString.Length = (USHORT)((dwLen) * sizeof(WCHAR));
|
||||
lsaString.MaximumLength = (USHORT)((dwLen + 1) * sizeof(WCHAR));
|
||||
return lsaString;
|
||||
}
|
||||
|
||||
|
||||
static BOOL ObtainLockPagesPrivilege() {
|
||||
HANDLE token;
|
||||
PTOKEN_USER user = NULL;
|
||||
|
||||
if (OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY, &token) == TRUE) {
|
||||
DWORD size = 0;
|
||||
|
||||
GetTokenInformation(token, TokenUser, NULL, 0, &size);
|
||||
if (size) {
|
||||
user = (PTOKEN_USER) LocalAlloc(LPTR, size);
|
||||
}
|
||||
|
||||
GetTokenInformation(token, TokenUser, user, size, &size);
|
||||
CloseHandle(token);
|
||||
}
|
||||
|
||||
if (!user) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
LSA_HANDLE handle;
|
||||
LSA_OBJECT_ATTRIBUTES attributes;
|
||||
ZeroMemory(&attributes, sizeof(attributes));
|
||||
|
||||
BOOL result = FALSE;
|
||||
if (LsaOpenPolicy(NULL, &attributes, POLICY_ALL_ACCESS, &handle) == 0) {
|
||||
LSA_UNICODE_STRING str = StringToLsaUnicodeString(_T(SE_LOCK_MEMORY_NAME));
|
||||
|
||||
if (LsaAddAccountRights(handle, user->User.Sid, &str, 1) == 0) {
|
||||
applog_notime(LOG_WARNING, "Huge pages support was successfully enabled, but reboot required to use it");
|
||||
result = TRUE;
|
||||
}
|
||||
|
||||
LsaClose(handle);
|
||||
}
|
||||
|
||||
LocalFree(user);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
static BOOL TrySetLockPagesPrivilege() {
|
||||
if (SetLockPagesPrivilege()) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return ObtainLockPagesPrivilege() && SetLockPagesPrivilege();
|
||||
}
|
||||
|
||||
|
||||
const char * persistent_memory_allocate() {
|
||||
const int ratio = (opt_double_hash && opt_algo != ALGO_CRYPTONIGHT_LITE) ? 2 : 1;
|
||||
const int size = MEMORY * (opt_n_threads * ratio + 1);
|
||||
|
||||
if (TrySetLockPagesPrivilege()) {
|
||||
persistent_memory_flags |= MEMORY_HUGEPAGES_AVAILABLE;
|
||||
}
|
||||
|
||||
persistent_memory = VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE);
|
||||
if (!persistent_memory) {
|
||||
persistent_memory = _mm_malloc(size, 4096);
|
||||
persistent_memory = _mm_malloc(size, 16);
|
||||
}
|
||||
else {
|
||||
persistent_memory_flags |= MEMORY_HUGEPAGES_ENABLED;
|
||||
|
||||
185
xmrig.c
185
xmrig.c
@@ -70,16 +70,6 @@ static bool g_want_donate = false;
|
||||
static void workio_cmd_free(struct workio_cmd *wc);
|
||||
|
||||
|
||||
/**
|
||||
* @brief work_free
|
||||
* @param w
|
||||
*/
|
||||
static inline void work_free(struct work *w) {
|
||||
free(w->job_id);
|
||||
free(w->xnonce2);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @brief work_copy
|
||||
* @param dest
|
||||
@@ -87,14 +77,6 @@ static inline void work_free(struct work *w) {
|
||||
*/
|
||||
static inline void work_copy(struct work *dest, const struct work *src) {
|
||||
memcpy(dest, src, sizeof(struct work));
|
||||
if (src->job_id) {
|
||||
dest->job_id = strdup(src->job_id);
|
||||
}
|
||||
|
||||
if (src->xnonce2) {
|
||||
dest->xnonce2 = malloc(src->xnonce2_len);
|
||||
memcpy(dest->xnonce2, src->xnonce2, src->xnonce2_len);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -117,9 +99,7 @@ static inline void gen_workify(struct stratum_ctx *sctx) {
|
||||
pthread_mutex_lock(&stratum_ctx->work_lock);
|
||||
|
||||
if (stratum_ctx->work.job_id && (!stratum_ctx->g_work_time || strcmp(stratum_ctx->work.job_id, stratum_ctx->g_work.job_id))) {
|
||||
free(sctx->g_work.job_id);
|
||||
memcpy(&sctx->g_work, &sctx->work, sizeof(struct work));
|
||||
sctx->work.job_id = strdup(sctx->work.job_id);
|
||||
time(&stratum_ctx->g_work_time);
|
||||
|
||||
pthread_mutex_unlock(&stratum_ctx->work_lock);
|
||||
@@ -143,11 +123,11 @@ static bool submit_upstream_work(struct work *work) {
|
||||
char s[JSON_BUF_LEN];
|
||||
|
||||
/* pass if the previous hash is not the current previous hash */
|
||||
if (memcmp(work->data + 1, stratum_ctx->g_work.data + 1, 32)) {
|
||||
if (memcmp(work->blob + 1, stratum_ctx->g_work.blob + 1, 32)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
char *noncestr = bin2hex(((const unsigned char*) work->data) + 39, 4);
|
||||
char *noncestr = bin2hex(((const unsigned char*) work->blob) + 39, 4);
|
||||
char *hashhex = bin2hex((const unsigned char *) work->hash, 32);
|
||||
|
||||
snprintf(s, JSON_BUF_LEN,
|
||||
@@ -161,7 +141,6 @@ static bool submit_upstream_work(struct work *work) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -176,7 +155,6 @@ static void workio_cmd_free(struct workio_cmd *wc) {
|
||||
return;
|
||||
}
|
||||
|
||||
work_free(wc->work);
|
||||
free(wc->work);
|
||||
|
||||
memset(wc, 0, sizeof(*wc)); /* poison */
|
||||
@@ -277,26 +255,21 @@ static bool should_pause(int thr_id) {
|
||||
*/
|
||||
static void *miner_thread(void *userdata) {
|
||||
struct thr_info *mythr = userdata;
|
||||
int thr_id = mythr->id;
|
||||
const int thr_id = mythr->id;
|
||||
struct work work = { { 0 } };
|
||||
uint32_t max_nonce;
|
||||
uint32_t end_nonce = 0xffffffffU / opt_n_threads * (thr_id + 1) - 0x20;
|
||||
|
||||
struct cryptonight_ctx *persistentctx = (struct cryptonight_ctx *) &persistent_memory[TWO_MB_PAGE - sizeof(struct cryptonight_ctx) * (thr_id + 1)];
|
||||
struct cryptonight_ctx *persistentctx = (struct cryptonight_ctx *) create_persistent_ctx(thr_id);
|
||||
|
||||
if (cpu_info.count > 1 && opt_n_threads > 1 && opt_affinity != -1L) {
|
||||
if (cpu_info.total_logical_cpus > 1 && opt_affinity != -1L) {
|
||||
affine_to_cpu_mask(thr_id, (unsigned long) opt_affinity);
|
||||
}
|
||||
|
||||
uint32_t *nonceptr = (uint32_t*) (((char*)work.data) + 39);
|
||||
uint32_t hash[32 / 4] __attribute__((aligned(32)));
|
||||
uint32_t *nonceptr = NULL;
|
||||
uint32_t hash[8] __attribute__((aligned(32)));
|
||||
|
||||
while (1) {
|
||||
unsigned long hashes_done;
|
||||
struct timeval tv_start;
|
||||
int64_t max64;
|
||||
int rc;
|
||||
|
||||
if (should_pause(thr_id)) {
|
||||
sleep(1);
|
||||
continue;
|
||||
@@ -304,46 +277,140 @@ static void *miner_thread(void *userdata) {
|
||||
|
||||
pthread_mutex_lock(&stratum_ctx->work_lock);
|
||||
|
||||
if (memcmp(work.data, stratum_ctx->g_work.data, 39) || memcmp(((uint8_t*) work.data) + 43, ((uint8_t*) stratum_ctx->g_work.data) + 43, 33)) {
|
||||
work_free(&work);
|
||||
if (memcmp(work.job_id, stratum_ctx->g_work.job_id, 64)) {
|
||||
work_copy(&work, &stratum_ctx->g_work);
|
||||
nonceptr = (uint32_t*) (((char*)work.data) + 39);
|
||||
*nonceptr = 0xffffffffU / opt_n_threads * thr_id;
|
||||
} else {
|
||||
++(*nonceptr);
|
||||
nonceptr = (uint32_t*) (((char*) work.blob) + 39);
|
||||
|
||||
if (opt_nicehash) {
|
||||
end_nonce = (*nonceptr & 0xff000000U) + (0xffffffU / opt_n_threads * (thr_id + 1) - 0x20);
|
||||
*nonceptr = (*nonceptr & 0xff000000U) + (0xffffffU / opt_n_threads * thr_id);
|
||||
}
|
||||
else {
|
||||
*nonceptr = 0xffffffffU / opt_n_threads * thr_id;
|
||||
}
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&stratum_ctx->work_lock);
|
||||
|
||||
work_restart[thr_id].restart = 0;
|
||||
|
||||
/* adjust max_nonce to meet target scan time */
|
||||
max64 = LP_SCANTIME;
|
||||
|
||||
//max64 *= thr_hashrates[thr_id];
|
||||
if (max64 <= 0) {
|
||||
max64 = 0x40LL;
|
||||
}
|
||||
|
||||
if (*nonceptr + max64 > end_nonce) {
|
||||
if (*nonceptr + LP_SCANTIME > end_nonce) {
|
||||
max_nonce = end_nonce;
|
||||
} else {
|
||||
max_nonce = *nonceptr + max64;
|
||||
max_nonce = *nonceptr + LP_SCANTIME;
|
||||
}
|
||||
|
||||
hashes_done = 0;
|
||||
gettimeofday(&tv_start, NULL );
|
||||
unsigned long hashes_done = 0;
|
||||
|
||||
struct timeval tv_start;
|
||||
gettimeofday(&tv_start, NULL);
|
||||
|
||||
/* scan nonces for a proof-of-work hash */
|
||||
rc = scanhash_cryptonight(thr_id, hash, work.data, work.target, max_nonce, &hashes_done, &persistent_memory[TWO_MB_PAGE * (thr_id + 1)], persistentctx);
|
||||
const int rc = scanhash_cryptonight(thr_id, hash, work.blob, work.blob_size, work.target, max_nonce, &hashes_done, persistentctx);
|
||||
stats_add_hashes(thr_id, &tv_start, hashes_done);
|
||||
|
||||
memcpy(work.hash, hash, 32);
|
||||
|
||||
/* if nonce found, submit work */
|
||||
if (rc && !submit_work(mythr, &work)) {
|
||||
if (!rc) {
|
||||
continue;
|
||||
}
|
||||
|
||||
memcpy(work.hash, hash, 32);
|
||||
submit_work(mythr, &work);
|
||||
++(*nonceptr);
|
||||
}
|
||||
|
||||
tq_freeze(mythr->q);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @brief miner_thread_double
|
||||
* @param userdata
|
||||
* @return
|
||||
*/
|
||||
static void *miner_thread_double(void *userdata) {
|
||||
struct thr_info *mythr = userdata;
|
||||
const int thr_id = mythr->id;
|
||||
struct work work = { { 0 } };
|
||||
uint32_t max_nonce;
|
||||
uint32_t end_nonce = 0xffffffffU / opt_n_threads * (thr_id + 1) - 0x20;
|
||||
|
||||
struct cryptonight_ctx *persistentctx = (struct cryptonight_ctx *) create_persistent_ctx(thr_id);
|
||||
|
||||
if (cpu_info.total_logical_cpus > 1 && opt_affinity != -1L) {
|
||||
affine_to_cpu_mask(thr_id, (unsigned long) opt_affinity);
|
||||
}
|
||||
|
||||
uint32_t *nonceptr0 = NULL;
|
||||
uint32_t *nonceptr1 = NULL;
|
||||
uint8_t double_hash[64];
|
||||
uint8_t double_blob[sizeof(work.blob) * 2];
|
||||
|
||||
while (1) {
|
||||
if (should_pause(thr_id)) {
|
||||
sleep(1);
|
||||
continue;
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&stratum_ctx->work_lock);
|
||||
|
||||
if (memcmp(work.job_id, stratum_ctx->g_work.job_id, 64)) {
|
||||
work_copy(&work, &stratum_ctx->g_work);
|
||||
|
||||
memcpy(double_blob, work.blob, work.blob_size);
|
||||
memcpy(double_blob + work.blob_size, work.blob, work.blob_size);
|
||||
|
||||
nonceptr0 = (uint32_t*) (((char*) double_blob) + 39);
|
||||
nonceptr1 = (uint32_t*) (((char*) double_blob) + 39 + work.blob_size);
|
||||
|
||||
if (opt_nicehash) {
|
||||
end_nonce = (*nonceptr0 & 0xff000000U) + (0xffffffU / (opt_n_threads * 2) * (thr_id + 1) - 0x20);
|
||||
*nonceptr0 = (*nonceptr0 & 0xff000000U) + (0xffffffU / (opt_n_threads * 2) * thr_id);
|
||||
*nonceptr1 = (*nonceptr1 & 0xff000000U) + (0xffffffU / (opt_n_threads * 2) * (thr_id + opt_n_threads));
|
||||
}
|
||||
else {
|
||||
*nonceptr0 = 0xffffffffU / (opt_n_threads * 2) * thr_id;
|
||||
*nonceptr1 = 0xffffffffU / (opt_n_threads * 2) * (thr_id + opt_n_threads);
|
||||
}
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&stratum_ctx->work_lock);
|
||||
|
||||
work_restart[thr_id].restart = 0;
|
||||
|
||||
if (*nonceptr0 + (LP_SCANTIME / 2) > end_nonce) {
|
||||
max_nonce = end_nonce;
|
||||
} else {
|
||||
max_nonce = *nonceptr0 + (LP_SCANTIME / 2);
|
||||
}
|
||||
|
||||
unsigned long hashes_done = 0;
|
||||
|
||||
struct timeval tv_start;
|
||||
gettimeofday(&tv_start, NULL);
|
||||
|
||||
/* scan nonces for a proof-of-work hash */
|
||||
const int rc = scanhash_cryptonight_double(thr_id, (uint32_t *) double_hash, double_blob, work.blob_size, work.target, max_nonce, &hashes_done, persistentctx);
|
||||
stats_add_hashes(thr_id, &tv_start, hashes_done);
|
||||
|
||||
if (!rc) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (rc & 1) {
|
||||
memcpy(work.hash, double_hash, 32);
|
||||
memcpy(work.blob, double_blob, work.blob_size);
|
||||
submit_work(mythr, &work);
|
||||
}
|
||||
|
||||
if (rc & 2) {
|
||||
memcpy(work.hash, double_hash + 32, 32);
|
||||
memcpy(work.blob, double_blob + work.blob_size, work.blob_size);
|
||||
submit_work(mythr, &work);
|
||||
}
|
||||
|
||||
++(*nonceptr0);
|
||||
++(*nonceptr1);
|
||||
}
|
||||
|
||||
tq_freeze(mythr->q);
|
||||
@@ -393,7 +460,7 @@ static void switch_stratum() {
|
||||
static bool want_donate = false;
|
||||
|
||||
if (g_want_donate && !want_donate) {
|
||||
stratum_ctx->url = "stratum+tcp://donate.xmrig.com:443";
|
||||
stratum_ctx->url = opt_algo == ALGO_CRYPTONIGHT ? "stratum+tcp://donate.xmrig.com:443" : "stratum+tcp://donate.xmrig.com:3333";
|
||||
applog(LOG_NOTICE, "Switching to dev pool");
|
||||
want_donate = true;
|
||||
}
|
||||
@@ -557,7 +624,7 @@ static bool start_mining() {
|
||||
thr->id = i;
|
||||
thr->q = tq_new();
|
||||
|
||||
if (unlikely(!thr->q || pthread_create(&thr->pth, NULL, miner_thread, thr))) {
|
||||
if (unlikely(!thr->q || pthread_create(&thr->pth, NULL, opt_double_hash ? miner_thread_double : miner_thread, thr))) {
|
||||
applog(LOG_ERR, "thread %d create failed", i);
|
||||
return false;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user