mirror of
https://github.com/xmrig/xmrig.git
synced 2025-12-06 23:52:38 -05:00
Compare commits
30 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4fdec33c50 | ||
|
|
30e5e4a492 | ||
|
|
d92c1a54de | ||
|
|
aa474fa51b | ||
|
|
7976059367 | ||
|
|
c5cbd9d8fe | ||
|
|
ef2e8bed6e | ||
|
|
7574bfab60 | ||
|
|
27980f24f8 | ||
|
|
5e6a69e16f | ||
|
|
69513e7049 | ||
|
|
b834c50aba | ||
|
|
302ebe5a5b | ||
|
|
b9096f2392 | ||
|
|
b02f4ff163 | ||
|
|
11748fad78 | ||
|
|
e0dc51edf9 | ||
|
|
779238fc85 | ||
|
|
a06a224c0a | ||
|
|
bf2eb1a685 | ||
|
|
0bba8849f0 | ||
|
|
1e22a984af | ||
|
|
61b49137c7 | ||
|
|
93d072ff6e | ||
|
|
f0b293f650 | ||
|
|
b93e7d9daa | ||
|
|
0b4b07fcd6 | ||
|
|
af62621169 | ||
|
|
ed7260449a | ||
|
|
33944595a2 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,2 +1 @@
|
||||
/build
|
||||
/CMakeLists.txt.user
|
||||
|
||||
82
CHANGELOG.md
82
CHANGELOG.md
@@ -1,77 +1,11 @@
|
||||
# v2.4.1
|
||||
- [#147](https://github.com/xmrig/xmrig/issues/147) Fixed comparability with monero-stratum.
|
||||
|
||||
# v2.4.0
|
||||
- Added [HTTP API](https://github.com/xmrig/xmrig/wiki/API).
|
||||
- Added comments support in config file.
|
||||
- libjansson replaced to rapidjson.
|
||||
- [#98](https://github.com/xmrig/xmrig/issues/98) Ignore `keepalive` option with minergate.com and nicehash.com.
|
||||
- [#101](https://github.com/xmrig/xmrig/issues/101) Fixed MSVC 2017 (15.3) compile time version detection.
|
||||
- [#108](https://github.com/xmrig/xmrig/issues/108) Silently ignore invalid values for `donate-level` option.
|
||||
- [#111](https://github.com/xmrig/xmrig/issues/111) Fixed build without AEON support.
|
||||
|
||||
# v2.3.1
|
||||
- [#68](https://github.com/xmrig/xmrig/issues/68) Fixed compatibility with Docker containers, was nothing print on console.
|
||||
|
||||
# v2.3.0
|
||||
- Added `--cpu-priority` option (0 idle, 2 normal to 5 highest).
|
||||
- Added `--user-agent` option, to set custom user-agent string for pool. For example `cpuminer-multi/0.1`.
|
||||
- Added `--no-huge-pages` option, to disable huge pages support.
|
||||
- [#62](https://github.com/xmrig/xmrig/issues/62) Don't send the login to the dev pool.
|
||||
- Force reconnect if pool block miner IP address. helps switch to backup pool.
|
||||
- Fixed: failed open default config file if path contains non English characters.
|
||||
- Fixed: error occurred if try use unavailable stdin or stdout, regression since version 2.2.0.
|
||||
- Fixed: message about huge pages support successfully enabled on Windows was not shown in release builds.
|
||||
|
||||
# v2.2.1
|
||||
- Fixed [terminal issues](https://github.com/xmrig/xmrig-proxy/issues/2#issuecomment-319914085) after exit on Linux and OS X.
|
||||
|
||||
# v2.2.0
|
||||
- [#46](https://github.com/xmrig/xmrig/issues/46) Restored config file support. Now possible use multiple config files and combine with command line options also added support for default config.
|
||||
- Improved colors support on Windows, now used uv_tty, legacy code removed.
|
||||
- QuickEdit Mode now disabled on Windows.
|
||||
- Added interactive commands in console window:: **h**ashrate, **p**ause, **r**esume.
|
||||
- Fixed autoconf mode for AMD FX CPUs.
|
||||
|
||||
# v2.1.0
|
||||
- [#40](https://github.com/xmrig/xmrig/issues/40)
|
||||
Improved miner shutdown, fixed crash on exit for Linux and OS X.
|
||||
- Fixed, login request was contain malformed JSON if username or password has some special characters for example `\`.
|
||||
- [#220](https://github.com/fireice-uk/xmr-stak-cpu/pull/220) Better support for Round Robin DNS, IP address now always chosen randomly instead of stuck on first one.
|
||||
- Changed donation address, new [xmrig-proxy](https://github.com/xmrig/xmrig-proxy) is coming soon.
|
||||
|
||||
# v2.0.2
|
||||
- Better deal with possible duplicate jobs from pool, show warning and ignore duplicates.
|
||||
- For Windows builds libuv updated to version 1.13.1 and gcc to 7.1.0.
|
||||
|
||||
# v2.0.1
|
||||
- [#27](https://github.com/xmrig/xmrig/issues/27) Fixed possibility crash on 32bit systems.
|
||||
|
||||
# v2.0.0
|
||||
- Option `--backup-url` removed, instead now possibility specify multiple pools for example: `-o example1.com:3333 -u user1 -p password1 -k -o example2.com:5555 -u user2 -o example3.com:4444 -u user3`
|
||||
- [#15](https://github.com/xmrig/xmrig/issues/15) Added option `-l, --log-file=FILE` to write log to file.
|
||||
- [#15](https://github.com/xmrig/xmrig/issues/15) Added option `-S, --syslog` to use syslog for logging, Linux only.
|
||||
- [#18](https://github.com/xmrig/xmrig/issues/18) Added nice messages for accepted/rejected shares with diff and network latency.
|
||||
- [#20](https://github.com/xmrig/xmrig/issues/20) Fixed `--cpu-affinity` for more than 32 threads.
|
||||
- Fixed Windows XP support.
|
||||
- Fixed regression, option `--no-color` was not fully disable colored output.
|
||||
- Show resolved pool IP address in miner output.
|
||||
|
||||
# v1.0.1
|
||||
- Fix broken software AES implementation, app has crashed if CPU not support AES-NI, only version 1.0.0 affected.
|
||||
|
||||
# v1.0.0
|
||||
- Miner complete rewritten in C++ with libuv.
|
||||
- This version should be fully compatible (except config file) with previos versions, many new nice features will come in next versions.
|
||||
- This is still beta. If you found regression, stability or perfomance issues or have an idea for new feature please fell free to open new [issue](https://github.com/xmrig/xmrig/issues/new).
|
||||
- Added new option `--print-time=N`, print hashrate report every N seconds.
|
||||
- New hashrate reports, by default every 60 secons.
|
||||
- Added Microsoft Visual C++ 2015 and 2017 support.
|
||||
- Removed dependency on libcurl.
|
||||
- To compile this version from source please switch to [dev](https://github.com/xmrig/xmrig/tree/dev) branch.
|
||||
|
||||
# v0.8.2
|
||||
- Fixed L2 cache size detection for AMD CPUs (Bulldozer/Piledriver/Steamroller/Excavator architecture).
|
||||
# v0.9.0
|
||||
- **[#753](https://github.com/xmrig/xmrig/issues/753) Added new algorithm [CryptoNight variant 2](https://github.com/xmrig/xmrig/issues/753) for Monero fork, thanks [@SChernykh](https://github.com/SChernykh).**
|
||||
- Added option `--asm`, possible values `--asm auto`, `--asm none`, `--asm intel` and `--asm ryzen`.
|
||||
- Added support for new style long and short algorithm names, possible values: `cryptonight`, `cryptonight/0`, `cryptonight/1`, `cryptonight/2`, `cryptonight-lite`, `cryptonight-lite/0`, `cryptonight-lite/1` and short equvalents `cn/2` etc.
|
||||
- Added `--variant`, example `--algo cn --variant 2`, by default miner automaticaly detect proper variant for Monero by block version.
|
||||
- Added CryptoNight-Lite variant 1.
|
||||
- Added xmrig-proxy autodetection, nicehash will be enabled automaticaly.
|
||||
- Added workaround for xmrig-proxy [bug](https://github.com/xmrig/xmrig-proxy/commit/dfa1960fe3eeb13f80717b7dbfcc7c6e9f222d89).
|
||||
|
||||
# v0.8.2
|
||||
- Fixed L2 cache size detection for AMD CPUs (Bulldozer/Piledriver/Steamroller/Excavator architecture).
|
||||
|
||||
302
CMakeLists.txt
302
CMakeLists.txt
@@ -1,236 +1,156 @@
|
||||
cmake_minimum_required(VERSION 3.0)
|
||||
project(xmrig)
|
||||
project(xmrig C)
|
||||
|
||||
option(WITH_LIBCPUID "Use Libcpuid" ON)
|
||||
option(WITH_AEON "CryptoNight-Lite support" ON)
|
||||
option(WITH_HTTPD "HTTP REST API" ON)
|
||||
|
||||
include (CheckIncludeFile)
|
||||
|
||||
option(WITH_ASM "Enable ASM PoW implementations" ON)
|
||||
|
||||
set(HEADERS
|
||||
src/3rdparty/align.h
|
||||
src/api/Api.h
|
||||
src/api/ApiState.h
|
||||
src/api/NetworkState.h
|
||||
src/App.h
|
||||
src/Console.h
|
||||
src/Cpu.h
|
||||
src/interfaces/IClientListener.h
|
||||
src/interfaces/IConsoleListener.h
|
||||
src/interfaces/IJobResultListener.h
|
||||
src/interfaces/ILogBackend.h
|
||||
src/interfaces/IStrategy.h
|
||||
src/interfaces/IStrategyListener.h
|
||||
src/interfaces/IWorker.h
|
||||
src/log/ConsoleLog.h
|
||||
src/log/FileLog.h
|
||||
src/log/Log.h
|
||||
src/Mem.h
|
||||
src/net/Client.h
|
||||
src/net/Job.h
|
||||
src/net/JobId.h
|
||||
src/net/JobResult.h
|
||||
src/net/Network.h
|
||||
src/net/strategies/DonateStrategy.h
|
||||
src/net/strategies/FailoverStrategy.h
|
||||
src/net/strategies/SinglePoolStrategy.h
|
||||
src/net/SubmitResult.h
|
||||
src/net/Url.h
|
||||
src/Options.h
|
||||
src/Platform.h
|
||||
src/Summary.h
|
||||
src/version.h
|
||||
src/workers/DoubleWorker.h
|
||||
src/workers/Handle.h
|
||||
src/workers/Hashrate.h
|
||||
src/workers/SingleWorker.h
|
||||
src/workers/Worker.h
|
||||
src/workers/Workers.h
|
||||
algo/cryptonight/cryptonight.h
|
||||
algo/cryptonight/cryptonight_aesni.h
|
||||
algo/cryptonight/cryptonight_monero.h
|
||||
algo/cryptonight/cryptonight_softaes.h
|
||||
algo/cryptonight/cryptonight_test.h
|
||||
algo/cryptonight/variant4_random_math.h
|
||||
compat.h
|
||||
cpu.h
|
||||
donate.h
|
||||
elist.h
|
||||
options.h
|
||||
persistent_memory.h
|
||||
stats.h
|
||||
stratum.h
|
||||
util.h
|
||||
version.h
|
||||
xmrig.h
|
||||
)
|
||||
|
||||
set(HEADERS_CRYPTO
|
||||
src/crypto/c_blake256.h
|
||||
src/crypto/c_groestl.h
|
||||
src/crypto/c_jh.h
|
||||
src/crypto/c_keccak.h
|
||||
src/crypto/c_skein.h
|
||||
src/crypto/CryptoNight.h
|
||||
src/crypto/CryptoNight_p.h
|
||||
src/crypto/CryptoNight_test.h
|
||||
src/crypto/groestl_tables.h
|
||||
src/crypto/hash.h
|
||||
src/crypto/skein_port.h
|
||||
crypto/c_groestl.h
|
||||
crypto/c_blake256.h
|
||||
crypto/c_jh.h
|
||||
crypto/c_skein.h
|
||||
crypto/soft_aes.h
|
||||
)
|
||||
|
||||
set(HEADERS_COMPAT
|
||||
compat/winansi.h
|
||||
)
|
||||
|
||||
set(HEADERS_UTILS
|
||||
utils/applog.h
|
||||
utils/threads.h
|
||||
utils/summary.h
|
||||
)
|
||||
|
||||
set(SOURCES
|
||||
src/api/Api.cpp
|
||||
src/api/ApiState.cpp
|
||||
src/api/NetworkState.cpp
|
||||
src/App.cpp
|
||||
src/Console.cpp
|
||||
src/log/ConsoleLog.cpp
|
||||
src/log/FileLog.cpp
|
||||
src/log/Log.cpp
|
||||
src/Mem.cpp
|
||||
src/net/Client.cpp
|
||||
src/net/Job.cpp
|
||||
src/net/Network.cpp
|
||||
src/net/strategies/DonateStrategy.cpp
|
||||
src/net/strategies/FailoverStrategy.cpp
|
||||
src/net/strategies/SinglePoolStrategy.cpp
|
||||
src/net/SubmitResult.cpp
|
||||
src/net/Url.cpp
|
||||
src/Options.cpp
|
||||
src/Platform.cpp
|
||||
src/Summary.cpp
|
||||
src/workers/DoubleWorker.cpp
|
||||
src/workers/Handle.cpp
|
||||
src/workers/Hashrate.cpp
|
||||
src/workers/SingleWorker.cpp
|
||||
src/workers/Worker.cpp
|
||||
src/workers/Workers.cpp
|
||||
src/xmrig.cpp
|
||||
xmrig.c
|
||||
algo/cryptonight/cryptonight.c
|
||||
algo/cryptonight/cryptonight_av1.c
|
||||
algo/cryptonight/cryptonight_av2.c
|
||||
algo/cryptonight/cryptonight_av3.c
|
||||
algo/cryptonight/cryptonight_av4.c
|
||||
algo/cryptonight/cryptonight_r_av1.c
|
||||
algo/cryptonight/cryptonight_r_av2.c
|
||||
algo/cryptonight/cryptonight_r_av3.c
|
||||
algo/cryptonight/cryptonight_r_av4.c
|
||||
util.c
|
||||
options.c
|
||||
stratum.c
|
||||
stats.c
|
||||
memory.c
|
||||
)
|
||||
|
||||
set(SOURCES_CRYPTO
|
||||
src/crypto/c_keccak.c
|
||||
src/crypto/c_groestl.c
|
||||
src/crypto/c_blake256.c
|
||||
src/crypto/c_jh.c
|
||||
src/crypto/c_skein.c
|
||||
src/crypto/soft_aes.c
|
||||
src/crypto/soft_aes.c
|
||||
src/crypto/CryptoNight.cpp
|
||||
crypto/c_keccak.c
|
||||
crypto/c_groestl.c
|
||||
crypto/c_blake256.c
|
||||
crypto/c_jh.c
|
||||
crypto/c_skein.c
|
||||
)
|
||||
|
||||
set(SOURCES_UTILS
|
||||
utils/applog.c
|
||||
utils/summary.c
|
||||
)
|
||||
|
||||
if (WIN32)
|
||||
set(SOURCES_OS
|
||||
res/app.rc
|
||||
src/App_win.cpp
|
||||
src/Cpu_win.cpp
|
||||
src/Mem_win.cpp
|
||||
src/Platform_win.cpp
|
||||
)
|
||||
|
||||
add_definitions(/DWIN32)
|
||||
set(EXTRA_LIBS ws2_32 psapi iphlpapi userenv)
|
||||
set(SOURCES_OS win/cpu_win.c win/memory_win.c win/xmrig_win.c win/app.rc compat/winansi.c)
|
||||
set(EXTRA_LIBS ws2_32 crypt32)
|
||||
add_definitions(/D_WIN32_WINNT=0x600)
|
||||
elseif (APPLE)
|
||||
set(SOURCES_OS
|
||||
src/App_unix.cpp
|
||||
src/Cpu_mac.cpp
|
||||
src/Mem_unix.cpp
|
||||
src/Platform_mac.cpp
|
||||
)
|
||||
set(SOURCES_OS mac/cpu_mac.c mac/memory_mac.c mac/xmrig_mac.c)
|
||||
else()
|
||||
set(SOURCES_OS
|
||||
src/App_unix.cpp
|
||||
src/Cpu_unix.cpp
|
||||
src/Mem_unix.cpp
|
||||
src/Platform_unix.cpp
|
||||
)
|
||||
|
||||
set(EXTRA_LIBS pthread)
|
||||
set(SOURCES_OS unix/cpu_unix.c unix/memory_unix.c unix/xmrig_unix.c)
|
||||
set(EXTRA_LIBS pthread rt m)
|
||||
endif()
|
||||
|
||||
add_definitions(/D__STDC_FORMAT_MACROS)
|
||||
include_directories(.)
|
||||
add_definitions(/DUSE_NATIVE_THREADS)
|
||||
add_definitions(/D_GNU_SOURCE)
|
||||
add_definitions(/DUNICODE)
|
||||
add_definitions(/DRAPIDJSON_SSE2)
|
||||
#add_definitions(/DAPP_DEBUG)
|
||||
|
||||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
|
||||
|
||||
find_package(UV REQUIRED)
|
||||
|
||||
if ("${CMAKE_BUILD_TYPE}" STREQUAL "")
|
||||
set(CMAKE_BUILD_TYPE Release)
|
||||
endif()
|
||||
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes -Wno-pointer-to-int-cast")
|
||||
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
|
||||
|
||||
# https://cmake.org/cmake/help/latest/variable/CMAKE_LANG_COMPILER_ID.html
|
||||
if (CMAKE_CXX_COMPILER_ID MATCHES GNU)
|
||||
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes -Wall -Wno-strict-aliasing")
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Ofast -funroll-loops -fvariable-expansion-in-unroller -ftree-loop-if-convert-stores -fmerge-all-constants -fbranch-target-load-optimize2")
|
||||
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes -Wall -fno-exceptions -fno-rtti")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Ofast -s -funroll-loops -fvariable-expansion-in-unroller -ftree-loop-if-convert-stores -fmerge-all-constants -fbranch-target-load-optimize2")
|
||||
|
||||
if (WIN32)
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static")
|
||||
else()
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc -static-libstdc++")
|
||||
endif()
|
||||
|
||||
add_definitions(/D_GNU_SOURCE)
|
||||
|
||||
if (${CMAKE_VERSION} VERSION_LESS "3.1.0")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
|
||||
endif()
|
||||
|
||||
#set(CMAKE_C_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -gdwarf-2")
|
||||
|
||||
elseif (CMAKE_CXX_COMPILER_ID MATCHES MSVC)
|
||||
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Ox /Ot /Oi /MT /GL")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Ox /Ot /Oi /MT /GL")
|
||||
add_definitions(/D_CRT_SECURE_NO_WARNINGS)
|
||||
add_definitions(/D_CRT_NONSTDC_NO_WARNINGS)
|
||||
add_definitions(/DNOMINMAX)
|
||||
|
||||
elseif (CMAKE_CXX_COMPILER_ID MATCHES Clang)
|
||||
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes -Wall")
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Ofast -funroll-loops -fmerge-all-constants")
|
||||
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes -Wall -fno-exceptions -fno-rtti -Wno-missing-braces")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Ofast -funroll-loops -fmerge-all-constants")
|
||||
|
||||
if (CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Ofast -s -funroll-loops -fvariable-expansion-in-unroller -fmerge-all-constants")
|
||||
else()
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Ofast -s -funroll-loops -fvariable-expansion-in-unroller -ftree-loop-if-convert-stores -fmerge-all-constants -fbranch-target-load-optimize2")
|
||||
endif()
|
||||
|
||||
#set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -gdwarf-2")
|
||||
#set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -fprofile-generate")
|
||||
#set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -fprofile-use -fprofile-correction")
|
||||
|
||||
if (WIN32)
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static")
|
||||
endif()
|
||||
|
||||
include_directories(compat/jansson)
|
||||
add_subdirectory(compat/jansson)
|
||||
|
||||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/")
|
||||
|
||||
find_package(CURL REQUIRED)
|
||||
include_directories(${CURL_INCLUDE_DIRS})
|
||||
add_definitions(/DCURL_STATICLIB)
|
||||
link_directories(${CURL_LIBRARIES})
|
||||
|
||||
if (WITH_LIBCPUID)
|
||||
add_subdirectory(src/3rdparty/libcpuid)
|
||||
add_subdirectory(compat/libcpuid)
|
||||
|
||||
include_directories(src/3rdparty/libcpuid)
|
||||
include_directories(compat/libcpuid)
|
||||
set(CPUID_LIB cpuid)
|
||||
set(SOURCES_CPUID src/Cpu.cpp)
|
||||
set(SOURCES_CPUID cpu.c)
|
||||
else()
|
||||
add_definitions(/DXMRIG_NO_LIBCPUID)
|
||||
set(SOURCES_CPUID src/Cpu_stub.cpp)
|
||||
set(SOURCES_CPUID cpu_stub.c)
|
||||
endif()
|
||||
|
||||
CHECK_INCLUDE_FILE (syslog.h HAVE_SYSLOG_H)
|
||||
if (HAVE_SYSLOG_H)
|
||||
add_definitions(/DHAVE_SYSLOG_H)
|
||||
set(SOURCES_SYSLOG src/log/SysLog.h src/log/SysLog.cpp)
|
||||
endif()
|
||||
include(cmake/asm.cmake)
|
||||
|
||||
if (NOT WITH_AEON)
|
||||
if (WITH_AEON)
|
||||
set(SOURCES_AEON
|
||||
algo/cryptonight-lite/cryptonight_lite_av1.c
|
||||
algo/cryptonight-lite/cryptonight_lite_av2.c
|
||||
algo/cryptonight-lite/cryptonight_lite_av3.c
|
||||
algo/cryptonight-lite/cryptonight_lite_av4.c
|
||||
algo/cryptonight-lite/cryptonight_lite_aesni.h
|
||||
algo/cryptonight-lite/cryptonight_lite_softaes.h
|
||||
)
|
||||
else()
|
||||
add_definitions(/DXMRIG_NO_AEON)
|
||||
endif()
|
||||
|
||||
if (WITH_HTTPD)
|
||||
find_package(MHD)
|
||||
|
||||
if (MHD_FOUND)
|
||||
include_directories(${MHD_INCLUDE_DIRS})
|
||||
set(HTTPD_SOURCES src/api/Httpd.h src/api/Httpd.cpp)
|
||||
else()
|
||||
message(FATAL_ERROR "microhttpd NOT found: use `-DWITH_HTTPD=OFF` to build without http deamon support")
|
||||
endif()
|
||||
if (CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
add_executable(xmrig ${HEADERS} ${HEADERS_CRYPTO} ${SOURCES} ${SOURCES_CRYPTO} ${HEADERS_UTILS} ${SOURCES_UTILS} ${HEADERS_COMPAT} ${SOURCES_COMPAT} ${SOURCES_OS} ${SOURCES_CPUID} ${SOURCES_AEON} ${XMRIG_ASM_SOURCES})
|
||||
target_link_libraries(xmrig ${XMRIG_ASM_LIBRARY} jansson ${CURL_LIBRARY} ${CPUID_LIB} ${EXTRA_LIBS})
|
||||
else()
|
||||
add_definitions(/DXMRIG_NO_HTTPD)
|
||||
add_definitions(/DXMRIG_NO_API)
|
||||
add_executable(xmrig32 ${HEADERS} ${HEADERS_CRYPTO} ${SOURCES} ${SOURCES_CRYPTO} ${HEADERS_UTILS} ${SOURCES_UTILS} ${HEADERS_COMPAT} ${SOURCES_COMPAT} ${SOURCES_OS} ${SOURCES_CPUID} ${SOURCES_AEON} ${XMRIG_ASM_SOURCES})
|
||||
target_link_libraries(xmrig32 ${XMRIG_ASM_LIBRARY} jansson ${CURL_LIBRARY} ${CPUID_LIB} ${EXTRA_LIBS})
|
||||
endif()
|
||||
|
||||
include_directories(src)
|
||||
include_directories(src/3rdparty)
|
||||
include_directories(${UV_INCLUDE_DIR})
|
||||
|
||||
add_executable(xmrig ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${HTTPD_SOURCES})
|
||||
target_link_libraries(xmrig ${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB})
|
||||
|
||||
78
README.md
78
README.md
@@ -1,28 +1,24 @@
|
||||
# XMRig
|
||||
XMRig is high performance Monero (XMR) CPU miner, with the official full Windows support.
|
||||
Originally based on cpuminer-multi with heavy optimizations/rewrites and removing a lot of legacy code, since version 1.0.0 complete rewritten from scratch on C++.
|
||||
Based on cpuminer-multi with heavy optimizations/rewrites and removing a lot of legacy code.
|
||||
|
||||
* This is the CPU-mining version, there is also a [NVIDIA GPU version](https://github.com/xmrig/xmrig-nvidia).
|
||||
* [Roadmap](https://github.com/xmrig/xmrig/issues/106) for next releases.
|
||||
|
||||
<img src="http://i.imgur.com/OKZRVDh.png" width="619" >
|
||||
<img src="http://i.imgur.com/GdRDnAu.png" width="596" >
|
||||
|
||||
#### Table of contents
|
||||
* [Features](#features)
|
||||
* [Download](#download)
|
||||
* [Usage](#usage)
|
||||
* [Algorithm variations](#algorithm-variations)
|
||||
* [Build](https://github.com/xmrig/xmrig/wiki/Build)
|
||||
* [Build](#build)
|
||||
* [Common Issues](#common-issues)
|
||||
* [Other information](#other-information)
|
||||
* [Donations](#donations)
|
||||
* [Contacts](#contacts)
|
||||
|
||||
## Features
|
||||
* High performance.
|
||||
* High performance (290+ H/s on i7 6700).
|
||||
* Official Windows support.
|
||||
* Small Windows executable, without dependencies.
|
||||
* x86/x64 support.
|
||||
* Small Windows executable, only 535 KB without dependencies.
|
||||
* Support for backup (failover) mining server.
|
||||
* keepalived support.
|
||||
* Command line options compatible with cpuminer.
|
||||
@@ -34,24 +30,19 @@ Originally based on cpuminer-multi with heavy optimizations/rewrites and removin
|
||||
## Download
|
||||
* Binary releases: https://github.com/xmrig/xmrig/releases
|
||||
* Git tree: https://github.com/xmrig/xmrig.git
|
||||
* Clone with `git clone https://github.com/xmrig/xmrig.git` :hammer: [Build instructions](https://github.com/xmrig/xmrig/wiki/Build).
|
||||
* Clone with `git clone https://github.com/xmrig/xmrig.git`
|
||||
|
||||
## Usage
|
||||
### Basic example
|
||||
```
|
||||
xmrig.exe -o pool.minemonero.pro:5555 -u YOUR_WALLET -p x -k
|
||||
xmrig.exe -o xmr-eu.dwarfpool.com:8005 -u YOUR_WALLET -p x -k
|
||||
```
|
||||
|
||||
### Failover
|
||||
```
|
||||
xmrig.exe -o pool.minemonero.pro:5555 -u YOUR_WALLET1 -p x -k -o pool.supportxmr.com:5555 -u YOUR_WALLET2 -p x -k
|
||||
```
|
||||
For failover you can add multiple pools, maximum count not limited.
|
||||
|
||||
### Options
|
||||
```
|
||||
-a, --algo=ALGO cryptonight (default) or cryptonight-lite
|
||||
-o, --url=URL URL of mining server
|
||||
-b, --backup-url=URL URL of backup mining server
|
||||
-O, --userpass=U:P username:password pair for mining server
|
||||
-u, --user=USERNAME username for mining server
|
||||
-p, --pass=PASSWORD password for mining server
|
||||
@@ -60,25 +51,18 @@ For failover you can add multiple pools, maximum count not limited.
|
||||
-k, --keepalive send keepalived for prevent timeout (need pool support)
|
||||
-r, --retries=N number of times to retry before switch to backup server (default: 5)
|
||||
-R, --retry-pause=N time to pause between retries (default: 5)
|
||||
--cpu-affinity set process affinity to CPU core(s), mask 0x3 for cores 0 and 1
|
||||
--cpu-priority set process priority (0 idle, 2 normal to 5 highest)
|
||||
--no-huge-pages disable huge pages support
|
||||
--cpu-affinity set process affinity to cpu core(s), mask 0x3 for cores 0 and 1
|
||||
--no-color disable colored output
|
||||
--donate-level=N donate level, default 5% (5 minutes in 100 minutes)
|
||||
--user-agent set custom user-agent string for pool
|
||||
-B, --background run the miner in the background
|
||||
-c, --config=FILE load a JSON-format configuration file
|
||||
-l, --log-file=FILE log all output to a file
|
||||
--max-cpu-usage=N maximum CPU usage for automatic threads mode (default 75)
|
||||
--safe safe adjust threads and av settings for current CPU
|
||||
--max-cpu-usage=N maximum cpu usage for automatic threads mode (default 75)
|
||||
--safe safe adjust threads and av settings for current cpu
|
||||
--nicehash enable nicehash support
|
||||
--print-time=N print hashrate report every N seconds
|
||||
-h, --help display this help and exit
|
||||
-V, --version output version information and exit
|
||||
```
|
||||
|
||||
Also you can use configuration via config file, default **config.json**. You can load multiple config files and combine it with command line options.
|
||||
|
||||
## Algorithm variations
|
||||
Since version 0.8.0.
|
||||
* `--av=1` For CPUs with hardware AES.
|
||||
@@ -86,6 +70,42 @@ Since version 0.8.0.
|
||||
* `--av=3` Software AES implementation.
|
||||
* `--av=4` Lower power mode (double hash) of `3`.
|
||||
|
||||
## Build
|
||||
### Ubuntu (Debian-based distros)
|
||||
```
|
||||
sudo apt-get install git build-essential cmake libcurl4-openssl-dev
|
||||
git clone https://github.com/xmrig/xmrig.git
|
||||
cd xmrig
|
||||
mkdir build
|
||||
cd build
|
||||
cmake .. -DCMAKE_BUILD_TYPE=Release
|
||||
make
|
||||
```
|
||||
|
||||
### Windows
|
||||
It's complicated, you need [MSYS2](http://www.msys2.org/), custom libcurl build, and of course CMake too.
|
||||
|
||||
Necessary MSYS2 packages:
|
||||
```
|
||||
pacman -Sy
|
||||
pacman -S mingw-w64-x86_64-gcc
|
||||
pacman -S make
|
||||
pacman -S mingw-w64-x86_64-cmake
|
||||
pacman -S mingw-w64-x86_64-pkg-config
|
||||
```
|
||||
Configure options for libcurl:
|
||||
```
|
||||
./configure --disable-shared --enable-optimize --enable-threaded-resolver --disable-libcurl-option --disable-ares --disable-rt --disable-ftp --disable-file --disable-ldap --disable-ldaps --disable-rtsp --disable-dict --disable-telnet --disable-tftp --disable-pop3 --disable-imap --disable-smb --disable-smtp --disable-gopher --disable-manual --disable-ipv6 --disable-sspi --disable-crypto-auth --disable-ntlm-wb --disable-tls-srp --disable-unix-sockets --without-zlib --without-winssl --without-ssl --without-libssh2 --without-nghttp2 --disable-cookies --without-ca-bundle --without-librtmp
|
||||
```
|
||||
CMake options:
|
||||
```
|
||||
cmake .. -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release -DCURL_INCLUDE_DIR="c:\xmrig-deps\gcc\x64\include" -DCURL_LIBRARY="c:\xmrig-deps\gcc\x64\lib\libcurl.a"
|
||||
```
|
||||
|
||||
### Optional features
|
||||
`-DWITH_LIBCPUID=OFF` Disable libcpuid. Auto configuration of CPU after this will be very limited.
|
||||
`-DWITH_AEON=OFF` Disable CryptoNight-Lite support.
|
||||
|
||||
## Common Issues
|
||||
### HUGE PAGES unavailable
|
||||
* Run XMRig as Administrator.
|
||||
@@ -98,8 +118,8 @@ Since version 0.8.0.
|
||||
|
||||
|
||||
### CPU mining performance
|
||||
* **Intel i7-7700** - 307 H/s (4 threads)
|
||||
* **AMD Ryzen 7 1700X** - 560 H/s (8 threads)
|
||||
* **i7-6700** - 290+ H/s (4 threads, cpu affinity 0xAA)
|
||||
* **Dual E5620** - 377 H/s (12 threads, cpu affinity 0xEEEE)
|
||||
|
||||
Please note performance is highly dependent on system load. The numbers above are obtained on an idle system. Tasks heavily using a processor cache, such as video playback, can greatly degrade hashrate. Optimal number of threads depends on the size of the L3 cache of a processor, 1 thread requires 2 MB of cache.
|
||||
|
||||
|
||||
274
algo/cryptonight-lite/cryptonight_lite_aesni.h
Normal file
274
algo/cryptonight-lite/cryptonight_lite_aesni.h
Normal file
@@ -0,0 +1,274 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef XMRIG_CRYPTONIGHT_LITE_AESNI_H
|
||||
#define XMRIG_CRYPTONIGHT_LITE_AESNI_H
|
||||
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <stdint.h>
|
||||
|
||||
|
||||
#define aes_genkey_sub(imm8) \
|
||||
__m128i xout1 = _mm_aeskeygenassist_si128(*xout2, (imm8)); \
|
||||
xout1 = _mm_shuffle_epi32(xout1, 0xFF); \
|
||||
*xout0 = sl_xor(*xout0); \
|
||||
*xout0 = _mm_xor_si128(*xout0, xout1); \
|
||||
xout1 = _mm_aeskeygenassist_si128(*xout0, 0x00);\
|
||||
xout1 = _mm_shuffle_epi32(xout1, 0xAA); \
|
||||
*xout2 = sl_xor(*xout2); \
|
||||
*xout2 = _mm_xor_si128(*xout2, xout1); \
|
||||
|
||||
|
||||
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
|
||||
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
|
||||
static inline __m128i sl_xor(__m128i tmp1)
|
||||
{
|
||||
__m128i tmp4;
|
||||
tmp4 = _mm_slli_si128(tmp1, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
return tmp1;
|
||||
}
|
||||
|
||||
|
||||
static inline void aes_genkey_sub1(__m128i* xout0, __m128i* xout2)
|
||||
{
|
||||
aes_genkey_sub(0x1)
|
||||
}
|
||||
|
||||
|
||||
static inline void aes_genkey_sub2(__m128i* xout0, __m128i* xout2)
|
||||
{
|
||||
aes_genkey_sub(0x2)
|
||||
}
|
||||
|
||||
|
||||
static inline void aes_genkey_sub4(__m128i* xout0, __m128i* xout2)
|
||||
{
|
||||
aes_genkey_sub(0x4)
|
||||
}
|
||||
|
||||
|
||||
static inline void aes_genkey_sub8(__m128i* xout0, __m128i* xout2)
|
||||
{
|
||||
aes_genkey_sub(0x8)
|
||||
}
|
||||
|
||||
|
||||
static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7)
|
||||
{
|
||||
*x0 = _mm_aesenc_si128(*x0, key);
|
||||
*x1 = _mm_aesenc_si128(*x1, key);
|
||||
*x2 = _mm_aesenc_si128(*x2, key);
|
||||
*x3 = _mm_aesenc_si128(*x3, key);
|
||||
*x4 = _mm_aesenc_si128(*x4, key);
|
||||
*x5 = _mm_aesenc_si128(*x5, key);
|
||||
*x6 = _mm_aesenc_si128(*x6, key);
|
||||
*x7 = _mm_aesenc_si128(*x7, key);
|
||||
}
|
||||
|
||||
|
||||
static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9)
|
||||
{
|
||||
__m128i xout0 = _mm_load_si128(memory);
|
||||
__m128i xout2 = _mm_load_si128(memory + 1);
|
||||
*k0 = xout0;
|
||||
*k1 = xout2;
|
||||
|
||||
aes_genkey_sub1(&xout0, &xout2);
|
||||
*k2 = xout0;
|
||||
*k3 = xout2;
|
||||
|
||||
aes_genkey_sub2(&xout0, &xout2);
|
||||
*k4 = xout0;
|
||||
*k5 = xout2;
|
||||
|
||||
aes_genkey_sub4(&xout0, &xout2);
|
||||
*k6 = xout0;
|
||||
*k7 = xout2;
|
||||
|
||||
aes_genkey_sub8(&xout0, &xout2);
|
||||
*k8 = xout0;
|
||||
*k9 = xout2;
|
||||
}
|
||||
|
||||
|
||||
static inline void cn_explode_scratchpad(const __m128i* input, __m128i* output)
|
||||
{
|
||||
// This is more than we have registers, compiler will assign 2 keys on the stack
|
||||
__m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
|
||||
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
||||
|
||||
aes_genkey(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
|
||||
|
||||
xin0 = _mm_load_si128(input + 4);
|
||||
xin1 = _mm_load_si128(input + 5);
|
||||
xin2 = _mm_load_si128(input + 6);
|
||||
xin3 = _mm_load_si128(input + 7);
|
||||
xin4 = _mm_load_si128(input + 8);
|
||||
xin5 = _mm_load_si128(input + 9);
|
||||
xin6 = _mm_load_si128(input + 10);
|
||||
xin7 = _mm_load_si128(input + 11);
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < MEMORY_LITE / sizeof(__m128i), 1); i += 8) {
|
||||
aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
|
||||
_mm_store_si128(output + i + 0, xin0);
|
||||
_mm_store_si128(output + i + 1, xin1);
|
||||
_mm_store_si128(output + i + 2, xin2);
|
||||
_mm_store_si128(output + i + 3, xin3);
|
||||
_mm_store_si128(output + i + 4, xin4);
|
||||
_mm_store_si128(output + i + 5, xin5);
|
||||
_mm_store_si128(output + i + 6, xin6);
|
||||
_mm_store_si128(output + i + 7, xin7);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static inline void cn_implode_scratchpad(const __m128i* input, __m128i* output)
|
||||
{
|
||||
// This is more than we have registers, compiler will assign 2 keys on the stack
|
||||
__m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
|
||||
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
||||
|
||||
aes_genkey(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
|
||||
|
||||
xout0 = _mm_load_si128(output + 4);
|
||||
xout1 = _mm_load_si128(output + 5);
|
||||
xout2 = _mm_load_si128(output + 6);
|
||||
xout3 = _mm_load_si128(output + 7);
|
||||
xout4 = _mm_load_si128(output + 8);
|
||||
xout5 = _mm_load_si128(output + 9);
|
||||
xout6 = _mm_load_si128(output + 10);
|
||||
xout7 = _mm_load_si128(output + 11);
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < MEMORY_LITE / sizeof(__m128i), 1); i += 8)
|
||||
{
|
||||
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
|
||||
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
|
||||
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
|
||||
xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
|
||||
xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
|
||||
xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
|
||||
xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
|
||||
xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
|
||||
|
||||
aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
}
|
||||
|
||||
_mm_store_si128(output + 4, xout0);
|
||||
_mm_store_si128(output + 5, xout1);
|
||||
_mm_store_si128(output + 6, xout2);
|
||||
_mm_store_si128(output + 7, xout3);
|
||||
_mm_store_si128(output + 8, xout4);
|
||||
_mm_store_si128(output + 9, xout5);
|
||||
_mm_store_si128(output + 10, xout6);
|
||||
_mm_store_si128(output + 11, xout7);
|
||||
}
|
||||
|
||||
|
||||
#if defined(__x86_64__)
|
||||
# define EXTRACT64(X) _mm_cvtsi128_si64(X)
|
||||
|
||||
static inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi)
|
||||
{
|
||||
unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b;
|
||||
*hi = r >> 64;
|
||||
return (uint64_t) r;
|
||||
}
|
||||
#elif defined(__i386__)
|
||||
# define HI32(X) \
|
||||
_mm_srli_si128((X), 4)
|
||||
|
||||
|
||||
# define EXTRACT64(X) \
|
||||
((uint64_t)(uint32_t)_mm_cvtsi128_si32(X) | \
|
||||
((uint64_t)(uint32_t)_mm_cvtsi128_si32(HI32(X)) << 32))
|
||||
|
||||
static inline uint64_t _umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) {
|
||||
// multiplier = ab = a * 2^32 + b
|
||||
// multiplicand = cd = c * 2^32 + d
|
||||
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
|
||||
uint64_t a = multiplier >> 32;
|
||||
uint64_t b = multiplier & 0xFFFFFFFF;
|
||||
uint64_t c = multiplicand >> 32;
|
||||
uint64_t d = multiplicand & 0xFFFFFFFF;
|
||||
|
||||
//uint64_t ac = a * c;
|
||||
uint64_t ad = a * d;
|
||||
//uint64_t bc = b * c;
|
||||
uint64_t bd = b * d;
|
||||
|
||||
uint64_t adbc = ad + (b * c);
|
||||
uint64_t adbc_carry = adbc < ad ? 1 : 0;
|
||||
|
||||
// multiplier * multiplicand = product_hi * 2^64 + product_lo
|
||||
uint64_t product_lo = bd + (adbc << 32);
|
||||
uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
|
||||
*product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
|
||||
|
||||
return product_lo;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
static inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
|
||||
{
|
||||
mem_out[0] = EXTRACT64(tmp);
|
||||
|
||||
tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp)));
|
||||
uint64_t vh = EXTRACT64(tmp);
|
||||
|
||||
uint8_t x = vh >> 24;
|
||||
static const uint16_t table = 0x7531;
|
||||
const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1;
|
||||
vh ^= ((table >> index) & 0x3) << 28;
|
||||
|
||||
mem_out[1] = vh;
|
||||
}
|
||||
|
||||
|
||||
#endif /* XMRIG_CRYPTONIGHT_LITE_AESNI_H */
|
||||
134
algo/cryptonight-lite/cryptonight_lite_av1.c
Normal file
134
algo/cryptonight-lite/cryptonight_lite_av1.c
Normal file
@@ -0,0 +1,134 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "algo/cryptonight/cryptonight.h"
|
||||
#include "algo/cryptonight/cryptonight_monero.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
#include "cryptonight_lite_aesni.h"
|
||||
|
||||
|
||||
void cryptonight_lite_av1_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
|
||||
__m128i cx;
|
||||
cx = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
|
||||
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
|
||||
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx));
|
||||
idx0 = EXTRACT64(cx);
|
||||
bx0 = cx;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0xFFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0xFFFF0])[1] = ah0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_lite_av1_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
if (size < 43) {
|
||||
memset(output, 0, 32);
|
||||
return;
|
||||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
|
||||
VARIANT1_INIT(0);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
|
||||
__m128i cx;
|
||||
cx = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
|
||||
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
|
||||
|
||||
cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx));
|
||||
|
||||
idx0 = EXTRACT64(cx);
|
||||
bx0 = cx;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0xFFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0xFFFF0])[1] = ah0 ^ tweak1_2_0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
202
algo/cryptonight-lite/cryptonight_lite_av2.c
Normal file
202
algo/cryptonight-lite/cryptonight_lite_av2.c
Normal file
@@ -0,0 +1,202 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "algo/cryptonight/cryptonight.h"
|
||||
#include "algo/cryptonight/cryptonight_monero.h"
|
||||
#include "cryptonight_lite_aesni.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
|
||||
|
||||
void cryptonight_lite_av2_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
keccak(input + size, size, ctx[1]->state, 200);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
const uint8_t* l1 = ctx[1]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
uint64_t* h1 = (uint64_t*) ctx[1]->state;
|
||||
|
||||
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
uint64_t idx1 = h1[0] ^ h1[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
|
||||
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
|
||||
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0xFFFF0]);
|
||||
|
||||
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
|
||||
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
|
||||
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx0));
|
||||
_mm_store_si128((__m128i *) &l1[idx1 & 0xFFFF0], _mm_xor_si128(bx1, cx1));
|
||||
|
||||
idx0 = EXTRACT64(cx0);
|
||||
idx1 = EXTRACT64(cx1);
|
||||
|
||||
bx0 = cx0;
|
||||
bx1 = cx1;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*) &l0[idx0 & 0xFFFF0])[0] = al0;
|
||||
((uint64_t*) &l0[idx0 & 0xFFFF0])[1] = ah0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
|
||||
cl = ((uint64_t*) &l1[idx1 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*) &l1[idx1 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx1, cl, &hi);
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
||||
((uint64_t*) &l1[idx1 & 0xFFFF0])[0] = al1;
|
||||
((uint64_t*) &l1[idx1 & 0xFFFF0])[1] = ah1;
|
||||
|
||||
ah1 ^= ch;
|
||||
al1 ^= cl;
|
||||
idx1 = al1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
|
||||
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, (char*) output + 32);
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_lite_av2_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
if (size < 43) {
|
||||
memset(output, 0, 64);
|
||||
return;
|
||||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
keccak(input + size, size, ctx[1]->state, 200);
|
||||
|
||||
VARIANT1_INIT(0);
|
||||
VARIANT1_INIT(1);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
const uint8_t* l1 = ctx[1]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
uint64_t* h1 = (uint64_t*) ctx[1]->state;
|
||||
|
||||
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
uint64_t idx1 = h1[0] ^ h1[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
|
||||
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
|
||||
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0xFFFF0]);
|
||||
|
||||
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
|
||||
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
|
||||
|
||||
cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx0));
|
||||
cryptonight_monero_tweak((uint64_t*)&l1[idx1 & 0xFFFF0], _mm_xor_si128(bx1, cx1));
|
||||
|
||||
idx0 = EXTRACT64(cx0);
|
||||
idx1 = EXTRACT64(cx1);
|
||||
|
||||
bx0 = cx0;
|
||||
bx1 = cx1;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*) &l0[idx0 & 0xFFFF0])[0] = al0;
|
||||
((uint64_t*) &l0[idx0 & 0xFFFF0])[1] = ah0 ^ tweak1_2_0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
|
||||
cl = ((uint64_t*) &l1[idx1 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*) &l1[idx1 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx1, cl, &hi);
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
||||
((uint64_t*) &l1[idx1 & 0xFFFF0])[0] = al1;
|
||||
((uint64_t*) &l1[idx1 & 0xFFFF0])[1] = ah1 ^ tweak1_2_1;
|
||||
|
||||
ah1 ^= ch;
|
||||
al1 ^= cl;
|
||||
idx1 = al1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
|
||||
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, (char*) output + 32);
|
||||
}
|
||||
134
algo/cryptonight-lite/cryptonight_lite_av3.c
Normal file
134
algo/cryptonight-lite/cryptonight_lite_av3.c
Normal file
@@ -0,0 +1,134 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "algo/cryptonight/cryptonight.h"
|
||||
#include "algo/cryptonight/cryptonight_monero.h"
|
||||
#include "cryptonight_lite_softaes.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
|
||||
|
||||
void cryptonight_lite_av3_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
|
||||
__m128i cx;
|
||||
cx = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
|
||||
cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
|
||||
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx));
|
||||
idx0 = EXTRACT64(cx);
|
||||
bx0 = cx;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0xFFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0xFFFF0])[1] = ah0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_lite_av3_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
if (size < 43) {
|
||||
memset(output, 0, 32);
|
||||
return;
|
||||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
|
||||
VARIANT1_INIT(0);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
|
||||
__m128i cx;
|
||||
cx = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
|
||||
cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
|
||||
|
||||
cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx));
|
||||
|
||||
idx0 = EXTRACT64(cx);
|
||||
bx0 = cx;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0xFFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0xFFFF0])[1] = ah0 ^ tweak1_2_0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
202
algo/cryptonight-lite/cryptonight_lite_av4.c
Normal file
202
algo/cryptonight-lite/cryptonight_lite_av4.c
Normal file
@@ -0,0 +1,202 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "algo/cryptonight/cryptonight.h"
|
||||
#include "algo/cryptonight/cryptonight_monero.h"
|
||||
#include "cryptonight_lite_softaes.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
|
||||
|
||||
void cryptonight_lite_av4_v0(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
keccak(input + size, size, ctx[1]->state, 200);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
const uint8_t* l1 = ctx[1]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
uint64_t* h1 = (uint64_t*) ctx[1]->state;
|
||||
|
||||
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
uint64_t idx1 = h1[0] ^ h1[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
|
||||
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
|
||||
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0xFFFF0]);
|
||||
|
||||
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
|
||||
cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
|
||||
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx0));
|
||||
_mm_store_si128((__m128i *) &l1[idx1 & 0xFFFF0], _mm_xor_si128(bx1, cx1));
|
||||
|
||||
idx0 = EXTRACT64(cx0);
|
||||
idx1 = EXTRACT64(cx1);
|
||||
|
||||
bx0 = cx0;
|
||||
bx1 = cx1;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*) &l0[idx0 & 0xFFFF0])[0] = al0;
|
||||
((uint64_t*) &l0[idx0 & 0xFFFF0])[1] = ah0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
|
||||
cl = ((uint64_t*) &l1[idx1 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*) &l1[idx1 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx1, cl, &hi);
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
||||
((uint64_t*) &l1[idx1 & 0xFFFF0])[0] = al1;
|
||||
((uint64_t*) &l1[idx1 & 0xFFFF0])[1] = ah1;
|
||||
|
||||
ah1 ^= ch;
|
||||
al1 ^= cl;
|
||||
idx1 = al1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
|
||||
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_lite_av4_v1(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
if (size < 43) {
|
||||
memset(output, 0, 64);
|
||||
return;
|
||||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
keccak(input + size, size, ctx[1]->state, 200);
|
||||
|
||||
VARIANT1_INIT(0);
|
||||
VARIANT1_INIT(1);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
const uint8_t* l1 = ctx[1]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
uint64_t* h1 = (uint64_t*) ctx[1]->state;
|
||||
|
||||
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
uint64_t idx1 = h1[0] ^ h1[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
|
||||
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
|
||||
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0xFFFF0]);
|
||||
|
||||
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
|
||||
cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
|
||||
|
||||
cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx0));
|
||||
cryptonight_monero_tweak((uint64_t*)&l1[idx1 & 0xFFFF0], _mm_xor_si128(bx1, cx1));
|
||||
|
||||
idx0 = EXTRACT64(cx0);
|
||||
idx1 = EXTRACT64(cx1);
|
||||
|
||||
bx0 = cx0;
|
||||
bx1 = cx1;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*) &l0[idx0 & 0xFFFF0])[0] = al0;
|
||||
((uint64_t*) &l0[idx0 & 0xFFFF0])[1] = ah0 ^ tweak1_2_0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
|
||||
cl = ((uint64_t*) &l1[idx1 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*) &l1[idx1 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx1, cl, &hi);
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
||||
((uint64_t*) &l1[idx1 & 0xFFFF0])[0] = al1;
|
||||
((uint64_t*) &l1[idx1 & 0xFFFF0])[1] = ah1 ^ tweak1_2_1;
|
||||
|
||||
ah1 ^= ch;
|
||||
al1 ^= cl;
|
||||
idx1 = al1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
|
||||
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, (char*) output + 32);
|
||||
}
|
||||
255
algo/cryptonight-lite/cryptonight_lite_softaes.h
Normal file
255
algo/cryptonight-lite/cryptonight_lite_softaes.h
Normal file
@@ -0,0 +1,255 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef XMRIG_CRYPTONIGHT_LITE_SOFTAES_H
|
||||
#define XMRIG_CRYPTONIGHT_LITE_SOFTAES_H
|
||||
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <stdint.h>
|
||||
|
||||
|
||||
#include "crypto/soft_aes.h"
|
||||
|
||||
|
||||
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
|
||||
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
|
||||
static inline __m128i sl_xor(__m128i tmp1)
|
||||
{
|
||||
__m128i tmp4;
|
||||
tmp4 = _mm_slli_si128(tmp1, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
return tmp1;
|
||||
}
|
||||
|
||||
|
||||
static inline void aes_genkey_sub(__m128i* xout0, __m128i* xout2, uint8_t rcon)
|
||||
{
|
||||
__m128i xout1 = soft_aeskeygenassist(*xout2, rcon);
|
||||
xout1 = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem
|
||||
*xout0 = sl_xor(*xout0);
|
||||
*xout0 = _mm_xor_si128(*xout0, xout1);
|
||||
xout1 = soft_aeskeygenassist(*xout0, 0x00);
|
||||
xout1 = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem
|
||||
*xout2 = sl_xor(*xout2);
|
||||
*xout2 = _mm_xor_si128(*xout2, xout1);
|
||||
}
|
||||
|
||||
|
||||
static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7)
|
||||
{
|
||||
*x0 = soft_aesenc(*x0, key);
|
||||
*x1 = soft_aesenc(*x1, key);
|
||||
*x2 = soft_aesenc(*x2, key);
|
||||
*x3 = soft_aesenc(*x3, key);
|
||||
*x4 = soft_aesenc(*x4, key);
|
||||
*x5 = soft_aesenc(*x5, key);
|
||||
*x6 = soft_aesenc(*x6, key);
|
||||
*x7 = soft_aesenc(*x7, key);
|
||||
}
|
||||
|
||||
|
||||
static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9)
|
||||
{
|
||||
__m128i xout0 = _mm_load_si128(memory);
|
||||
__m128i xout2 = _mm_load_si128(memory + 1);
|
||||
*k0 = xout0;
|
||||
*k1 = xout2;
|
||||
|
||||
aes_genkey_sub(&xout0, &xout2, 0x1);
|
||||
*k2 = xout0;
|
||||
*k3 = xout2;
|
||||
|
||||
aes_genkey_sub(&xout0, &xout2, 0x2);
|
||||
*k4 = xout0;
|
||||
*k5 = xout2;
|
||||
|
||||
aes_genkey_sub(&xout0, &xout2, 0x4);
|
||||
*k6 = xout0;
|
||||
*k7 = xout2;
|
||||
|
||||
aes_genkey_sub(&xout0, &xout2, 0x8);
|
||||
*k8 = xout0;
|
||||
*k9 = xout2;
|
||||
}
|
||||
|
||||
|
||||
static inline void cn_explode_scratchpad(const __m128i* input, __m128i* output)
|
||||
{
|
||||
// This is more than we have registers, compiler will assign 2 keys on the stack
|
||||
__m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
|
||||
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
||||
|
||||
aes_genkey(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
|
||||
|
||||
xin0 = _mm_load_si128(input + 4);
|
||||
xin1 = _mm_load_si128(input + 5);
|
||||
xin2 = _mm_load_si128(input + 6);
|
||||
xin3 = _mm_load_si128(input + 7);
|
||||
xin4 = _mm_load_si128(input + 8);
|
||||
xin5 = _mm_load_si128(input + 9);
|
||||
xin6 = _mm_load_si128(input + 10);
|
||||
xin7 = _mm_load_si128(input + 11);
|
||||
|
||||
for (size_t i = 0; i < MEMORY_LITE / sizeof(__m128i); i += 8) {
|
||||
aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
|
||||
_mm_store_si128(output + i + 0, xin0);
|
||||
_mm_store_si128(output + i + 1, xin1);
|
||||
_mm_store_si128(output + i + 2, xin2);
|
||||
_mm_store_si128(output + i + 3, xin3);
|
||||
_mm_store_si128(output + i + 4, xin4);
|
||||
_mm_store_si128(output + i + 5, xin5);
|
||||
_mm_store_si128(output + i + 6, xin6);
|
||||
_mm_store_si128(output + i + 7, xin7);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static inline void cn_implode_scratchpad(const __m128i* input, __m128i* output)
|
||||
{
|
||||
// This is more than we have registers, compiler will assign 2 keys on the stack
|
||||
__m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
|
||||
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
||||
|
||||
aes_genkey(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
|
||||
|
||||
xout0 = _mm_load_si128(output + 4);
|
||||
xout1 = _mm_load_si128(output + 5);
|
||||
xout2 = _mm_load_si128(output + 6);
|
||||
xout3 = _mm_load_si128(output + 7);
|
||||
xout4 = _mm_load_si128(output + 8);
|
||||
xout5 = _mm_load_si128(output + 9);
|
||||
xout6 = _mm_load_si128(output + 10);
|
||||
xout7 = _mm_load_si128(output + 11);
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < MEMORY_LITE / sizeof(__m128i), 1); i += 8)
|
||||
{
|
||||
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
|
||||
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
|
||||
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
|
||||
xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
|
||||
xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
|
||||
xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
|
||||
xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
|
||||
xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
|
||||
|
||||
aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
}
|
||||
|
||||
_mm_store_si128(output + 4, xout0);
|
||||
_mm_store_si128(output + 5, xout1);
|
||||
_mm_store_si128(output + 6, xout2);
|
||||
_mm_store_si128(output + 7, xout3);
|
||||
_mm_store_si128(output + 8, xout4);
|
||||
_mm_store_si128(output + 9, xout5);
|
||||
_mm_store_si128(output + 10, xout6);
|
||||
_mm_store_si128(output + 11, xout7);
|
||||
}
|
||||
|
||||
|
||||
#if defined(__x86_64__)
|
||||
# define EXTRACT64(X) _mm_cvtsi128_si64(X)
|
||||
|
||||
static inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi)
|
||||
{
|
||||
unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b;
|
||||
*hi = r >> 64;
|
||||
return (uint64_t) r;
|
||||
}
|
||||
#elif defined(__i386__)
|
||||
# define HI32(X) \
|
||||
_mm_srli_si128((X), 4)
|
||||
|
||||
|
||||
# define EXTRACT64(X) \
|
||||
((uint64_t)(uint32_t)_mm_cvtsi128_si32(X) | \
|
||||
((uint64_t)(uint32_t)_mm_cvtsi128_si32(HI32(X)) << 32))
|
||||
|
||||
static inline uint64_t _umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) {
|
||||
// multiplier = ab = a * 2^32 + b
|
||||
// multiplicand = cd = c * 2^32 + d
|
||||
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
|
||||
uint64_t a = multiplier >> 32;
|
||||
uint64_t b = multiplier & 0xFFFFFFFF;
|
||||
uint64_t c = multiplicand >> 32;
|
||||
uint64_t d = multiplicand & 0xFFFFFFFF;
|
||||
|
||||
//uint64_t ac = a * c;
|
||||
uint64_t ad = a * d;
|
||||
//uint64_t bc = b * c;
|
||||
uint64_t bd = b * d;
|
||||
|
||||
uint64_t adbc = ad + (b * c);
|
||||
uint64_t adbc_carry = adbc < ad ? 1 : 0;
|
||||
|
||||
// multiplier * multiplicand = product_hi * 2^64 + product_lo
|
||||
uint64_t product_lo = bd + (adbc << 32);
|
||||
uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
|
||||
*product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
|
||||
|
||||
return product_lo;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
static inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
|
||||
{
|
||||
mem_out[0] = EXTRACT64(tmp);
|
||||
|
||||
tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp)));
|
||||
uint64_t vh = EXTRACT64(tmp);
|
||||
|
||||
uint8_t x = vh >> 24;
|
||||
static const uint16_t table = 0x7531;
|
||||
const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1;
|
||||
vh ^= ((table >> index) & 0x3) << 28;
|
||||
|
||||
mem_out[1] = vh;
|
||||
}
|
||||
|
||||
|
||||
#endif /* XMRIG_CRYPTONIGHT_LITE_SOFTAES_H */
|
||||
407
algo/cryptonight/cryptonight.c
Normal file
407
algo/cryptonight/cryptonight.c
Normal file
@@ -0,0 +1,407 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <mm_malloc.h>
|
||||
|
||||
|
||||
#ifndef BUILD_TEST
|
||||
# include "xmrig.h"
|
||||
#endif
|
||||
|
||||
#include "cpu.h"
|
||||
#include "crypto/c_blake256.h"
|
||||
#include "crypto/c_groestl.h"
|
||||
#include "crypto/c_jh.h"
|
||||
#include "crypto/c_skein.h"
|
||||
#include "cryptonight_test.h"
|
||||
#include "cryptonight.h"
|
||||
#include "options.h"
|
||||
#include "persistent_memory.h"
|
||||
|
||||
|
||||
static cn_hash_fun asm_func_map[AV_MAX][VARIANT_MAX][ASM_MAX] = {};
|
||||
|
||||
|
||||
void cryptonight_av1_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_av1_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_av1_v2(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_av2_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_av2_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_av2_v2(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_av3_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_av3_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_av3_v2(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_av4_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_av4_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_av4_v2(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
|
||||
void cryptonight_r_av1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_r_av2(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_r_av3(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_r_av4(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
|
||||
|
||||
#ifndef XMRIG_NO_AEON
|
||||
void cryptonight_lite_av1_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_lite_av1_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_lite_av2_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_lite_av2_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_lite_av3_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_lite_av3_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_lite_av4_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_lite_av4_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef XMRIG_NO_ASM
|
||||
void cryptonight_single_hash_asm_intel(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_single_hash_asm_ryzen(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_single_hash_asm_bulldozer(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_double_hash_asm(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
|
||||
void cryptonight_r_av1_asm_intel(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_r_av1_asm_bulldozer(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_r_av2_asm_intel(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_r_av2_asm_bulldozer(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
#endif
|
||||
|
||||
|
||||
static inline bool verify(enum Variant variant, uint8_t *output, struct cryptonight_ctx **ctx, const uint8_t *referenceValue)
|
||||
{
|
||||
cn_hash_fun func = cryptonight_hash_fn(opt_algo, opt_av, variant);
|
||||
if (func == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
func(test_input, 76, output, ctx);
|
||||
|
||||
return memcmp(output, referenceValue, opt_double_hash ? 64 : 32) == 0;
|
||||
}
|
||||
|
||||
|
||||
static inline bool verify2(enum Variant variant, uint8_t *output, struct cryptonight_ctx **ctx, const uint8_t *referenceValue)
|
||||
{
|
||||
cn_hash_fun func = cryptonight_hash_fn(opt_algo, opt_av, variant);
|
||||
if (func == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (opt_double_hash) {
|
||||
uint8_t input[128];
|
||||
|
||||
for (size_t i = 0; i < (sizeof(cn_r_test_input) / sizeof(cn_r_test_input[0])); ++i) {
|
||||
const size_t size = cn_r_test_input[i].size;
|
||||
memcpy(input, cn_r_test_input[i].data, size);
|
||||
memcpy(input + size, cn_r_test_input[i].data, size);
|
||||
|
||||
ctx[0]->height = ctx[1]->height = cn_r_test_input[i].height;
|
||||
|
||||
func(input, size, output, ctx);
|
||||
|
||||
if (memcmp(output, referenceValue + i * 32, 32) != 0 || memcmp(output + 32, referenceValue + i * 32, 32) != 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (size_t i = 0; i < (sizeof(cn_r_test_input) / sizeof(cn_r_test_input[0])); ++i) {
|
||||
ctx[0]->height = cn_r_test_input[i].height;
|
||||
|
||||
func(cn_r_test_input[i].data, cn_r_test_input[i].size, output, ctx);
|
||||
|
||||
if (memcmp(output, referenceValue + i * 32, 32) != 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
static bool self_test() {
|
||||
struct cryptonight_ctx *ctx[2];
|
||||
uint8_t output[64];
|
||||
|
||||
const size_t count = opt_double_hash ? 2 : 1;
|
||||
const size_t size = opt_algo == ALGO_CRYPTONIGHT ? MEMORY : MEMORY_LITE;
|
||||
bool result = false;
|
||||
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
ctx[i] = _mm_malloc(sizeof(struct cryptonight_ctx), 16);
|
||||
ctx[i]->memory = _mm_malloc(size, 16);
|
||||
|
||||
init_cn_r(ctx[i]);
|
||||
}
|
||||
|
||||
if (opt_algo == ALGO_CRYPTONIGHT) {
|
||||
result = verify(VARIANT_0, output, ctx, test_output_v0) &&
|
||||
verify(VARIANT_1, output, ctx, test_output_v1) &&
|
||||
verify(VARIANT_2, output, ctx, test_output_v2) &&
|
||||
verify2(VARIANT_4, output, ctx, test_output_r);
|
||||
}
|
||||
# ifndef XMRIG_NO_AEON
|
||||
else {
|
||||
result = verify(VARIANT_0, output, ctx, test_output_v0_lite) &&
|
||||
verify(VARIANT_1, output, ctx, test_output_v1_lite);
|
||||
}
|
||||
# endif
|
||||
|
||||
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
_mm_free(ctx[i]->memory);
|
||||
_mm_free(ctx[i]);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
#ifndef XMRIG_NO_ASM
|
||||
cn_hash_fun cryptonight_hash_asm_fn(enum AlgoVariant av, enum Variant variant, enum Assembly assembly)
|
||||
{
|
||||
if (assembly == ASM_AUTO) {
|
||||
assembly = (enum Assembly) cpu_info.assembly;
|
||||
}
|
||||
|
||||
if (assembly == ASM_NONE) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return asm_func_map[av][variant][assembly];
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum Variant variant)
|
||||
{
|
||||
assert(av > AV_AUTO && av < AV_MAX);
|
||||
assert(variant > VARIANT_AUTO && variant < VARIANT_MAX);
|
||||
|
||||
# ifndef XMRIG_NO_ASM
|
||||
if (algorithm == ALGO_CRYPTONIGHT) {
|
||||
cn_hash_fun fun = cryptonight_hash_asm_fn(av, variant, opt_assembly);
|
||||
if (fun) {
|
||||
return fun;
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
static const cn_hash_fun func_table[VARIANT_MAX * 4 * 2] = {
|
||||
cryptonight_av1_v0,
|
||||
cryptonight_av2_v0,
|
||||
cryptonight_av3_v0,
|
||||
cryptonight_av4_v0,
|
||||
cryptonight_av1_v1,
|
||||
cryptonight_av2_v1,
|
||||
cryptonight_av3_v1,
|
||||
cryptonight_av4_v1,
|
||||
cryptonight_av1_v2,
|
||||
cryptonight_av2_v2,
|
||||
cryptonight_av3_v2,
|
||||
cryptonight_av4_v2,
|
||||
|
||||
cryptonight_r_av1,
|
||||
cryptonight_r_av2,
|
||||
cryptonight_r_av3,
|
||||
cryptonight_r_av4,
|
||||
|
||||
# ifndef XMRIG_NO_AEON
|
||||
cryptonight_lite_av1_v0,
|
||||
cryptonight_lite_av2_v0,
|
||||
cryptonight_lite_av3_v0,
|
||||
cryptonight_lite_av4_v0,
|
||||
cryptonight_lite_av1_v1,
|
||||
cryptonight_lite_av2_v1,
|
||||
cryptonight_lite_av3_v1,
|
||||
cryptonight_lite_av4_v1,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
# else
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
# endif
|
||||
};
|
||||
|
||||
# ifndef NDEBUG
|
||||
const size_t index = VARIANT_MAX * 4 * algorithm + 4 * variant + av - 1;
|
||||
|
||||
cn_hash_fun func = func_table[index];
|
||||
|
||||
assert(index < sizeof(func_table) / sizeof(func_table[0]));
|
||||
assert(func != NULL);
|
||||
|
||||
return func;
|
||||
# else
|
||||
return func_table[VARIANT_MAX * 4 * algorithm + 4 * variant + av - 1];
|
||||
# endif
|
||||
}
|
||||
|
||||
|
||||
bool cryptonight_init(int av)
|
||||
{
|
||||
opt_double_hash = av == AV_DOUBLE || av == AV_DOUBLE_SOFT;
|
||||
|
||||
# ifndef XMRIG_NO_ASM
|
||||
asm_func_map[AV_SINGLE][VARIANT_2][ASM_INTEL] = cryptonight_single_hash_asm_intel;
|
||||
asm_func_map[AV_SINGLE][VARIANT_2][ASM_RYZEN] = cryptonight_single_hash_asm_intel;
|
||||
asm_func_map[AV_SINGLE][VARIANT_2][ASM_BULLDOZER] = cryptonight_single_hash_asm_bulldozer;
|
||||
|
||||
asm_func_map[AV_DOUBLE][VARIANT_2][ASM_INTEL] = cryptonight_double_hash_asm;
|
||||
asm_func_map[AV_DOUBLE][VARIANT_2][ASM_RYZEN] = cryptonight_double_hash_asm;
|
||||
asm_func_map[AV_DOUBLE][VARIANT_2][ASM_BULLDOZER] = cryptonight_double_hash_asm;
|
||||
|
||||
asm_func_map[AV_SINGLE][VARIANT_4][ASM_INTEL] = cryptonight_r_av1_asm_intel;
|
||||
asm_func_map[AV_SINGLE][VARIANT_4][ASM_RYZEN] = cryptonight_r_av1_asm_intel;
|
||||
asm_func_map[AV_SINGLE][VARIANT_4][ASM_BULLDOZER] = cryptonight_r_av1_asm_bulldozer;
|
||||
|
||||
asm_func_map[AV_DOUBLE][VARIANT_4][ASM_INTEL] = cryptonight_r_av2_asm_intel;
|
||||
asm_func_map[AV_DOUBLE][VARIANT_4][ASM_RYZEN] = cryptonight_r_av2_asm_intel;
|
||||
asm_func_map[AV_DOUBLE][VARIANT_4][ASM_BULLDOZER] = cryptonight_r_av2_asm_bulldozer;
|
||||
# endif
|
||||
|
||||
return self_test();
|
||||
}
|
||||
|
||||
|
||||
static inline void do_blake_hash(const void* input, size_t len, char* output) {
|
||||
blake256_hash((uint8_t*)output, input, len);
|
||||
}
|
||||
|
||||
|
||||
static inline void do_groestl_hash(const void* input, size_t len, char* output) {
|
||||
groestl(input, len * 8, (uint8_t*)output);
|
||||
}
|
||||
|
||||
|
||||
static inline void do_jh_hash(const void* input, size_t len, char* output) {
|
||||
jh_hash(32 * 8, input, 8 * len, (uint8_t*)output);
|
||||
}
|
||||
|
||||
|
||||
static inline void do_skein_hash(const void* input, size_t len, char* output) {
|
||||
skein_hash(8 * 32, input, 8 * len, (uint8_t*)output);
|
||||
}
|
||||
|
||||
|
||||
void (* const extra_hashes[4])(const void *, size_t, char *) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash};
|
||||
|
||||
|
||||
static inline enum Variant cryptonight_variant(uint8_t version)
|
||||
{
|
||||
if (opt_variant != VARIANT_AUTO) {
|
||||
return opt_variant;
|
||||
}
|
||||
|
||||
if (opt_algo == ALGO_CRYPTONIGHT_LITE) {
|
||||
return VARIANT_1;
|
||||
}
|
||||
|
||||
if (version >= 10) {
|
||||
return VARIANT_4;
|
||||
}
|
||||
|
||||
if (version >= 8) {
|
||||
return VARIANT_2;
|
||||
}
|
||||
|
||||
return version == 7 ? VARIANT_1 : VARIANT_0;
|
||||
}
|
||||
|
||||
|
||||
#ifndef BUILD_TEST
|
||||
int scanhash_cryptonight(int thr_id, uint32_t *hash, uint8_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx **restrict ctx) {
|
||||
uint32_t *nonceptr = (uint32_t*) (((char*) blob) + 39);
|
||||
enum Variant variant = cryptonight_variant(blob[0]);
|
||||
|
||||
do {
|
||||
cryptonight_hash_fn(opt_algo, opt_av, variant)(blob, blob_size, (uint8_t *) hash, ctx);
|
||||
|
||||
(*hashes_done)++;
|
||||
|
||||
if (unlikely(hash[7] < target)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
(*nonceptr)++;
|
||||
} while (likely(((*nonceptr) < max_nonce && !work_restart[thr_id].restart)));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int scanhash_cryptonight_double(int thr_id, uint32_t *hash, uint8_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx **restrict ctx) {
|
||||
int rc = 0;
|
||||
uint32_t *nonceptr0 = (uint32_t*) (((char*) blob) + 39);
|
||||
uint32_t *nonceptr1 = (uint32_t*) (((char*) blob) + 39 + blob_size);
|
||||
enum Variant variant = cryptonight_variant(blob[0]);
|
||||
|
||||
do {
|
||||
cryptonight_hash_fn(opt_algo, opt_av, variant)(blob, blob_size, (uint8_t *) hash, ctx);
|
||||
(*hashes_done) += 2;
|
||||
|
||||
if (unlikely(hash[7] < target)) {
|
||||
return rc |= 1;
|
||||
}
|
||||
|
||||
if (unlikely(hash[15] < target)) {
|
||||
return rc |= 2;
|
||||
}
|
||||
|
||||
if (rc) {
|
||||
break;
|
||||
}
|
||||
|
||||
(*nonceptr0)++;
|
||||
(*nonceptr1)++;
|
||||
} while (likely(((*nonceptr0) < max_nonce && !work_restart[thr_id].restart)));
|
||||
|
||||
return rc;
|
||||
}
|
||||
#endif
|
||||
81
algo/cryptonight/cryptonight.h
Normal file
81
algo/cryptonight/cryptonight.h
Normal file
@@ -0,0 +1,81 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef XMRIG_CRYPTONIGHT_H
|
||||
#define XMRIG_CRYPTONIGHT_H
|
||||
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
|
||||
#include "options.h"
|
||||
|
||||
|
||||
#define MEMORY 2097152 /* 2 MiB */
|
||||
#define MEMORY_LITE 1048576 /* 1 MiB */
|
||||
|
||||
|
||||
#if defined _MSC_VER || defined XMRIG_ARM
|
||||
#define ABI_ATTRIBUTE
|
||||
#else
|
||||
#define ABI_ATTRIBUTE __attribute__((ms_abi))
|
||||
#endif
|
||||
|
||||
|
||||
struct cryptonight_ctx;
|
||||
typedef void(*cn_mainloop_fun_ms_abi)(struct cryptonight_ctx*) ABI_ATTRIBUTE;
|
||||
typedef void(*cn_mainloop_double_fun_ms_abi)(struct cryptonight_ctx*, struct cryptonight_ctx*) ABI_ATTRIBUTE;
|
||||
|
||||
|
||||
struct cryptonight_ctx {
|
||||
uint8_t state[224] __attribute__((aligned(16)));
|
||||
uint8_t *memory __attribute__((aligned(16)));
|
||||
|
||||
uint8_t unused[40];
|
||||
const uint32_t *saes_table;
|
||||
|
||||
cn_mainloop_fun_ms_abi generated_code;
|
||||
cn_mainloop_double_fun_ms_abi generated_code_double;
|
||||
uint64_t generated_code_height;
|
||||
uint64_t generated_code_double_height;
|
||||
uint64_t height;
|
||||
};
|
||||
|
||||
|
||||
typedef void (*cn_hash_fun)(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
|
||||
|
||||
extern void (* const extra_hashes[4])(const void *, size_t, char *);
|
||||
|
||||
cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum Variant variant);
|
||||
|
||||
bool cryptonight_init(int av);
|
||||
int scanhash_cryptonight(int thr_id, uint32_t *hash, uint8_t *blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *hashes_done, struct cryptonight_ctx **ctx);
|
||||
int scanhash_cryptonight_double(int thr_id, uint32_t *hash, uint8_t *blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *hashes_done, struct cryptonight_ctx **ctx);
|
||||
|
||||
|
||||
#endif /* XMRIG_CRYPTONIGHT_H */
|
||||
274
algo/cryptonight/cryptonight_aesni.h
Normal file
274
algo/cryptonight/cryptonight_aesni.h
Normal file
@@ -0,0 +1,274 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef XMRIG_CRYPTONIGHT_AESNI_H
|
||||
#define XMRIG_CRYPTONIGHT_AESNI_H
|
||||
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <stdint.h>
|
||||
|
||||
|
||||
#define aes_genkey_sub(imm8) \
|
||||
__m128i xout1 = _mm_aeskeygenassist_si128(*xout2, (imm8)); \
|
||||
xout1 = _mm_shuffle_epi32(xout1, 0xFF); \
|
||||
*xout0 = sl_xor(*xout0); \
|
||||
*xout0 = _mm_xor_si128(*xout0, xout1); \
|
||||
xout1 = _mm_aeskeygenassist_si128(*xout0, 0x00);\
|
||||
xout1 = _mm_shuffle_epi32(xout1, 0xAA); \
|
||||
*xout2 = sl_xor(*xout2); \
|
||||
*xout2 = _mm_xor_si128(*xout2, xout1); \
|
||||
|
||||
|
||||
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
|
||||
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
|
||||
static inline __m128i sl_xor(__m128i tmp1)
|
||||
{
|
||||
__m128i tmp4;
|
||||
tmp4 = _mm_slli_si128(tmp1, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
return tmp1;
|
||||
}
|
||||
|
||||
|
||||
static inline void aes_genkey_sub1(__m128i* xout0, __m128i* xout2)
|
||||
{
|
||||
aes_genkey_sub(0x1)
|
||||
}
|
||||
|
||||
|
||||
static inline void aes_genkey_sub2(__m128i* xout0, __m128i* xout2)
|
||||
{
|
||||
aes_genkey_sub(0x2)
|
||||
}
|
||||
|
||||
|
||||
static inline void aes_genkey_sub4(__m128i* xout0, __m128i* xout2)
|
||||
{
|
||||
aes_genkey_sub(0x4)
|
||||
}
|
||||
|
||||
|
||||
static inline void aes_genkey_sub8(__m128i* xout0, __m128i* xout2)
|
||||
{
|
||||
aes_genkey_sub(0x8)
|
||||
}
|
||||
|
||||
|
||||
static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7)
|
||||
{
|
||||
*x0 = _mm_aesenc_si128(*x0, key);
|
||||
*x1 = _mm_aesenc_si128(*x1, key);
|
||||
*x2 = _mm_aesenc_si128(*x2, key);
|
||||
*x3 = _mm_aesenc_si128(*x3, key);
|
||||
*x4 = _mm_aesenc_si128(*x4, key);
|
||||
*x5 = _mm_aesenc_si128(*x5, key);
|
||||
*x6 = _mm_aesenc_si128(*x6, key);
|
||||
*x7 = _mm_aesenc_si128(*x7, key);
|
||||
}
|
||||
|
||||
|
||||
static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9)
|
||||
{
|
||||
__m128i xout0 = _mm_load_si128(memory);
|
||||
__m128i xout2 = _mm_load_si128(memory + 1);
|
||||
*k0 = xout0;
|
||||
*k1 = xout2;
|
||||
|
||||
aes_genkey_sub1(&xout0, &xout2);
|
||||
*k2 = xout0;
|
||||
*k3 = xout2;
|
||||
|
||||
aes_genkey_sub2(&xout0, &xout2);
|
||||
*k4 = xout0;
|
||||
*k5 = xout2;
|
||||
|
||||
aes_genkey_sub4(&xout0, &xout2);
|
||||
*k6 = xout0;
|
||||
*k7 = xout2;
|
||||
|
||||
aes_genkey_sub8(&xout0, &xout2);
|
||||
*k8 = xout0;
|
||||
*k9 = xout2;
|
||||
}
|
||||
|
||||
|
||||
static inline void cn_explode_scratchpad(const __m128i* input, __m128i* output)
|
||||
{
|
||||
// This is more than we have registers, compiler will assign 2 keys on the stack
|
||||
__m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
|
||||
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
||||
|
||||
aes_genkey(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
|
||||
|
||||
xin0 = _mm_load_si128(input + 4);
|
||||
xin1 = _mm_load_si128(input + 5);
|
||||
xin2 = _mm_load_si128(input + 6);
|
||||
xin3 = _mm_load_si128(input + 7);
|
||||
xin4 = _mm_load_si128(input + 8);
|
||||
xin5 = _mm_load_si128(input + 9);
|
||||
xin6 = _mm_load_si128(input + 10);
|
||||
xin7 = _mm_load_si128(input + 11);
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < MEMORY / sizeof(__m128i), 1); i += 8) {
|
||||
aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
|
||||
_mm_store_si128(output + i + 0, xin0);
|
||||
_mm_store_si128(output + i + 1, xin1);
|
||||
_mm_store_si128(output + i + 2, xin2);
|
||||
_mm_store_si128(output + i + 3, xin3);
|
||||
_mm_store_si128(output + i + 4, xin4);
|
||||
_mm_store_si128(output + i + 5, xin5);
|
||||
_mm_store_si128(output + i + 6, xin6);
|
||||
_mm_store_si128(output + i + 7, xin7);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static inline void cn_implode_scratchpad(const __m128i* input, __m128i* output)
|
||||
{
|
||||
// This is more than we have registers, compiler will assign 2 keys on the stack
|
||||
__m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
|
||||
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
||||
|
||||
aes_genkey(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
|
||||
|
||||
xout0 = _mm_load_si128(output + 4);
|
||||
xout1 = _mm_load_si128(output + 5);
|
||||
xout2 = _mm_load_si128(output + 6);
|
||||
xout3 = _mm_load_si128(output + 7);
|
||||
xout4 = _mm_load_si128(output + 8);
|
||||
xout5 = _mm_load_si128(output + 9);
|
||||
xout6 = _mm_load_si128(output + 10);
|
||||
xout7 = _mm_load_si128(output + 11);
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < MEMORY / sizeof(__m128i), 1); i += 8)
|
||||
{
|
||||
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
|
||||
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
|
||||
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
|
||||
xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
|
||||
xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
|
||||
xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
|
||||
xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
|
||||
xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
|
||||
|
||||
aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
}
|
||||
|
||||
_mm_store_si128(output + 4, xout0);
|
||||
_mm_store_si128(output + 5, xout1);
|
||||
_mm_store_si128(output + 6, xout2);
|
||||
_mm_store_si128(output + 7, xout3);
|
||||
_mm_store_si128(output + 8, xout4);
|
||||
_mm_store_si128(output + 9, xout5);
|
||||
_mm_store_si128(output + 10, xout6);
|
||||
_mm_store_si128(output + 11, xout7);
|
||||
}
|
||||
|
||||
|
||||
#if defined(__x86_64__)
|
||||
# define EXTRACT64(X) _mm_cvtsi128_si64(X)
|
||||
|
||||
static inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi)
|
||||
{
|
||||
unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b;
|
||||
*hi = r >> 64;
|
||||
return (uint64_t) r;
|
||||
}
|
||||
#elif defined(__i386__)
|
||||
# define HI32(X) \
|
||||
_mm_srli_si128((X), 4)
|
||||
|
||||
|
||||
# define EXTRACT64(X) \
|
||||
((uint64_t)(uint32_t)_mm_cvtsi128_si32(X) | \
|
||||
((uint64_t)(uint32_t)_mm_cvtsi128_si32(HI32(X)) << 32))
|
||||
|
||||
static inline uint64_t _umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) {
|
||||
// multiplier = ab = a * 2^32 + b
|
||||
// multiplicand = cd = c * 2^32 + d
|
||||
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
|
||||
uint64_t a = multiplier >> 32;
|
||||
uint64_t b = multiplier & 0xFFFFFFFF;
|
||||
uint64_t c = multiplicand >> 32;
|
||||
uint64_t d = multiplicand & 0xFFFFFFFF;
|
||||
|
||||
//uint64_t ac = a * c;
|
||||
uint64_t ad = a * d;
|
||||
//uint64_t bc = b * c;
|
||||
uint64_t bd = b * d;
|
||||
|
||||
uint64_t adbc = ad + (b * c);
|
||||
uint64_t adbc_carry = adbc < ad ? 1 : 0;
|
||||
|
||||
// multiplier * multiplicand = product_hi * 2^64 + product_lo
|
||||
uint64_t product_lo = bd + (adbc << 32);
|
||||
uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
|
||||
*product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
|
||||
|
||||
return product_lo;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
static inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
|
||||
{
|
||||
mem_out[0] = EXTRACT64(tmp);
|
||||
|
||||
tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp)));
|
||||
uint64_t vh = EXTRACT64(tmp);
|
||||
|
||||
uint8_t x = vh >> 24;
|
||||
static const uint16_t table = 0x7531;
|
||||
const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1;
|
||||
vh ^= ((table >> index) & 0x3) << 28;
|
||||
|
||||
mem_out[1] = vh;
|
||||
}
|
||||
|
||||
|
||||
#endif /* XMRIG_CRYPTONIGHT_AESNI_H */
|
||||
261
algo/cryptonight/cryptonight_av1.c
Normal file
261
algo/cryptonight/cryptonight_av1.c
Normal file
@@ -0,0 +1,261 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "crypto/c_keccak.h"
|
||||
#include "cryptonight.h"
|
||||
#include "cryptonight_aesni.h"
|
||||
#include "cryptonight_monero.h"
|
||||
|
||||
|
||||
void cryptonight_av1_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx;
|
||||
cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
|
||||
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
|
||||
idx0 = EXTRACT64(cx);
|
||||
bx0 = cx;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_av1_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
if (size < 43) {
|
||||
memset(output, 0, 32);
|
||||
return;
|
||||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
|
||||
VARIANT1_INIT(0);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx;
|
||||
cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
|
||||
|
||||
cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
|
||||
|
||||
idx0 = EXTRACT64(cx);
|
||||
bx0 = cx;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0 ^ tweak1_2_0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_av1_v2(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
|
||||
VARIANT2_INIT(0);
|
||||
VARIANT2_SET_ROUNDING_MODE();
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
|
||||
|
||||
uint64_t idx0 = al0;
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
|
||||
|
||||
cx = _mm_aesenc_si128(cx, ax0);
|
||||
|
||||
VARIANT2_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx0, bx1);
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
|
||||
|
||||
idx0 = _mm_cvtsi128_si64(cx);
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
|
||||
VARIANT2_INTEGER_MATH(0, cl, cx);
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
VARIANT2_SHUFFLE2(l0, idx0 & 0x1FFFF0, ax0, bx0, bx1, hi, lo);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||
|
||||
al0 ^= cl;
|
||||
ah0 ^= ch;
|
||||
idx0 = al0;
|
||||
|
||||
bx1 = bx0;
|
||||
bx0 = cx;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
||||
|
||||
#ifndef XMRIG_NO_ASM
|
||||
extern void cnv2_mainloop_ivybridge_asm(struct cryptonight_ctx *ctx);
|
||||
extern void cnv2_mainloop_ryzen_asm(struct cryptonight_ctx *ctx);
|
||||
extern void cnv2_mainloop_bulldozer_asm(struct cryptonight_ctx *ctx);
|
||||
extern void cnv2_double_mainloop_sandybridge_asm(struct cryptonight_ctx* ctx0, struct cryptonight_ctx* ctx1);
|
||||
|
||||
|
||||
void cryptonight_single_hash_asm_intel(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
cnv2_mainloop_ivybridge_asm(ctx[0]);
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
keccakf((uint64_t*) ctx[0]->state, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_single_hash_asm_ryzen(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
cnv2_mainloop_ryzen_asm(ctx[0]);
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
keccakf((uint64_t*) ctx[0]->state, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_single_hash_asm_bulldozer(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
cnv2_mainloop_bulldozer_asm(ctx[0]);
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
keccakf((uint64_t*) ctx[0]->state, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_double_hash_asm(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
keccak(input + size, size, ctx[1]->state, 200);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
cn_explode_scratchpad((__m128i*) ctx[1]->state, (__m128i*) ctx[1]->memory);
|
||||
|
||||
cnv2_double_mainloop_sandybridge_asm(ctx[0], ctx[1]);
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
cn_implode_scratchpad((__m128i*) ctx[1]->memory, (__m128i*) ctx[1]->state);
|
||||
|
||||
keccakf((uint64_t*) ctx[0]->state, 24);
|
||||
keccakf((uint64_t*) ctx[1]->state, 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||
}
|
||||
#endif
|
||||
304
algo/cryptonight/cryptonight_av2.c
Normal file
304
algo/cryptonight/cryptonight_av2.c
Normal file
@@ -0,0 +1,304 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "crypto/c_keccak.h"
|
||||
#include "cryptonight.h"
|
||||
#include "cryptonight_aesni.h"
|
||||
#include "cryptonight_monero.h"
|
||||
|
||||
|
||||
void cryptonight_av2_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
keccak(input + size, size, ctx[1]->state, 200);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
const uint8_t* l1 = ctx[1]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
uint64_t* h1 = (uint64_t*) ctx[1]->state;
|
||||
|
||||
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
uint64_t idx1 = h1[0] ^ h1[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]);
|
||||
|
||||
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
|
||||
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
|
||||
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx0));
|
||||
_mm_store_si128((__m128i *) &l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx1, cx1));
|
||||
|
||||
idx0 = EXTRACT64(cx0);
|
||||
idx1 = EXTRACT64(cx1);
|
||||
|
||||
bx0 = cx0;
|
||||
bx1 = cx1;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*) &l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*) &l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
|
||||
cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx1, cl, &hi);
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
||||
((uint64_t*) &l1[idx1 & 0x1FFFF0])[0] = al1;
|
||||
((uint64_t*) &l1[idx1 & 0x1FFFF0])[1] = ah1;
|
||||
|
||||
ah1 ^= ch;
|
||||
al1 ^= cl;
|
||||
idx1 = al1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
|
||||
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_av2_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
if (size < 43) {
|
||||
memset(output, 0, 64);
|
||||
return;
|
||||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
keccak(input + size, size, ctx[1]->state, 200);
|
||||
|
||||
VARIANT1_INIT(0);
|
||||
VARIANT1_INIT(1);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
const uint8_t* l1 = ctx[1]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
uint64_t* h1 = (uint64_t*) ctx[1]->state;
|
||||
|
||||
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
uint64_t idx1 = h1[0] ^ h1[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]);
|
||||
|
||||
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
|
||||
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
|
||||
|
||||
cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx0));
|
||||
cryptonight_monero_tweak((uint64_t*)&l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx1, cx1));
|
||||
|
||||
idx0 = EXTRACT64(cx0);
|
||||
idx1 = EXTRACT64(cx1);
|
||||
|
||||
bx0 = cx0;
|
||||
bx1 = cx1;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*) &l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*) &l0[idx0 & 0x1FFFF0])[1] = ah0 ^ tweak1_2_0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
|
||||
cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx1, cl, &hi);
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
||||
((uint64_t*) &l1[idx1 & 0x1FFFF0])[0] = al1;
|
||||
((uint64_t*) &l1[idx1 & 0x1FFFF0])[1] = ah1 ^ tweak1_2_1;
|
||||
|
||||
ah1 ^= ch;
|
||||
al1 ^= cl;
|
||||
idx1 = al1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
|
||||
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_av2_v2(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
keccak(input + size, size, ctx[1]->state, 200);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
const uint8_t* l1 = ctx[1]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
uint64_t* h1 = (uint64_t*) ctx[1]->state;
|
||||
|
||||
VARIANT2_INIT(0);
|
||||
VARIANT2_INIT(1);
|
||||
VARIANT2_SET_ROUNDING_MODE();
|
||||
|
||||
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
|
||||
__m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx01 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
|
||||
__m128i bx10 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
__m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
|
||||
|
||||
uint64_t idx0 = al0;
|
||||
uint64_t idx1 = al1;
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]);
|
||||
|
||||
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
|
||||
const __m128i ax1 = _mm_set_epi64x(ah1, al1);
|
||||
|
||||
cx0 = _mm_aesenc_si128(cx0, ax0);
|
||||
cx1 = _mm_aesenc_si128(cx1, ax1);
|
||||
|
||||
VARIANT2_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx00, bx01);
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx00, cx0));
|
||||
|
||||
VARIANT2_SHUFFLE(l1, idx1 & 0x1FFFF0, ax1, bx10, bx11);
|
||||
_mm_store_si128((__m128i *) &l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx10, cx1));
|
||||
|
||||
idx0 = _mm_cvtsi128_si64(cx0);
|
||||
idx1 = _mm_cvtsi128_si64(cx1);
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
|
||||
VARIANT2_INTEGER_MATH(0, cl, cx0);
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
VARIANT2_SHUFFLE2(l0, idx0 & 0x1FFFF0, ax0, bx00, bx01, hi, lo);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||
|
||||
al0 ^= cl;
|
||||
ah0 ^= ch;
|
||||
idx0 = al0;
|
||||
|
||||
cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1];
|
||||
|
||||
VARIANT2_INTEGER_MATH(1, cl, cx1);
|
||||
lo = _umul128(idx1, cl, &hi);
|
||||
VARIANT2_SHUFFLE2(l1, idx1 & 0x1FFFF0, ax1, bx10, bx11, hi, lo);
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
||||
((uint64_t*)&l1[idx1 & 0x1FFFF0])[0] = al1;
|
||||
((uint64_t*)&l1[idx1 & 0x1FFFF0])[1] = ah1;
|
||||
|
||||
al1 ^= cl;
|
||||
ah1 ^= ch;
|
||||
idx1 = al1;
|
||||
|
||||
bx01 = bx00;
|
||||
bx11 = bx10;
|
||||
|
||||
bx00 = cx0;
|
||||
bx10 = cx1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
|
||||
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||
}
|
||||
193
algo/cryptonight/cryptonight_av3.c
Normal file
193
algo/cryptonight/cryptonight_av3.c
Normal file
@@ -0,0 +1,193 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "crypto/c_keccak.h"
|
||||
#include "cryptonight.h"
|
||||
#include "cryptonight_monero.h"
|
||||
#include "cryptonight_softaes.h"
|
||||
|
||||
|
||||
void cryptonight_av3_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx;
|
||||
cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
|
||||
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
|
||||
idx0 = EXTRACT64(cx);
|
||||
bx0 = cx;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_av3_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
if (size < 43) {
|
||||
memset(output, 0, 32);
|
||||
return;
|
||||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
|
||||
VARIANT1_INIT(0);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx;
|
||||
cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
|
||||
|
||||
cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
|
||||
|
||||
idx0 = EXTRACT64(cx);
|
||||
bx0 = cx;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0 ^ tweak1_2_0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_av3_v2(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
|
||||
VARIANT2_INIT(0);
|
||||
VARIANT2_SET_ROUNDING_MODE();
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
|
||||
|
||||
uint64_t idx0 = al0;
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
|
||||
|
||||
cx = soft_aesenc(cx, ax0);
|
||||
|
||||
VARIANT2_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx0, bx1);
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
|
||||
|
||||
idx0 = _mm_cvtsi128_si64(cx);
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
|
||||
VARIANT2_INTEGER_MATH(0, cl, cx);
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
VARIANT2_SHUFFLE2(l0, idx0 & 0x1FFFF0, ax0, bx0, bx1, hi, lo);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||
|
||||
al0 ^= cl;
|
||||
ah0 ^= ch;
|
||||
idx0 = al0;
|
||||
|
||||
bx1 = bx0;
|
||||
bx0 = cx;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
304
algo/cryptonight/cryptonight_av4.c
Normal file
304
algo/cryptonight/cryptonight_av4.c
Normal file
@@ -0,0 +1,304 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "crypto/c_keccak.h"
|
||||
#include "cryptonight.h"
|
||||
#include "cryptonight_monero.h"
|
||||
#include "cryptonight_softaes.h"
|
||||
|
||||
|
||||
void cryptonight_av4_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
keccak(input + size, size, ctx[1]->state, 200);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
const uint8_t* l1 = ctx[1]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
uint64_t* h1 = (uint64_t*) ctx[1]->state;
|
||||
|
||||
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
uint64_t idx1 = h1[0] ^ h1[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]);
|
||||
|
||||
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
|
||||
cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
|
||||
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx0));
|
||||
_mm_store_si128((__m128i *) &l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx1, cx1));
|
||||
|
||||
idx0 = EXTRACT64(cx0);
|
||||
idx1 = EXTRACT64(cx1);
|
||||
|
||||
bx0 = cx0;
|
||||
bx1 = cx1;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*) &l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*) &l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
|
||||
cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx1, cl, &hi);
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
||||
((uint64_t*) &l1[idx1 & 0x1FFFF0])[0] = al1;
|
||||
((uint64_t*) &l1[idx1 & 0x1FFFF0])[1] = ah1;
|
||||
|
||||
ah1 ^= ch;
|
||||
al1 ^= cl;
|
||||
idx1 = al1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
|
||||
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_av4_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
if (size < 43) {
|
||||
memset(output, 0, 64);
|
||||
return;
|
||||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
keccak(input + size, size, ctx[1]->state, 200);
|
||||
|
||||
VARIANT1_INIT(0);
|
||||
VARIANT1_INIT(1);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
const uint8_t* l1 = ctx[1]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
uint64_t* h1 = (uint64_t*) ctx[1]->state;
|
||||
|
||||
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
uint64_t idx1 = h1[0] ^ h1[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]);
|
||||
|
||||
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
|
||||
cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
|
||||
|
||||
cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx0));
|
||||
cryptonight_monero_tweak((uint64_t*)&l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx1, cx1));
|
||||
|
||||
idx0 = EXTRACT64(cx0);
|
||||
idx1 = EXTRACT64(cx1);
|
||||
|
||||
bx0 = cx0;
|
||||
bx1 = cx1;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*) &l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*) &l0[idx0 & 0x1FFFF0])[1] = ah0 ^ tweak1_2_0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
|
||||
cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx1, cl, &hi);
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
||||
((uint64_t*) &l1[idx1 & 0x1FFFF0])[0] = al1;
|
||||
((uint64_t*) &l1[idx1 & 0x1FFFF0])[1] = ah1 ^ tweak1_2_1;
|
||||
|
||||
ah1 ^= ch;
|
||||
al1 ^= cl;
|
||||
idx1 = al1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
|
||||
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_av4_v2(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
keccak(input + size, size, ctx[1]->state, 200);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
const uint8_t* l1 = ctx[1]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
uint64_t* h1 = (uint64_t*) ctx[1]->state;
|
||||
|
||||
VARIANT2_INIT(0);
|
||||
VARIANT2_INIT(1);
|
||||
VARIANT2_SET_ROUNDING_MODE();
|
||||
|
||||
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
|
||||
__m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx01 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
|
||||
__m128i bx10 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
__m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
|
||||
|
||||
uint64_t idx0 = al0;
|
||||
uint64_t idx1 = al1;
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]);
|
||||
|
||||
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
|
||||
const __m128i ax1 = _mm_set_epi64x(ah1, al1);
|
||||
|
||||
cx0 = soft_aesenc(cx0, ax0);
|
||||
cx1 = soft_aesenc(cx1, ax1);
|
||||
|
||||
VARIANT2_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx00, bx01);
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx00, cx0));
|
||||
|
||||
VARIANT2_SHUFFLE(l1, idx1 & 0x1FFFF0, ax1, bx10, bx11);
|
||||
_mm_store_si128((__m128i *) &l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx10, cx1));
|
||||
|
||||
idx0 = _mm_cvtsi128_si64(cx0);
|
||||
idx1 = _mm_cvtsi128_si64(cx1);
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
|
||||
VARIANT2_INTEGER_MATH(0, cl, cx0);
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
VARIANT2_SHUFFLE2(l0, idx0 & 0x1FFFF0, ax0, bx00, bx01, hi, lo);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||
|
||||
al0 ^= cl;
|
||||
ah0 ^= ch;
|
||||
idx0 = al0;
|
||||
|
||||
cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1];
|
||||
|
||||
VARIANT2_INTEGER_MATH(1, cl, cx1);
|
||||
lo = _umul128(idx1, cl, &hi);
|
||||
VARIANT2_SHUFFLE2(l1, idx1 & 0x1FFFF0, ax1, bx10, bx11, hi, lo);
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
||||
((uint64_t*)&l1[idx1 & 0x1FFFF0])[0] = al1;
|
||||
((uint64_t*)&l1[idx1 & 0x1FFFF0])[1] = ah1;
|
||||
|
||||
al1 ^= cl;
|
||||
ah1 ^= ch;
|
||||
idx1 = al1;
|
||||
|
||||
bx01 = bx00;
|
||||
bx11 = bx10;
|
||||
|
||||
bx00 = cx0;
|
||||
bx10 = cx1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
|
||||
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||
}
|
||||
150
algo/cryptonight/cryptonight_monero.h
Normal file
150
algo/cryptonight/cryptonight_monero.h
Normal file
@@ -0,0 +1,150 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef XMRIG_CRYPTONIGHT_MONERO_H
|
||||
#define XMRIG_CRYPTONIGHT_MONERO_H
|
||||
|
||||
|
||||
#include <fenv.h>
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <x86intrin.h>
|
||||
|
||||
|
||||
static inline __m128i int_sqrt_v2(const uint64_t n0)
|
||||
{
|
||||
__m128d x = _mm_castsi128_pd(_mm_add_epi64(_mm_cvtsi64_si128(n0 >> 12), _mm_set_epi64x(0, 1023ULL << 52)));
|
||||
x = _mm_sqrt_sd(_mm_setzero_pd(), x);
|
||||
uint64_t r = (uint64_t)(_mm_cvtsi128_si64(_mm_castpd_si128(x)));
|
||||
|
||||
const uint64_t s = r >> 20;
|
||||
r >>= 19;
|
||||
|
||||
uint64_t x2 = (s - (1022ULL << 32)) * (r - s - (1022ULL << 32) + 1);
|
||||
# if (defined(_MSC_VER) || __GNUC__ > 7 || (__GNUC__ == 7 && __GNUC_MINOR__ > 1)) && (defined(__x86_64__) || defined(_M_AMD64))
|
||||
_addcarry_u64(_subborrow_u64(0, x2, n0, (unsigned long long int*)&x2), r, 0, (unsigned long long int*)&r);
|
||||
# else
|
||||
if (x2 < n0) ++r;
|
||||
# endif
|
||||
|
||||
return _mm_cvtsi64_si128(r);
|
||||
}
|
||||
|
||||
|
||||
# define VARIANT1_INIT(part) \
|
||||
uint64_t tweak1_2_##part = (*(const uint64_t*)(input + 35 + part * size) ^ \
|
||||
*((const uint64_t*)(ctx[part]->state) + 24)); \
|
||||
|
||||
# define VARIANT2_INIT(part) \
|
||||
__m128i division_result_xmm_##part = _mm_cvtsi64_si128(h##part[12]); \
|
||||
__m128i sqrt_result_xmm_##part = _mm_cvtsi64_si128(h##part[13]);
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# define VARIANT2_SET_ROUNDING_MODE() { _control87(RC_DOWN, MCW_RC); }
|
||||
#else
|
||||
# define VARIANT2_SET_ROUNDING_MODE() { fesetround(FE_DOWNWARD); }
|
||||
#endif
|
||||
|
||||
# define VARIANT2_INTEGER_MATH(part, cl, cx) \
|
||||
{ \
|
||||
const uint64_t sqrt_result = (uint64_t)(_mm_cvtsi128_si64(sqrt_result_xmm_##part)); \
|
||||
const uint64_t cx_0 = _mm_cvtsi128_si64(cx); \
|
||||
cl ^= (uint64_t)(_mm_cvtsi128_si64(division_result_xmm_##part)) ^ (sqrt_result << 32); \
|
||||
const uint32_t d = (uint32_t)(cx_0 + (sqrt_result << 1)) | 0x80000001UL; \
|
||||
const uint64_t cx_1 = _mm_cvtsi128_si64(_mm_srli_si128(cx, 8)); \
|
||||
const uint64_t division_result = (uint32_t)(cx_1 / d) + ((cx_1 % d) << 32); \
|
||||
division_result_xmm_##part = _mm_cvtsi64_si128((int64_t)(division_result)); \
|
||||
sqrt_result_xmm_##part = int_sqrt_v2(cx_0 + division_result); \
|
||||
}
|
||||
|
||||
# define VARIANT2_SHUFFLE(base_ptr, offset, _a, _b, _b1) \
|
||||
{ \
|
||||
const __m128i chunk1 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10))); \
|
||||
const __m128i chunk2 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20))); \
|
||||
const __m128i chunk3 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30))); \
|
||||
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk3, _b1)); \
|
||||
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk1, _b)); \
|
||||
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \
|
||||
}
|
||||
|
||||
# define VARIANT4_SHUFFLE(base_ptr, offset, _a, _b, _b1, _c) \
|
||||
{ \
|
||||
const __m128i chunk1 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10))); \
|
||||
const __m128i chunk2 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20))); \
|
||||
const __m128i chunk3 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30))); \
|
||||
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk3, _b1)); \
|
||||
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk1, _b)); \
|
||||
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \
|
||||
_c = _mm_xor_si128(_mm_xor_si128(_c, chunk3), _mm_xor_si128(chunk1, chunk2)); \
|
||||
}
|
||||
|
||||
# define VARIANT2_SHUFFLE2(base_ptr, offset, _a, _b, _b1, hi, lo) \
|
||||
{ \
|
||||
const __m128i chunk1 = _mm_xor_si128(_mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10))), _mm_set_epi64x(lo, hi)); \
|
||||
const __m128i chunk2 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20))); \
|
||||
hi ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[0]; \
|
||||
lo ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[1]; \
|
||||
const __m128i chunk3 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30))); \
|
||||
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk3, _b1)); \
|
||||
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk1, _b)); \
|
||||
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \
|
||||
}
|
||||
|
||||
|
||||
#ifndef NOINLINE
|
||||
#ifdef __GNUC__
|
||||
#define NOINLINE __attribute__ ((noinline))
|
||||
#elif _MSC_VER
|
||||
#define NOINLINE __declspec(noinline)
|
||||
#else
|
||||
#define NOINLINE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "variant4_random_math.h"
|
||||
|
||||
#define VARIANT4_RANDOM_MATH_INIT(part) \
|
||||
uint32_t r##part[9]; \
|
||||
struct V4_Instruction code##part[256]; \
|
||||
{ \
|
||||
r##part[0] = (uint32_t)(h##part[12]); \
|
||||
r##part[1] = (uint32_t)(h##part[12] >> 32); \
|
||||
r##part[2] = (uint32_t)(h##part[13]); \
|
||||
r##part[3] = (uint32_t)(h##part[13] >> 32); \
|
||||
} \
|
||||
v4_random_math_init(code##part, ctx[part]->height);
|
||||
|
||||
#define VARIANT4_RANDOM_MATH(part, al, ah, cl, bx0, bx1) \
|
||||
{ \
|
||||
cl ^= (r##part[0] + r##part[1]) | ((uint64_t)(r##part[2] + r##part[3]) << 32); \
|
||||
r##part[4] = (uint32_t)(al); \
|
||||
r##part[5] = (uint32_t)(ah); \
|
||||
r##part[6] = (uint32_t)(_mm_cvtsi128_si32(bx0)); \
|
||||
r##part[7] = (uint32_t)(_mm_cvtsi128_si32(bx1)); \
|
||||
r##part[8] = (uint32_t)(_mm_cvtsi128_si32(_mm_srli_si128(bx1, 8))); \
|
||||
v4_random_math(code##part, r##part); \
|
||||
}
|
||||
|
||||
#endif /* XMRIG_CRYPTONIGHT_MONERO_H */
|
||||
143
algo/cryptonight/cryptonight_r_av1.c
Normal file
143
algo/cryptonight/cryptonight_r_av1.c
Normal file
@@ -0,0 +1,143 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "crypto/c_keccak.h"
|
||||
#include "cryptonight.h"
|
||||
#include "cryptonight_aesni.h"
|
||||
#include "cryptonight_monero.h"
|
||||
|
||||
|
||||
void cryptonight_r_av1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
|
||||
VARIANT2_INIT(0);
|
||||
VARIANT2_SET_ROUNDING_MODE();
|
||||
VARIANT4_RANDOM_MATH_INIT(0);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
|
||||
|
||||
uint64_t idx0 = al0;
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
|
||||
|
||||
cx = _mm_aesenc_si128(cx, ax0);
|
||||
|
||||
VARIANT4_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx0, bx1, cx);
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
|
||||
|
||||
idx0 = _mm_cvtsi128_si64(cx);
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
|
||||
VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx0, bx1);
|
||||
al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
|
||||
ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
|
||||
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
VARIANT4_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx0, bx1, cx);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||
|
||||
al0 ^= cl;
|
||||
ah0 ^= ch;
|
||||
idx0 = al0;
|
||||
|
||||
bx1 = bx0;
|
||||
bx0 = cx;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
||||
|
||||
#ifndef XMRIG_NO_ASM
|
||||
void v4_compile_code(const struct V4_Instruction* code, int code_size, void* machine_code, enum Assembly ASM);
|
||||
|
||||
|
||||
void cryptonight_r_av1_asm_intel(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
if (ctx[0]->generated_code_height != ctx[0]->height) {
|
||||
struct V4_Instruction code[256];
|
||||
const int code_size = v4_random_math_init(code, ctx[0]->height);
|
||||
|
||||
v4_compile_code(code, code_size, (void*)(ctx[0]->generated_code), ASM_INTEL);
|
||||
ctx[0]->generated_code_height = ctx[0]->height;
|
||||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
ctx[0]->generated_code(ctx[0]);
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
keccakf((uint64_t*) ctx[0]->state, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_r_av1_asm_bulldozer(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
if (ctx[0]->generated_code_height != ctx[0]->height) {
|
||||
struct V4_Instruction code[256];
|
||||
const int code_size = v4_random_math_init(code, ctx[0]->height);
|
||||
|
||||
v4_compile_code(code, code_size, (void*)(ctx[0]->generated_code), ASM_BULLDOZER);
|
||||
ctx[0]->generated_code_height = ctx[0]->height;
|
||||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
ctx[0]->generated_code(ctx[0]);
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
keccakf((uint64_t*) ctx[0]->state, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
#endif
|
||||
202
algo/cryptonight/cryptonight_r_av2.c
Normal file
202
algo/cryptonight/cryptonight_r_av2.c
Normal file
@@ -0,0 +1,202 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "crypto/c_keccak.h"
|
||||
#include "cryptonight.h"
|
||||
#include "cryptonight_aesni.h"
|
||||
#include "cryptonight_monero.h"
|
||||
|
||||
|
||||
void cryptonight_r_av2(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
keccak(input + size, size, ctx[1]->state, 200);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
const uint8_t* l1 = ctx[1]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
uint64_t* h1 = (uint64_t*) ctx[1]->state;
|
||||
|
||||
VARIANT2_INIT(0);
|
||||
VARIANT2_INIT(1);
|
||||
VARIANT2_SET_ROUNDING_MODE();
|
||||
VARIANT4_RANDOM_MATH_INIT(0);
|
||||
VARIANT4_RANDOM_MATH_INIT(1);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
|
||||
__m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx01 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
|
||||
__m128i bx10 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
__m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
|
||||
|
||||
uint64_t idx0 = al0;
|
||||
uint64_t idx1 = al1;
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]);
|
||||
|
||||
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
|
||||
const __m128i ax1 = _mm_set_epi64x(ah1, al1);
|
||||
|
||||
cx0 = _mm_aesenc_si128(cx0, ax0);
|
||||
cx1 = _mm_aesenc_si128(cx1, ax1);
|
||||
|
||||
VARIANT4_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx00, bx01, cx0);
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx00, cx0));
|
||||
|
||||
VARIANT4_SHUFFLE(l1, idx1 & 0x1FFFF0, ax1, bx10, bx11, cx1);
|
||||
_mm_store_si128((__m128i *) &l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx10, cx1));
|
||||
|
||||
idx0 = _mm_cvtsi128_si64(cx0);
|
||||
idx1 = _mm_cvtsi128_si64(cx1);
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
|
||||
VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx01);
|
||||
al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
|
||||
ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
|
||||
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
VARIANT4_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx00, bx01, cx0);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||
|
||||
al0 ^= cl;
|
||||
ah0 ^= ch;
|
||||
idx0 = al0;
|
||||
|
||||
cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1];
|
||||
|
||||
VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx10, bx11);
|
||||
al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32);
|
||||
ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32);
|
||||
|
||||
lo = _umul128(idx1, cl, &hi);
|
||||
VARIANT4_SHUFFLE(l1, idx1 & 0x1FFFF0, ax1, bx10, bx11, cx1);
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
||||
((uint64_t*)&l1[idx1 & 0x1FFFF0])[0] = al1;
|
||||
((uint64_t*)&l1[idx1 & 0x1FFFF0])[1] = ah1;
|
||||
|
||||
al1 ^= cl;
|
||||
ah1 ^= ch;
|
||||
idx1 = al1;
|
||||
|
||||
bx01 = bx00;
|
||||
bx11 = bx10;
|
||||
|
||||
bx00 = cx0;
|
||||
bx10 = cx1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
|
||||
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||
}
|
||||
|
||||
|
||||
#ifndef XMRIG_NO_ASM
|
||||
void v4_compile_code_double(const struct V4_Instruction* code, int code_size, void* machine_code, enum Assembly ASM);
|
||||
|
||||
|
||||
void cryptonight_r_av2_asm_intel(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
if (ctx[0]->generated_code_height != ctx[0]->height) {
|
||||
struct V4_Instruction code[256];
|
||||
const int code_size = v4_random_math_init(code, ctx[0]->height);
|
||||
v4_compile_code_double(code, code_size, (void*)(ctx[0]->generated_code_double), ASM_INTEL);
|
||||
ctx[0]->generated_code_height = ctx[0]->height;
|
||||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
keccak(input + size, size, ctx[1]->state, 200);
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
cn_explode_scratchpad((__m128i*) ctx[1]->state, (__m128i*) ctx[1]->memory);
|
||||
|
||||
ctx[0]->generated_code_double(ctx[0], ctx[1]);
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
cn_implode_scratchpad((__m128i*) ctx[1]->memory, (__m128i*) ctx[1]->state);
|
||||
|
||||
keccakf((uint64_t *) ctx[0]->state, 24);
|
||||
keccakf((uint64_t *) ctx[1]->state, 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_r_av2_asm_bulldozer(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
if (ctx[0]->generated_code_height != ctx[0]->height) {
|
||||
struct V4_Instruction code[256];
|
||||
const int code_size = v4_random_math_init(code, ctx[0]->height);
|
||||
v4_compile_code_double(code, code_size, (void*)(ctx[0]->generated_code_double), ASM_BULLDOZER);
|
||||
ctx[0]->generated_code_height = ctx[0]->height;
|
||||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
keccak(input + size, size, ctx[1]->state, 200);
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
cn_explode_scratchpad((__m128i*) ctx[1]->state, (__m128i*) ctx[1]->memory);
|
||||
|
||||
ctx[0]->generated_code_double(ctx[0], ctx[1]);
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
cn_implode_scratchpad((__m128i*) ctx[1]->memory, (__m128i*) ctx[1]->state);
|
||||
|
||||
keccakf((uint64_t *) ctx[0]->state, 24);
|
||||
keccakf((uint64_t *) ctx[1]->state, 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||
}
|
||||
#endif
|
||||
112
algo/cryptonight/cryptonight_r_av3.c
Normal file
112
algo/cryptonight/cryptonight_r_av3.c
Normal file
@@ -0,0 +1,112 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "crypto/c_keccak.h"
|
||||
#include "cryptonight.h"
|
||||
#include "cryptonight_monero.h"
|
||||
#include "cryptonight_softaes.h"
|
||||
|
||||
|
||||
#ifndef XMRIG_NO_ASM
|
||||
void v4_soft_aes_compile_code(const struct V4_Instruction* code, int code_size, void* machine_code, enum Assembly ASM);
|
||||
#endif
|
||||
|
||||
|
||||
void cryptonight_r_av3(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
# ifndef XMRIG_NO_ASM
|
||||
if (ctx[0]->generated_code_height != ctx[0]->height) {
|
||||
struct V4_Instruction code[256];
|
||||
const int code_size = v4_random_math_init(code, ctx[0]->height);
|
||||
|
||||
v4_soft_aes_compile_code(code, code_size, (void*)(ctx[0]->generated_code), ASM_NONE);
|
||||
ctx[0]->generated_code_height = ctx[0]->height;
|
||||
}
|
||||
|
||||
ctx[0]->saes_table = (const uint32_t*)saes_table;
|
||||
ctx[0]->generated_code(ctx[0]);
|
||||
# else
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
|
||||
VARIANT2_INIT(0);
|
||||
VARIANT2_SET_ROUNDING_MODE();
|
||||
VARIANT4_RANDOM_MATH_INIT(0);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
|
||||
|
||||
uint64_t idx0 = al0;
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
|
||||
|
||||
cx = soft_aesenc(cx, ax0);
|
||||
|
||||
VARIANT4_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx0, bx1, cx);
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
|
||||
|
||||
idx0 = _mm_cvtsi128_si64(cx);
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
|
||||
VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx0, bx1);
|
||||
al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
|
||||
ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
|
||||
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
VARIANT4_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx0, bx1, cx);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||
|
||||
al0 ^= cl;
|
||||
ah0 ^= ch;
|
||||
idx0 = al0;
|
||||
|
||||
bx1 = bx0;
|
||||
bx0 = cx;
|
||||
}
|
||||
# endif
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
keccakf((uint64_t *) ctx[0]->state, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
143
algo/cryptonight/cryptonight_r_av4.c
Normal file
143
algo/cryptonight/cryptonight_r_av4.c
Normal file
@@ -0,0 +1,143 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "crypto/c_keccak.h"
|
||||
#include "cryptonight.h"
|
||||
#include "cryptonight_monero.h"
|
||||
#include "cryptonight_softaes.h"
|
||||
|
||||
|
||||
void cryptonight_r_av4(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
keccak(input + size, size, ctx[1]->state, 200);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
const uint8_t* l1 = ctx[1]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
uint64_t* h1 = (uint64_t*) ctx[1]->state;
|
||||
|
||||
VARIANT2_INIT(0);
|
||||
VARIANT2_INIT(1);
|
||||
VARIANT2_SET_ROUNDING_MODE();
|
||||
VARIANT4_RANDOM_MATH_INIT(0);
|
||||
VARIANT4_RANDOM_MATH_INIT(1);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
|
||||
__m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx01 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
|
||||
__m128i bx10 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
__m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
|
||||
|
||||
uint64_t idx0 = al0;
|
||||
uint64_t idx1 = al1;
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]);
|
||||
|
||||
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
|
||||
const __m128i ax1 = _mm_set_epi64x(ah1, al1);
|
||||
|
||||
cx0 = soft_aesenc(cx0, ax0);
|
||||
cx1 = soft_aesenc(cx1, ax1);
|
||||
|
||||
VARIANT4_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx00, bx01, cx0);
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx00, cx0));
|
||||
|
||||
VARIANT4_SHUFFLE(l1, idx1 & 0x1FFFF0, ax1, bx10, bx11, cx1);
|
||||
_mm_store_si128((__m128i *) &l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx10, cx1));
|
||||
|
||||
idx0 = _mm_cvtsi128_si64(cx0);
|
||||
idx1 = _mm_cvtsi128_si64(cx1);
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
|
||||
VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx01);
|
||||
al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
|
||||
ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
|
||||
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
VARIANT4_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx00, bx01, cx0);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||
|
||||
al0 ^= cl;
|
||||
ah0 ^= ch;
|
||||
idx0 = al0;
|
||||
|
||||
cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1];
|
||||
|
||||
VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx10, bx11);
|
||||
al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32);
|
||||
ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32);
|
||||
|
||||
lo = _umul128(idx1, cl, &hi);
|
||||
VARIANT4_SHUFFLE(l1, idx1 & 0x1FFFF0, ax1, bx10, bx11, cx1);
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
||||
((uint64_t*)&l1[idx1 & 0x1FFFF0])[0] = al1;
|
||||
((uint64_t*)&l1[idx1 & 0x1FFFF0])[1] = ah1;
|
||||
|
||||
al1 ^= cl;
|
||||
ah1 ^= ch;
|
||||
idx1 = al1;
|
||||
|
||||
bx01 = bx00;
|
||||
bx11 = bx10;
|
||||
|
||||
bx00 = cx0;
|
||||
bx10 = cx1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
|
||||
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||
}
|
||||
255
algo/cryptonight/cryptonight_softaes.h
Normal file
255
algo/cryptonight/cryptonight_softaes.h
Normal file
@@ -0,0 +1,255 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef XMRIG_CRYPTONIGHT_SOFTAES_H
|
||||
#define XMRIG_CRYPTONIGHT_SOFTAES_H
|
||||
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <stdint.h>
|
||||
|
||||
|
||||
#include "crypto/soft_aes.h"
|
||||
|
||||
|
||||
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
|
||||
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
|
||||
static inline __m128i sl_xor(__m128i tmp1)
|
||||
{
|
||||
__m128i tmp4;
|
||||
tmp4 = _mm_slli_si128(tmp1, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
return tmp1;
|
||||
}
|
||||
|
||||
|
||||
static inline void aes_genkey_sub(__m128i* xout0, __m128i* xout2, uint8_t rcon)
|
||||
{
|
||||
__m128i xout1 = soft_aeskeygenassist(*xout2, rcon);
|
||||
xout1 = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem
|
||||
*xout0 = sl_xor(*xout0);
|
||||
*xout0 = _mm_xor_si128(*xout0, xout1);
|
||||
xout1 = soft_aeskeygenassist(*xout0, 0x00);
|
||||
xout1 = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem
|
||||
*xout2 = sl_xor(*xout2);
|
||||
*xout2 = _mm_xor_si128(*xout2, xout1);
|
||||
}
|
||||
|
||||
|
||||
static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7)
|
||||
{
|
||||
*x0 = soft_aesenc(*x0, key);
|
||||
*x1 = soft_aesenc(*x1, key);
|
||||
*x2 = soft_aesenc(*x2, key);
|
||||
*x3 = soft_aesenc(*x3, key);
|
||||
*x4 = soft_aesenc(*x4, key);
|
||||
*x5 = soft_aesenc(*x5, key);
|
||||
*x6 = soft_aesenc(*x6, key);
|
||||
*x7 = soft_aesenc(*x7, key);
|
||||
}
|
||||
|
||||
|
||||
static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9)
|
||||
{
|
||||
__m128i xout0 = _mm_load_si128(memory);
|
||||
__m128i xout2 = _mm_load_si128(memory + 1);
|
||||
*k0 = xout0;
|
||||
*k1 = xout2;
|
||||
|
||||
aes_genkey_sub(&xout0, &xout2, 0x1);
|
||||
*k2 = xout0;
|
||||
*k3 = xout2;
|
||||
|
||||
aes_genkey_sub(&xout0, &xout2, 0x2);
|
||||
*k4 = xout0;
|
||||
*k5 = xout2;
|
||||
|
||||
aes_genkey_sub(&xout0, &xout2, 0x4);
|
||||
*k6 = xout0;
|
||||
*k7 = xout2;
|
||||
|
||||
aes_genkey_sub(&xout0, &xout2, 0x8);
|
||||
*k8 = xout0;
|
||||
*k9 = xout2;
|
||||
}
|
||||
|
||||
|
||||
static inline void cn_explode_scratchpad(const __m128i* input, __m128i* output)
|
||||
{
|
||||
// This is more than we have registers, compiler will assign 2 keys on the stack
|
||||
__m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
|
||||
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
||||
|
||||
aes_genkey(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
|
||||
|
||||
xin0 = _mm_load_si128(input + 4);
|
||||
xin1 = _mm_load_si128(input + 5);
|
||||
xin2 = _mm_load_si128(input + 6);
|
||||
xin3 = _mm_load_si128(input + 7);
|
||||
xin4 = _mm_load_si128(input + 8);
|
||||
xin5 = _mm_load_si128(input + 9);
|
||||
xin6 = _mm_load_si128(input + 10);
|
||||
xin7 = _mm_load_si128(input + 11);
|
||||
|
||||
for (size_t i = 0; i < MEMORY / sizeof(__m128i); i += 8) {
|
||||
aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
|
||||
_mm_store_si128(output + i + 0, xin0);
|
||||
_mm_store_si128(output + i + 1, xin1);
|
||||
_mm_store_si128(output + i + 2, xin2);
|
||||
_mm_store_si128(output + i + 3, xin3);
|
||||
_mm_store_si128(output + i + 4, xin4);
|
||||
_mm_store_si128(output + i + 5, xin5);
|
||||
_mm_store_si128(output + i + 6, xin6);
|
||||
_mm_store_si128(output + i + 7, xin7);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static inline void cn_implode_scratchpad(const __m128i* input, __m128i* output)
|
||||
{
|
||||
// This is more than we have registers, compiler will assign 2 keys on the stack
|
||||
__m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
|
||||
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
||||
|
||||
aes_genkey(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
|
||||
|
||||
xout0 = _mm_load_si128(output + 4);
|
||||
xout1 = _mm_load_si128(output + 5);
|
||||
xout2 = _mm_load_si128(output + 6);
|
||||
xout3 = _mm_load_si128(output + 7);
|
||||
xout4 = _mm_load_si128(output + 8);
|
||||
xout5 = _mm_load_si128(output + 9);
|
||||
xout6 = _mm_load_si128(output + 10);
|
||||
xout7 = _mm_load_si128(output + 11);
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < MEMORY / sizeof(__m128i), 1); i += 8)
|
||||
{
|
||||
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
|
||||
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
|
||||
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
|
||||
xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
|
||||
xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
|
||||
xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
|
||||
xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
|
||||
xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
|
||||
|
||||
aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
}
|
||||
|
||||
_mm_store_si128(output + 4, xout0);
|
||||
_mm_store_si128(output + 5, xout1);
|
||||
_mm_store_si128(output + 6, xout2);
|
||||
_mm_store_si128(output + 7, xout3);
|
||||
_mm_store_si128(output + 8, xout4);
|
||||
_mm_store_si128(output + 9, xout5);
|
||||
_mm_store_si128(output + 10, xout6);
|
||||
_mm_store_si128(output + 11, xout7);
|
||||
}
|
||||
|
||||
|
||||
#if defined(__x86_64__)
|
||||
# define EXTRACT64(X) _mm_cvtsi128_si64(X)
|
||||
|
||||
static inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi)
|
||||
{
|
||||
unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b;
|
||||
*hi = r >> 64;
|
||||
return (uint64_t) r;
|
||||
}
|
||||
#elif defined(__i386__)
|
||||
# define HI32(X) \
|
||||
_mm_srli_si128((X), 4)
|
||||
|
||||
|
||||
# define EXTRACT64(X) \
|
||||
((uint64_t)(uint32_t)_mm_cvtsi128_si32(X) | \
|
||||
((uint64_t)(uint32_t)_mm_cvtsi128_si32(HI32(X)) << 32))
|
||||
|
||||
inline uint64_t _umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) {
|
||||
// multiplier = ab = a * 2^32 + b
|
||||
// multiplicand = cd = c * 2^32 + d
|
||||
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
|
||||
uint64_t a = multiplier >> 32;
|
||||
uint64_t b = multiplier & 0xFFFFFFFF;
|
||||
uint64_t c = multiplicand >> 32;
|
||||
uint64_t d = multiplicand & 0xFFFFFFFF;
|
||||
|
||||
//uint64_t ac = a * c;
|
||||
uint64_t ad = a * d;
|
||||
//uint64_t bc = b * c;
|
||||
uint64_t bd = b * d;
|
||||
|
||||
uint64_t adbc = ad + (b * c);
|
||||
uint64_t adbc_carry = adbc < ad ? 1 : 0;
|
||||
|
||||
// multiplier * multiplicand = product_hi * 2^64 + product_lo
|
||||
uint64_t product_lo = bd + (adbc << 32);
|
||||
uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
|
||||
*product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
|
||||
|
||||
return product_lo;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
static inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
|
||||
{
|
||||
mem_out[0] = EXTRACT64(tmp);
|
||||
|
||||
tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp)));
|
||||
uint64_t vh = EXTRACT64(tmp);
|
||||
|
||||
uint8_t x = vh >> 24;
|
||||
static const uint16_t table = 0x7531;
|
||||
const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1;
|
||||
vh ^= ((table >> index) & 0x3) << 28;
|
||||
|
||||
mem_out[1] = vh;
|
||||
}
|
||||
|
||||
|
||||
#endif /* XMRIG_CRYPTONIGHT_SOFTAES_H */
|
||||
129
algo/cryptonight/cryptonight_test.h
Normal file
129
algo/cryptonight/cryptonight_test.h
Normal file
@@ -0,0 +1,129 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef XMRIG_CRYPTONIGHT_TEST_H
|
||||
#define XMRIG_CRYPTONIGHT_TEST_H
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
|
||||
const static uint8_t test_input[152] = {
|
||||
0x03, 0x05, 0xA0, 0xDB, 0xD6, 0xBF, 0x05, 0xCF, 0x16, 0xE5, 0x03, 0xF3, 0xA6, 0x6F, 0x78, 0x00,
|
||||
0x7C, 0xBF, 0x34, 0x14, 0x43, 0x32, 0xEC, 0xBF, 0xC2, 0x2E, 0xD9, 0x5C, 0x87, 0x00, 0x38, 0x3B,
|
||||
0x30, 0x9A, 0xCE, 0x19, 0x23, 0xA0, 0x96, 0x4B, 0x00, 0x00, 0x00, 0x08, 0xBA, 0x93, 0x9A, 0x62,
|
||||
0x72, 0x4C, 0x0D, 0x75, 0x81, 0xFC, 0xE5, 0x76, 0x1E, 0x9D, 0x8A, 0x0E, 0x6A, 0x1C, 0x3F, 0x92,
|
||||
0x4F, 0xDD, 0x84, 0x93, 0xD1, 0x11, 0x56, 0x49, 0xC0, 0x5E, 0xB6, 0x01,
|
||||
0x01, 0x00, 0xFB, 0x8E, 0x8A, 0xC8, 0x05, 0x89, 0x93, 0x23, 0x37, 0x1B, 0xB7, 0x90, 0xDB, 0x19,
|
||||
0x21, 0x8A, 0xFD, 0x8D, 0xB8, 0xE3, 0x75, 0x5D, 0x8B, 0x90, 0xF3, 0x9B, 0x3D, 0x55, 0x06, 0xA9,
|
||||
0xAB, 0xCE, 0x4F, 0xA9, 0x12, 0x24, 0x45, 0x00, 0x00, 0x00, 0x00, 0xEE, 0x81, 0x46, 0xD4, 0x9F,
|
||||
0xA9, 0x3E, 0xE7, 0x24, 0xDE, 0xB5, 0x7D, 0x12, 0xCB, 0xC6, 0xC6, 0xF3, 0xB9, 0x24, 0xD9, 0x46,
|
||||
0x12, 0x7C, 0x7A, 0x97, 0x41, 0x8F, 0x93, 0x48, 0x82, 0x8F, 0x0F, 0x02
|
||||
};
|
||||
|
||||
|
||||
const static uint8_t test_output_v0[64] = {
|
||||
0x1A, 0x3F, 0xFB, 0xEE, 0x90, 0x9B, 0x42, 0x0D, 0x91, 0xF7, 0xBE, 0x6E, 0x5F, 0xB5, 0x6D, 0xB7,
|
||||
0x1B, 0x31, 0x10, 0xD8, 0x86, 0x01, 0x1E, 0x87, 0x7E, 0xE5, 0x78, 0x6A, 0xFD, 0x08, 0x01, 0x00,
|
||||
0x1B, 0x60, 0x6A, 0x3F, 0x4A, 0x07, 0xD6, 0x48, 0x9A, 0x1B, 0xCD, 0x07, 0x69, 0x7B, 0xD1, 0x66,
|
||||
0x96, 0xB6, 0x1C, 0x8A, 0xE9, 0x82, 0xF6, 0x1A, 0x90, 0x16, 0x0F, 0x4E, 0x52, 0x82, 0x8A, 0x7F
|
||||
};
|
||||
|
||||
|
||||
// Cryptonight variant 1 (Monero v7)
|
||||
const static uint8_t test_output_v1[64] = {
|
||||
0xF2, 0x2D, 0x3D, 0x62, 0x03, 0xD2, 0xA0, 0x8B, 0x41, 0xD9, 0x02, 0x72, 0x78, 0xD8, 0xBC, 0xC9,
|
||||
0x83, 0xAC, 0xAD, 0xA9, 0xB6, 0x8E, 0x52, 0xE3, 0xC6, 0x89, 0x69, 0x2A, 0x50, 0xE9, 0x21, 0xD9,
|
||||
0xC9, 0xFA, 0xE8, 0x42, 0x5D, 0x86, 0x88, 0xDC, 0x23, 0x6B, 0xCD, 0xBC, 0x42, 0xFD, 0xB4, 0x2D,
|
||||
0x37, 0x6C, 0x6E, 0xC1, 0x90, 0x50, 0x1A, 0xA8, 0x4B, 0x04, 0xA4, 0xB4, 0xCF, 0x1E, 0xE1, 0x22
|
||||
};
|
||||
|
||||
|
||||
// Cryptonight variant 2 (Monero v8)
|
||||
const static uint8_t test_output_v2[64] = {
|
||||
0x97, 0x37, 0x82, 0x82, 0xCF, 0x10, 0xE7, 0xAD, 0x03, 0x3F, 0x7B, 0x80, 0x74, 0xC4, 0x0E, 0x14,
|
||||
0xD0, 0x6E, 0x7F, 0x60, 0x9D, 0xDD, 0xDA, 0x78, 0x76, 0x80, 0xB5, 0x8C, 0x05, 0xF4, 0x3D, 0x21,
|
||||
0x87, 0x1F, 0xCD, 0x68, 0x23, 0xF6, 0xA8, 0x79, 0xBB, 0x3F, 0x33, 0x95, 0x1C, 0x8E, 0x8E, 0x89,
|
||||
0x1D, 0x40, 0x43, 0x88, 0x0B, 0x02, 0xDF, 0xA1, 0xBB, 0x3B, 0xE4, 0x98, 0xB5, 0x0E, 0x75, 0x78
|
||||
};
|
||||
|
||||
|
||||
struct cn_r_test_input_data
|
||||
{
|
||||
uint64_t height;
|
||||
size_t size;
|
||||
uint8_t data[64];
|
||||
};
|
||||
|
||||
|
||||
const static struct cn_r_test_input_data cn_r_test_input[] = {
|
||||
{ 1806260, 44, { 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74 } },
|
||||
{ 1806261, 50, { 0x4c, 0x6f, 0x72, 0x65, 0x6d, 0x20, 0x69, 0x70, 0x73, 0x75, 0x6d, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x20, 0x73, 0x69, 0x74, 0x20, 0x61, 0x6d, 0x65, 0x74, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x65, 0x63, 0x74, 0x65, 0x74, 0x75, 0x72, 0x20, 0x61, 0x64, 0x69, 0x70, 0x69, 0x73, 0x63, 0x69, 0x6e, 0x67 } },
|
||||
{ 1806262, 48, { 0x65, 0x6c, 0x69, 0x74, 0x2c, 0x20, 0x73, 0x65, 0x64, 0x20, 0x64, 0x6f, 0x20, 0x65, 0x69, 0x75, 0x73, 0x6d, 0x6f, 0x64, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x6f, 0x72, 0x20, 0x69, 0x6e, 0x63, 0x69, 0x64, 0x69, 0x64, 0x75, 0x6e, 0x74, 0x20, 0x75, 0x74, 0x20, 0x6c, 0x61, 0x62, 0x6f, 0x72, 0x65 } },
|
||||
{ 1806263, 48, { 0x65, 0x74, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x65, 0x20, 0x6d, 0x61, 0x67, 0x6e, 0x61, 0x20, 0x61, 0x6c, 0x69, 0x71, 0x75, 0x61, 0x2e, 0x20, 0x55, 0x74, 0x20, 0x65, 0x6e, 0x69, 0x6d, 0x20, 0x61, 0x64, 0x20, 0x6d, 0x69, 0x6e, 0x69, 0x6d, 0x20, 0x76, 0x65, 0x6e, 0x69, 0x61, 0x6d, 0x2c } },
|
||||
{ 1806264, 46, { 0x71, 0x75, 0x69, 0x73, 0x20, 0x6e, 0x6f, 0x73, 0x74, 0x72, 0x75, 0x64, 0x20, 0x65, 0x78, 0x65, 0x72, 0x63, 0x69, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x75, 0x6c, 0x6c, 0x61, 0x6d, 0x63, 0x6f, 0x20, 0x6c, 0x61, 0x62, 0x6f, 0x72, 0x69, 0x73, 0x20, 0x6e, 0x69, 0x73, 0x69 } },
|
||||
{ 1806265, 45, { 0x75, 0x74, 0x20, 0x61, 0x6c, 0x69, 0x71, 0x75, 0x69, 0x70, 0x20, 0x65, 0x78, 0x20, 0x65, 0x61, 0x20, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x64, 0x6f, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x65, 0x71, 0x75, 0x61, 0x74, 0x2e, 0x20, 0x44, 0x75, 0x69, 0x73, 0x20, 0x61, 0x75, 0x74, 0x65 } },
|
||||
{ 1806266, 47, { 0x69, 0x72, 0x75, 0x72, 0x65, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x20, 0x69, 0x6e, 0x20, 0x72, 0x65, 0x70, 0x72, 0x65, 0x68, 0x65, 0x6e, 0x64, 0x65, 0x72, 0x69, 0x74, 0x20, 0x69, 0x6e, 0x20, 0x76, 0x6f, 0x6c, 0x75, 0x70, 0x74, 0x61, 0x74, 0x65, 0x20, 0x76, 0x65, 0x6c, 0x69, 0x74 } },
|
||||
{ 1806267, 44, { 0x65, 0x73, 0x73, 0x65, 0x20, 0x63, 0x69, 0x6c, 0x6c, 0x75, 0x6d, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x65, 0x20, 0x65, 0x75, 0x20, 0x66, 0x75, 0x67, 0x69, 0x61, 0x74, 0x20, 0x6e, 0x75, 0x6c, 0x6c, 0x61, 0x20, 0x70, 0x61, 0x72, 0x69, 0x61, 0x74, 0x75, 0x72, 0x2e } },
|
||||
{ 1806268, 47, { 0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x65, 0x75, 0x72, 0x20, 0x73, 0x69, 0x6e, 0x74, 0x20, 0x6f, 0x63, 0x63, 0x61, 0x65, 0x63, 0x61, 0x74, 0x20, 0x63, 0x75, 0x70, 0x69, 0x64, 0x61, 0x74, 0x61, 0x74, 0x20, 0x6e, 0x6f, 0x6e, 0x20, 0x70, 0x72, 0x6f, 0x69, 0x64, 0x65, 0x6e, 0x74, 0x2c } },
|
||||
{ 1806269, 62, { 0x73, 0x75, 0x6e, 0x74, 0x20, 0x69, 0x6e, 0x20, 0x63, 0x75, 0x6c, 0x70, 0x61, 0x20, 0x71, 0x75, 0x69, 0x20, 0x6f, 0x66, 0x66, 0x69, 0x63, 0x69, 0x61, 0x20, 0x64, 0x65, 0x73, 0x65, 0x72, 0x75, 0x6e, 0x74, 0x20, 0x6d, 0x6f, 0x6c, 0x6c, 0x69, 0x74, 0x20, 0x61, 0x6e, 0x69, 0x6d, 0x20, 0x69, 0x64, 0x20, 0x65, 0x73, 0x74, 0x20, 0x6c, 0x61, 0x62, 0x6f, 0x72, 0x75, 0x6d, 0x2e } },
|
||||
};
|
||||
|
||||
|
||||
// "cn/r"
|
||||
const static uint8_t test_output_r[] = {
|
||||
0xf7, 0x59, 0x58, 0x8a, 0xd5, 0x7e, 0x75, 0x84, 0x67, 0x29, 0x54, 0x43, 0xa9, 0xbd, 0x71, 0x49, 0x0a, 0xbf, 0xf8, 0xe9, 0xda, 0xd1, 0xb9, 0x5b, 0x6b, 0xf2, 0xf5, 0xd0, 0xd7, 0x83, 0x87, 0xbc,
|
||||
0x5b, 0xb8, 0x33, 0xde, 0xca, 0x2b, 0xdd, 0x72, 0x52, 0xa9, 0xcc, 0xd7, 0xb4, 0xce, 0x0b, 0x6a, 0x48, 0x54, 0x51, 0x57, 0x94, 0xb5, 0x6c, 0x20, 0x72, 0x62, 0xf7, 0xa5, 0xb9, 0xbd, 0xb5, 0x66,
|
||||
0x1e, 0xe6, 0x72, 0x8d, 0xa6, 0x0f, 0xbd, 0x8d, 0x7d, 0x55, 0xb2, 0xb1, 0xad, 0xe4, 0x87, 0xa3, 0xcf, 0x52, 0xa2, 0xc3, 0xac, 0x6f, 0x52, 0x0d, 0xb1, 0x2c, 0x27, 0xd8, 0x92, 0x1f, 0x6c, 0xab,
|
||||
0x69, 0x69, 0xfe, 0x2d, 0xdf, 0xb7, 0x58, 0x43, 0x8d, 0x48, 0x04, 0x9f, 0x30, 0x2f, 0xc2, 0x10, 0x8a, 0x4f, 0xcc, 0x93, 0xe3, 0x76, 0x69, 0x17, 0x0e, 0x6d, 0xb4, 0xb0, 0xb9, 0xb4, 0xc4, 0xcb,
|
||||
0x7f, 0x30, 0x48, 0xb4, 0xe9, 0x0d, 0x0c, 0xbe, 0x7a, 0x57, 0xc0, 0x39, 0x4f, 0x37, 0x33, 0x8a, 0x01, 0xfa, 0xe3, 0xad, 0xfd, 0xc0, 0xe5, 0x12, 0x6d, 0x86, 0x3a, 0x89, 0x5e, 0xb0, 0x4e, 0x02,
|
||||
0x1d, 0x29, 0x04, 0x43, 0xa4, 0xb5, 0x42, 0xaf, 0x04, 0xa8, 0x2f, 0x6b, 0x24, 0x94, 0xa6, 0xee, 0x7f, 0x20, 0xf2, 0x75, 0x4c, 0x58, 0xe0, 0x84, 0x90, 0x32, 0x48, 0x3a, 0x56, 0xe8, 0xe2, 0xef,
|
||||
0xc4, 0x3c, 0xc6, 0x56, 0x74, 0x36, 0xa8, 0x6a, 0xfb, 0xd6, 0xaa, 0x9e, 0xaa, 0x7c, 0x27, 0x6e, 0x98, 0x06, 0x83, 0x03, 0x34, 0xb6, 0x14, 0xb2, 0xbe, 0xe2, 0x3c, 0xc7, 0x66, 0x34, 0xf6, 0xfd,
|
||||
0x87, 0xbe, 0x24, 0x79, 0xc0, 0xc4, 0xe8, 0xed, 0xfd, 0xfa, 0xa5, 0x60, 0x3e, 0x93, 0xf4, 0x26, 0x5b, 0x3f, 0x82, 0x24, 0xc1, 0xc5, 0x94, 0x6f, 0xeb, 0x42, 0x48, 0x19, 0xd1, 0x89, 0x90, 0xa4,
|
||||
0xdd, 0x9d, 0x6a, 0x6d, 0x8e, 0x47, 0x46, 0x5c, 0xce, 0xac, 0x08, 0x77, 0xef, 0x88, 0x9b, 0x93, 0xe7, 0xeb, 0xa9, 0x79, 0x55, 0x7e, 0x39, 0x35, 0xd7, 0xf8, 0x6d, 0xce, 0x11, 0xb0, 0x70, 0xf3,
|
||||
0x75, 0xc6, 0xf2, 0xae, 0x49, 0xa2, 0x05, 0x21, 0xde, 0x97, 0x28, 0x5b, 0x43, 0x1e, 0x71, 0x71, 0x25, 0x84, 0x7f, 0xb8, 0x93, 0x5e, 0xd8, 0x4a, 0x61, 0xe7, 0xf8, 0xd3, 0x6a, 0x2c, 0x3d, 0x8e,
|
||||
};
|
||||
|
||||
|
||||
#ifndef XMRIG_NO_AEON
|
||||
const static uint8_t test_output_v0_lite[64] = {
|
||||
0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E,
|
||||
0x00, 0x4E, 0xEC, 0xE0, 0x9B, 0x83, 0xA7, 0x2E, 0xF6, 0xBA, 0x98, 0x64, 0xD3, 0x51, 0x0C, 0x88,
|
||||
0x28, 0xA2, 0x2B, 0xAD, 0x3F, 0x93, 0xD1, 0x40, 0x8F, 0xCA, 0x47, 0x2E, 0xB5, 0xAD, 0x1C, 0xBE,
|
||||
0x75, 0xF2, 0x1D, 0x05, 0x3C, 0x8C, 0xE5, 0xB3, 0xAF, 0x10, 0x5A, 0x57, 0x71, 0x3E, 0x21, 0xDD
|
||||
};
|
||||
|
||||
|
||||
// AEON v7
|
||||
const static uint8_t test_output_v1_lite[64] = {
|
||||
0x6D, 0x8C, 0xDC, 0x44, 0x4E, 0x9B, 0xBB, 0xFD, 0x68, 0xFC, 0x43, 0xFC, 0xD4, 0x85, 0x5B, 0x22,
|
||||
0x8C, 0x8A, 0x1B, 0xD9, 0x1D, 0x9D, 0x00, 0x28, 0x5B, 0xEC, 0x02, 0xB7, 0xCA, 0x2D, 0x67, 0x41,
|
||||
0x87, 0xC4, 0xE5, 0x70, 0x65, 0x3E, 0xB4, 0xC2, 0xB4, 0x2B, 0x7A, 0x0D, 0x54, 0x65, 0x59, 0x45,
|
||||
0x2D, 0xFA, 0xB5, 0x73, 0xB8, 0x2E, 0xC5, 0x2F, 0x15, 0x2B, 0x7F, 0xF9, 0x8E, 0x79, 0x44, 0x6F
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* XMRIG_CRYPTONIGHT_TEST_H */
|
||||
449
algo/cryptonight/variant4_random_math.h
Normal file
449
algo/cryptonight/variant4_random_math.h
Normal file
@@ -0,0 +1,449 @@
|
||||
#ifndef VARIANT4_RANDOM_MATH_H
|
||||
#define VARIANT4_RANDOM_MATH_H
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
|
||||
#include "crypto/c_blake256.h"
|
||||
|
||||
|
||||
enum V4_Settings
|
||||
{
|
||||
// Generate code with minimal theoretical latency = 45 cycles, which is equivalent to 15 multiplications
|
||||
TOTAL_LATENCY = 15 * 3,
|
||||
|
||||
// Always generate at least 60 instructions
|
||||
NUM_INSTRUCTIONS_MIN = 60,
|
||||
|
||||
// Never generate more than 70 instructions (final RET instruction doesn't count here)
|
||||
NUM_INSTRUCTIONS_MAX = 70,
|
||||
|
||||
// Available ALUs for MUL
|
||||
// Modern CPUs typically have only 1 ALU which can do multiplications
|
||||
ALU_COUNT_MUL = 1,
|
||||
|
||||
// Total available ALUs
|
||||
// Modern CPUs have 4 ALUs, but we use only 3 because random math executes together with other main loop code
|
||||
ALU_COUNT = 3,
|
||||
};
|
||||
|
||||
enum V4_InstructionList
|
||||
{
|
||||
MUL, // a*b
|
||||
ADD, // a+b + C, C is an unsigned 32-bit constant
|
||||
SUB, // a-b
|
||||
ROR, // rotate right "a" by "b & 31" bits
|
||||
ROL, // rotate left "a" by "b & 31" bits
|
||||
XOR, // a^b
|
||||
RET, // finish execution
|
||||
V4_INSTRUCTION_COUNT = RET,
|
||||
};
|
||||
|
||||
// V4_InstructionDefinition is used to generate code from random data
|
||||
// Every random sequence of bytes is a valid code
|
||||
//
|
||||
// There are 9 registers in total:
|
||||
// - 4 variable registers
|
||||
// - 5 constant registers initialized from loop variables
|
||||
// This is why dst_index is 2 bits
|
||||
enum V4_InstructionDefinition
|
||||
{
|
||||
V4_OPCODE_BITS = 3,
|
||||
V4_DST_INDEX_BITS = 2,
|
||||
V4_SRC_INDEX_BITS = 3,
|
||||
};
|
||||
|
||||
struct V4_Instruction
|
||||
{
|
||||
uint8_t opcode;
|
||||
uint8_t dst_index;
|
||||
uint8_t src_index;
|
||||
uint32_t C;
|
||||
};
|
||||
|
||||
#ifndef FORCEINLINE
|
||||
#ifdef __GNUC__
|
||||
#define FORCEINLINE __attribute__((always_inline)) inline
|
||||
#elif _MSC_VER
|
||||
#define FORCEINLINE __forceinline
|
||||
#else
|
||||
#define FORCEINLINE inline
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef UNREACHABLE_CODE
|
||||
#ifdef __GNUC__
|
||||
#define UNREACHABLE_CODE __builtin_unreachable()
|
||||
#elif _MSC_VER
|
||||
#define UNREACHABLE_CODE __assume(false)
|
||||
#else
|
||||
#define UNREACHABLE_CODE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define SWAP32LE(x) x
|
||||
#define SWAP64LE(x) x
|
||||
#define hash_extra_blake(data, length, hash) blake256_hash((uint8_t*)(hash), (uint8_t*)(data), (length))
|
||||
|
||||
// Random math interpreter's loop is fully unrolled and inlined to achieve 100% branch prediction on CPU:
|
||||
// every switch-case will point to the same destination on every iteration of Cryptonight main loop
|
||||
//
|
||||
// This is about as fast as it can get without using low-level machine code generation
|
||||
//template<typename v4_reg>
|
||||
static void v4_random_math(const struct V4_Instruction* code, uint32_t r[9])
|
||||
{
|
||||
#define REG_BITS 32
|
||||
#define V4_EXEC(i) \
|
||||
{ \
|
||||
const struct V4_Instruction* op = code + i; \
|
||||
const uint32_t src = r[op->src_index]; \
|
||||
uint32_t *dst = r + op->dst_index; \
|
||||
switch (op->opcode) \
|
||||
{ \
|
||||
case MUL: \
|
||||
*dst *= src; \
|
||||
break; \
|
||||
case ADD: \
|
||||
*dst += src + op->C; \
|
||||
break; \
|
||||
case SUB: \
|
||||
*dst -= src; \
|
||||
break; \
|
||||
case ROR: \
|
||||
{ \
|
||||
const uint32_t shift = src % REG_BITS; \
|
||||
*dst = (*dst >> shift) | (*dst << ((REG_BITS - shift) % REG_BITS)); \
|
||||
} \
|
||||
break; \
|
||||
case ROL: \
|
||||
{ \
|
||||
const uint32_t shift = src % REG_BITS; \
|
||||
*dst = (*dst << shift) | (*dst >> ((REG_BITS - shift) % REG_BITS)); \
|
||||
} \
|
||||
break; \
|
||||
case XOR: \
|
||||
*dst ^= src; \
|
||||
break; \
|
||||
case RET: \
|
||||
return; \
|
||||
default: \
|
||||
UNREACHABLE_CODE; \
|
||||
break; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define V4_EXEC_10(j) \
|
||||
V4_EXEC(j + 0) \
|
||||
V4_EXEC(j + 1) \
|
||||
V4_EXEC(j + 2) \
|
||||
V4_EXEC(j + 3) \
|
||||
V4_EXEC(j + 4) \
|
||||
V4_EXEC(j + 5) \
|
||||
V4_EXEC(j + 6) \
|
||||
V4_EXEC(j + 7) \
|
||||
V4_EXEC(j + 8) \
|
||||
V4_EXEC(j + 9)
|
||||
|
||||
// Generated program can have 60 + a few more (usually 2-3) instructions to achieve required latency
|
||||
// I've checked all block heights < 10,000,000 and here is the distribution of program sizes:
|
||||
//
|
||||
// 60 27960
|
||||
// 61 105054
|
||||
// 62 2452759
|
||||
// 63 5115997
|
||||
// 64 1022269
|
||||
// 65 1109635
|
||||
// 66 153145
|
||||
// 67 8550
|
||||
// 68 4529
|
||||
// 69 102
|
||||
|
||||
// Unroll 70 instructions here
|
||||
V4_EXEC_10(0); // instructions 0-9
|
||||
V4_EXEC_10(10); // instructions 10-19
|
||||
V4_EXEC_10(20); // instructions 20-29
|
||||
V4_EXEC_10(30); // instructions 30-39
|
||||
V4_EXEC_10(40); // instructions 40-49
|
||||
V4_EXEC_10(50); // instructions 50-59
|
||||
V4_EXEC_10(60); // instructions 60-69
|
||||
|
||||
#undef V4_EXEC_10
|
||||
#undef V4_EXEC
|
||||
#undef REG_BITS
|
||||
}
|
||||
|
||||
// If we don't have enough data available, generate more
|
||||
static FORCEINLINE void check_data(size_t* data_index, const size_t bytes_needed, int8_t* data, const size_t data_size)
|
||||
{
|
||||
if (*data_index + bytes_needed > data_size)
|
||||
{
|
||||
hash_extra_blake(data, data_size, (char*) data);
|
||||
*data_index = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Generates as many random math operations as possible with given latency and ALU restrictions
|
||||
// "code" array must have space for NUM_INSTRUCTIONS_MAX+1 instructions
|
||||
static int v4_random_math_init(struct V4_Instruction* code, const uint64_t height)
|
||||
{
|
||||
// MUL is 3 cycles, 3-way addition and rotations are 2 cycles, SUB/XOR are 1 cycle
|
||||
// These latencies match real-life instruction latencies for Intel CPUs starting from Sandy Bridge and up to Skylake/Coffee lake
|
||||
//
|
||||
// AMD Ryzen has the same latencies except 1-cycle ROR/ROL, so it'll be a bit faster than Intel Sandy Bridge and newer processors
|
||||
// Surprisingly, Intel Nehalem also has 1-cycle ROR/ROL, so it'll also be faster than Intel Sandy Bridge and newer processors
|
||||
// AMD Bulldozer has 4 cycles latency for MUL (slower than Intel) and 1 cycle for ROR/ROL (faster than Intel), so average performance will be the same
|
||||
// Source: https://www.agner.org/optimize/instruction_tables.pdf
|
||||
const int op_latency[V4_INSTRUCTION_COUNT] = { 3, 2, 1, 2, 2, 1 };
|
||||
|
||||
// Instruction latencies for theoretical ASIC implementation
|
||||
const int asic_op_latency[V4_INSTRUCTION_COUNT] = { 3, 1, 1, 1, 1, 1 };
|
||||
|
||||
// Available ALUs for each instruction
|
||||
const int op_ALUs[V4_INSTRUCTION_COUNT] = { ALU_COUNT_MUL, ALU_COUNT, ALU_COUNT, ALU_COUNT, ALU_COUNT, ALU_COUNT };
|
||||
|
||||
int8_t data[32];
|
||||
memset(data, 0, sizeof(data));
|
||||
uint64_t tmp = SWAP64LE(height);
|
||||
memcpy(data, &tmp, sizeof(uint64_t));
|
||||
data[20] = -38;
|
||||
|
||||
// Set data_index past the last byte in data
|
||||
// to trigger full data update with blake hash
|
||||
// before we start using it
|
||||
size_t data_index = sizeof(data);
|
||||
|
||||
int code_size;
|
||||
|
||||
// There is a small chance (1.8%) that register R8 won't be used in the generated program
|
||||
// So we keep track of it and try again if it's not used
|
||||
bool r8_used;
|
||||
do {
|
||||
int latency[9];
|
||||
int asic_latency[9];
|
||||
|
||||
// Tracks previous instruction and value of the source operand for registers R0-R3 throughout code execution
|
||||
// byte 0: current value of the destination register
|
||||
// byte 1: instruction opcode
|
||||
// byte 2: current value of the source register
|
||||
//
|
||||
// Registers R4-R8 are constant and are treated as having the same value because when we do
|
||||
// the same operation twice with two constant source registers, it can be optimized into a single operation
|
||||
uint32_t inst_data[9] = { 0, 1, 2, 3, 0xFFFFFF, 0xFFFFFF, 0xFFFFFF, 0xFFFFFF, 0xFFFFFF };
|
||||
|
||||
bool alu_busy[TOTAL_LATENCY + 1][ALU_COUNT];
|
||||
bool is_rotation[V4_INSTRUCTION_COUNT];
|
||||
bool rotated[4];
|
||||
int rotate_count = 0;
|
||||
|
||||
memset(latency, 0, sizeof(latency));
|
||||
memset(asic_latency, 0, sizeof(asic_latency));
|
||||
memset(alu_busy, 0, sizeof(alu_busy));
|
||||
memset(is_rotation, 0, sizeof(is_rotation));
|
||||
memset(rotated, 0, sizeof(rotated));
|
||||
is_rotation[ROR] = true;
|
||||
is_rotation[ROL] = true;
|
||||
|
||||
int num_retries = 0;
|
||||
code_size = 0;
|
||||
|
||||
int total_iterations = 0;
|
||||
r8_used = false;
|
||||
|
||||
// Generate random code to achieve minimal required latency for our abstract CPU
|
||||
// Try to get this latency for all 4 registers
|
||||
while (((latency[0] < TOTAL_LATENCY) || (latency[1] < TOTAL_LATENCY) || (latency[2] < TOTAL_LATENCY) || (latency[3] < TOTAL_LATENCY)) && (num_retries < 64))
|
||||
{
|
||||
// Fail-safe to guarantee loop termination
|
||||
++total_iterations;
|
||||
if (total_iterations > 256)
|
||||
break;
|
||||
|
||||
check_data(&data_index, 1, data, sizeof(data));
|
||||
|
||||
const uint8_t c = ((uint8_t*)data)[data_index++];
|
||||
|
||||
// MUL = opcodes 0-2
|
||||
// ADD = opcode 3
|
||||
// SUB = opcode 4
|
||||
// ROR/ROL = opcode 5, shift direction is selected randomly
|
||||
// XOR = opcodes 6-7
|
||||
uint8_t opcode = c & ((1 << V4_OPCODE_BITS) - 1);
|
||||
if (opcode == 5)
|
||||
{
|
||||
check_data(&data_index, 1, data, sizeof(data));
|
||||
opcode = (data[data_index++] >= 0) ? ROR : ROL;
|
||||
}
|
||||
else if (opcode >= 6)
|
||||
{
|
||||
opcode = XOR;
|
||||
}
|
||||
else
|
||||
{
|
||||
opcode = (opcode <= 2) ? MUL : (opcode - 2);
|
||||
}
|
||||
|
||||
uint8_t dst_index = (c >> V4_OPCODE_BITS) & ((1 << V4_DST_INDEX_BITS) - 1);
|
||||
uint8_t src_index = (c >> (V4_OPCODE_BITS + V4_DST_INDEX_BITS)) & ((1 << V4_SRC_INDEX_BITS) - 1);
|
||||
|
||||
const int a = dst_index;
|
||||
int b = src_index;
|
||||
|
||||
// Don't do ADD/SUB/XOR with the same register
|
||||
if (((opcode == ADD) || (opcode == SUB) || (opcode == XOR)) && (a == b))
|
||||
{
|
||||
// a is always < 4, so we don't need to check bounds here
|
||||
b = 8;
|
||||
src_index = b;
|
||||
}
|
||||
|
||||
// Don't do rotation with the same destination twice because it's equal to a single rotation
|
||||
if (is_rotation[opcode] && rotated[a])
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Don't do the same instruction (except MUL) with the same source value twice because all other cases can be optimized:
|
||||
// 2xADD(a, b, C) = ADD(a, b*2, C1+C2), same for SUB and rotations
|
||||
// 2xXOR(a, b) = NOP
|
||||
if ((opcode != MUL) && ((inst_data[a] & 0xFFFF00) == (opcode << 8) + ((inst_data[b] & 255) << 16)))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Find which ALU is available (and when) for this instruction
|
||||
int next_latency = (latency[a] > latency[b]) ? latency[a] : latency[b];
|
||||
int alu_index = -1;
|
||||
while (next_latency < TOTAL_LATENCY)
|
||||
{
|
||||
for (int i = op_ALUs[opcode] - 1; i >= 0; --i)
|
||||
{
|
||||
if (!alu_busy[next_latency][i])
|
||||
{
|
||||
// ADD is implemented as two 1-cycle instructions on a real CPU, so do an additional availability check
|
||||
if ((opcode == ADD) && alu_busy[next_latency + 1][i])
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Rotation can only start when previous rotation is finished, so do an additional availability check
|
||||
if (is_rotation[opcode] && (next_latency < rotate_count * op_latency[opcode]))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
alu_index = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (alu_index >= 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
++next_latency;
|
||||
}
|
||||
|
||||
// Don't generate instructions that leave some register unchanged for more than 7 cycles
|
||||
if (next_latency > latency[a] + 7)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
next_latency += op_latency[opcode];
|
||||
|
||||
if (next_latency <= TOTAL_LATENCY)
|
||||
{
|
||||
if (is_rotation[opcode])
|
||||
{
|
||||
++rotate_count;
|
||||
}
|
||||
|
||||
// Mark ALU as busy only for the first cycle when it starts executing the instruction because ALUs are fully pipelined
|
||||
alu_busy[next_latency - op_latency[opcode]][alu_index] = true;
|
||||
latency[a] = next_latency;
|
||||
|
||||
// ASIC is supposed to have enough ALUs to run as many independent instructions per cycle as possible, so latency calculation for ASIC is simple
|
||||
asic_latency[a] = ((asic_latency[a] > asic_latency[b]) ? asic_latency[a] : asic_latency[b]) + asic_op_latency[opcode];
|
||||
|
||||
rotated[a] = is_rotation[opcode];
|
||||
|
||||
inst_data[a] = code_size + (opcode << 8) + ((inst_data[b] & 255) << 16);
|
||||
|
||||
code[code_size].opcode = opcode;
|
||||
code[code_size].dst_index = dst_index;
|
||||
code[code_size].src_index = src_index;
|
||||
code[code_size].C = 0;
|
||||
|
||||
if (src_index == 8)
|
||||
{
|
||||
r8_used = true;
|
||||
}
|
||||
|
||||
if (opcode == ADD)
|
||||
{
|
||||
// ADD instruction is implemented as two 1-cycle instructions on a real CPU, so mark ALU as busy for the next cycle too
|
||||
alu_busy[next_latency - op_latency[opcode] + 1][alu_index] = true;
|
||||
|
||||
// ADD instruction requires 4 more random bytes for 32-bit constant "C" in "a = a + b + C"
|
||||
check_data(&data_index, sizeof(uint32_t), data, sizeof(data));
|
||||
uint32_t t;
|
||||
memcpy(&t, data + data_index, sizeof(uint32_t));
|
||||
code[code_size].C = SWAP32LE(t);
|
||||
data_index += sizeof(uint32_t);
|
||||
}
|
||||
|
||||
++code_size;
|
||||
if (code_size >= NUM_INSTRUCTIONS_MIN)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
++num_retries;
|
||||
}
|
||||
}
|
||||
|
||||
// ASIC has more execution resources and can extract as much parallelism from the code as possible
|
||||
// We need to add a few more MUL and ROR instructions to achieve minimal required latency for ASIC
|
||||
// Get this latency for at least 1 of the 4 registers
|
||||
const int prev_code_size = code_size;
|
||||
while ((code_size < NUM_INSTRUCTIONS_MAX) && (asic_latency[0] < TOTAL_LATENCY) && (asic_latency[1] < TOTAL_LATENCY) && (asic_latency[2] < TOTAL_LATENCY) && (asic_latency[3] < TOTAL_LATENCY))
|
||||
{
|
||||
int min_idx = 0;
|
||||
int max_idx = 0;
|
||||
for (int i = 1; i < 4; ++i)
|
||||
{
|
||||
if (asic_latency[i] < asic_latency[min_idx]) min_idx = i;
|
||||
if (asic_latency[i] > asic_latency[max_idx]) max_idx = i;
|
||||
}
|
||||
|
||||
const uint8_t pattern[3] = { ROR, MUL, MUL };
|
||||
const uint8_t opcode = pattern[(code_size - prev_code_size) % 3];
|
||||
latency[min_idx] = latency[max_idx] + op_latency[opcode];
|
||||
asic_latency[min_idx] = asic_latency[max_idx] + asic_op_latency[opcode];
|
||||
|
||||
code[code_size].opcode = opcode;
|
||||
code[code_size].dst_index = min_idx;
|
||||
code[code_size].src_index = max_idx;
|
||||
code[code_size].C = 0;
|
||||
++code_size;
|
||||
}
|
||||
|
||||
// There is ~98.15% chance that loop condition is false, so this loop will execute only 1 iteration most of the time
|
||||
// It never does more than 4 iterations for all block heights < 10,000,000
|
||||
} while (!r8_used || (code_size < NUM_INSTRUCTIONS_MIN) || (code_size > NUM_INSTRUCTIONS_MAX));
|
||||
|
||||
// It's guaranteed that NUM_INSTRUCTIONS_MIN <= code_size <= NUM_INSTRUCTIONS_MAX here
|
||||
// Add final instruction to stop the interpreter
|
||||
code[code_size].opcode = RET;
|
||||
code[code_size].dst_index = 0;
|
||||
code[code_size].src_index = 0;
|
||||
code[code_size].C = 0;
|
||||
|
||||
return code_size;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,39 +0,0 @@
|
||||
# - Try to find MHD
|
||||
# Once done this will define
|
||||
#
|
||||
# MHD_FOUND - system has MHD
|
||||
# MHD_INCLUDE_DIRS - the MHD include directory
|
||||
# MHD_LIBRARY - Link these to use MHD
|
||||
|
||||
find_path(
|
||||
MHD_INCLUDE_DIR
|
||||
NAMES microhttpd.h
|
||||
DOC "microhttpd include dir"
|
||||
)
|
||||
|
||||
find_library(
|
||||
MHD_LIBRARY
|
||||
NAMES microhttpd microhttpd-10 libmicrohttpd libmicrohttpd-dll
|
||||
DOC "microhttpd library"
|
||||
)
|
||||
|
||||
set(MHD_INCLUDE_DIRS ${MHD_INCLUDE_DIR})
|
||||
set(MHD_LIBRARIES ${MHD_LIBRARY})
|
||||
|
||||
# debug library on windows
|
||||
# same naming convention as in qt (appending debug library with d)
|
||||
# boost is using the same "hack" as us with "optimized" and "debug"
|
||||
# official MHD project actually uses _d suffix
|
||||
if (${CMAKE_CXX_COMPILER_ID} STREQUAL MSVC)
|
||||
find_library(
|
||||
MHD_LIBRARY_DEBUG
|
||||
NAMES microhttpd_d microhttpd-10_d libmicrohttpd_d libmicrohttpd-dll_d
|
||||
DOC "mhd debug library"
|
||||
)
|
||||
set(MHD_LIBRARIES optimized ${MHD_LIBRARIES} debug ${MHD_LIBRARY_DEBUG})
|
||||
endif()
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(mhd DEFAULT_MSG MHD_INCLUDE_DIR MHD_LIBRARY)
|
||||
mark_as_advanced(MHD_INCLUDE_DIR MHD_LIBRARY)
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
find_path(UV_INCLUDE_DIR NAMES uv.h)
|
||||
find_library(UV_LIBRARY NAMES uv libuv)
|
||||
|
||||
set(UV_LIBRARIES ${UV_LIBRARY})
|
||||
set(UV_INCLUDE_DIRS ${UV_INCLUDE_DIR})
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(UV DEFAULT_MSG UV_LIBRARY UV_INCLUDE_DIR)
|
||||
27
cmake/asm.cmake
Normal file
27
cmake/asm.cmake
Normal file
@@ -0,0 +1,27 @@
|
||||
if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
set(XMRIG_ASM_LIBRARY "xmrig-asm")
|
||||
|
||||
enable_language(ASM)
|
||||
|
||||
if (WIN32 AND CMAKE_C_COMPILER_ID MATCHES GNU)
|
||||
set(XMRIG_ASM_FILES
|
||||
"crypto/asm/win64/cn_main_loop.S"
|
||||
"crypto/asm/CryptonightR_template.S"
|
||||
)
|
||||
else()
|
||||
set(XMRIG_ASM_FILES
|
||||
"crypto/asm/cn_main_loop.S"
|
||||
"crypto/asm/CryptonightR_template.S"
|
||||
)
|
||||
endif()
|
||||
|
||||
set_property(SOURCE ${XMRIG_ASM_FILES} PROPERTY C)
|
||||
|
||||
add_library(${XMRIG_ASM_LIBRARY} STATIC ${XMRIG_ASM_FILES})
|
||||
set(XMRIG_ASM_SOURCES "crypto/CryptonightR_gen.c")
|
||||
set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C)
|
||||
else()
|
||||
set(XMRIG_ASM_SOURCES "")
|
||||
set(XMRIG_ASM_LIBRARY "")
|
||||
add_definitions(/DXMRIG_NO_ASM)
|
||||
endif()
|
||||
@@ -21,29 +21,27 @@
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef __CONSOLE_H__
|
||||
#define __CONSOLE_H__
|
||||
#ifndef __COMPAT_H__
|
||||
#define __COMPAT_H__
|
||||
|
||||
#define unlikely(expr) (__builtin_expect(!!(expr), 0))
|
||||
#define likely(expr) (__builtin_expect(!!(expr), 1))
|
||||
|
||||
#include <uv.h>
|
||||
#ifdef WIN32
|
||||
|
||||
#include <windows.h>
|
||||
|
||||
class IConsoleListener;
|
||||
#define sleep(secs) Sleep((secs) * 1000)
|
||||
|
||||
|
||||
class Console
|
||||
{
|
||||
public:
|
||||
Console(IConsoleListener *listener);
|
||||
|
||||
private:
|
||||
static void onAllocBuffer(uv_handle_t *handle, size_t suggested_size, uv_buf_t *buf);
|
||||
static void onRead(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf);
|
||||
|
||||
char m_buf[1];
|
||||
IConsoleListener *m_listener;
|
||||
uv_tty_t m_tty;
|
||||
enum {
|
||||
PRIO_PROCESS = 0,
|
||||
};
|
||||
|
||||
static inline int setpriority(int which, int who, int prio)
|
||||
{
|
||||
return -!SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_IDLE);
|
||||
}
|
||||
|
||||
#endif /* __CONSOLE_H__ */
|
||||
#endif /* WIN32 */
|
||||
|
||||
#endif /* __COMPAT_H__ */
|
||||
25
compat/jansson/CMakeLists.txt
Normal file
25
compat/jansson/CMakeLists.txt
Normal file
@@ -0,0 +1,25 @@
|
||||
cmake_minimum_required (VERSION 2.8)
|
||||
project (jansson C)
|
||||
|
||||
add_definitions(-DHAVE_CONFIG_H)
|
||||
|
||||
# Add the lib sources.
|
||||
file(GLOB JANSSON_SRC *.c)
|
||||
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Os")
|
||||
|
||||
set(JANSSON_HDR_PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hashtable.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/jansson_private.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/strbuffer.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/utf.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/jansson_private_config.h)
|
||||
|
||||
set(JANSSON_HDR_PUBLIC
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/jansson_config.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/jansson.h)
|
||||
|
||||
add_library(jansson STATIC
|
||||
${JANSSON_SRC}
|
||||
${JANSSON_HDR_PRIVATE}
|
||||
${JANSSON_HDR_PUBLIC})
|
||||
19
compat/jansson/LICENSE
Normal file
19
compat/jansson/LICENSE
Normal file
@@ -0,0 +1,19 @@
|
||||
Copyright (c) 2009-2014 Petri Lehtinen <petri@digip.org>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
496
compat/jansson/dump.c
Normal file
496
compat/jansson/dump.c
Normal file
@@ -0,0 +1,496 @@
|
||||
/*
|
||||
* Copyright (c) 2009-2016 Petri Lehtinen <petri@digip.org>
|
||||
*
|
||||
* Jansson is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the MIT license. See LICENSE for details.
|
||||
*/
|
||||
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
|
||||
#include "jansson_private.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include "jansson.h"
|
||||
#include "strbuffer.h"
|
||||
#include "utf.h"
|
||||
|
||||
#define MAX_INTEGER_STR_LENGTH 100
|
||||
#define MAX_REAL_STR_LENGTH 100
|
||||
|
||||
#define FLAGS_TO_INDENT(f) ((f) & 0x1F)
|
||||
#define FLAGS_TO_PRECISION(f) (((f) >> 11) & 0x1F)
|
||||
|
||||
struct buffer {
|
||||
const size_t size;
|
||||
size_t used;
|
||||
char *data;
|
||||
};
|
||||
|
||||
static int dump_to_strbuffer(const char *buffer, size_t size, void *data)
|
||||
{
|
||||
return strbuffer_append_bytes((strbuffer_t *)data, buffer, size);
|
||||
}
|
||||
|
||||
static int dump_to_buffer(const char *buffer, size_t size, void *data)
|
||||
{
|
||||
struct buffer *buf = (struct buffer *)data;
|
||||
|
||||
if(buf->used + size <= buf->size)
|
||||
memcpy(&buf->data[buf->used], buffer, size);
|
||||
|
||||
buf->used += size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dump_to_file(const char *buffer, size_t size, void *data)
|
||||
{
|
||||
FILE *dest = (FILE *)data;
|
||||
if(fwrite(buffer, size, 1, dest) != 1)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dump_to_fd(const char *buffer, size_t size, void *data)
|
||||
{
|
||||
int *dest = (int *)data;
|
||||
#ifdef HAVE_UNISTD_H
|
||||
if(write(*dest, buffer, size) == (ssize_t)size)
|
||||
return 0;
|
||||
#endif
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* 32 spaces (the maximum indentation size) */
|
||||
static const char whitespace[] = " ";
|
||||
|
||||
static int dump_indent(size_t flags, int depth, int space, json_dump_callback_t dump, void *data)
|
||||
{
|
||||
if(FLAGS_TO_INDENT(flags) > 0)
|
||||
{
|
||||
unsigned int ws_count = FLAGS_TO_INDENT(flags), n_spaces = depth * ws_count;
|
||||
|
||||
if(dump("\n", 1, data))
|
||||
return -1;
|
||||
|
||||
while(n_spaces > 0)
|
||||
{
|
||||
int cur_n = n_spaces < sizeof whitespace - 1 ? n_spaces : sizeof whitespace - 1;
|
||||
|
||||
if(dump(whitespace, cur_n, data))
|
||||
return -1;
|
||||
|
||||
n_spaces -= cur_n;
|
||||
}
|
||||
}
|
||||
else if(space && !(flags & JSON_COMPACT))
|
||||
{
|
||||
return dump(" ", 1, data);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dump_string(const char *str, size_t len, json_dump_callback_t dump, void *data, size_t flags)
|
||||
{
|
||||
const char *pos, *end, *lim;
|
||||
int32_t codepoint;
|
||||
|
||||
if(dump("\"", 1, data))
|
||||
return -1;
|
||||
|
||||
end = pos = str;
|
||||
lim = str + len;
|
||||
while(1)
|
||||
{
|
||||
const char *text;
|
||||
char seq[13];
|
||||
int length;
|
||||
|
||||
while(end < lim)
|
||||
{
|
||||
end = utf8_iterate(pos, lim - pos, &codepoint);
|
||||
if(!end)
|
||||
return -1;
|
||||
|
||||
/* mandatory escape or control char */
|
||||
if(codepoint == '\\' || codepoint == '"' || codepoint < 0x20)
|
||||
break;
|
||||
|
||||
/* slash */
|
||||
if((flags & JSON_ESCAPE_SLASH) && codepoint == '/')
|
||||
break;
|
||||
|
||||
/* non-ASCII */
|
||||
if((flags & JSON_ENSURE_ASCII) && codepoint > 0x7F)
|
||||
break;
|
||||
|
||||
pos = end;
|
||||
}
|
||||
|
||||
if(pos != str) {
|
||||
if(dump(str, pos - str, data))
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(end == pos)
|
||||
break;
|
||||
|
||||
/* handle \, /, ", and control codes */
|
||||
length = 2;
|
||||
switch(codepoint)
|
||||
{
|
||||
case '\\': text = "\\\\"; break;
|
||||
case '\"': text = "\\\""; break;
|
||||
case '\b': text = "\\b"; break;
|
||||
case '\f': text = "\\f"; break;
|
||||
case '\n': text = "\\n"; break;
|
||||
case '\r': text = "\\r"; break;
|
||||
case '\t': text = "\\t"; break;
|
||||
case '/': text = "\\/"; break;
|
||||
default:
|
||||
{
|
||||
/* codepoint is in BMP */
|
||||
if(codepoint < 0x10000)
|
||||
{
|
||||
snprintf(seq, sizeof(seq), "\\u%04X", (unsigned int)codepoint);
|
||||
length = 6;
|
||||
}
|
||||
|
||||
/* not in BMP -> construct a UTF-16 surrogate pair */
|
||||
else
|
||||
{
|
||||
int32_t first, last;
|
||||
|
||||
codepoint -= 0x10000;
|
||||
first = 0xD800 | ((codepoint & 0xffc00) >> 10);
|
||||
last = 0xDC00 | (codepoint & 0x003ff);
|
||||
|
||||
snprintf(seq, sizeof(seq), "\\u%04X\\u%04X", (unsigned int)first, (unsigned int)last);
|
||||
length = 12;
|
||||
}
|
||||
|
||||
text = seq;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(dump(text, length, data))
|
||||
return -1;
|
||||
|
||||
str = pos = end;
|
||||
}
|
||||
|
||||
return dump("\"", 1, data);
|
||||
}
|
||||
|
||||
static int compare_keys(const void *key1, const void *key2)
|
||||
{
|
||||
return strcmp(*(const char **)key1, *(const char **)key2);
|
||||
}
|
||||
|
||||
static int do_dump(const json_t *json, size_t flags, int depth,
|
||||
json_dump_callback_t dump, void *data)
|
||||
{
|
||||
int embed = flags & JSON_EMBED;
|
||||
|
||||
flags &= ~JSON_EMBED;
|
||||
|
||||
if(!json)
|
||||
return -1;
|
||||
|
||||
switch(json_typeof(json)) {
|
||||
case JSON_NULL:
|
||||
return dump("null", 4, data);
|
||||
|
||||
case JSON_TRUE:
|
||||
return dump("true", 4, data);
|
||||
|
||||
case JSON_FALSE:
|
||||
return dump("false", 5, data);
|
||||
|
||||
case JSON_INTEGER:
|
||||
{
|
||||
char buffer[MAX_INTEGER_STR_LENGTH];
|
||||
int size;
|
||||
|
||||
size = snprintf(buffer, MAX_INTEGER_STR_LENGTH,
|
||||
"%" JSON_INTEGER_FORMAT,
|
||||
json_integer_value(json));
|
||||
if(size < 0 || size >= MAX_INTEGER_STR_LENGTH)
|
||||
return -1;
|
||||
|
||||
return dump(buffer, size, data);
|
||||
}
|
||||
|
||||
case JSON_REAL:
|
||||
{
|
||||
char buffer[MAX_REAL_STR_LENGTH];
|
||||
int size;
|
||||
double value = json_real_value(json);
|
||||
|
||||
size = jsonp_dtostr(buffer, MAX_REAL_STR_LENGTH, value,
|
||||
FLAGS_TO_PRECISION(flags));
|
||||
if(size < 0)
|
||||
return -1;
|
||||
|
||||
return dump(buffer, size, data);
|
||||
}
|
||||
|
||||
case JSON_STRING:
|
||||
return dump_string(json_string_value(json), json_string_length(json), dump, data, flags);
|
||||
|
||||
case JSON_ARRAY:
|
||||
{
|
||||
size_t n;
|
||||
size_t i;
|
||||
|
||||
json_array_t *array;
|
||||
|
||||
/* detect circular references */
|
||||
array = json_to_array(json);
|
||||
if(array->visited)
|
||||
goto array_error;
|
||||
array->visited = 1;
|
||||
|
||||
n = json_array_size(json);
|
||||
|
||||
if(!embed && dump("[", 1, data))
|
||||
goto array_error;
|
||||
if(n == 0) {
|
||||
array->visited = 0;
|
||||
return embed ? 0 : dump("]", 1, data);
|
||||
}
|
||||
if(dump_indent(flags, depth + 1, 0, dump, data))
|
||||
goto array_error;
|
||||
|
||||
for(i = 0; i < n; ++i) {
|
||||
if(do_dump(json_array_get(json, i), flags, depth + 1,
|
||||
dump, data))
|
||||
goto array_error;
|
||||
|
||||
if(i < n - 1)
|
||||
{
|
||||
if(dump(",", 1, data) ||
|
||||
dump_indent(flags, depth + 1, 1, dump, data))
|
||||
goto array_error;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(dump_indent(flags, depth, 0, dump, data))
|
||||
goto array_error;
|
||||
}
|
||||
}
|
||||
|
||||
array->visited = 0;
|
||||
return embed ? 0 : dump("]", 1, data);
|
||||
|
||||
array_error:
|
||||
array->visited = 0;
|
||||
return -1;
|
||||
}
|
||||
|
||||
case JSON_OBJECT:
|
||||
{
|
||||
json_object_t *object;
|
||||
void *iter;
|
||||
const char *separator;
|
||||
int separator_length;
|
||||
|
||||
if(flags & JSON_COMPACT) {
|
||||
separator = ":";
|
||||
separator_length = 1;
|
||||
}
|
||||
else {
|
||||
separator = ": ";
|
||||
separator_length = 2;
|
||||
}
|
||||
|
||||
/* detect circular references */
|
||||
object = json_to_object(json);
|
||||
if(object->visited)
|
||||
goto object_error;
|
||||
object->visited = 1;
|
||||
|
||||
iter = json_object_iter((json_t *)json);
|
||||
|
||||
if(!embed && dump("{", 1, data))
|
||||
goto object_error;
|
||||
if(!iter) {
|
||||
object->visited = 0;
|
||||
return embed ? 0 : dump("}", 1, data);
|
||||
}
|
||||
if(dump_indent(flags, depth + 1, 0, dump, data))
|
||||
goto object_error;
|
||||
|
||||
if(flags & JSON_SORT_KEYS)
|
||||
{
|
||||
const char **keys;
|
||||
size_t size, i;
|
||||
|
||||
size = json_object_size(json);
|
||||
keys = jsonp_malloc(size * sizeof(const char *));
|
||||
if(!keys)
|
||||
goto object_error;
|
||||
|
||||
i = 0;
|
||||
while(iter)
|
||||
{
|
||||
keys[i] = json_object_iter_key(iter);
|
||||
iter = json_object_iter_next((json_t *)json, iter);
|
||||
i++;
|
||||
}
|
||||
assert(i == size);
|
||||
|
||||
qsort(keys, size, sizeof(const char *), compare_keys);
|
||||
|
||||
for(i = 0; i < size; i++)
|
||||
{
|
||||
const char *key;
|
||||
json_t *value;
|
||||
|
||||
key = keys[i];
|
||||
value = json_object_get(json, key);
|
||||
assert(value);
|
||||
|
||||
dump_string(key, strlen(key), dump, data, flags);
|
||||
if(dump(separator, separator_length, data) ||
|
||||
do_dump(value, flags, depth + 1, dump, data))
|
||||
{
|
||||
jsonp_free(keys);
|
||||
goto object_error;
|
||||
}
|
||||
|
||||
if(i < size - 1)
|
||||
{
|
||||
if(dump(",", 1, data) ||
|
||||
dump_indent(flags, depth + 1, 1, dump, data))
|
||||
{
|
||||
jsonp_free(keys);
|
||||
goto object_error;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(dump_indent(flags, depth, 0, dump, data))
|
||||
{
|
||||
jsonp_free(keys);
|
||||
goto object_error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
jsonp_free(keys);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Don't sort keys */
|
||||
|
||||
while(iter)
|
||||
{
|
||||
void *next = json_object_iter_next((json_t *)json, iter);
|
||||
const char *key = json_object_iter_key(iter);
|
||||
|
||||
dump_string(key, strlen(key), dump, data, flags);
|
||||
if(dump(separator, separator_length, data) ||
|
||||
do_dump(json_object_iter_value(iter), flags, depth + 1,
|
||||
dump, data))
|
||||
goto object_error;
|
||||
|
||||
if(next)
|
||||
{
|
||||
if(dump(",", 1, data) ||
|
||||
dump_indent(flags, depth + 1, 1, dump, data))
|
||||
goto object_error;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(dump_indent(flags, depth, 0, dump, data))
|
||||
goto object_error;
|
||||
}
|
||||
|
||||
iter = next;
|
||||
}
|
||||
}
|
||||
|
||||
object->visited = 0;
|
||||
return embed ? 0 : dump("}", 1, data);
|
||||
|
||||
object_error:
|
||||
object->visited = 0;
|
||||
return -1;
|
||||
}
|
||||
|
||||
default:
|
||||
/* not reached */
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
char *json_dumps(const json_t *json, size_t flags)
|
||||
{
|
||||
strbuffer_t strbuff;
|
||||
char *result;
|
||||
|
||||
if(strbuffer_init(&strbuff))
|
||||
return NULL;
|
||||
|
||||
if(json_dump_callback(json, dump_to_strbuffer, (void *)&strbuff, flags))
|
||||
result = NULL;
|
||||
else
|
||||
result = jsonp_strdup(strbuffer_value(&strbuff));
|
||||
|
||||
strbuffer_close(&strbuff);
|
||||
return result;
|
||||
}
|
||||
|
||||
size_t json_dumpb(const json_t *json, char *buffer, size_t size, size_t flags)
|
||||
{
|
||||
struct buffer buf = { size, 0, buffer };
|
||||
|
||||
if(json_dump_callback(json, dump_to_buffer, (void *)&buf, flags))
|
||||
return 0;
|
||||
|
||||
return buf.used;
|
||||
}
|
||||
|
||||
int json_dumpf(const json_t *json, FILE *output, size_t flags)
|
||||
{
|
||||
return json_dump_callback(json, dump_to_file, (void *)output, flags);
|
||||
}
|
||||
|
||||
int json_dumpfd(const json_t *json, int output, size_t flags)
|
||||
{
|
||||
return json_dump_callback(json, dump_to_fd, (void *)&output, flags);
|
||||
}
|
||||
|
||||
int json_dump_file(const json_t *json, const char *path, size_t flags)
|
||||
{
|
||||
int result;
|
||||
|
||||
FILE *output = fopen(path, "w");
|
||||
if(!output)
|
||||
return -1;
|
||||
|
||||
result = json_dumpf(json, output, flags);
|
||||
|
||||
fclose(output);
|
||||
return result;
|
||||
}
|
||||
|
||||
int json_dump_callback(const json_t *json, json_dump_callback_t callback, void *data, size_t flags)
|
||||
{
|
||||
if(!(flags & JSON_ENCODE_ANY)) {
|
||||
if(!json_is_array(json) && !json_is_object(json))
|
||||
return -1;
|
||||
}
|
||||
|
||||
return do_dump(json, flags, 0, callback, data);
|
||||
}
|
||||
63
compat/jansson/error.c
Normal file
63
compat/jansson/error.c
Normal file
@@ -0,0 +1,63 @@
|
||||
#include <string.h>
|
||||
#include "jansson_private.h"
|
||||
|
||||
void jsonp_error_init(json_error_t *error, const char *source)
|
||||
{
|
||||
if(error)
|
||||
{
|
||||
error->text[0] = '\0';
|
||||
error->line = -1;
|
||||
error->column = -1;
|
||||
error->position = 0;
|
||||
if(source)
|
||||
jsonp_error_set_source(error, source);
|
||||
else
|
||||
error->source[0] = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
void jsonp_error_set_source(json_error_t *error, const char *source)
|
||||
{
|
||||
size_t length;
|
||||
|
||||
if(!error || !source)
|
||||
return;
|
||||
|
||||
length = strlen(source);
|
||||
if(length < JSON_ERROR_SOURCE_LENGTH)
|
||||
strncpy(error->source, source, length + 1);
|
||||
else {
|
||||
size_t extra = length - JSON_ERROR_SOURCE_LENGTH + 4;
|
||||
strncpy(error->source, "...", 3);
|
||||
strncpy(error->source + 3, source + extra, length - extra + 1);
|
||||
}
|
||||
}
|
||||
|
||||
void jsonp_error_set(json_error_t *error, int line, int column,
|
||||
size_t position, const char *msg, ...)
|
||||
{
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, msg);
|
||||
jsonp_error_vset(error, line, column, position, msg, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
void jsonp_error_vset(json_error_t *error, int line, int column,
|
||||
size_t position, const char *msg, va_list ap)
|
||||
{
|
||||
if(!error)
|
||||
return;
|
||||
|
||||
if(error->text[0] != '\0') {
|
||||
/* error already set */
|
||||
return;
|
||||
}
|
||||
|
||||
error->line = line;
|
||||
error->column = column;
|
||||
error->position = (int)position;
|
||||
|
||||
vsnprintf(error->text, JSON_ERROR_TEXT_LENGTH, msg, ap);
|
||||
error->text[JSON_ERROR_TEXT_LENGTH - 1] = '\0';
|
||||
}
|
||||
356
compat/jansson/hashtable.c
Normal file
356
compat/jansson/hashtable.c
Normal file
@@ -0,0 +1,356 @@
|
||||
/*
|
||||
* Copyright (c) 2009-2016 Petri Lehtinen <petri@digip.org>
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the MIT license. See LICENSE for details.
|
||||
*/
|
||||
|
||||
#if HAVE_CONFIG_H
|
||||
#include <jansson_private_config.h>
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#if HAVE_STDINT_H
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
|
||||
#include <jansson_config.h> /* for JSON_INLINE */
|
||||
#include "jansson_private.h" /* for container_of() */
|
||||
#include "hashtable.h"
|
||||
|
||||
#ifndef INITIAL_HASHTABLE_ORDER
|
||||
#define INITIAL_HASHTABLE_ORDER 3
|
||||
#endif
|
||||
|
||||
typedef struct hashtable_list list_t;
|
||||
typedef struct hashtable_pair pair_t;
|
||||
typedef struct hashtable_bucket bucket_t;
|
||||
|
||||
extern volatile uint32_t hashtable_seed;
|
||||
|
||||
/* Implementation of the hash function */
|
||||
#include "lookup3.h"
|
||||
|
||||
#define list_to_pair(list_) container_of(list_, pair_t, list)
|
||||
#define ordered_list_to_pair(list_) container_of(list_, pair_t, ordered_list)
|
||||
#define hash_str(key) ((size_t)hashlittle((key), strlen(key), hashtable_seed))
|
||||
|
||||
static JSON_INLINE void list_init(list_t *list)
|
||||
{
|
||||
list->next = list;
|
||||
list->prev = list;
|
||||
}
|
||||
|
||||
static JSON_INLINE void list_insert(list_t *list, list_t *node)
|
||||
{
|
||||
node->next = list;
|
||||
node->prev = list->prev;
|
||||
list->prev->next = node;
|
||||
list->prev = node;
|
||||
}
|
||||
|
||||
static JSON_INLINE void list_remove(list_t *list)
|
||||
{
|
||||
list->prev->next = list->next;
|
||||
list->next->prev = list->prev;
|
||||
}
|
||||
|
||||
static JSON_INLINE int bucket_is_empty(hashtable_t *hashtable, bucket_t *bucket)
|
||||
{
|
||||
return bucket->first == &hashtable->list && bucket->first == bucket->last;
|
||||
}
|
||||
|
||||
static void insert_to_bucket(hashtable_t *hashtable, bucket_t *bucket,
|
||||
list_t *list)
|
||||
{
|
||||
if(bucket_is_empty(hashtable, bucket))
|
||||
{
|
||||
list_insert(&hashtable->list, list);
|
||||
bucket->first = bucket->last = list;
|
||||
}
|
||||
else
|
||||
{
|
||||
list_insert(bucket->first, list);
|
||||
bucket->first = list;
|
||||
}
|
||||
}
|
||||
|
||||
static pair_t *hashtable_find_pair(hashtable_t *hashtable, bucket_t *bucket,
|
||||
const char *key, size_t hash)
|
||||
{
|
||||
list_t *list;
|
||||
pair_t *pair;
|
||||
|
||||
if(bucket_is_empty(hashtable, bucket))
|
||||
return NULL;
|
||||
|
||||
list = bucket->first;
|
||||
while(1)
|
||||
{
|
||||
pair = list_to_pair(list);
|
||||
if(pair->hash == hash && strcmp(pair->key, key) == 0)
|
||||
return pair;
|
||||
|
||||
if(list == bucket->last)
|
||||
break;
|
||||
|
||||
list = list->next;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* returns 0 on success, -1 if key was not found */
|
||||
static int hashtable_do_del(hashtable_t *hashtable,
|
||||
const char *key, size_t hash)
|
||||
{
|
||||
pair_t *pair;
|
||||
bucket_t *bucket;
|
||||
size_t index;
|
||||
|
||||
index = hash & hashmask(hashtable->order);
|
||||
bucket = &hashtable->buckets[index];
|
||||
|
||||
pair = hashtable_find_pair(hashtable, bucket, key, hash);
|
||||
if(!pair)
|
||||
return -1;
|
||||
|
||||
if(&pair->list == bucket->first && &pair->list == bucket->last)
|
||||
bucket->first = bucket->last = &hashtable->list;
|
||||
|
||||
else if(&pair->list == bucket->first)
|
||||
bucket->first = pair->list.next;
|
||||
|
||||
else if(&pair->list == bucket->last)
|
||||
bucket->last = pair->list.prev;
|
||||
|
||||
list_remove(&pair->list);
|
||||
list_remove(&pair->ordered_list);
|
||||
json_decref(pair->value);
|
||||
|
||||
jsonp_free(pair);
|
||||
hashtable->size--;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void hashtable_do_clear(hashtable_t *hashtable)
|
||||
{
|
||||
list_t *list, *next;
|
||||
pair_t *pair;
|
||||
|
||||
for(list = hashtable->list.next; list != &hashtable->list; list = next)
|
||||
{
|
||||
next = list->next;
|
||||
pair = list_to_pair(list);
|
||||
json_decref(pair->value);
|
||||
jsonp_free(pair);
|
||||
}
|
||||
}
|
||||
|
||||
static int hashtable_do_rehash(hashtable_t *hashtable)
|
||||
{
|
||||
list_t *list, *next;
|
||||
pair_t *pair;
|
||||
size_t i, index, new_size, new_order;
|
||||
struct hashtable_bucket *new_buckets;
|
||||
|
||||
new_order = hashtable->order + 1;
|
||||
new_size = hashsize(new_order);
|
||||
|
||||
new_buckets = jsonp_malloc(new_size * sizeof(bucket_t));
|
||||
if(!new_buckets)
|
||||
return -1;
|
||||
|
||||
jsonp_free(hashtable->buckets);
|
||||
hashtable->buckets = new_buckets;
|
||||
hashtable->order = new_order;
|
||||
|
||||
for(i = 0; i < hashsize(hashtable->order); i++)
|
||||
{
|
||||
hashtable->buckets[i].first = hashtable->buckets[i].last =
|
||||
&hashtable->list;
|
||||
}
|
||||
|
||||
list = hashtable->list.next;
|
||||
list_init(&hashtable->list);
|
||||
|
||||
for(; list != &hashtable->list; list = next) {
|
||||
next = list->next;
|
||||
pair = list_to_pair(list);
|
||||
index = pair->hash % new_size;
|
||||
insert_to_bucket(hashtable, &hashtable->buckets[index], &pair->list);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int hashtable_init(hashtable_t *hashtable)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
hashtable->size = 0;
|
||||
hashtable->order = INITIAL_HASHTABLE_ORDER;
|
||||
hashtable->buckets = jsonp_malloc(hashsize(hashtable->order) * sizeof(bucket_t));
|
||||
if(!hashtable->buckets)
|
||||
return -1;
|
||||
|
||||
list_init(&hashtable->list);
|
||||
list_init(&hashtable->ordered_list);
|
||||
|
||||
for(i = 0; i < hashsize(hashtable->order); i++)
|
||||
{
|
||||
hashtable->buckets[i].first = hashtable->buckets[i].last =
|
||||
&hashtable->list;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void hashtable_close(hashtable_t *hashtable)
|
||||
{
|
||||
hashtable_do_clear(hashtable);
|
||||
jsonp_free(hashtable->buckets);
|
||||
}
|
||||
|
||||
int hashtable_set(hashtable_t *hashtable, const char *key, json_t *value)
|
||||
{
|
||||
pair_t *pair;
|
||||
bucket_t *bucket;
|
||||
size_t hash, index;
|
||||
|
||||
/* rehash if the load ratio exceeds 1 */
|
||||
if(hashtable->size >= hashsize(hashtable->order))
|
||||
if(hashtable_do_rehash(hashtable))
|
||||
return -1;
|
||||
|
||||
hash = hash_str(key);
|
||||
index = hash & hashmask(hashtable->order);
|
||||
bucket = &hashtable->buckets[index];
|
||||
pair = hashtable_find_pair(hashtable, bucket, key, hash);
|
||||
|
||||
if(pair)
|
||||
{
|
||||
json_decref(pair->value);
|
||||
pair->value = value;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* offsetof(...) returns the size of pair_t without the last,
|
||||
flexible member. This way, the correct amount is
|
||||
allocated. */
|
||||
|
||||
size_t len = strlen(key);
|
||||
if(len >= (size_t)-1 - offsetof(pair_t, key)) {
|
||||
/* Avoid an overflow if the key is very long */
|
||||
return -1;
|
||||
}
|
||||
|
||||
pair = jsonp_malloc(offsetof(pair_t, key) + len + 1);
|
||||
if(!pair)
|
||||
return -1;
|
||||
|
||||
pair->hash = hash;
|
||||
strncpy(pair->key, key, len + 1);
|
||||
pair->value = value;
|
||||
list_init(&pair->list);
|
||||
list_init(&pair->ordered_list);
|
||||
|
||||
insert_to_bucket(hashtable, bucket, &pair->list);
|
||||
list_insert(&hashtable->ordered_list, &pair->ordered_list);
|
||||
|
||||
hashtable->size++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void *hashtable_get(hashtable_t *hashtable, const char *key)
|
||||
{
|
||||
pair_t *pair;
|
||||
size_t hash;
|
||||
bucket_t *bucket;
|
||||
|
||||
hash = hash_str(key);
|
||||
bucket = &hashtable->buckets[hash & hashmask(hashtable->order)];
|
||||
|
||||
pair = hashtable_find_pair(hashtable, bucket, key, hash);
|
||||
if(!pair)
|
||||
return NULL;
|
||||
|
||||
return pair->value;
|
||||
}
|
||||
|
||||
int hashtable_del(hashtable_t *hashtable, const char *key)
|
||||
{
|
||||
size_t hash = hash_str(key);
|
||||
return hashtable_do_del(hashtable, key, hash);
|
||||
}
|
||||
|
||||
void hashtable_clear(hashtable_t *hashtable)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
hashtable_do_clear(hashtable);
|
||||
|
||||
for(i = 0; i < hashsize(hashtable->order); i++)
|
||||
{
|
||||
hashtable->buckets[i].first = hashtable->buckets[i].last =
|
||||
&hashtable->list;
|
||||
}
|
||||
|
||||
list_init(&hashtable->list);
|
||||
list_init(&hashtable->ordered_list);
|
||||
hashtable->size = 0;
|
||||
}
|
||||
|
||||
void *hashtable_iter(hashtable_t *hashtable)
|
||||
{
|
||||
return hashtable_iter_next(hashtable, &hashtable->ordered_list);
|
||||
}
|
||||
|
||||
void *hashtable_iter_at(hashtable_t *hashtable, const char *key)
|
||||
{
|
||||
pair_t *pair;
|
||||
size_t hash;
|
||||
bucket_t *bucket;
|
||||
|
||||
hash = hash_str(key);
|
||||
bucket = &hashtable->buckets[hash & hashmask(hashtable->order)];
|
||||
|
||||
pair = hashtable_find_pair(hashtable, bucket, key, hash);
|
||||
if(!pair)
|
||||
return NULL;
|
||||
|
||||
return &pair->ordered_list;
|
||||
}
|
||||
|
||||
void *hashtable_iter_next(hashtable_t *hashtable, void *iter)
|
||||
{
|
||||
list_t *list = (list_t *)iter;
|
||||
if(list->next == &hashtable->ordered_list)
|
||||
return NULL;
|
||||
return list->next;
|
||||
}
|
||||
|
||||
void *hashtable_iter_key(void *iter)
|
||||
{
|
||||
pair_t *pair = ordered_list_to_pair((list_t *)iter);
|
||||
return pair->key;
|
||||
}
|
||||
|
||||
void *hashtable_iter_value(void *iter)
|
||||
{
|
||||
pair_t *pair = ordered_list_to_pair((list_t *)iter);
|
||||
return pair->value;
|
||||
}
|
||||
|
||||
void hashtable_iter_set(void *iter, json_t *value)
|
||||
{
|
||||
pair_t *pair = ordered_list_to_pair((list_t *)iter);
|
||||
|
||||
json_decref(pair->value);
|
||||
pair->value = value;
|
||||
}
|
||||
176
compat/jansson/hashtable.h
Normal file
176
compat/jansson/hashtable.h
Normal file
@@ -0,0 +1,176 @@
|
||||
/*
|
||||
* Copyright (c) 2009-2016 Petri Lehtinen <petri@digip.org>
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the MIT license. See LICENSE for details.
|
||||
*/
|
||||
|
||||
#ifndef HASHTABLE_H
|
||||
#define HASHTABLE_H
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "jansson.h"
|
||||
|
||||
struct hashtable_list {
|
||||
struct hashtable_list *prev;
|
||||
struct hashtable_list *next;
|
||||
};
|
||||
|
||||
/* "pair" may be a bit confusing a name, but think of it as a
|
||||
key-value pair. In this case, it just encodes some extra data,
|
||||
too */
|
||||
struct hashtable_pair {
|
||||
struct hashtable_list list;
|
||||
struct hashtable_list ordered_list;
|
||||
size_t hash;
|
||||
json_t *value;
|
||||
char key[1];
|
||||
};
|
||||
|
||||
struct hashtable_bucket {
|
||||
struct hashtable_list *first;
|
||||
struct hashtable_list *last;
|
||||
};
|
||||
|
||||
typedef struct hashtable {
|
||||
size_t size;
|
||||
struct hashtable_bucket *buckets;
|
||||
size_t order; /* hashtable has pow(2, order) buckets */
|
||||
struct hashtable_list list;
|
||||
struct hashtable_list ordered_list;
|
||||
} hashtable_t;
|
||||
|
||||
|
||||
#define hashtable_key_to_iter(key_) \
|
||||
(&(container_of(key_, struct hashtable_pair, key)->ordered_list))
|
||||
|
||||
|
||||
/**
|
||||
* hashtable_init - Initialize a hashtable object
|
||||
*
|
||||
* @hashtable: The (statically allocated) hashtable object
|
||||
*
|
||||
* Initializes a statically allocated hashtable object. The object
|
||||
* should be cleared with hashtable_close when it's no longer used.
|
||||
*
|
||||
* Returns 0 on success, -1 on error (out of memory).
|
||||
*/
|
||||
int hashtable_init(hashtable_t *hashtable);
|
||||
|
||||
/**
|
||||
* hashtable_close - Release all resources used by a hashtable object
|
||||
*
|
||||
* @hashtable: The hashtable
|
||||
*
|
||||
* Destroys a statically allocated hashtable object.
|
||||
*/
|
||||
void hashtable_close(hashtable_t *hashtable);
|
||||
|
||||
/**
|
||||
* hashtable_set - Add/modify value in hashtable
|
||||
*
|
||||
* @hashtable: The hashtable object
|
||||
* @key: The key
|
||||
* @serial: For addition order of keys
|
||||
* @value: The value
|
||||
*
|
||||
* If a value with the given key already exists, its value is replaced
|
||||
* with the new value. Value is "stealed" in the sense that hashtable
|
||||
* doesn't increment its refcount but decreases the refcount when the
|
||||
* value is no longer needed.
|
||||
*
|
||||
* Returns 0 on success, -1 on failure (out of memory).
|
||||
*/
|
||||
int hashtable_set(hashtable_t *hashtable, const char *key, json_t *value);
|
||||
|
||||
/**
|
||||
* hashtable_get - Get a value associated with a key
|
||||
*
|
||||
* @hashtable: The hashtable object
|
||||
* @key: The key
|
||||
*
|
||||
* Returns value if it is found, or NULL otherwise.
|
||||
*/
|
||||
void *hashtable_get(hashtable_t *hashtable, const char *key);
|
||||
|
||||
/**
|
||||
* hashtable_del - Remove a value from the hashtable
|
||||
*
|
||||
* @hashtable: The hashtable object
|
||||
* @key: The key
|
||||
*
|
||||
* Returns 0 on success, or -1 if the key was not found.
|
||||
*/
|
||||
int hashtable_del(hashtable_t *hashtable, const char *key);
|
||||
|
||||
/**
|
||||
* hashtable_clear - Clear hashtable
|
||||
*
|
||||
* @hashtable: The hashtable object
|
||||
*
|
||||
* Removes all items from the hashtable.
|
||||
*/
|
||||
void hashtable_clear(hashtable_t *hashtable);
|
||||
|
||||
/**
|
||||
* hashtable_iter - Iterate over hashtable
|
||||
*
|
||||
* @hashtable: The hashtable object
|
||||
*
|
||||
* Returns an opaque iterator to the first element in the hashtable.
|
||||
* The iterator should be passed to hashtable_iter_* functions.
|
||||
* The hashtable items are not iterated over in any particular order.
|
||||
*
|
||||
* There's no need to free the iterator in any way. The iterator is
|
||||
* valid as long as the item that is referenced by the iterator is not
|
||||
* deleted. Other values may be added or deleted. In particular,
|
||||
* hashtable_iter_next() may be called on an iterator, and after that
|
||||
* the key/value pair pointed by the old iterator may be deleted.
|
||||
*/
|
||||
void *hashtable_iter(hashtable_t *hashtable);
|
||||
|
||||
/**
|
||||
* hashtable_iter_at - Return an iterator at a specific key
|
||||
*
|
||||
* @hashtable: The hashtable object
|
||||
* @key: The key that the iterator should point to
|
||||
*
|
||||
* Like hashtable_iter() but returns an iterator pointing to a
|
||||
* specific key.
|
||||
*/
|
||||
void *hashtable_iter_at(hashtable_t *hashtable, const char *key);
|
||||
|
||||
/**
|
||||
* hashtable_iter_next - Advance an iterator
|
||||
*
|
||||
* @hashtable: The hashtable object
|
||||
* @iter: The iterator
|
||||
*
|
||||
* Returns a new iterator pointing to the next element in the
|
||||
* hashtable or NULL if the whole hastable has been iterated over.
|
||||
*/
|
||||
void *hashtable_iter_next(hashtable_t *hashtable, void *iter);
|
||||
|
||||
/**
|
||||
* hashtable_iter_key - Retrieve the key pointed by an iterator
|
||||
*
|
||||
* @iter: The iterator
|
||||
*/
|
||||
void *hashtable_iter_key(void *iter);
|
||||
|
||||
/**
|
||||
* hashtable_iter_value - Retrieve the value pointed by an iterator
|
||||
*
|
||||
* @iter: The iterator
|
||||
*/
|
||||
void *hashtable_iter_value(void *iter);
|
||||
|
||||
/**
|
||||
* hashtable_iter_set - Set the value pointed by an iterator
|
||||
*
|
||||
* @iter: The iterator
|
||||
* @value: The value to set
|
||||
*/
|
||||
void hashtable_iter_set(void *iter, json_t *value);
|
||||
|
||||
#endif
|
||||
277
compat/jansson/hashtable_seed.c
Normal file
277
compat/jansson/hashtable_seed.c
Normal file
@@ -0,0 +1,277 @@
|
||||
/* Generate sizeof(uint32_t) bytes of as random data as possible to seed
|
||||
the hash function.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <jansson_private_config.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <time.h>
|
||||
|
||||
#ifdef HAVE_STDINT_H
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_FCNTL_H
|
||||
#include <fcntl.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_SCHED_H
|
||||
#include <sched.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_SYS_STAT_H
|
||||
#include <sys/stat.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_SYS_TIME_H
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
|
||||
#if defined(_WIN32)
|
||||
/* For GetModuleHandle(), GetProcAddress() and GetCurrentProcessId() */
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#include "jansson.h"
|
||||
|
||||
|
||||
static uint32_t buf_to_uint32(char *data) {
|
||||
size_t i;
|
||||
uint32_t result = 0;
|
||||
|
||||
for (i = 0; i < sizeof(uint32_t); i++)
|
||||
result = (result << 8) | (unsigned char)data[i];
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* /dev/urandom */
|
||||
#if !defined(_WIN32) && defined(USE_URANDOM)
|
||||
static int seed_from_urandom(uint32_t *seed) {
|
||||
/* Use unbuffered I/O if we have open(), close() and read(). Otherwise
|
||||
fall back to fopen() */
|
||||
|
||||
char data[sizeof(uint32_t)];
|
||||
int ok;
|
||||
|
||||
#if defined(HAVE_OPEN) && defined(HAVE_CLOSE) && defined(HAVE_READ)
|
||||
int urandom;
|
||||
urandom = open("/dev/urandom", O_RDONLY);
|
||||
if (urandom == -1)
|
||||
return 1;
|
||||
|
||||
ok = read(urandom, data, sizeof(uint32_t)) == sizeof(uint32_t);
|
||||
close(urandom);
|
||||
#else
|
||||
FILE *urandom;
|
||||
|
||||
urandom = fopen("/dev/urandom", "rb");
|
||||
if (!urandom)
|
||||
return 1;
|
||||
|
||||
ok = fread(data, 1, sizeof(uint32_t), urandom) == sizeof(uint32_t);
|
||||
fclose(urandom);
|
||||
#endif
|
||||
|
||||
if (!ok)
|
||||
return 1;
|
||||
|
||||
*seed = buf_to_uint32(data);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Windows Crypto API */
|
||||
#if defined(_WIN32) && defined(USE_WINDOWS_CRYPTOAPI)
|
||||
#include <wincrypt.h>
|
||||
|
||||
typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv, LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType, DWORD dwFlags);
|
||||
typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen, BYTE *pbBuffer);
|
||||
typedef BOOL (WINAPI *CRYPTRELEASECONTEXT)(HCRYPTPROV hProv, DWORD dwFlags);
|
||||
|
||||
static int seed_from_windows_cryptoapi(uint32_t *seed)
|
||||
{
|
||||
HINSTANCE hAdvAPI32 = NULL;
|
||||
CRYPTACQUIRECONTEXTA pCryptAcquireContext = NULL;
|
||||
CRYPTGENRANDOM pCryptGenRandom = NULL;
|
||||
CRYPTRELEASECONTEXT pCryptReleaseContext = NULL;
|
||||
HCRYPTPROV hCryptProv = 0;
|
||||
BYTE data[sizeof(uint32_t)];
|
||||
int ok;
|
||||
|
||||
hAdvAPI32 = GetModuleHandle(TEXT("advapi32.dll"));
|
||||
if(hAdvAPI32 == NULL)
|
||||
return 1;
|
||||
|
||||
pCryptAcquireContext = (CRYPTACQUIRECONTEXTA)GetProcAddress(hAdvAPI32, "CryptAcquireContextA");
|
||||
if (!pCryptAcquireContext)
|
||||
return 1;
|
||||
|
||||
pCryptGenRandom = (CRYPTGENRANDOM)GetProcAddress(hAdvAPI32, "CryptGenRandom");
|
||||
if (!pCryptGenRandom)
|
||||
return 1;
|
||||
|
||||
pCryptReleaseContext = (CRYPTRELEASECONTEXT)GetProcAddress(hAdvAPI32, "CryptReleaseContext");
|
||||
if (!pCryptReleaseContext)
|
||||
return 1;
|
||||
|
||||
if (!pCryptAcquireContext(&hCryptProv, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT))
|
||||
return 1;
|
||||
|
||||
ok = pCryptGenRandom(hCryptProv, sizeof(uint32_t), data);
|
||||
pCryptReleaseContext(hCryptProv, 0);
|
||||
|
||||
if (!ok)
|
||||
return 1;
|
||||
|
||||
*seed = buf_to_uint32((char *)data);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* gettimeofday() and getpid() */
|
||||
static int seed_from_timestamp_and_pid(uint32_t *seed) {
|
||||
#ifdef HAVE_GETTIMEOFDAY
|
||||
/* XOR of seconds and microseconds */
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
*seed = (uint32_t)tv.tv_sec ^ (uint32_t)tv.tv_usec;
|
||||
#else
|
||||
/* Seconds only */
|
||||
*seed = (uint32_t)time(NULL);
|
||||
#endif
|
||||
|
||||
/* XOR with PID for more randomness */
|
||||
#if defined(_WIN32)
|
||||
*seed ^= (uint32_t)GetCurrentProcessId();
|
||||
#elif defined(HAVE_GETPID)
|
||||
*seed ^= (uint32_t)getpid();
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static uint32_t generate_seed() {
|
||||
uint32_t seed;
|
||||
int done = 0;
|
||||
|
||||
#if !defined(_WIN32) && defined(USE_URANDOM)
|
||||
if (seed_from_urandom(&seed) == 0)
|
||||
done = 1;
|
||||
#endif
|
||||
|
||||
#if defined(_WIN32) && defined(USE_WINDOWS_CRYPTOAPI)
|
||||
if (seed_from_windows_cryptoapi(&seed) == 0)
|
||||
done = 1;
|
||||
#endif
|
||||
|
||||
if (!done) {
|
||||
/* Fall back to timestamp and PID if no better randomness is
|
||||
available */
|
||||
seed_from_timestamp_and_pid(&seed);
|
||||
}
|
||||
|
||||
/* Make sure the seed is never zero */
|
||||
if (seed == 0)
|
||||
seed = 1;
|
||||
|
||||
return seed;
|
||||
}
|
||||
|
||||
|
||||
volatile uint32_t hashtable_seed = 0;
|
||||
|
||||
#if defined(HAVE_ATOMIC_BUILTINS) && (defined(HAVE_SCHED_YIELD) || !defined(_WIN32))
|
||||
static volatile char seed_initialized = 0;
|
||||
|
||||
void json_object_seed(size_t seed) {
|
||||
uint32_t new_seed = (uint32_t)seed;
|
||||
|
||||
if (hashtable_seed == 0) {
|
||||
if (__atomic_test_and_set(&seed_initialized, __ATOMIC_RELAXED) == 0) {
|
||||
/* Do the seeding ourselves */
|
||||
if (new_seed == 0)
|
||||
new_seed = generate_seed();
|
||||
|
||||
__atomic_store_n(&hashtable_seed, new_seed, __ATOMIC_RELEASE);
|
||||
} else {
|
||||
/* Wait for another thread to do the seeding */
|
||||
do {
|
||||
#ifdef HAVE_SCHED_YIELD
|
||||
sched_yield();
|
||||
#endif
|
||||
} while(__atomic_load_n(&hashtable_seed, __ATOMIC_ACQUIRE) == 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
#elif defined(HAVE_SYNC_BUILTINS) && (defined(HAVE_SCHED_YIELD) || !defined(_WIN32))
|
||||
void json_object_seed(size_t seed) {
|
||||
uint32_t new_seed = (uint32_t)seed;
|
||||
|
||||
if (hashtable_seed == 0) {
|
||||
if (new_seed == 0) {
|
||||
/* Explicit synchronization fences are not supported by the
|
||||
__sync builtins, so every thread getting here has to
|
||||
generate the seed value.
|
||||
*/
|
||||
new_seed = generate_seed();
|
||||
}
|
||||
|
||||
do {
|
||||
if (__sync_bool_compare_and_swap(&hashtable_seed, 0, new_seed)) {
|
||||
/* We were the first to seed */
|
||||
break;
|
||||
} else {
|
||||
/* Wait for another thread to do the seeding */
|
||||
#ifdef HAVE_SCHED_YIELD
|
||||
sched_yield();
|
||||
#endif
|
||||
}
|
||||
} while(hashtable_seed == 0);
|
||||
}
|
||||
}
|
||||
#elif defined(_WIN32)
|
||||
static long seed_initialized = 0;
|
||||
void json_object_seed(size_t seed) {
|
||||
uint32_t new_seed = (uint32_t)seed;
|
||||
|
||||
if (hashtable_seed == 0) {
|
||||
if (InterlockedIncrement(&seed_initialized) == 1) {
|
||||
/* Do the seeding ourselves */
|
||||
if (new_seed == 0)
|
||||
new_seed = generate_seed();
|
||||
|
||||
hashtable_seed = new_seed;
|
||||
} else {
|
||||
/* Wait for another thread to do the seeding */
|
||||
do {
|
||||
SwitchToThread();
|
||||
} while (hashtable_seed == 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
/* Fall back to a thread-unsafe version */
|
||||
void json_object_seed(size_t seed) {
|
||||
uint32_t new_seed = (uint32_t)seed;
|
||||
|
||||
if (hashtable_seed == 0) {
|
||||
if (new_seed == 0)
|
||||
new_seed = generate_seed();
|
||||
|
||||
hashtable_seed = new_seed;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
315
compat/jansson/jansson.h
Normal file
315
compat/jansson/jansson.h
Normal file
@@ -0,0 +1,315 @@
|
||||
/*
|
||||
* Copyright (c) 2009-2016 Petri Lehtinen <petri@digip.org>
|
||||
*
|
||||
* Jansson is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the MIT license. See LICENSE for details.
|
||||
*/
|
||||
|
||||
#ifndef JANSSON_H
|
||||
#define JANSSON_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h> /* for size_t */
|
||||
#include <stdarg.h>
|
||||
|
||||
#include "jansson_config.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* version */
|
||||
|
||||
#define JANSSON_MAJOR_VERSION 2
|
||||
#define JANSSON_MINOR_VERSION 10
|
||||
#define JANSSON_MICRO_VERSION 0
|
||||
|
||||
/* Micro version is omitted if it's 0 */
|
||||
#define JANSSON_VERSION "2.10"
|
||||
|
||||
/* Version as a 3-byte hex number, e.g. 0x010201 == 1.2.1. Use this
|
||||
for numeric comparisons, e.g. #if JANSSON_VERSION_HEX >= ... */
|
||||
#define JANSSON_VERSION_HEX ((JANSSON_MAJOR_VERSION << 16) | \
|
||||
(JANSSON_MINOR_VERSION << 8) | \
|
||||
(JANSSON_MICRO_VERSION << 0))
|
||||
|
||||
|
||||
/* types */
|
||||
|
||||
typedef enum {
|
||||
JSON_OBJECT,
|
||||
JSON_ARRAY,
|
||||
JSON_STRING,
|
||||
JSON_INTEGER,
|
||||
JSON_REAL,
|
||||
JSON_TRUE,
|
||||
JSON_FALSE,
|
||||
JSON_NULL
|
||||
} json_type;
|
||||
|
||||
typedef struct json_t {
|
||||
json_type type;
|
||||
size_t refcount;
|
||||
} json_t;
|
||||
|
||||
#ifndef JANSSON_USING_CMAKE /* disabled if using cmake */
|
||||
#if JSON_INTEGER_IS_LONG_LONG
|
||||
#ifdef _WIN32
|
||||
#define JSON_INTEGER_FORMAT "I64d"
|
||||
#else
|
||||
#define JSON_INTEGER_FORMAT "lld"
|
||||
#endif
|
||||
typedef long long json_int_t;
|
||||
#else
|
||||
#define JSON_INTEGER_FORMAT "ld"
|
||||
typedef long json_int_t;
|
||||
#endif /* JSON_INTEGER_IS_LONG_LONG */
|
||||
#endif
|
||||
|
||||
#define json_typeof(json) ((json)->type)
|
||||
#define json_is_object(json) ((json) && json_typeof(json) == JSON_OBJECT)
|
||||
#define json_is_array(json) ((json) && json_typeof(json) == JSON_ARRAY)
|
||||
#define json_is_string(json) ((json) && json_typeof(json) == JSON_STRING)
|
||||
#define json_is_integer(json) ((json) && json_typeof(json) == JSON_INTEGER)
|
||||
#define json_is_real(json) ((json) && json_typeof(json) == JSON_REAL)
|
||||
#define json_is_number(json) (json_is_integer(json) || json_is_real(json))
|
||||
#define json_is_true(json) ((json) && json_typeof(json) == JSON_TRUE)
|
||||
#define json_is_false(json) ((json) && json_typeof(json) == JSON_FALSE)
|
||||
#define json_boolean_value json_is_true
|
||||
#define json_is_boolean(json) (json_is_true(json) || json_is_false(json))
|
||||
#define json_is_null(json) ((json) && json_typeof(json) == JSON_NULL)
|
||||
|
||||
/* construction, destruction, reference counting */
|
||||
|
||||
json_t *json_object(void);
|
||||
json_t *json_array(void);
|
||||
json_t *json_string(const char *value);
|
||||
json_t *json_stringn(const char *value, size_t len);
|
||||
json_t *json_string_nocheck(const char *value);
|
||||
json_t *json_stringn_nocheck(const char *value, size_t len);
|
||||
json_t *json_integer(json_int_t value);
|
||||
json_t *json_real(double value);
|
||||
json_t *json_true(void);
|
||||
json_t *json_false(void);
|
||||
#define json_boolean(val) ((val) ? json_true() : json_false())
|
||||
json_t *json_null(void);
|
||||
|
||||
static JSON_INLINE
|
||||
json_t *json_incref(json_t *json)
|
||||
{
|
||||
if(json && json->refcount != (size_t)-1)
|
||||
++json->refcount;
|
||||
return json;
|
||||
}
|
||||
|
||||
/* do not call json_delete directly */
|
||||
void json_delete(json_t *json);
|
||||
|
||||
static JSON_INLINE
|
||||
void json_decref(json_t *json)
|
||||
{
|
||||
if(json && json->refcount != (size_t)-1 && --json->refcount == 0)
|
||||
json_delete(json);
|
||||
}
|
||||
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
static JSON_INLINE
|
||||
void json_decrefp(json_t **json)
|
||||
{
|
||||
if(json) {
|
||||
json_decref(*json);
|
||||
*json = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
#define json_auto_t json_t __attribute__((cleanup(json_decrefp)))
|
||||
#endif
|
||||
|
||||
|
||||
/* error reporting */
|
||||
|
||||
#define JSON_ERROR_TEXT_LENGTH 160
|
||||
#define JSON_ERROR_SOURCE_LENGTH 80
|
||||
|
||||
typedef struct {
|
||||
int line;
|
||||
int column;
|
||||
int position;
|
||||
char source[JSON_ERROR_SOURCE_LENGTH];
|
||||
char text[JSON_ERROR_TEXT_LENGTH];
|
||||
} json_error_t;
|
||||
|
||||
|
||||
/* getters, setters, manipulation */
|
||||
|
||||
void json_object_seed(size_t seed);
|
||||
size_t json_object_size(const json_t *object);
|
||||
json_t *json_object_get(const json_t *object, const char *key);
|
||||
int json_object_set_new(json_t *object, const char *key, json_t *value);
|
||||
int json_object_set_new_nocheck(json_t *object, const char *key, json_t *value);
|
||||
int json_object_del(json_t *object, const char *key);
|
||||
int json_object_clear(json_t *object);
|
||||
int json_object_update(json_t *object, json_t *other);
|
||||
int json_object_update_existing(json_t *object, json_t *other);
|
||||
int json_object_update_missing(json_t *object, json_t *other);
|
||||
void *json_object_iter(json_t *object);
|
||||
void *json_object_iter_at(json_t *object, const char *key);
|
||||
void *json_object_key_to_iter(const char *key);
|
||||
void *json_object_iter_next(json_t *object, void *iter);
|
||||
const char *json_object_iter_key(void *iter);
|
||||
json_t *json_object_iter_value(void *iter);
|
||||
int json_object_iter_set_new(json_t *object, void *iter, json_t *value);
|
||||
|
||||
#define json_object_foreach(object, key, value) \
|
||||
for(key = json_object_iter_key(json_object_iter(object)); \
|
||||
key && (value = json_object_iter_value(json_object_key_to_iter(key))); \
|
||||
key = json_object_iter_key(json_object_iter_next(object, json_object_key_to_iter(key))))
|
||||
|
||||
#define json_object_foreach_safe(object, n, key, value) \
|
||||
for(key = json_object_iter_key(json_object_iter(object)), \
|
||||
n = json_object_iter_next(object, json_object_key_to_iter(key)); \
|
||||
key && (value = json_object_iter_value(json_object_key_to_iter(key))); \
|
||||
key = json_object_iter_key(n), \
|
||||
n = json_object_iter_next(object, json_object_key_to_iter(key)))
|
||||
|
||||
#define json_array_foreach(array, index, value) \
|
||||
for(index = 0; \
|
||||
index < json_array_size(array) && (value = json_array_get(array, index)); \
|
||||
index++)
|
||||
|
||||
static JSON_INLINE
|
||||
int json_object_set(json_t *object, const char *key, json_t *value)
|
||||
{
|
||||
return json_object_set_new(object, key, json_incref(value));
|
||||
}
|
||||
|
||||
static JSON_INLINE
|
||||
int json_object_set_nocheck(json_t *object, const char *key, json_t *value)
|
||||
{
|
||||
return json_object_set_new_nocheck(object, key, json_incref(value));
|
||||
}
|
||||
|
||||
static JSON_INLINE
|
||||
int json_object_iter_set(json_t *object, void *iter, json_t *value)
|
||||
{
|
||||
return json_object_iter_set_new(object, iter, json_incref(value));
|
||||
}
|
||||
|
||||
size_t json_array_size(const json_t *array);
|
||||
json_t *json_array_get(const json_t *array, size_t index);
|
||||
int json_array_set_new(json_t *array, size_t index, json_t *value);
|
||||
int json_array_append_new(json_t *array, json_t *value);
|
||||
int json_array_insert_new(json_t *array, size_t index, json_t *value);
|
||||
int json_array_remove(json_t *array, size_t index);
|
||||
int json_array_clear(json_t *array);
|
||||
int json_array_extend(json_t *array, json_t *other);
|
||||
|
||||
static JSON_INLINE
|
||||
int json_array_set(json_t *array, size_t ind, json_t *value)
|
||||
{
|
||||
return json_array_set_new(array, ind, json_incref(value));
|
||||
}
|
||||
|
||||
static JSON_INLINE
|
||||
int json_array_append(json_t *array, json_t *value)
|
||||
{
|
||||
return json_array_append_new(array, json_incref(value));
|
||||
}
|
||||
|
||||
static JSON_INLINE
|
||||
int json_array_insert(json_t *array, size_t ind, json_t *value)
|
||||
{
|
||||
return json_array_insert_new(array, ind, json_incref(value));
|
||||
}
|
||||
|
||||
const char *json_string_value(const json_t *string);
|
||||
size_t json_string_length(const json_t *string);
|
||||
json_int_t json_integer_value(const json_t *integer);
|
||||
double json_real_value(const json_t *real);
|
||||
double json_number_value(const json_t *json);
|
||||
|
||||
int json_string_set(json_t *string, const char *value);
|
||||
int json_string_setn(json_t *string, const char *value, size_t len);
|
||||
int json_string_set_nocheck(json_t *string, const char *value);
|
||||
int json_string_setn_nocheck(json_t *string, const char *value, size_t len);
|
||||
int json_integer_set(json_t *integer, json_int_t value);
|
||||
int json_real_set(json_t *real, double value);
|
||||
|
||||
/* pack, unpack */
|
||||
|
||||
json_t *json_pack(const char *fmt, ...);
|
||||
json_t *json_pack_ex(json_error_t *error, size_t flags, const char *fmt, ...);
|
||||
json_t *json_vpack_ex(json_error_t *error, size_t flags, const char *fmt, va_list ap);
|
||||
|
||||
#define JSON_VALIDATE_ONLY 0x1
|
||||
#define JSON_STRICT 0x2
|
||||
|
||||
int json_unpack(json_t *root, const char *fmt, ...);
|
||||
int json_unpack_ex(json_t *root, json_error_t *error, size_t flags, const char *fmt, ...);
|
||||
int json_vunpack_ex(json_t *root, json_error_t *error, size_t flags, const char *fmt, va_list ap);
|
||||
|
||||
|
||||
/* equality */
|
||||
|
||||
int json_equal(json_t *value1, json_t *value2);
|
||||
|
||||
|
||||
/* copying */
|
||||
|
||||
json_t *json_copy(json_t *value);
|
||||
json_t *json_deep_copy(const json_t *value);
|
||||
|
||||
|
||||
/* decoding */
|
||||
|
||||
#define JSON_REJECT_DUPLICATES 0x1
|
||||
#define JSON_DISABLE_EOF_CHECK 0x2
|
||||
#define JSON_DECODE_ANY 0x4
|
||||
#define JSON_DECODE_INT_AS_REAL 0x8
|
||||
#define JSON_ALLOW_NUL 0x10
|
||||
|
||||
typedef size_t (*json_load_callback_t)(void *buffer, size_t buflen, void *data);
|
||||
|
||||
json_t *json_loads(const char *input, size_t flags, json_error_t *error);
|
||||
json_t *json_loadb(const char *buffer, size_t buflen, size_t flags, json_error_t *error);
|
||||
json_t *json_loadf(FILE *input, size_t flags, json_error_t *error);
|
||||
json_t *json_loadfd(int input, size_t flags, json_error_t *error);
|
||||
json_t *json_load_file(const char *path, size_t flags, json_error_t *error);
|
||||
json_t *json_load_callback(json_load_callback_t callback, void *data, size_t flags, json_error_t *error);
|
||||
|
||||
|
||||
/* encoding */
|
||||
|
||||
#define JSON_MAX_INDENT 0x1F
|
||||
#define JSON_INDENT(n) ((n) & JSON_MAX_INDENT)
|
||||
#define JSON_COMPACT 0x20
|
||||
#define JSON_ENSURE_ASCII 0x40
|
||||
#define JSON_SORT_KEYS 0x80
|
||||
#define JSON_PRESERVE_ORDER 0x100
|
||||
#define JSON_ENCODE_ANY 0x200
|
||||
#define JSON_ESCAPE_SLASH 0x400
|
||||
#define JSON_REAL_PRECISION(n) (((n) & 0x1F) << 11)
|
||||
#define JSON_EMBED 0x10000
|
||||
|
||||
typedef int (*json_dump_callback_t)(const char *buffer, size_t size, void *data);
|
||||
|
||||
char *json_dumps(const json_t *json, size_t flags);
|
||||
size_t json_dumpb(const json_t *json, char *buffer, size_t size, size_t flags);
|
||||
int json_dumpf(const json_t *json, FILE *output, size_t flags);
|
||||
int json_dumpfd(const json_t *json, int output, size_t flags);
|
||||
int json_dump_file(const json_t *json, const char *path, size_t flags);
|
||||
int json_dump_callback(const json_t *json, json_dump_callback_t callback, void *data, size_t flags);
|
||||
|
||||
/* custom memory allocation */
|
||||
|
||||
typedef void *(*json_malloc_t)(size_t);
|
||||
typedef void (*json_free_t)(void *);
|
||||
|
||||
void json_set_alloc_funcs(json_malloc_t malloc_fn, json_free_t free_fn);
|
||||
void json_get_alloc_funcs(json_malloc_t *malloc_fn, json_free_t *free_fn);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
43
compat/jansson/jansson_config.h
Normal file
43
compat/jansson/jansson_config.h
Normal file
@@ -0,0 +1,43 @@
|
||||
/*
|
||||
* Copyright (c) 2010-2016 Petri Lehtinen <petri@digip.org>
|
||||
*
|
||||
* Jansson is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the MIT license. See LICENSE for details.
|
||||
*
|
||||
*
|
||||
* This file specifies a part of the site-specific configuration for
|
||||
* Jansson, namely those things that affect the public API in
|
||||
* jansson.h.
|
||||
*
|
||||
* The configure script copies this file to jansson_config.h and
|
||||
* replaces @var@ substitutions by values that fit your system. If you
|
||||
* cannot run the configure script, you can do the value substitution
|
||||
* by hand.
|
||||
*/
|
||||
|
||||
#ifndef JANSSON_CONFIG_H
|
||||
#define JANSSON_CONFIG_H
|
||||
|
||||
/* If your compiler supports the inline keyword in C, JSON_INLINE is
|
||||
defined to `inline', otherwise empty. In C++, the inline is always
|
||||
supported. */
|
||||
#ifdef __cplusplus
|
||||
#define JSON_INLINE inline
|
||||
#else
|
||||
#define JSON_INLINE inline
|
||||
#endif
|
||||
|
||||
/* If your compiler supports the `long long` type and the strtoll()
|
||||
library function, JSON_INTEGER_IS_LONG_LONG is defined to 1,
|
||||
otherwise to 0. */
|
||||
#define JSON_INTEGER_IS_LONG_LONG 1
|
||||
|
||||
/* If locale.h and localeconv() are available, define to 1,
|
||||
otherwise to 0. */
|
||||
#define JSON_HAVE_LOCALECONV 1
|
||||
|
||||
/* Maximum recursion depth for parsing JSON input.
|
||||
This limits the depth of e.g. array-within-array constructions. */
|
||||
#define JSON_PARSER_MAX_DEPTH 2048
|
||||
|
||||
#endif
|
||||
109
compat/jansson/jansson_private.h
Normal file
109
compat/jansson/jansson_private.h
Normal file
@@ -0,0 +1,109 @@
|
||||
/*
|
||||
* Copyright (c) 2009-2016 Petri Lehtinen <petri@digip.org>
|
||||
*
|
||||
* Jansson is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the MIT license. See LICENSE for details.
|
||||
*/
|
||||
|
||||
#ifndef JANSSON_PRIVATE_H
|
||||
#define JANSSON_PRIVATE_H
|
||||
|
||||
#include "jansson_private_config.h"
|
||||
#include <stddef.h>
|
||||
#include "jansson.h"
|
||||
#include "hashtable.h"
|
||||
#include "strbuffer.h"
|
||||
|
||||
#define container_of(ptr_, type_, member_) \
|
||||
((type_ *)((char *)ptr_ - offsetof(type_, member_)))
|
||||
|
||||
/* On some platforms, max() may already be defined */
|
||||
#ifndef max
|
||||
#define max(a, b) ((a) > (b) ? (a) : (b))
|
||||
#endif
|
||||
|
||||
/* va_copy is a C99 feature. In C89 implementations, it's sometimes
|
||||
available as __va_copy. If not, memcpy() should do the trick. */
|
||||
#ifndef va_copy
|
||||
#ifdef __va_copy
|
||||
#define va_copy __va_copy
|
||||
#else
|
||||
#define va_copy(a, b) memcpy(&(a), &(b), sizeof(va_list))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
json_t json;
|
||||
hashtable_t hashtable;
|
||||
int visited;
|
||||
} json_object_t;
|
||||
|
||||
typedef struct {
|
||||
json_t json;
|
||||
size_t size;
|
||||
size_t entries;
|
||||
json_t **table;
|
||||
int visited;
|
||||
} json_array_t;
|
||||
|
||||
typedef struct {
|
||||
json_t json;
|
||||
char *value;
|
||||
size_t length;
|
||||
} json_string_t;
|
||||
|
||||
typedef struct {
|
||||
json_t json;
|
||||
double value;
|
||||
} json_real_t;
|
||||
|
||||
typedef struct {
|
||||
json_t json;
|
||||
json_int_t value;
|
||||
} json_integer_t;
|
||||
|
||||
#define json_to_object(json_) container_of(json_, json_object_t, json)
|
||||
#define json_to_array(json_) container_of(json_, json_array_t, json)
|
||||
#define json_to_string(json_) container_of(json_, json_string_t, json)
|
||||
#define json_to_real(json_) container_of(json_, json_real_t, json)
|
||||
#define json_to_integer(json_) container_of(json_, json_integer_t, json)
|
||||
|
||||
/* Create a string by taking ownership of an existing buffer */
|
||||
json_t *jsonp_stringn_nocheck_own(const char *value, size_t len);
|
||||
|
||||
/* Error message formatting */
|
||||
void jsonp_error_init(json_error_t *error, const char *source);
|
||||
void jsonp_error_set_source(json_error_t *error, const char *source);
|
||||
void jsonp_error_set(json_error_t *error, int line, int column,
|
||||
size_t position, const char *msg, ...);
|
||||
void jsonp_error_vset(json_error_t *error, int line, int column,
|
||||
size_t position, const char *msg, va_list ap);
|
||||
|
||||
/* Locale independent string<->double conversions */
|
||||
int jsonp_strtod(strbuffer_t *strbuffer, double *out);
|
||||
int jsonp_dtostr(char *buffer, size_t size, double value, int prec);
|
||||
|
||||
/* Wrappers for custom memory functions */
|
||||
void* jsonp_malloc(size_t size);
|
||||
void jsonp_free(void *ptr);
|
||||
char *jsonp_strndup(const char *str, size_t length);
|
||||
char *jsonp_strdup(const char *str);
|
||||
char *jsonp_strndup(const char *str, size_t len);
|
||||
|
||||
|
||||
/* Windows compatibility */
|
||||
#if defined(_WIN32) || defined(WIN32)
|
||||
# if defined(_MSC_VER) /* MS compiller */
|
||||
# if (_MSC_VER < 1900) && !defined(snprintf) /* snprintf not defined yet & not introduced */
|
||||
# define snprintf _snprintf
|
||||
# endif
|
||||
# if (_MSC_VER < 1500) && !defined(vsnprintf) /* vsnprintf not defined yet & not introduced */
|
||||
# define vsnprintf(b,c,f,a) _vsnprintf(b,c,f,a)
|
||||
# endif
|
||||
# else /* Other Windows compiller, old definition */
|
||||
# define snprintf _snprintf
|
||||
# define vsnprintf _vsnprintf
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
159
compat/jansson/jansson_private_config.h
Normal file
159
compat/jansson/jansson_private_config.h
Normal file
@@ -0,0 +1,159 @@
|
||||
/* jansson_private_config.h. Generated from jansson_private_config.h.in by configure. */
|
||||
/* jansson_private_config.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
/* Define to 1 if gcc's __atomic builtins are available */
|
||||
#define HAVE_ATOMIC_BUILTINS 1
|
||||
|
||||
/* Define to 1 if you have the `close' function. */
|
||||
#define HAVE_CLOSE 1
|
||||
|
||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||
/* #undef HAVE_DLFCN_H */
|
||||
|
||||
/* Define to 1 if you have the <endian.h> header file. */
|
||||
/* #undef HAVE_ENDIAN_H */
|
||||
|
||||
/* Define to 1 if you have the <fcntl.h> header file. */
|
||||
#define HAVE_FCNTL_H 1
|
||||
|
||||
/* Define to 1 if you have the `getpid' function. */
|
||||
#define HAVE_GETPID 1
|
||||
|
||||
/* Define to 1 if you have the `gettimeofday' function. */
|
||||
#define HAVE_GETTIMEOFDAY 1
|
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
#define HAVE_INTTYPES_H 1
|
||||
|
||||
/* Define to 1 if you have the `localeconv' function. */
|
||||
#define HAVE_LOCALECONV 1
|
||||
|
||||
/* Define to 1 if you have the <locale.h> header file. */
|
||||
#define HAVE_LOCALE_H 1
|
||||
|
||||
/* Define to 1 if the system has the type 'long long int'. */
|
||||
#define HAVE_LONG_LONG_INT 1
|
||||
|
||||
/* Define to 1 if you have the <memory.h> header file. */
|
||||
#define HAVE_MEMORY_H 1
|
||||
|
||||
/* Define to 1 if you have the `open' function. */
|
||||
#define HAVE_OPEN 1
|
||||
|
||||
/* Define to 1 if you have the `read' function. */
|
||||
#define HAVE_READ 1
|
||||
|
||||
/* Define to 1 if you have the <sched.h> header file. */
|
||||
#define HAVE_SCHED_H 1
|
||||
|
||||
/* Define to 1 if you have the `sched_yield' function. */
|
||||
#define HAVE_SCHED_YIELD 1
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
#define HAVE_STDINT_H 1
|
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||
#define HAVE_STDLIB_H 1
|
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */
|
||||
#define HAVE_STRINGS_H 1
|
||||
|
||||
/* Define to 1 if you have the <string.h> header file. */
|
||||
#define HAVE_STRING_H 1
|
||||
|
||||
/* Define to 1 if you have the `strtoll' function. */
|
||||
#define HAVE_STRTOLL 1
|
||||
|
||||
/* Define to 1 if gcc's __sync builtins are available */
|
||||
#define HAVE_SYNC_BUILTINS 1
|
||||
|
||||
/* Define to 1 if you have the <sys/param.h> header file. */
|
||||
#define HAVE_SYS_PARAM_H 1
|
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||
#define HAVE_SYS_STAT_H 1
|
||||
|
||||
/* Define to 1 if you have the <sys/time.h> header file. */
|
||||
#define HAVE_SYS_TIME_H 1
|
||||
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
#define HAVE_SYS_TYPES_H 1
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
#define HAVE_UNISTD_H 1
|
||||
|
||||
/* Define to 1 if the system has the type 'unsigned long long int'. */
|
||||
#define HAVE_UNSIGNED_LONG_LONG_INT 1
|
||||
|
||||
/* Number of buckets new object hashtables contain is 2 raised to this power.
|
||||
E.g. 3 -> 2^3 = 8. */
|
||||
#define INITIAL_HASHTABLE_ORDER 3
|
||||
|
||||
/* Define to the sub-directory where libtool stores uninstalled libraries. */
|
||||
#define LT_OBJDIR ".libs/"
|
||||
|
||||
/* Name of package */
|
||||
#define PACKAGE "jansson"
|
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */
|
||||
#define PACKAGE_BUGREPORT "petri@digip.org"
|
||||
|
||||
/* Define to the full name of this package. */
|
||||
#define PACKAGE_NAME "jansson"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "jansson 2.9"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "jansson"
|
||||
|
||||
/* Define to the home page for this package. */
|
||||
#define PACKAGE_URL ""
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "2.9"
|
||||
|
||||
/* Define to 1 if you have the ANSI C header files. */
|
||||
#define STDC_HEADERS 1
|
||||
|
||||
/* Define to 1 if /dev/urandom should be used for seeding the hash function */
|
||||
#define USE_URANDOM 1
|
||||
|
||||
/* Define to 1 if CryptGenRandom should be used for seeding the hash function
|
||||
*/
|
||||
#define USE_WINDOWS_CRYPTOAPI 1
|
||||
|
||||
/* Version number of package */
|
||||
#define VERSION "2.9"
|
||||
|
||||
/* Define for Solaris 2.5.1 so the uint32_t typedef from <sys/synch.h>,
|
||||
<pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
|
||||
#define below would cause a syntax error. */
|
||||
/* #undef _UINT32_T */
|
||||
|
||||
/* Define for Solaris 2.5.1 so the uint8_t typedef from <sys/synch.h>,
|
||||
<pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
|
||||
#define below would cause a syntax error. */
|
||||
/* #undef _UINT8_T */
|
||||
|
||||
/* Define to `__inline__' or `__inline' if that's what the C compiler
|
||||
calls it, or to nothing if 'inline' is not supported under any name. */
|
||||
#ifndef __cplusplus
|
||||
/* #undef inline */
|
||||
#endif
|
||||
|
||||
/* Define to the type of a signed integer type of width exactly 32 bits if
|
||||
such a type exists and the standard includes do not define it. */
|
||||
/* #undef int32_t */
|
||||
|
||||
/* Define to the type of an unsigned integer type of width exactly 16 bits if
|
||||
such a type exists and the standard includes do not define it. */
|
||||
/* #undef uint16_t */
|
||||
|
||||
/* Define to the type of an unsigned integer type of width exactly 32 bits if
|
||||
such a type exists and the standard includes do not define it. */
|
||||
/* #undef uint32_t */
|
||||
|
||||
/* Define to the type of an unsigned integer type of width exactly 8 bits if
|
||||
such a type exists and the standard includes do not define it. */
|
||||
/* #undef uint8_t */
|
||||
1153
compat/jansson/load.c
Normal file
1153
compat/jansson/load.c
Normal file
File diff suppressed because it is too large
Load Diff
381
compat/jansson/lookup3.h
Normal file
381
compat/jansson/lookup3.h
Normal file
@@ -0,0 +1,381 @@
|
||||
/*
|
||||
-------------------------------------------------------------------------------
|
||||
lookup3.c, by Bob Jenkins, May 2006, Public Domain.
|
||||
|
||||
These are functions for producing 32-bit hashes for hash table lookup.
|
||||
hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
|
||||
are externally useful functions. Routines to test the hash are included
|
||||
if SELF_TEST is defined. You can use this free for any purpose. It's in
|
||||
the public domain. It has no warranty.
|
||||
|
||||
You probably want to use hashlittle(). hashlittle() and hashbig()
|
||||
hash byte arrays. hashlittle() is is faster than hashbig() on
|
||||
little-endian machines. Intel and AMD are little-endian machines.
|
||||
On second thought, you probably want hashlittle2(), which is identical to
|
||||
hashlittle() except it returns two 32-bit hashes for the price of one.
|
||||
You could implement hashbig2() if you wanted but I haven't bothered here.
|
||||
|
||||
If you want to find a hash of, say, exactly 7 integers, do
|
||||
a = i1; b = i2; c = i3;
|
||||
mix(a,b,c);
|
||||
a += i4; b += i5; c += i6;
|
||||
mix(a,b,c);
|
||||
a += i7;
|
||||
final(a,b,c);
|
||||
then use c as the hash value. If you have a variable length array of
|
||||
4-byte integers to hash, use hashword(). If you have a byte array (like
|
||||
a character string), use hashlittle(). If you have several byte arrays, or
|
||||
a mix of things, see the comments above hashlittle().
|
||||
|
||||
Why is this so big? I read 12 bytes at a time into 3 4-byte integers,
|
||||
then mix those integers. This is fast (you can do a lot more thorough
|
||||
mixing with 12*3 instructions on 3 integers than you can with 3 instructions
|
||||
on 1 byte), but shoehorning those bytes into integers efficiently is messy.
|
||||
-------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <jansson_private_config.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_STDINT_H
|
||||
#include <stdint.h> /* defines uint32_t etc */
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_SYS_PARAM_H
|
||||
#include <sys/param.h> /* attempt to define endianness */
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_ENDIAN_H
|
||||
# include <endian.h> /* attempt to define endianness */
|
||||
#endif
|
||||
|
||||
/*
|
||||
* My best guess at if you are big-endian or little-endian. This may
|
||||
* need adjustment.
|
||||
*/
|
||||
#if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
|
||||
__BYTE_ORDER == __LITTLE_ENDIAN) || \
|
||||
(defined(i386) || defined(__i386__) || defined(__i486__) || \
|
||||
defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL))
|
||||
# define HASH_LITTLE_ENDIAN 1
|
||||
# define HASH_BIG_ENDIAN 0
|
||||
#elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
|
||||
__BYTE_ORDER == __BIG_ENDIAN) || \
|
||||
(defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
|
||||
# define HASH_LITTLE_ENDIAN 0
|
||||
# define HASH_BIG_ENDIAN 1
|
||||
#else
|
||||
# define HASH_LITTLE_ENDIAN 0
|
||||
# define HASH_BIG_ENDIAN 0
|
||||
#endif
|
||||
|
||||
#define hashsize(n) ((uint32_t)1<<(n))
|
||||
#define hashmask(n) (hashsize(n)-1)
|
||||
#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
|
||||
|
||||
/*
|
||||
-------------------------------------------------------------------------------
|
||||
mix -- mix 3 32-bit values reversibly.
|
||||
|
||||
This is reversible, so any information in (a,b,c) before mix() is
|
||||
still in (a,b,c) after mix().
|
||||
|
||||
If four pairs of (a,b,c) inputs are run through mix(), or through
|
||||
mix() in reverse, there are at least 32 bits of the output that
|
||||
are sometimes the same for one pair and different for another pair.
|
||||
This was tested for:
|
||||
* pairs that differed by one bit, by two bits, in any combination
|
||||
of top bits of (a,b,c), or in any combination of bottom bits of
|
||||
(a,b,c).
|
||||
* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
|
||||
the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
|
||||
is commonly produced by subtraction) look like a single 1-bit
|
||||
difference.
|
||||
* the base values were pseudorandom, all zero but one bit set, or
|
||||
all zero plus a counter that starts at zero.
|
||||
|
||||
Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
|
||||
satisfy this are
|
||||
4 6 8 16 19 4
|
||||
9 15 3 18 27 15
|
||||
14 9 3 7 17 3
|
||||
Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
|
||||
for "differ" defined as + with a one-bit base and a two-bit delta. I
|
||||
used http://burtleburtle.net/bob/hash/avalanche.html to choose
|
||||
the operations, constants, and arrangements of the variables.
|
||||
|
||||
This does not achieve avalanche. There are input bits of (a,b,c)
|
||||
that fail to affect some output bits of (a,b,c), especially of a. The
|
||||
most thoroughly mixed value is c, but it doesn't really even achieve
|
||||
avalanche in c.
|
||||
|
||||
This allows some parallelism. Read-after-writes are good at doubling
|
||||
the number of bits affected, so the goal of mixing pulls in the opposite
|
||||
direction as the goal of parallelism. I did what I could. Rotates
|
||||
seem to cost as much as shifts on every machine I could lay my hands
|
||||
on, and rotates are much kinder to the top and bottom bits, so I used
|
||||
rotates.
|
||||
-------------------------------------------------------------------------------
|
||||
*/
|
||||
#define mix(a,b,c) \
|
||||
{ \
|
||||
a -= c; a ^= rot(c, 4); c += b; \
|
||||
b -= a; b ^= rot(a, 6); a += c; \
|
||||
c -= b; c ^= rot(b, 8); b += a; \
|
||||
a -= c; a ^= rot(c,16); c += b; \
|
||||
b -= a; b ^= rot(a,19); a += c; \
|
||||
c -= b; c ^= rot(b, 4); b += a; \
|
||||
}
|
||||
|
||||
/*
|
||||
-------------------------------------------------------------------------------
|
||||
final -- final mixing of 3 32-bit values (a,b,c) into c
|
||||
|
||||
Pairs of (a,b,c) values differing in only a few bits will usually
|
||||
produce values of c that look totally different. This was tested for
|
||||
* pairs that differed by one bit, by two bits, in any combination
|
||||
of top bits of (a,b,c), or in any combination of bottom bits of
|
||||
(a,b,c).
|
||||
* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
|
||||
the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
|
||||
is commonly produced by subtraction) look like a single 1-bit
|
||||
difference.
|
||||
* the base values were pseudorandom, all zero but one bit set, or
|
||||
all zero plus a counter that starts at zero.
|
||||
|
||||
These constants passed:
|
||||
14 11 25 16 4 14 24
|
||||
12 14 25 16 4 14 24
|
||||
and these came close:
|
||||
4 8 15 26 3 22 24
|
||||
10 8 15 26 3 22 24
|
||||
11 8 15 26 3 22 24
|
||||
-------------------------------------------------------------------------------
|
||||
*/
|
||||
#define final(a,b,c) \
|
||||
{ \
|
||||
c ^= b; c -= rot(b,14); \
|
||||
a ^= c; a -= rot(c,11); \
|
||||
b ^= a; b -= rot(a,25); \
|
||||
c ^= b; c -= rot(b,16); \
|
||||
a ^= c; a -= rot(c,4); \
|
||||
b ^= a; b -= rot(a,14); \
|
||||
c ^= b; c -= rot(b,24); \
|
||||
}
|
||||
|
||||
/*
|
||||
-------------------------------------------------------------------------------
|
||||
hashlittle() -- hash a variable-length key into a 32-bit value
|
||||
k : the key (the unaligned variable-length array of bytes)
|
||||
length : the length of the key, counting by bytes
|
||||
initval : can be any 4-byte value
|
||||
Returns a 32-bit value. Every bit of the key affects every bit of
|
||||
the return value. Two keys differing by one or two bits will have
|
||||
totally different hash values.
|
||||
|
||||
The best hash table sizes are powers of 2. There is no need to do
|
||||
mod a prime (mod is sooo slow!). If you need less than 32 bits,
|
||||
use a bitmask. For example, if you need only 10 bits, do
|
||||
h = (h & hashmask(10));
|
||||
In which case, the hash table should have hashsize(10) elements.
|
||||
|
||||
If you are hashing n strings (uint8_t **)k, do it like this:
|
||||
for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
|
||||
|
||||
By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
|
||||
code any way you wish, private, educational, or commercial. It's free.
|
||||
|
||||
Use for hash table lookup, or anything where one collision in 2^^32 is
|
||||
acceptable. Do NOT use for cryptographic purposes.
|
||||
-------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
static uint32_t hashlittle(const void *key, size_t length, uint32_t initval)
|
||||
{
|
||||
uint32_t a,b,c; /* internal state */
|
||||
union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
|
||||
|
||||
/* Set up the internal state */
|
||||
a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
|
||||
|
||||
u.ptr = key;
|
||||
if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
|
||||
const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
|
||||
|
||||
/* Detect Valgrind or AddressSanitizer */
|
||||
#ifdef VALGRIND
|
||||
# define NO_MASKING_TRICK 1
|
||||
#else
|
||||
# if defined(__has_feature) /* Clang */
|
||||
# if __has_feature(address_sanitizer) /* is ASAN enabled? */
|
||||
# define NO_MASKING_TRICK 1
|
||||
# endif
|
||||
# else
|
||||
# if defined(__SANITIZE_ADDRESS__) /* GCC 4.8.x, is ASAN enabled? */
|
||||
# define NO_MASKING_TRICK 1
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef NO_MASKING_TRICK
|
||||
const uint8_t *k8;
|
||||
#endif
|
||||
|
||||
/*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
|
||||
while (length > 12)
|
||||
{
|
||||
a += k[0];
|
||||
b += k[1];
|
||||
c += k[2];
|
||||
mix(a,b,c);
|
||||
length -= 12;
|
||||
k += 3;
|
||||
}
|
||||
|
||||
/*----------------------------- handle the last (probably partial) block */
|
||||
/*
|
||||
* "k[2]&0xffffff" actually reads beyond the end of the string, but
|
||||
* then masks off the part it's not allowed to read. Because the
|
||||
* string is aligned, the masked-off tail is in the same word as the
|
||||
* rest of the string. Every machine with memory protection I've seen
|
||||
* does it on word boundaries, so is OK with this. But VALGRIND will
|
||||
* still catch it and complain. The masking trick does make the hash
|
||||
* noticably faster for short strings (like English words).
|
||||
*/
|
||||
#ifndef NO_MASKING_TRICK
|
||||
|
||||
switch(length)
|
||||
{
|
||||
case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
|
||||
case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
|
||||
case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
|
||||
case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
|
||||
case 8 : b+=k[1]; a+=k[0]; break;
|
||||
case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
|
||||
case 6 : b+=k[1]&0xffff; a+=k[0]; break;
|
||||
case 5 : b+=k[1]&0xff; a+=k[0]; break;
|
||||
case 4 : a+=k[0]; break;
|
||||
case 3 : a+=k[0]&0xffffff; break;
|
||||
case 2 : a+=k[0]&0xffff; break;
|
||||
case 1 : a+=k[0]&0xff; break;
|
||||
case 0 : return c; /* zero length strings require no mixing */
|
||||
}
|
||||
|
||||
#else /* make valgrind happy */
|
||||
|
||||
k8 = (const uint8_t *)k;
|
||||
switch(length)
|
||||
{
|
||||
case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
|
||||
case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
|
||||
case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
|
||||
case 9 : c+=k8[8]; /* fall through */
|
||||
case 8 : b+=k[1]; a+=k[0]; break;
|
||||
case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
|
||||
case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
|
||||
case 5 : b+=k8[4]; /* fall through */
|
||||
case 4 : a+=k[0]; break;
|
||||
case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
|
||||
case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
|
||||
case 1 : a+=k8[0]; break;
|
||||
case 0 : return c;
|
||||
}
|
||||
|
||||
#endif /* !valgrind */
|
||||
|
||||
} else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
|
||||
const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
|
||||
const uint8_t *k8;
|
||||
|
||||
/*--------------- all but last block: aligned reads and different mixing */
|
||||
while (length > 12)
|
||||
{
|
||||
a += k[0] + (((uint32_t)k[1])<<16);
|
||||
b += k[2] + (((uint32_t)k[3])<<16);
|
||||
c += k[4] + (((uint32_t)k[5])<<16);
|
||||
mix(a,b,c);
|
||||
length -= 12;
|
||||
k += 6;
|
||||
}
|
||||
|
||||
/*----------------------------- handle the last (probably partial) block */
|
||||
k8 = (const uint8_t *)k;
|
||||
switch(length)
|
||||
{
|
||||
case 12: c+=k[4]+(((uint32_t)k[5])<<16);
|
||||
b+=k[2]+(((uint32_t)k[3])<<16);
|
||||
a+=k[0]+(((uint32_t)k[1])<<16);
|
||||
break;
|
||||
case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
|
||||
case 10: c+=k[4];
|
||||
b+=k[2]+(((uint32_t)k[3])<<16);
|
||||
a+=k[0]+(((uint32_t)k[1])<<16);
|
||||
break;
|
||||
case 9 : c+=k8[8]; /* fall through */
|
||||
case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
|
||||
a+=k[0]+(((uint32_t)k[1])<<16);
|
||||
break;
|
||||
case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
|
||||
case 6 : b+=k[2];
|
||||
a+=k[0]+(((uint32_t)k[1])<<16);
|
||||
break;
|
||||
case 5 : b+=k8[4]; /* fall through */
|
||||
case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
|
||||
break;
|
||||
case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
|
||||
case 2 : a+=k[0];
|
||||
break;
|
||||
case 1 : a+=k8[0];
|
||||
break;
|
||||
case 0 : return c; /* zero length requires no mixing */
|
||||
}
|
||||
|
||||
} else { /* need to read the key one byte at a time */
|
||||
const uint8_t *k = (const uint8_t *)key;
|
||||
|
||||
/*--------------- all but the last block: affect some 32 bits of (a,b,c) */
|
||||
while (length > 12)
|
||||
{
|
||||
a += k[0];
|
||||
a += ((uint32_t)k[1])<<8;
|
||||
a += ((uint32_t)k[2])<<16;
|
||||
a += ((uint32_t)k[3])<<24;
|
||||
b += k[4];
|
||||
b += ((uint32_t)k[5])<<8;
|
||||
b += ((uint32_t)k[6])<<16;
|
||||
b += ((uint32_t)k[7])<<24;
|
||||
c += k[8];
|
||||
c += ((uint32_t)k[9])<<8;
|
||||
c += ((uint32_t)k[10])<<16;
|
||||
c += ((uint32_t)k[11])<<24;
|
||||
mix(a,b,c);
|
||||
length -= 12;
|
||||
k += 12;
|
||||
}
|
||||
|
||||
/*-------------------------------- last block: affect all 32 bits of (c) */
|
||||
switch(length) /* all the case statements fall through */
|
||||
{
|
||||
case 12: c+=((uint32_t)k[11])<<24;
|
||||
case 11: c+=((uint32_t)k[10])<<16;
|
||||
case 10: c+=((uint32_t)k[9])<<8;
|
||||
case 9 : c+=k[8];
|
||||
case 8 : b+=((uint32_t)k[7])<<24;
|
||||
case 7 : b+=((uint32_t)k[6])<<16;
|
||||
case 6 : b+=((uint32_t)k[5])<<8;
|
||||
case 5 : b+=k[4];
|
||||
case 4 : a+=((uint32_t)k[3])<<24;
|
||||
case 3 : a+=((uint32_t)k[2])<<16;
|
||||
case 2 : a+=((uint32_t)k[1])<<8;
|
||||
case 1 : a+=k[0];
|
||||
break;
|
||||
case 0 : return c;
|
||||
}
|
||||
}
|
||||
|
||||
final(a,b,c);
|
||||
return c;
|
||||
}
|
||||
69
compat/jansson/memory.c
Normal file
69
compat/jansson/memory.c
Normal file
@@ -0,0 +1,69 @@
|
||||
/*
|
||||
* Copyright (c) 2009-2016 Petri Lehtinen <petri@digip.org>
|
||||
* Copyright (c) 2011-2012 Basile Starynkevitch <basile@starynkevitch.net>
|
||||
*
|
||||
* Jansson is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the MIT license. See LICENSE for details.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "jansson.h"
|
||||
#include "jansson_private.h"
|
||||
|
||||
/* C89 allows these to be macros */
|
||||
#undef malloc
|
||||
#undef free
|
||||
|
||||
/* memory function pointers */
|
||||
static json_malloc_t do_malloc = malloc;
|
||||
static json_free_t do_free = free;
|
||||
|
||||
void *jsonp_malloc(size_t size)
|
||||
{
|
||||
if(!size)
|
||||
return NULL;
|
||||
|
||||
return (*do_malloc)(size);
|
||||
}
|
||||
|
||||
void jsonp_free(void *ptr)
|
||||
{
|
||||
if(!ptr)
|
||||
return;
|
||||
|
||||
(*do_free)(ptr);
|
||||
}
|
||||
|
||||
char *jsonp_strdup(const char *str)
|
||||
{
|
||||
return jsonp_strndup(str, strlen(str));
|
||||
}
|
||||
|
||||
char *jsonp_strndup(const char *str, size_t len)
|
||||
{
|
||||
char *new_str;
|
||||
|
||||
new_str = jsonp_malloc(len + 1);
|
||||
if(!new_str)
|
||||
return NULL;
|
||||
|
||||
memcpy(new_str, str, len);
|
||||
new_str[len] = '\0';
|
||||
return new_str;
|
||||
}
|
||||
|
||||
void json_set_alloc_funcs(json_malloc_t malloc_fn, json_free_t free_fn)
|
||||
{
|
||||
do_malloc = malloc_fn;
|
||||
do_free = free_fn;
|
||||
}
|
||||
|
||||
void json_get_alloc_funcs(json_malloc_t *malloc_fn, json_free_t *free_fn)
|
||||
{
|
||||
if (malloc_fn)
|
||||
*malloc_fn = do_malloc;
|
||||
if (free_fn)
|
||||
*free_fn = do_free;
|
||||
}
|
||||
871
compat/jansson/pack_unpack.c
Normal file
871
compat/jansson/pack_unpack.c
Normal file
@@ -0,0 +1,871 @@
|
||||
/*
|
||||
* Copyright (c) 2009-2016 Petri Lehtinen <petri@digip.org>
|
||||
* Copyright (c) 2011-2012 Graeme Smecher <graeme.smecher@mail.mcgill.ca>
|
||||
*
|
||||
* Jansson is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the MIT license. See LICENSE for details.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "jansson.h"
|
||||
#include "jansson_private.h"
|
||||
#include "utf.h"
|
||||
|
||||
typedef struct {
|
||||
int line;
|
||||
int column;
|
||||
size_t pos;
|
||||
char token;
|
||||
} token_t;
|
||||
|
||||
typedef struct {
|
||||
const char *start;
|
||||
const char *fmt;
|
||||
token_t prev_token;
|
||||
token_t token;
|
||||
token_t next_token;
|
||||
json_error_t *error;
|
||||
size_t flags;
|
||||
int line;
|
||||
int column;
|
||||
size_t pos;
|
||||
} scanner_t;
|
||||
|
||||
#define token(scanner) ((scanner)->token.token)
|
||||
|
||||
static const char * const type_names[] = {
|
||||
"object",
|
||||
"array",
|
||||
"string",
|
||||
"integer",
|
||||
"real",
|
||||
"true",
|
||||
"false",
|
||||
"null"
|
||||
};
|
||||
|
||||
#define type_name(x) type_names[json_typeof(x)]
|
||||
|
||||
static const char unpack_value_starters[] = "{[siIbfFOon";
|
||||
|
||||
static void scanner_init(scanner_t *s, json_error_t *error,
|
||||
size_t flags, const char *fmt)
|
||||
{
|
||||
s->error = error;
|
||||
s->flags = flags;
|
||||
s->fmt = s->start = fmt;
|
||||
memset(&s->prev_token, 0, sizeof(token_t));
|
||||
memset(&s->token, 0, sizeof(token_t));
|
||||
memset(&s->next_token, 0, sizeof(token_t));
|
||||
s->line = 1;
|
||||
s->column = 0;
|
||||
s->pos = 0;
|
||||
}
|
||||
|
||||
static void next_token(scanner_t *s)
|
||||
{
|
||||
const char *t;
|
||||
s->prev_token = s->token;
|
||||
|
||||
if(s->next_token.line) {
|
||||
s->token = s->next_token;
|
||||
s->next_token.line = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
t = s->fmt;
|
||||
s->column++;
|
||||
s->pos++;
|
||||
|
||||
/* skip space and ignored chars */
|
||||
while(*t == ' ' || *t == '\t' || *t == '\n' || *t == ',' || *t == ':') {
|
||||
if(*t == '\n') {
|
||||
s->line++;
|
||||
s->column = 1;
|
||||
}
|
||||
else
|
||||
s->column++;
|
||||
|
||||
s->pos++;
|
||||
t++;
|
||||
}
|
||||
|
||||
s->token.token = *t;
|
||||
s->token.line = s->line;
|
||||
s->token.column = s->column;
|
||||
s->token.pos = s->pos;
|
||||
|
||||
t++;
|
||||
s->fmt = t;
|
||||
}
|
||||
|
||||
static void prev_token(scanner_t *s)
|
||||
{
|
||||
s->next_token = s->token;
|
||||
s->token = s->prev_token;
|
||||
}
|
||||
|
||||
static void set_error(scanner_t *s, const char *source, const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
va_start(ap, fmt);
|
||||
|
||||
jsonp_error_vset(s->error, s->token.line, s->token.column, s->token.pos,
|
||||
fmt, ap);
|
||||
|
||||
jsonp_error_set_source(s->error, source);
|
||||
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
static json_t *pack(scanner_t *s, va_list *ap);
|
||||
|
||||
|
||||
/* ours will be set to 1 if jsonp_free() must be called for the result
|
||||
afterwards */
|
||||
static char *read_string(scanner_t *s, va_list *ap,
|
||||
const char *purpose, size_t *out_len, int *ours)
|
||||
{
|
||||
char t;
|
||||
strbuffer_t strbuff;
|
||||
const char *str;
|
||||
size_t length;
|
||||
|
||||
next_token(s);
|
||||
t = token(s);
|
||||
prev_token(s);
|
||||
|
||||
if(t != '#' && t != '%' && t != '+') {
|
||||
/* Optimize the simple case */
|
||||
str = va_arg(*ap, const char *);
|
||||
|
||||
if(!str) {
|
||||
set_error(s, "<args>", "NULL string argument");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
length = strlen(str);
|
||||
|
||||
if(!utf8_check_string(str, length)) {
|
||||
set_error(s, "<args>", "Invalid UTF-8 %s", purpose);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
*out_len = length;
|
||||
*ours = 0;
|
||||
return (char *)str;
|
||||
}
|
||||
|
||||
strbuffer_init(&strbuff);
|
||||
|
||||
while(1) {
|
||||
str = va_arg(*ap, const char *);
|
||||
if(!str) {
|
||||
set_error(s, "<args>", "NULL string argument");
|
||||
strbuffer_close(&strbuff);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
next_token(s);
|
||||
|
||||
if(token(s) == '#') {
|
||||
length = va_arg(*ap, int);
|
||||
}
|
||||
else if(token(s) == '%') {
|
||||
length = va_arg(*ap, size_t);
|
||||
}
|
||||
else {
|
||||
prev_token(s);
|
||||
length = strlen(str);
|
||||
}
|
||||
|
||||
if(strbuffer_append_bytes(&strbuff, str, length) == -1) {
|
||||
set_error(s, "<internal>", "Out of memory");
|
||||
strbuffer_close(&strbuff);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
next_token(s);
|
||||
if(token(s) != '+') {
|
||||
prev_token(s);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(!utf8_check_string(strbuff.value, strbuff.length)) {
|
||||
set_error(s, "<args>", "Invalid UTF-8 %s", purpose);
|
||||
strbuffer_close(&strbuff);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
*out_len = strbuff.length;
|
||||
*ours = 1;
|
||||
return strbuffer_steal_value(&strbuff);
|
||||
}
|
||||
|
||||
static json_t *pack_object(scanner_t *s, va_list *ap)
|
||||
{
|
||||
json_t *object = json_object();
|
||||
next_token(s);
|
||||
|
||||
while(token(s) != '}') {
|
||||
char *key;
|
||||
size_t len;
|
||||
int ours;
|
||||
json_t *value;
|
||||
|
||||
if(!token(s)) {
|
||||
set_error(s, "<format>", "Unexpected end of format string");
|
||||
goto error;
|
||||
}
|
||||
|
||||
if(token(s) != 's') {
|
||||
set_error(s, "<format>", "Expected format 's', got '%c'", token(s));
|
||||
goto error;
|
||||
}
|
||||
|
||||
key = read_string(s, ap, "object key", &len, &ours);
|
||||
if(!key)
|
||||
goto error;
|
||||
|
||||
next_token(s);
|
||||
|
||||
value = pack(s, ap);
|
||||
if(!value) {
|
||||
if(ours)
|
||||
jsonp_free(key);
|
||||
|
||||
goto error;
|
||||
}
|
||||
|
||||
if(json_object_set_new_nocheck(object, key, value)) {
|
||||
set_error(s, "<internal>", "Unable to add key \"%s\"", key);
|
||||
if(ours)
|
||||
jsonp_free(key);
|
||||
|
||||
goto error;
|
||||
}
|
||||
|
||||
if(ours)
|
||||
jsonp_free(key);
|
||||
|
||||
next_token(s);
|
||||
}
|
||||
|
||||
return object;
|
||||
|
||||
error:
|
||||
json_decref(object);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static json_t *pack_array(scanner_t *s, va_list *ap)
|
||||
{
|
||||
json_t *array = json_array();
|
||||
next_token(s);
|
||||
|
||||
while(token(s) != ']') {
|
||||
json_t *value;
|
||||
|
||||
if(!token(s)) {
|
||||
set_error(s, "<format>", "Unexpected end of format string");
|
||||
goto error;
|
||||
}
|
||||
|
||||
value = pack(s, ap);
|
||||
if(!value)
|
||||
goto error;
|
||||
|
||||
if(json_array_append_new(array, value)) {
|
||||
set_error(s, "<internal>", "Unable to append to array");
|
||||
goto error;
|
||||
}
|
||||
|
||||
next_token(s);
|
||||
}
|
||||
return array;
|
||||
|
||||
error:
|
||||
json_decref(array);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static json_t *pack_string(scanner_t *s, va_list *ap)
|
||||
{
|
||||
char *str;
|
||||
size_t len;
|
||||
int ours;
|
||||
int nullable;
|
||||
|
||||
next_token(s);
|
||||
nullable = token(s) == '?';
|
||||
if (!nullable)
|
||||
prev_token(s);
|
||||
|
||||
str = read_string(s, ap, "string", &len, &ours);
|
||||
if (!str) {
|
||||
return nullable ? json_null() : NULL;
|
||||
} else if (ours) {
|
||||
return jsonp_stringn_nocheck_own(str, len);
|
||||
} else {
|
||||
return json_stringn_nocheck(str, len);
|
||||
}
|
||||
}
|
||||
|
||||
static json_t *pack(scanner_t *s, va_list *ap)
|
||||
{
|
||||
switch(token(s)) {
|
||||
case '{':
|
||||
return pack_object(s, ap);
|
||||
|
||||
case '[':
|
||||
return pack_array(s, ap);
|
||||
|
||||
case 's': /* string */
|
||||
return pack_string(s, ap);
|
||||
|
||||
case 'n': /* null */
|
||||
return json_null();
|
||||
|
||||
case 'b': /* boolean */
|
||||
return va_arg(*ap, int) ? json_true() : json_false();
|
||||
|
||||
case 'i': /* integer from int */
|
||||
return json_integer(va_arg(*ap, int));
|
||||
|
||||
case 'I': /* integer from json_int_t */
|
||||
return json_integer(va_arg(*ap, json_int_t));
|
||||
|
||||
case 'f': /* real */
|
||||
return json_real(va_arg(*ap, double));
|
||||
|
||||
case 'O': /* a json_t object; increments refcount */
|
||||
{
|
||||
int nullable;
|
||||
json_t *json;
|
||||
|
||||
next_token(s);
|
||||
nullable = token(s) == '?';
|
||||
if (!nullable)
|
||||
prev_token(s);
|
||||
|
||||
json = va_arg(*ap, json_t *);
|
||||
if (!json && nullable) {
|
||||
return json_null();
|
||||
} else {
|
||||
return json_incref(json);
|
||||
}
|
||||
}
|
||||
|
||||
case 'o': /* a json_t object; doesn't increment refcount */
|
||||
{
|
||||
int nullable;
|
||||
json_t *json;
|
||||
|
||||
next_token(s);
|
||||
nullable = token(s) == '?';
|
||||
if (!nullable)
|
||||
prev_token(s);
|
||||
|
||||
json = va_arg(*ap, json_t *);
|
||||
if (!json && nullable) {
|
||||
return json_null();
|
||||
} else {
|
||||
return json;
|
||||
}
|
||||
}
|
||||
|
||||
default:
|
||||
set_error(s, "<format>", "Unexpected format character '%c'",
|
||||
token(s));
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static int unpack(scanner_t *s, json_t *root, va_list *ap);
|
||||
|
||||
static int unpack_object(scanner_t *s, json_t *root, va_list *ap)
|
||||
{
|
||||
int ret = -1;
|
||||
int strict = 0;
|
||||
int gotopt = 0;
|
||||
|
||||
/* Use a set (emulated by a hashtable) to check that all object
|
||||
keys are accessed. Checking that the correct number of keys
|
||||
were accessed is not enough, as the same key can be unpacked
|
||||
multiple times.
|
||||
*/
|
||||
hashtable_t key_set;
|
||||
|
||||
if(hashtable_init(&key_set)) {
|
||||
set_error(s, "<internal>", "Out of memory");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(root && !json_is_object(root)) {
|
||||
set_error(s, "<validation>", "Expected object, got %s",
|
||||
type_name(root));
|
||||
goto out;
|
||||
}
|
||||
next_token(s);
|
||||
|
||||
while(token(s) != '}') {
|
||||
const char *key;
|
||||
json_t *value;
|
||||
int opt = 0;
|
||||
|
||||
if(strict != 0) {
|
||||
set_error(s, "<format>", "Expected '}' after '%c', got '%c'",
|
||||
(strict == 1 ? '!' : '*'), token(s));
|
||||
goto out;
|
||||
}
|
||||
|
||||
if(!token(s)) {
|
||||
set_error(s, "<format>", "Unexpected end of format string");
|
||||
goto out;
|
||||
}
|
||||
|
||||
if(token(s) == '!' || token(s) == '*') {
|
||||
strict = (token(s) == '!' ? 1 : -1);
|
||||
next_token(s);
|
||||
continue;
|
||||
}
|
||||
|
||||
if(token(s) != 's') {
|
||||
set_error(s, "<format>", "Expected format 's', got '%c'", token(s));
|
||||
goto out;
|
||||
}
|
||||
|
||||
key = va_arg(*ap, const char *);
|
||||
if(!key) {
|
||||
set_error(s, "<args>", "NULL object key");
|
||||
goto out;
|
||||
}
|
||||
|
||||
next_token(s);
|
||||
|
||||
if(token(s) == '?') {
|
||||
opt = gotopt = 1;
|
||||
next_token(s);
|
||||
}
|
||||
|
||||
if(!root) {
|
||||
/* skipping */
|
||||
value = NULL;
|
||||
}
|
||||
else {
|
||||
value = json_object_get(root, key);
|
||||
if(!value && !opt) {
|
||||
set_error(s, "<validation>", "Object item not found: %s", key);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if(unpack(s, value, ap))
|
||||
goto out;
|
||||
|
||||
hashtable_set(&key_set, key, json_null());
|
||||
next_token(s);
|
||||
}
|
||||
|
||||
if(strict == 0 && (s->flags & JSON_STRICT))
|
||||
strict = 1;
|
||||
|
||||
if(root && strict == 1) {
|
||||
/* We need to check that all non optional items have been parsed */
|
||||
const char *key;
|
||||
int have_unrecognized_keys = 0;
|
||||
strbuffer_t unrecognized_keys;
|
||||
json_t *value;
|
||||
long unpacked = 0;
|
||||
if (gotopt) {
|
||||
/* We have optional keys, we need to iter on each key */
|
||||
json_object_foreach(root, key, value) {
|
||||
if(!hashtable_get(&key_set, key)) {
|
||||
unpacked++;
|
||||
|
||||
/* Save unrecognized keys for the error message */
|
||||
if (!have_unrecognized_keys) {
|
||||
strbuffer_init(&unrecognized_keys);
|
||||
have_unrecognized_keys = 1;
|
||||
} else {
|
||||
strbuffer_append_bytes(&unrecognized_keys, ", ", 2);
|
||||
}
|
||||
strbuffer_append_bytes(&unrecognized_keys, key, strlen(key));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* No optional keys, we can just compare the number of items */
|
||||
unpacked = (long)json_object_size(root) - (long)key_set.size;
|
||||
}
|
||||
if (unpacked) {
|
||||
if (!gotopt) {
|
||||
/* Save unrecognized keys for the error message */
|
||||
json_object_foreach(root, key, value) {
|
||||
if(!hashtable_get(&key_set, key)) {
|
||||
if (!have_unrecognized_keys) {
|
||||
strbuffer_init(&unrecognized_keys);
|
||||
have_unrecognized_keys = 1;
|
||||
} else {
|
||||
strbuffer_append_bytes(&unrecognized_keys, ", ", 2);
|
||||
}
|
||||
strbuffer_append_bytes(&unrecognized_keys, key, strlen(key));
|
||||
}
|
||||
}
|
||||
}
|
||||
set_error(s, "<validation>",
|
||||
"%li object item(s) left unpacked: %s",
|
||||
unpacked, strbuffer_value(&unrecognized_keys));
|
||||
strbuffer_close(&unrecognized_keys);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
hashtable_close(&key_set);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int unpack_array(scanner_t *s, json_t *root, va_list *ap)
|
||||
{
|
||||
size_t i = 0;
|
||||
int strict = 0;
|
||||
|
||||
if(root && !json_is_array(root)) {
|
||||
set_error(s, "<validation>", "Expected array, got %s", type_name(root));
|
||||
return -1;
|
||||
}
|
||||
next_token(s);
|
||||
|
||||
while(token(s) != ']') {
|
||||
json_t *value;
|
||||
|
||||
if(strict != 0) {
|
||||
set_error(s, "<format>", "Expected ']' after '%c', got '%c'",
|
||||
(strict == 1 ? '!' : '*'),
|
||||
token(s));
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(!token(s)) {
|
||||
set_error(s, "<format>", "Unexpected end of format string");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(token(s) == '!' || token(s) == '*') {
|
||||
strict = (token(s) == '!' ? 1 : -1);
|
||||
next_token(s);
|
||||
continue;
|
||||
}
|
||||
|
||||
if(!strchr(unpack_value_starters, token(s))) {
|
||||
set_error(s, "<format>", "Unexpected format character '%c'",
|
||||
token(s));
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(!root) {
|
||||
/* skipping */
|
||||
value = NULL;
|
||||
}
|
||||
else {
|
||||
value = json_array_get(root, i);
|
||||
if(!value) {
|
||||
set_error(s, "<validation>", "Array index %lu out of range",
|
||||
(unsigned long)i);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if(unpack(s, value, ap))
|
||||
return -1;
|
||||
|
||||
next_token(s);
|
||||
i++;
|
||||
}
|
||||
|
||||
if(strict == 0 && (s->flags & JSON_STRICT))
|
||||
strict = 1;
|
||||
|
||||
if(root && strict == 1 && i != json_array_size(root)) {
|
||||
long diff = (long)json_array_size(root) - (long)i;
|
||||
set_error(s, "<validation>", "%li array item(s) left unpacked", diff);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int unpack(scanner_t *s, json_t *root, va_list *ap)
|
||||
{
|
||||
switch(token(s))
|
||||
{
|
||||
case '{':
|
||||
return unpack_object(s, root, ap);
|
||||
|
||||
case '[':
|
||||
return unpack_array(s, root, ap);
|
||||
|
||||
case 's':
|
||||
if(root && !json_is_string(root)) {
|
||||
set_error(s, "<validation>", "Expected string, got %s",
|
||||
type_name(root));
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(!(s->flags & JSON_VALIDATE_ONLY)) {
|
||||
const char **str_target;
|
||||
size_t *len_target = NULL;
|
||||
|
||||
str_target = va_arg(*ap, const char **);
|
||||
if(!str_target) {
|
||||
set_error(s, "<args>", "NULL string argument");
|
||||
return -1;
|
||||
}
|
||||
|
||||
next_token(s);
|
||||
|
||||
if(token(s) == '%') {
|
||||
len_target = va_arg(*ap, size_t *);
|
||||
if(!len_target) {
|
||||
set_error(s, "<args>", "NULL string length argument");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
else
|
||||
prev_token(s);
|
||||
|
||||
if(root) {
|
||||
*str_target = json_string_value(root);
|
||||
if(len_target)
|
||||
*len_target = json_string_length(root);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
||||
case 'i':
|
||||
if(root && !json_is_integer(root)) {
|
||||
set_error(s, "<validation>", "Expected integer, got %s",
|
||||
type_name(root));
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(!(s->flags & JSON_VALIDATE_ONLY)) {
|
||||
int *target = va_arg(*ap, int*);
|
||||
if(root)
|
||||
*target = (int)json_integer_value(root);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
case 'I':
|
||||
if(root && !json_is_integer(root)) {
|
||||
set_error(s, "<validation>", "Expected integer, got %s",
|
||||
type_name(root));
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(!(s->flags & JSON_VALIDATE_ONLY)) {
|
||||
json_int_t *target = va_arg(*ap, json_int_t*);
|
||||
if(root)
|
||||
*target = json_integer_value(root);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
case 'b':
|
||||
if(root && !json_is_boolean(root)) {
|
||||
set_error(s, "<validation>", "Expected true or false, got %s",
|
||||
type_name(root));
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(!(s->flags & JSON_VALIDATE_ONLY)) {
|
||||
int *target = va_arg(*ap, int*);
|
||||
if(root)
|
||||
*target = json_is_true(root);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
case 'f':
|
||||
if(root && !json_is_real(root)) {
|
||||
set_error(s, "<validation>", "Expected real, got %s",
|
||||
type_name(root));
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(!(s->flags & JSON_VALIDATE_ONLY)) {
|
||||
double *target = va_arg(*ap, double*);
|
||||
if(root)
|
||||
*target = json_real_value(root);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
case 'F':
|
||||
if(root && !json_is_number(root)) {
|
||||
set_error(s, "<validation>", "Expected real or integer, got %s",
|
||||
type_name(root));
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(!(s->flags & JSON_VALIDATE_ONLY)) {
|
||||
double *target = va_arg(*ap, double*);
|
||||
if(root)
|
||||
*target = json_number_value(root);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
case 'O':
|
||||
if(root && !(s->flags & JSON_VALIDATE_ONLY))
|
||||
json_incref(root);
|
||||
/* Fall through */
|
||||
|
||||
case 'o':
|
||||
if(!(s->flags & JSON_VALIDATE_ONLY)) {
|
||||
json_t **target = va_arg(*ap, json_t**);
|
||||
if(root)
|
||||
*target = root;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
case 'n':
|
||||
/* Never assign, just validate */
|
||||
if(root && !json_is_null(root)) {
|
||||
set_error(s, "<validation>", "Expected null, got %s",
|
||||
type_name(root));
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
|
||||
default:
|
||||
set_error(s, "<format>", "Unexpected format character '%c'",
|
||||
token(s));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
json_t *json_vpack_ex(json_error_t *error, size_t flags,
|
||||
const char *fmt, va_list ap)
|
||||
{
|
||||
scanner_t s;
|
||||
va_list ap_copy;
|
||||
json_t *value;
|
||||
|
||||
if(!fmt || !*fmt) {
|
||||
jsonp_error_init(error, "<format>");
|
||||
jsonp_error_set(error, -1, -1, 0, "NULL or empty format string");
|
||||
return NULL;
|
||||
}
|
||||
jsonp_error_init(error, NULL);
|
||||
|
||||
scanner_init(&s, error, flags, fmt);
|
||||
next_token(&s);
|
||||
|
||||
va_copy(ap_copy, ap);
|
||||
value = pack(&s, &ap_copy);
|
||||
va_end(ap_copy);
|
||||
|
||||
if(!value)
|
||||
return NULL;
|
||||
|
||||
next_token(&s);
|
||||
if(token(&s)) {
|
||||
json_decref(value);
|
||||
set_error(&s, "<format>", "Garbage after format string");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
json_t *json_pack_ex(json_error_t *error, size_t flags, const char *fmt, ...)
|
||||
{
|
||||
json_t *value;
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, fmt);
|
||||
value = json_vpack_ex(error, flags, fmt, ap);
|
||||
va_end(ap);
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
json_t *json_pack(const char *fmt, ...)
|
||||
{
|
||||
json_t *value;
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, fmt);
|
||||
value = json_vpack_ex(NULL, 0, fmt, ap);
|
||||
va_end(ap);
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
int json_vunpack_ex(json_t *root, json_error_t *error, size_t flags,
|
||||
const char *fmt, va_list ap)
|
||||
{
|
||||
scanner_t s;
|
||||
va_list ap_copy;
|
||||
|
||||
if(!root) {
|
||||
jsonp_error_init(error, "<root>");
|
||||
jsonp_error_set(error, -1, -1, 0, "NULL root value");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(!fmt || !*fmt) {
|
||||
jsonp_error_init(error, "<format>");
|
||||
jsonp_error_set(error, -1, -1, 0, "NULL or empty format string");
|
||||
return -1;
|
||||
}
|
||||
jsonp_error_init(error, NULL);
|
||||
|
||||
scanner_init(&s, error, flags, fmt);
|
||||
next_token(&s);
|
||||
|
||||
va_copy(ap_copy, ap);
|
||||
if(unpack(&s, root, &ap_copy)) {
|
||||
va_end(ap_copy);
|
||||
return -1;
|
||||
}
|
||||
va_end(ap_copy);
|
||||
|
||||
next_token(&s);
|
||||
if(token(&s)) {
|
||||
set_error(&s, "<format>", "Garbage after format string");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int json_unpack_ex(json_t *root, json_error_t *error, size_t flags, const char *fmt, ...)
|
||||
{
|
||||
int ret;
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, fmt);
|
||||
ret = json_vunpack_ex(root, error, flags, fmt, ap);
|
||||
va_end(ap);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int json_unpack(json_t *root, const char *fmt, ...)
|
||||
{
|
||||
int ret;
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, fmt);
|
||||
ret = json_vunpack_ex(root, NULL, 0, fmt, ap);
|
||||
va_end(ap);
|
||||
|
||||
return ret;
|
||||
}
|
||||
111
compat/jansson/strbuffer.c
Normal file
111
compat/jansson/strbuffer.c
Normal file
@@ -0,0 +1,111 @@
|
||||
/*
|
||||
* Copyright (c) 2009-2016 Petri Lehtinen <petri@digip.org>
|
||||
*
|
||||
* Jansson is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the MIT license. See LICENSE for details.
|
||||
*/
|
||||
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "jansson_private.h"
|
||||
#include "strbuffer.h"
|
||||
|
||||
#define STRBUFFER_MIN_SIZE 16
|
||||
#define STRBUFFER_FACTOR 2
|
||||
#define STRBUFFER_SIZE_MAX ((size_t)-1)
|
||||
|
||||
int strbuffer_init(strbuffer_t *strbuff)
|
||||
{
|
||||
strbuff->size = STRBUFFER_MIN_SIZE;
|
||||
strbuff->length = 0;
|
||||
|
||||
strbuff->value = jsonp_malloc(strbuff->size);
|
||||
if(!strbuff->value)
|
||||
return -1;
|
||||
|
||||
/* initialize to empty */
|
||||
strbuff->value[0] = '\0';
|
||||
return 0;
|
||||
}
|
||||
|
||||
void strbuffer_close(strbuffer_t *strbuff)
|
||||
{
|
||||
if(strbuff->value)
|
||||
jsonp_free(strbuff->value);
|
||||
|
||||
strbuff->size = 0;
|
||||
strbuff->length = 0;
|
||||
strbuff->value = NULL;
|
||||
}
|
||||
|
||||
void strbuffer_clear(strbuffer_t *strbuff)
|
||||
{
|
||||
strbuff->length = 0;
|
||||
strbuff->value[0] = '\0';
|
||||
}
|
||||
|
||||
const char *strbuffer_value(const strbuffer_t *strbuff)
|
||||
{
|
||||
return strbuff->value;
|
||||
}
|
||||
|
||||
char *strbuffer_steal_value(strbuffer_t *strbuff)
|
||||
{
|
||||
char *result = strbuff->value;
|
||||
strbuff->value = NULL;
|
||||
return result;
|
||||
}
|
||||
|
||||
int strbuffer_append_byte(strbuffer_t *strbuff, char byte)
|
||||
{
|
||||
return strbuffer_append_bytes(strbuff, &byte, 1);
|
||||
}
|
||||
|
||||
int strbuffer_append_bytes(strbuffer_t *strbuff, const char *data, size_t size)
|
||||
{
|
||||
if(size >= strbuff->size - strbuff->length)
|
||||
{
|
||||
size_t new_size;
|
||||
char *new_value;
|
||||
|
||||
/* avoid integer overflow */
|
||||
if (strbuff->size > STRBUFFER_SIZE_MAX / STRBUFFER_FACTOR
|
||||
|| size > STRBUFFER_SIZE_MAX - 1
|
||||
|| strbuff->length > STRBUFFER_SIZE_MAX - 1 - size)
|
||||
return -1;
|
||||
|
||||
new_size = max(strbuff->size * STRBUFFER_FACTOR,
|
||||
strbuff->length + size + 1);
|
||||
|
||||
new_value = jsonp_malloc(new_size);
|
||||
if(!new_value)
|
||||
return -1;
|
||||
|
||||
memcpy(new_value, strbuff->value, strbuff->length);
|
||||
|
||||
jsonp_free(strbuff->value);
|
||||
strbuff->value = new_value;
|
||||
strbuff->size = new_size;
|
||||
}
|
||||
|
||||
memcpy(strbuff->value + strbuff->length, data, size);
|
||||
strbuff->length += size;
|
||||
strbuff->value[strbuff->length] = '\0';
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
char strbuffer_pop(strbuffer_t *strbuff)
|
||||
{
|
||||
if(strbuff->length > 0) {
|
||||
char c = strbuff->value[--strbuff->length];
|
||||
strbuff->value[strbuff->length] = '\0';
|
||||
return c;
|
||||
}
|
||||
else
|
||||
return '\0';
|
||||
}
|
||||
34
compat/jansson/strbuffer.h
Normal file
34
compat/jansson/strbuffer.h
Normal file
@@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Copyright (c) 2009-2016 Petri Lehtinen <petri@digip.org>
|
||||
*
|
||||
* Jansson is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the MIT license. See LICENSE for details.
|
||||
*/
|
||||
|
||||
#ifndef STRBUFFER_H
|
||||
#define STRBUFFER_H
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
typedef struct {
|
||||
char *value;
|
||||
size_t length; /* bytes used */
|
||||
size_t size; /* bytes allocated */
|
||||
} strbuffer_t;
|
||||
|
||||
int strbuffer_init(strbuffer_t *strbuff);
|
||||
void strbuffer_close(strbuffer_t *strbuff);
|
||||
|
||||
void strbuffer_clear(strbuffer_t *strbuff);
|
||||
|
||||
const char *strbuffer_value(const strbuffer_t *strbuff);
|
||||
|
||||
/* Steal the value and close the strbuffer */
|
||||
char *strbuffer_steal_value(strbuffer_t *strbuff);
|
||||
|
||||
int strbuffer_append_byte(strbuffer_t *strbuff, char byte);
|
||||
int strbuffer_append_bytes(strbuffer_t *strbuff, const char *data, size_t size);
|
||||
|
||||
char strbuffer_pop(strbuffer_t *strbuff);
|
||||
|
||||
#endif
|
||||
145
compat/jansson/strconv.c
Normal file
145
compat/jansson/strconv.c
Normal file
@@ -0,0 +1,145 @@
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#ifdef __MINGW32__
|
||||
#undef __NO_ISOCEXT /* ensure stdlib.h will declare prototypes for mingw own 'strtod' replacement, called '__strtod' */
|
||||
#endif
|
||||
#include "jansson_private.h"
|
||||
#include "strbuffer.h"
|
||||
|
||||
/* need jansson_private_config.h to get the correct snprintf */
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <jansson_private_config.h>
|
||||
#endif
|
||||
|
||||
#ifdef __MINGW32__
|
||||
#define strtod __strtod
|
||||
#endif
|
||||
|
||||
#if JSON_HAVE_LOCALECONV
|
||||
#include <locale.h>
|
||||
|
||||
/*
|
||||
- This code assumes that the decimal separator is exactly one
|
||||
character.
|
||||
|
||||
- If setlocale() is called by another thread between the call to
|
||||
localeconv() and the call to sprintf() or strtod(), the result may
|
||||
be wrong. setlocale() is not thread-safe and should not be used
|
||||
this way. Multi-threaded programs should use uselocale() instead.
|
||||
*/
|
||||
|
||||
static void to_locale(strbuffer_t *strbuffer)
|
||||
{
|
||||
const char *point;
|
||||
char *pos;
|
||||
|
||||
point = localeconv()->decimal_point;
|
||||
if(*point == '.') {
|
||||
/* No conversion needed */
|
||||
return;
|
||||
}
|
||||
|
||||
pos = strchr(strbuffer->value, '.');
|
||||
if(pos)
|
||||
*pos = *point;
|
||||
}
|
||||
|
||||
static void from_locale(char *buffer)
|
||||
{
|
||||
const char *point;
|
||||
char *pos;
|
||||
|
||||
point = localeconv()->decimal_point;
|
||||
if(*point == '.') {
|
||||
/* No conversion needed */
|
||||
return;
|
||||
}
|
||||
|
||||
pos = strchr(buffer, *point);
|
||||
if(pos)
|
||||
*pos = '.';
|
||||
}
|
||||
#endif
|
||||
|
||||
int jsonp_strtod(strbuffer_t *strbuffer, double *out)
|
||||
{
|
||||
double value;
|
||||
char *end;
|
||||
|
||||
#if JSON_HAVE_LOCALECONV
|
||||
to_locale(strbuffer);
|
||||
#endif
|
||||
|
||||
errno = 0;
|
||||
value = strtod(strbuffer->value, &end);
|
||||
assert(end == strbuffer->value + strbuffer->length);
|
||||
|
||||
if((value == HUGE_VAL || value == -HUGE_VAL) && errno == ERANGE) {
|
||||
/* Overflow */
|
||||
return -1;
|
||||
}
|
||||
|
||||
*out = value;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int jsonp_dtostr(char *buffer, size_t size, double value, int precision)
|
||||
{
|
||||
int ret;
|
||||
char *start, *end;
|
||||
size_t length;
|
||||
|
||||
if (precision == 0)
|
||||
precision = 17;
|
||||
|
||||
ret = snprintf(buffer, size, "%.*g", precision, value);
|
||||
if(ret < 0)
|
||||
return -1;
|
||||
|
||||
length = (size_t)ret;
|
||||
if(length >= size)
|
||||
return -1;
|
||||
|
||||
#if JSON_HAVE_LOCALECONV
|
||||
from_locale(buffer);
|
||||
#endif
|
||||
|
||||
/* Make sure there's a dot or 'e' in the output. Otherwise
|
||||
a real is converted to an integer when decoding */
|
||||
if(strchr(buffer, '.') == NULL &&
|
||||
strchr(buffer, 'e') == NULL)
|
||||
{
|
||||
if(length + 3 >= size) {
|
||||
/* No space to append ".0" */
|
||||
return -1;
|
||||
}
|
||||
buffer[length] = '.';
|
||||
buffer[length + 1] = '0';
|
||||
buffer[length + 2] = '\0';
|
||||
length += 2;
|
||||
}
|
||||
|
||||
/* Remove leading '+' from positive exponent. Also remove leading
|
||||
zeros from exponents (added by some printf() implementations) */
|
||||
start = strchr(buffer, 'e');
|
||||
if(start) {
|
||||
start++;
|
||||
end = start + 1;
|
||||
|
||||
if(*start == '-')
|
||||
start++;
|
||||
|
||||
while(*end == '0')
|
||||
end++;
|
||||
|
||||
if(end != start) {
|
||||
memmove(start, end, length - (size_t)(end - buffer));
|
||||
length -= (size_t)(end - start);
|
||||
}
|
||||
}
|
||||
|
||||
return (int)length;
|
||||
}
|
||||
187
compat/jansson/utf.c
Normal file
187
compat/jansson/utf.c
Normal file
@@ -0,0 +1,187 @@
|
||||
/*
|
||||
* Copyright (c) 2009-2016 Petri Lehtinen <petri@digip.org>
|
||||
*
|
||||
* Jansson is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the MIT license. See LICENSE for details.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "utf.h"
|
||||
|
||||
int utf8_encode(int32_t codepoint, char *buffer, size_t *size)
|
||||
{
|
||||
if(codepoint < 0)
|
||||
return -1;
|
||||
else if(codepoint < 0x80)
|
||||
{
|
||||
buffer[0] = (char)codepoint;
|
||||
*size = 1;
|
||||
}
|
||||
else if(codepoint < 0x800)
|
||||
{
|
||||
buffer[0] = 0xC0 + ((codepoint & 0x7C0) >> 6);
|
||||
buffer[1] = 0x80 + ((codepoint & 0x03F));
|
||||
*size = 2;
|
||||
}
|
||||
else if(codepoint < 0x10000)
|
||||
{
|
||||
buffer[0] = 0xE0 + ((codepoint & 0xF000) >> 12);
|
||||
buffer[1] = 0x80 + ((codepoint & 0x0FC0) >> 6);
|
||||
buffer[2] = 0x80 + ((codepoint & 0x003F));
|
||||
*size = 3;
|
||||
}
|
||||
else if(codepoint <= 0x10FFFF)
|
||||
{
|
||||
buffer[0] = 0xF0 + ((codepoint & 0x1C0000) >> 18);
|
||||
buffer[1] = 0x80 + ((codepoint & 0x03F000) >> 12);
|
||||
buffer[2] = 0x80 + ((codepoint & 0x000FC0) >> 6);
|
||||
buffer[3] = 0x80 + ((codepoint & 0x00003F));
|
||||
*size = 4;
|
||||
}
|
||||
else
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t utf8_check_first(char byte)
|
||||
{
|
||||
unsigned char u = (unsigned char)byte;
|
||||
|
||||
if(u < 0x80)
|
||||
return 1;
|
||||
|
||||
if(0x80 <= u && u <= 0xBF) {
|
||||
/* second, third or fourth byte of a multi-byte
|
||||
sequence, i.e. a "continuation byte" */
|
||||
return 0;
|
||||
}
|
||||
else if(u == 0xC0 || u == 0xC1) {
|
||||
/* overlong encoding of an ASCII byte */
|
||||
return 0;
|
||||
}
|
||||
else if(0xC2 <= u && u <= 0xDF) {
|
||||
/* 2-byte sequence */
|
||||
return 2;
|
||||
}
|
||||
|
||||
else if(0xE0 <= u && u <= 0xEF) {
|
||||
/* 3-byte sequence */
|
||||
return 3;
|
||||
}
|
||||
else if(0xF0 <= u && u <= 0xF4) {
|
||||
/* 4-byte sequence */
|
||||
return 4;
|
||||
}
|
||||
else { /* u >= 0xF5 */
|
||||
/* Restricted (start of 4-, 5- or 6-byte sequence) or invalid
|
||||
UTF-8 */
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
size_t utf8_check_full(const char *buffer, size_t size, int32_t *codepoint)
|
||||
{
|
||||
size_t i;
|
||||
int32_t value = 0;
|
||||
unsigned char u = (unsigned char)buffer[0];
|
||||
|
||||
if(size == 2)
|
||||
{
|
||||
value = u & 0x1F;
|
||||
}
|
||||
else if(size == 3)
|
||||
{
|
||||
value = u & 0xF;
|
||||
}
|
||||
else if(size == 4)
|
||||
{
|
||||
value = u & 0x7;
|
||||
}
|
||||
else
|
||||
return 0;
|
||||
|
||||
for(i = 1; i < size; i++)
|
||||
{
|
||||
u = (unsigned char)buffer[i];
|
||||
|
||||
if(u < 0x80 || u > 0xBF) {
|
||||
/* not a continuation byte */
|
||||
return 0;
|
||||
}
|
||||
|
||||
value = (value << 6) + (u & 0x3F);
|
||||
}
|
||||
|
||||
if(value > 0x10FFFF) {
|
||||
/* not in Unicode range */
|
||||
return 0;
|
||||
}
|
||||
|
||||
else if(0xD800 <= value && value <= 0xDFFF) {
|
||||
/* invalid code point (UTF-16 surrogate halves) */
|
||||
return 0;
|
||||
}
|
||||
|
||||
else if((size == 2 && value < 0x80) ||
|
||||
(size == 3 && value < 0x800) ||
|
||||
(size == 4 && value < 0x10000)) {
|
||||
/* overlong encoding */
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(codepoint)
|
||||
*codepoint = value;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
const char *utf8_iterate(const char *buffer, size_t bufsize, int32_t *codepoint)
|
||||
{
|
||||
size_t count;
|
||||
int32_t value;
|
||||
|
||||
if(!bufsize)
|
||||
return buffer;
|
||||
|
||||
count = utf8_check_first(buffer[0]);
|
||||
if(count <= 0)
|
||||
return NULL;
|
||||
|
||||
if(count == 1)
|
||||
value = (unsigned char)buffer[0];
|
||||
else
|
||||
{
|
||||
if(count > bufsize || !utf8_check_full(buffer, count, &value))
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if(codepoint)
|
||||
*codepoint = value;
|
||||
|
||||
return buffer + count;
|
||||
}
|
||||
|
||||
int utf8_check_string(const char *string, size_t length)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for(i = 0; i < length; i++)
|
||||
{
|
||||
size_t count = utf8_check_first(string[i]);
|
||||
if(count == 0)
|
||||
return 0;
|
||||
else if(count > 1)
|
||||
{
|
||||
if(count > length - i)
|
||||
return 0;
|
||||
|
||||
if(!utf8_check_full(&string[i], count, NULL))
|
||||
return 0;
|
||||
|
||||
i += count - 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
27
compat/jansson/utf.h
Normal file
27
compat/jansson/utf.h
Normal file
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
* Copyright (c) 2009-2016 Petri Lehtinen <petri@digip.org>
|
||||
*
|
||||
* Jansson is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the MIT license. See LICENSE for details.
|
||||
*/
|
||||
|
||||
#ifndef UTF_H
|
||||
#define UTF_H
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <jansson_private_config.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_STDINT_H
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
|
||||
int utf8_encode(int32_t codepoint, char *buffer, size_t *size);
|
||||
|
||||
size_t utf8_check_first(char byte);
|
||||
size_t utf8_check_full(const char *buffer, size_t size, int32_t *codepoint);
|
||||
const char *utf8_iterate(const char *buffer, size_t size, int32_t *codepoint);
|
||||
|
||||
int utf8_check_string(const char *string, size_t length);
|
||||
|
||||
#endif
|
||||
1045
compat/jansson/value.c
Normal file
1045
compat/jansson/value.c
Normal file
File diff suppressed because it is too large
Load Diff
@@ -3,19 +3,17 @@ project (cpuid C)
|
||||
|
||||
add_definitions(/DVERSION="0.4.0")
|
||||
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Os")
|
||||
|
||||
set(HEADERS
|
||||
libcpuid.h
|
||||
libcpuid_types.h
|
||||
libcpuid_constants.h
|
||||
libcpuid_internal.h
|
||||
amd_code_t.h
|
||||
intel_code_t.h
|
||||
recog_amd.h
|
||||
recog_intel.h
|
||||
asm-bits.h
|
||||
libcpuid_util.h
|
||||
amd_code_t.h
|
||||
intel_code_t.h
|
||||
recog_amd.h
|
||||
recog_intel.h
|
||||
asm-bits.h
|
||||
libcpuid_util.h
|
||||
)
|
||||
|
||||
set(SOURCES
|
||||
@@ -26,13 +24,7 @@ set(SOURCES
|
||||
libcpuid_util.c
|
||||
)
|
||||
|
||||
if (CMAKE_CL_64)
|
||||
enable_language(ASM_MASM)
|
||||
set(SOURCES_ASM masm-x64.asm)
|
||||
endif()
|
||||
|
||||
add_library(cpuid STATIC
|
||||
${HEADERS}
|
||||
${SOURCES}
|
||||
${SOURCES_ASM}
|
||||
)
|
||||
@@ -46,6 +46,11 @@ int set_error(cpu_error_t err)
|
||||
return (int) err;
|
||||
}
|
||||
|
||||
static void raw_data_t_constructor(struct cpu_raw_data_t* raw)
|
||||
{
|
||||
memset(raw, 0, sizeof(struct cpu_raw_data_t));
|
||||
}
|
||||
|
||||
static void cpu_id_t_constructor(struct cpu_id_t* id)
|
||||
{
|
||||
memset(id, 0, sizeof(struct cpu_id_t));
|
||||
@@ -55,6 +60,29 @@ static void cpu_id_t_constructor(struct cpu_id_t* id)
|
||||
id->sse_size = -1;
|
||||
}
|
||||
|
||||
static int parse_token(const char* expected_token, const char *token,
|
||||
const char *value, uint32_t array[][4], int limit, int *recognized)
|
||||
{
|
||||
char format[32];
|
||||
int veax, vebx, vecx, vedx;
|
||||
int index;
|
||||
|
||||
if (*recognized) return 1; /* already recognized */
|
||||
if (strncmp(token, expected_token, strlen(expected_token))) return 1; /* not what we search for */
|
||||
sprintf(format, "%s[%%d]", expected_token);
|
||||
*recognized = 1;
|
||||
if (1 == sscanf(token, format, &index) && index >=0 && index < limit) {
|
||||
if (4 == sscanf(value, "%x%x%x%x", &veax, &vebx, &vecx, &vedx)) {
|
||||
array[index][0] = veax;
|
||||
array[index][1] = vebx;
|
||||
array[index][2] = vecx;
|
||||
array[index][3] = vedx;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* get_total_cpus() system specific code: uses OS routines to determine total number of CPUs */
|
||||
#ifdef __APPLE__
|
||||
#include <unistd.h>
|
||||
@@ -89,7 +117,7 @@ static int get_total_cpus(void)
|
||||
#if defined linux || defined __linux__ || defined __sun
|
||||
#include <sys/sysinfo.h>
|
||||
#include <unistd.h>
|
||||
|
||||
|
||||
static int get_total_cpus(void)
|
||||
{
|
||||
return sysconf(_SC_NPROCESSORS_ONLN);
|
||||
@@ -221,42 +249,42 @@ static void load_features_common(struct cpu_raw_data_t* raw, struct cpu_id_t* da
|
||||
|
||||
static cpu_vendor_t cpuid_vendor_identify(const uint32_t *raw_vendor, char *vendor_str)
|
||||
{
|
||||
int i;
|
||||
cpu_vendor_t vendor = VENDOR_UNKNOWN;
|
||||
const struct { cpu_vendor_t vendor; char match[16]; }
|
||||
matchtable[NUM_CPU_VENDORS] = {
|
||||
/* source: http://www.sandpile.org/ia32/cpuid.htm */
|
||||
{ VENDOR_INTEL , "GenuineIntel" },
|
||||
{ VENDOR_AMD , "AuthenticAMD" },
|
||||
{ VENDOR_CYRIX , "CyrixInstead" },
|
||||
{ VENDOR_NEXGEN , "NexGenDriven" },
|
||||
{ VENDOR_TRANSMETA , "GenuineTMx86" },
|
||||
{ VENDOR_UMC , "UMC UMC UMC " },
|
||||
{ VENDOR_CENTAUR , "CentaurHauls" },
|
||||
{ VENDOR_RISE , "RiseRiseRise" },
|
||||
{ VENDOR_SIS , "SiS SiS SiS " },
|
||||
{ VENDOR_NSC , "Geode by NSC" },
|
||||
};
|
||||
int i;
|
||||
cpu_vendor_t vendor = VENDOR_UNKNOWN;
|
||||
const struct { cpu_vendor_t vendor; char match[16]; }
|
||||
matchtable[NUM_CPU_VENDORS] = {
|
||||
/* source: http://www.sandpile.org/ia32/cpuid.htm */
|
||||
{ VENDOR_INTEL , "GenuineIntel" },
|
||||
{ VENDOR_AMD , "AuthenticAMD" },
|
||||
{ VENDOR_CYRIX , "CyrixInstead" },
|
||||
{ VENDOR_NEXGEN , "NexGenDriven" },
|
||||
{ VENDOR_TRANSMETA , "GenuineTMx86" },
|
||||
{ VENDOR_UMC , "UMC UMC UMC " },
|
||||
{ VENDOR_CENTAUR , "CentaurHauls" },
|
||||
{ VENDOR_RISE , "RiseRiseRise" },
|
||||
{ VENDOR_SIS , "SiS SiS SiS " },
|
||||
{ VENDOR_NSC , "Geode by NSC" },
|
||||
};
|
||||
|
||||
memcpy(vendor_str + 0, &raw_vendor[1], 4);
|
||||
memcpy(vendor_str + 4, &raw_vendor[3], 4);
|
||||
memcpy(vendor_str + 8, &raw_vendor[2], 4);
|
||||
vendor_str[12] = 0;
|
||||
memcpy(vendor_str + 0, &raw_vendor[1], 4);
|
||||
memcpy(vendor_str + 4, &raw_vendor[3], 4);
|
||||
memcpy(vendor_str + 8, &raw_vendor[2], 4);
|
||||
vendor_str[12] = 0;
|
||||
|
||||
/* Determine vendor: */
|
||||
for (i = 0; i < NUM_CPU_VENDORS; i++)
|
||||
if (!strcmp(vendor_str, matchtable[i].match)) {
|
||||
vendor = matchtable[i].vendor;
|
||||
break;
|
||||
}
|
||||
return vendor;
|
||||
/* Determine vendor: */
|
||||
for (i = 0; i < NUM_CPU_VENDORS; i++)
|
||||
if (!strcmp(vendor_str, matchtable[i].match)) {
|
||||
vendor = matchtable[i].vendor;
|
||||
break;
|
||||
}
|
||||
return vendor;
|
||||
}
|
||||
|
||||
static int cpuid_basic_identify(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
|
||||
{
|
||||
int i, j, basic, xmodel, xfamily, ext;
|
||||
char brandstr[64] = {0};
|
||||
data->vendor = cpuid_vendor_identify(raw->basic_cpuid[0], data->vendor_str);
|
||||
data->vendor = cpuid_vendor_identify(raw->basic_cpuid[0], data->vendor_str);
|
||||
|
||||
if (data->vendor == VENDOR_UNKNOWN)
|
||||
return set_error(ERR_CPU_UNKN);
|
||||
@@ -274,7 +302,7 @@ static int cpuid_basic_identify(struct cpu_raw_data_t* raw, struct cpu_id_t* dat
|
||||
data->ext_model = data->model + (xmodel << 4);
|
||||
}
|
||||
ext = raw->ext_cpuid[0][0] - 0x8000000;
|
||||
|
||||
|
||||
/* obtain the brand string, if present: */
|
||||
if (ext >= 4) {
|
||||
for (i = 0; i < 3; i++)
|
||||
@@ -292,6 +320,27 @@ static int cpuid_basic_identify(struct cpu_raw_data_t* raw, struct cpu_id_t* dat
|
||||
return set_error(ERR_OK);
|
||||
}
|
||||
|
||||
static void make_list_from_string(const char* csv, struct cpu_list_t* list)
|
||||
{
|
||||
int i, n, l, last;
|
||||
l = (int) strlen(csv);
|
||||
n = 0;
|
||||
for (i = 0; i < l; i++) if (csv[i] == ',') n++;
|
||||
n++;
|
||||
list->num_entries = n;
|
||||
list->names = (char**) malloc(sizeof(char*) * n);
|
||||
last = -1;
|
||||
n = 0;
|
||||
for (i = 0; i <= l; i++) if (i == l || csv[i] == ',') {
|
||||
list->names[n] = (char*) malloc(i - last);
|
||||
memcpy(list->names[n], &csv[last + 1], i - last - 1);
|
||||
list->names[n][i - last - 1] = '\0';
|
||||
n++;
|
||||
last = i;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Interface: */
|
||||
|
||||
int cpuid_get_total_cpus(void)
|
||||
@@ -610,6 +610,39 @@ void cpu_exec_cpuid_ext(uint32_t* regs);
|
||||
*/
|
||||
int cpuid_get_raw_data(struct cpu_raw_data_t* data);
|
||||
|
||||
/**
|
||||
* @brief Writes the raw CPUID data to a text file
|
||||
* @param data - a pointer to cpu_raw_data_t structure
|
||||
* @param filename - the path of the file, where the serialized data should be
|
||||
* written. If empty, stdout will be used.
|
||||
* @note This is intended primarily for debugging. On some processor, which is
|
||||
* not currently supported or not completely recognized by cpu_identify,
|
||||
* one can still successfully get the raw data and write it to a file.
|
||||
* libcpuid developers can later import this file and debug the detection
|
||||
* code as if running on the actual hardware.
|
||||
* The file is simple text format of "something=value" pairs. Version info
|
||||
* is also written, but the format is not intended to be neither backward-
|
||||
* nor forward compatible.
|
||||
* @returns zero if successful, and some negative number on error.
|
||||
* The error message can be obtained by calling \ref cpuid_error.
|
||||
* @see cpu_error_t
|
||||
*/
|
||||
int cpuid_serialize_raw_data(struct cpu_raw_data_t* data, const char* filename);
|
||||
|
||||
/**
|
||||
* @brief Reads raw CPUID data from file
|
||||
* @param data - a pointer to cpu_raw_data_t structure. The deserialized data will
|
||||
* be written here.
|
||||
* @param filename - the path of the file, containing the serialized raw data.
|
||||
* If empty, stdin will be used.
|
||||
* @note This function may fail, if the file is created by different version of
|
||||
* the library. Also, see the notes on cpuid_serialize_raw_data.
|
||||
* @returns zero if successful, and some negative number on error.
|
||||
* The error message can be obtained by calling \ref cpuid_error.
|
||||
* @see cpu_error_t
|
||||
*/
|
||||
int cpuid_deserialize_raw_data(struct cpu_raw_data_t* data, const char* filename);
|
||||
|
||||
/**
|
||||
* @brief Identifies the CPU
|
||||
* @param raw - Input - a pointer to the raw CPUID data, which is obtained
|
||||
@@ -635,6 +668,222 @@ int cpuid_get_raw_data(struct cpu_raw_data_t* data);
|
||||
*/
|
||||
int cpu_identify(struct cpu_raw_data_t* raw, struct cpu_id_t* data);
|
||||
|
||||
/**
|
||||
* @brief Returns the short textual representation of a CPU flag
|
||||
* @param feature - the feature, whose textual representation is wanted.
|
||||
* @returns a constant string like "fpu", "tsc", "sse2", etc.
|
||||
* @note the names of the returned flags are compatible with those from
|
||||
* /proc/cpuinfo in Linux, with the exception of `tm_amd'
|
||||
*/
|
||||
const char* cpu_feature_str(cpu_feature_t feature);
|
||||
|
||||
/**
|
||||
* @brief Returns textual description of the last error
|
||||
*
|
||||
* libcpuid stores an `errno'-style error status, whose description
|
||||
* can be obtained with this function.
|
||||
* @note This function is not thread-safe
|
||||
* @see cpu_error_t
|
||||
*/
|
||||
const char* cpuid_error(void);
|
||||
|
||||
/**
|
||||
* @brief Executes RDTSC
|
||||
*
|
||||
* The RDTSC (ReaD Time Stamp Counter) instruction gives access to an
|
||||
* internal 64-bit counter, which usually increments at each clock cycle.
|
||||
* This can be used for various timing routines, and as a very precise
|
||||
* clock source. It is set to zero on system startup. Beware that may not
|
||||
* increment at the same frequency as the CPU. Consecutive calls of RDTSC
|
||||
* are, however, guaranteed to return monotonically-increasing values.
|
||||
*
|
||||
* @param result - a pointer to a 64-bit unsigned integer, where the TSC value
|
||||
* will be stored
|
||||
*
|
||||
* @note If 100% compatibility is a concern, you must first check if the
|
||||
* RDTSC instruction is present (if it is not, your program will crash
|
||||
* with "invalid opcode" exception). Only some very old processors (i486,
|
||||
* early AMD K5 and some Cyrix CPUs) lack that instruction - they should
|
||||
* have become exceedingly rare these days. To verify RDTSC presence,
|
||||
* run cpu_identify() and check flags[CPU_FEATURE_TSC].
|
||||
*
|
||||
* @note The monotonically increasing nature of the TSC may be violated
|
||||
* on SMP systems, if their TSC clocks run at different rate. If the OS
|
||||
* doesn't account for that, the TSC drift may become arbitrary large.
|
||||
*/
|
||||
void cpu_rdtsc(uint64_t* result);
|
||||
|
||||
/**
|
||||
* @brief Store TSC and timing info
|
||||
*
|
||||
* This function stores the current TSC value and current
|
||||
* time info from a precise OS-specific clock source in the cpu_mark_t
|
||||
* structure. The sys_clock field contains time with microsecond resolution.
|
||||
* The values can later be used to measure time intervals, number of clocks,
|
||||
* FPU frequency, etc.
|
||||
* @see cpu_rdtsc
|
||||
*
|
||||
* @param mark [out] - a pointer to a cpu_mark_t structure
|
||||
*/
|
||||
void cpu_tsc_mark(struct cpu_mark_t* mark);
|
||||
|
||||
/**
|
||||
* @brief Calculate TSC and timing difference
|
||||
*
|
||||
* @param mark - input/output: a pointer to a cpu_mark_t sturcture, which has
|
||||
* already been initialized by cpu_tsc_mark. The difference in
|
||||
* TSC and time will be written here.
|
||||
*
|
||||
* This function calculates the TSC and time difference, by obtaining the
|
||||
* current TSC and timing values and subtracting the contents of the `mark'
|
||||
* structure from them. Results are written in the same structure.
|
||||
*
|
||||
* Example:
|
||||
* @code
|
||||
* ...
|
||||
* struct cpu_mark_t mark;
|
||||
* cpu_tsc_mark(&mark);
|
||||
* foo();
|
||||
* cpu_tsc_unmark(&mark);
|
||||
* printf("Foo finished. Executed in %llu cycles and %llu usecs\n",
|
||||
* mark.tsc, mark.sys_clock);
|
||||
* ...
|
||||
* @endcode
|
||||
*/
|
||||
void cpu_tsc_unmark(struct cpu_mark_t* mark);
|
||||
|
||||
/**
|
||||
* @brief Calculates the CPU clock
|
||||
*
|
||||
* @param mark - pointer to a cpu_mark_t structure, which has been initialized
|
||||
* with cpu_tsc_mark and later `stopped' with cpu_tsc_unmark.
|
||||
*
|
||||
* @note For reliable results, the marked time interval should be at least about
|
||||
* 10 ms.
|
||||
*
|
||||
* @returns the CPU clock frequency, in MHz. Due to measurement error, it will
|
||||
* differ from the true value in a few least-significant bits. Accuracy depends
|
||||
* on the timing interval - the more, the better. If the timing interval is
|
||||
* insufficient, the result is -1. Also, see the comment on cpu_clock_measure
|
||||
* for additional issues and pitfalls in using RDTSC for CPU frequency
|
||||
* measurements.
|
||||
*/
|
||||
int cpu_clock_by_mark(struct cpu_mark_t* mark);
|
||||
|
||||
/**
|
||||
* @brief Returns the CPU clock, as reported by the OS
|
||||
*
|
||||
* This function uses OS-specific functions to obtain the CPU clock. It may
|
||||
* differ from the true clock for several reasons:<br><br>
|
||||
*
|
||||
* i) The CPU might be in some power saving state, while the OS reports its
|
||||
* full-power frequency, or vice-versa.<br>
|
||||
* ii) In some cases you can raise or lower the CPU frequency with overclocking
|
||||
* utilities and the OS will not notice.
|
||||
*
|
||||
* @returns the CPU clock frequency in MHz. If the OS is not (yet) supported
|
||||
* or lacks the necessary reporting machinery, the return value is -1
|
||||
*/
|
||||
int cpu_clock_by_os(void);
|
||||
|
||||
/**
|
||||
* @brief Measure the CPU clock frequency
|
||||
*
|
||||
* @param millis - How much time to waste in the busy-wait cycle. In millisecs.
|
||||
* Useful values 10 - 1000
|
||||
* @param quad_check - Do a more thorough measurement if nonzero
|
||||
* (see the explanation).
|
||||
*
|
||||
* The function performs a busy-wait cycle for the given time and calculates
|
||||
* the CPU frequency by the difference of the TSC values. The accuracy of the
|
||||
* calculation depends on the length of the busy-wait cycle: more is better,
|
||||
* but 100ms should be enough for most purposes.
|
||||
*
|
||||
* While this will calculate the CPU frequency correctly in most cases, there are
|
||||
* several reasons why it might be incorrect:<br>
|
||||
*
|
||||
* i) RDTSC doesn't guarantee it will run at the same clock as the CPU.
|
||||
* Apparently there aren't CPUs at the moment, but still, there's no
|
||||
* guarantee.<br>
|
||||
* ii) The CPU might be in a low-frequency power saving mode, and the CPU
|
||||
* might be switched to higher frequency at any time. If this happens
|
||||
* during the measurement, the result can be anywhere between the
|
||||
* low and high frequencies. Also, if you're interested in the
|
||||
* high frequency value only, this function might return the low one
|
||||
* instead.<br>
|
||||
* iii) On SMP systems exhibiting TSC drift (see \ref cpu_rdtsc)
|
||||
*
|
||||
* the quad_check option will run four consecutive measurements and
|
||||
* then return the average of the two most-consistent results. The total
|
||||
* runtime of the function will still be `millis' - consider using
|
||||
* a bit more time for the timing interval.
|
||||
*
|
||||
* Finally, for benchmarking / CPU intensive applications, the best strategy is
|
||||
* to use the cpu_tsc_mark() / cpu_tsc_unmark() / cpu_clock_by_mark() method.
|
||||
* Begin by mark()-ing about one second after application startup (allowing the
|
||||
* power-saving manager to kick in and rise the frequency during that time),
|
||||
* then unmark() just before application finishing. The result will most
|
||||
* acurately represent at what frequency your app was running.
|
||||
*
|
||||
* @returns the CPU clock frequency in MHz (within some measurement error
|
||||
* margin). If RDTSC is not supported, the result is -1.
|
||||
*/
|
||||
int cpu_clock_measure(int millis, int quad_check);
|
||||
|
||||
/**
|
||||
* @brief Measure the CPU clock frequency using instruction-counting
|
||||
*
|
||||
* @param millis - how much time to allocate for each run, in milliseconds
|
||||
* @param runs - how many runs to perform
|
||||
*
|
||||
* The function performs a busy-wait cycle using a known number of "heavy" (SSE)
|
||||
* instructions. These instructions run at (more or less guaranteed) 1 IPC rate,
|
||||
* so by running a busy loop for a fixed amount of time, and measuring the
|
||||
* amount of instructions done, the CPU clock is accurately measured.
|
||||
*
|
||||
* Of course, this function is still affected by the power-saving schemes, so
|
||||
* the warnings as of cpu_clock_measure() still apply. However, this function is
|
||||
* immune to problems with detection, related to the Intel Nehalem's "Turbo"
|
||||
* mode, where the internal clock is raised, but the RDTSC rate is unaffected.
|
||||
*
|
||||
* The function will run for about (millis * runs) milliseconds.
|
||||
* You can make only a single busy-wait run (runs == 1); however, this can
|
||||
* be affected by task scheduling (which will break the counting), so allowing
|
||||
* more than one run is recommended. As run length is not imperative for
|
||||
* accurate readings (e.g., 50ms is sufficient), you can afford a lot of short
|
||||
* runs, e.g. 10 runs of 50ms or 20 runs of 25ms.
|
||||
*
|
||||
* Recommended values - millis = 50, runs = 4. For more robustness,
|
||||
* increase the number of runs.
|
||||
*
|
||||
* NOTE: on Bulldozer and later CPUs, the busy-wait cycle runs at 1.4 IPC, thus
|
||||
* the results are skewed. This is corrected internally by dividing the resulting
|
||||
* value by 1.4.
|
||||
* However, this only occurs if the thread is executed on a single CMT
|
||||
* module - if there are other threads competing for resources, the results are
|
||||
* unpredictable. Make sure you run cpu_clock_by_ic() on a CPU that is free from
|
||||
* competing threads, or if there are such threads, they shouldn't exceed the
|
||||
* number of modules. On a Bulldozer X8, that means 4 threads.
|
||||
*
|
||||
* @returns the CPU clock frequency in MHz (within some measurement error
|
||||
* margin). If SSE is not supported, the result is -1. If the input parameters
|
||||
* are incorrect, or some other internal fault is detected, the result is -2.
|
||||
*/
|
||||
int cpu_clock_by_ic(int millis, int runs);
|
||||
|
||||
/**
|
||||
* @brief Get the CPU clock frequency (all-in-one method)
|
||||
*
|
||||
* This is an all-in-one method for getting the CPU clock frequency.
|
||||
* It tries to use the OS for that. If the OS doesn't have this info, it
|
||||
* uses cpu_clock_measure with 200ms time interval and quadruple checking.
|
||||
*
|
||||
* @returns the CPU clock frequency in MHz. If every possible method fails,
|
||||
* the result is -1.
|
||||
*/
|
||||
int cpu_clock(void);
|
||||
|
||||
|
||||
/**
|
||||
* @brief The return value of cpuid_get_epc().
|
||||
* @details
|
||||
@@ -667,6 +916,230 @@ struct cpu_epc_t cpuid_get_epc(int index, const struct cpu_raw_data_t* raw);
|
||||
*/
|
||||
const char* cpuid_lib_version(void);
|
||||
|
||||
typedef void (*libcpuid_warn_fn_t) (const char *msg);
|
||||
/**
|
||||
* @brief Sets the warning print function
|
||||
*
|
||||
* In some cases, the internal libcpuid machinery would like to emit useful
|
||||
* debug warnings. By default, these warnings are written to stderr. However,
|
||||
* you can set a custom function that will receive those warnings.
|
||||
*
|
||||
* @param warn_fun - the warning function you want to set. If NULL, warnings
|
||||
* are disabled. The function takes const char* argument.
|
||||
*
|
||||
* @returns the current warning function. You can use the return value to
|
||||
* keep the previous warning function and restore it at your discretion.
|
||||
*/
|
||||
libcpuid_warn_fn_t cpuid_set_warn_function(libcpuid_warn_fn_t warn_fun);
|
||||
|
||||
/**
|
||||
* @brief Sets the verbosiness level
|
||||
*
|
||||
* When the verbosiness level is above zero, some functions might print
|
||||
* diagnostic information about what are they doing. The higher the level is,
|
||||
* the more detail is printed. Level zero is guaranteed to omit all such
|
||||
* output. The output is written using the same machinery as the warnings,
|
||||
* @see cpuid_set_warn_function()
|
||||
*
|
||||
* @param level the desired verbosiness level. Useful values 0..2 inclusive
|
||||
*/
|
||||
void cpuid_set_verbosiness_level(int level);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Obtains the CPU vendor from CPUID from the current CPU
|
||||
* @note The result is cached.
|
||||
* @returns VENDOR_UNKNOWN if failed, otherwise the CPU vendor type.
|
||||
* @see cpu_vendor_t
|
||||
*/
|
||||
cpu_vendor_t cpuid_get_vendor(void);
|
||||
|
||||
/**
|
||||
* @brief a structure that holds a list of processor names
|
||||
*/
|
||||
struct cpu_list_t {
|
||||
/** Number of entries in the list */
|
||||
int num_entries;
|
||||
/** Pointers to names. There will be num_entries of them */
|
||||
char **names;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Gets a list of all known CPU names from a specific vendor.
|
||||
*
|
||||
* This function compiles a list of all known CPU (code)names
|
||||
* (i.e. the possible values of cpu_id_t::cpu_codename) for the given vendor.
|
||||
*
|
||||
* There are about 100 entries for Intel and AMD, and a few for the other
|
||||
* vendors. The list is written out in approximate chronological introduction
|
||||
* order of the parts.
|
||||
*
|
||||
* @param vendor the vendor to be queried
|
||||
* @param list [out] the resulting list will be written here.
|
||||
* NOTE: As the memory is dynamically allocated, be sure to call
|
||||
* cpuid_free_cpu_list() after you're done with the data
|
||||
* @see cpu_list_t
|
||||
*/
|
||||
void cpuid_get_cpu_list(cpu_vendor_t vendor, struct cpu_list_t* list);
|
||||
|
||||
/**
|
||||
* @brief Frees a CPU list
|
||||
*
|
||||
* This function deletes all the memory associated with a CPU list, as obtained
|
||||
* by cpuid_get_cpu_list()
|
||||
*
|
||||
* @param list - the list to be free()'d.
|
||||
*/
|
||||
void cpuid_free_cpu_list(struct cpu_list_t* list);
|
||||
|
||||
struct msr_driver_t;
|
||||
/**
|
||||
* @brief Starts/opens a driver, needed to read MSRs (Model Specific Registers)
|
||||
*
|
||||
* On systems that support it, this function will create a temporary
|
||||
* system driver, that has privileges to execute the RDMSR instruction.
|
||||
* After the driver is created, you can read MSRs by calling \ref cpu_rdmsr
|
||||
*
|
||||
* @returns a handle to the driver on success, and NULL on error.
|
||||
* The error message can be obtained by calling \ref cpuid_error.
|
||||
* @see cpu_error_t
|
||||
*/
|
||||
struct msr_driver_t* cpu_msr_driver_open(void);
|
||||
|
||||
/**
|
||||
* @brief Similar to \ref cpu_msr_driver_open, but accept one parameter
|
||||
*
|
||||
* This function works on certain operating systems (GNU/Linux, FreeBSD)
|
||||
*
|
||||
* @param core_num specify the core number for MSR.
|
||||
* The first core number is 0.
|
||||
* The last core number is \ref cpuid_get_total_cpus - 1.
|
||||
*
|
||||
* @returns a handle to the driver on success, and NULL on error.
|
||||
* The error message can be obtained by calling \ref cpuid_error.
|
||||
* @see cpu_error_t
|
||||
*/
|
||||
struct msr_driver_t* cpu_msr_driver_open_core(unsigned core_num);
|
||||
|
||||
/**
|
||||
* @brief Reads a Model-Specific Register (MSR)
|
||||
*
|
||||
* If the CPU has MSRs (as indicated by the CPU_FEATURE_MSR flag), you can
|
||||
* read a MSR with the given index by calling this function.
|
||||
*
|
||||
* There are several prerequisites you must do before reading MSRs:
|
||||
* 1) You must ensure the CPU has RDMSR. Check the CPU_FEATURE_MSR flag
|
||||
* in cpu_id_t::flags
|
||||
* 2) You must ensure that the CPU implements the specific MSR you intend to
|
||||
* read.
|
||||
* 3) You must open a MSR-reader driver. RDMSR is a privileged instruction and
|
||||
* needs ring-0 access in order to work. This temporary driver is created
|
||||
* by calling \ref cpu_msr_driver_open
|
||||
*
|
||||
* @param handle - a handle to the MSR reader driver, as created by
|
||||
* cpu_msr_driver_open
|
||||
* @param msr_index - the numeric ID of the MSR you want to read
|
||||
* @param result - a pointer to a 64-bit integer, where the MSR value is stored
|
||||
*
|
||||
* @returns zero if successful, and some negative number on error.
|
||||
* The error message can be obtained by calling \ref cpuid_error.
|
||||
* @see cpu_error_t
|
||||
*/
|
||||
int cpu_rdmsr(struct msr_driver_t* handle, uint32_t msr_index, uint64_t* result);
|
||||
|
||||
|
||||
typedef enum {
|
||||
INFO_MPERF, /*!< Maximum performance frequency clock. This
|
||||
is a counter, which increments as a
|
||||
proportion of the actual processor speed. */
|
||||
INFO_APERF, /*!< Actual performance frequency clock. This
|
||||
accumulates the core clock counts when the
|
||||
core is active. */
|
||||
INFO_MIN_MULTIPLIER, /*!< Minimum CPU:FSB ratio for this CPU,
|
||||
multiplied by 100. */
|
||||
INFO_CUR_MULTIPLIER, /*!< Current CPU:FSB ratio, multiplied by 100.
|
||||
e.g., a CPU:FSB value of 18.5 reads as
|
||||
"1850". */
|
||||
INFO_MAX_MULTIPLIER, /*!< Maximum CPU:FSB ratio for this CPU,
|
||||
multiplied by 100. */
|
||||
INFO_TEMPERATURE, /*!< The current core temperature in Celsius. */
|
||||
INFO_THROTTLING, /*!< 1 if the current logical processor is
|
||||
throttling. 0 if it is running normally. */
|
||||
INFO_VOLTAGE, /*!< The current core voltage in Volt,
|
||||
multiplied by 100. */
|
||||
INFO_BCLK, /*!< See \ref INFO_BUS_CLOCK. */
|
||||
INFO_BUS_CLOCK, /*!< The main bus clock in MHz,
|
||||
e.g., FSB/QPI/DMI/HT base clock,
|
||||
multiplied by 100. */
|
||||
} cpu_msrinfo_request_t;
|
||||
|
||||
/**
|
||||
* @brief Similar to \ref cpu_rdmsr, but extract a range of bits
|
||||
*
|
||||
* @param handle - a handle to the MSR reader driver, as created by
|
||||
* cpu_msr_driver_open
|
||||
* @param msr_index - the numeric ID of the MSR you want to read
|
||||
* @param highbit - the high bit in range, must be inferior to 64
|
||||
* @param lowbit - the low bit in range, must be equal or superior to 0
|
||||
* @param result - a pointer to a 64-bit integer, where the MSR value is stored
|
||||
*
|
||||
* @returns zero if successful, and some negative number on error.
|
||||
* The error message can be obtained by calling \ref cpuid_error.
|
||||
* @see cpu_error_t
|
||||
*/
|
||||
int cpu_rdmsr_range(struct msr_driver_t* handle, uint32_t msr_index, uint8_t highbit,
|
||||
uint8_t lowbit, uint64_t* result);
|
||||
|
||||
/**
|
||||
* @brief Reads extended CPU information from Model-Specific Registers.
|
||||
* @param handle - a handle to an open MSR driver, @see cpu_msr_driver_open
|
||||
* @param which - which info field should be returned. A list of
|
||||
* available information entities is listed in the
|
||||
* cpu_msrinfo_request_t enum.
|
||||
* @retval - if the requested information is available for the current
|
||||
* processor model, the respective value is returned.
|
||||
* if no information is available, or the CPU doesn't support
|
||||
* the query, the special value CPU_INVALID_VALUE is returned
|
||||
* @note This function is not MT-safe. If you intend to call it from multiple
|
||||
* threads, guard it through a mutex or a similar primitive.
|
||||
*/
|
||||
int cpu_msrinfo(struct msr_driver_t* handle, cpu_msrinfo_request_t which);
|
||||
#define CPU_INVALID_VALUE 0x3fffffff
|
||||
|
||||
/**
|
||||
* @brief Writes the raw MSR data to a text file
|
||||
* @param data - a pointer to msr_driver_t structure
|
||||
* @param filename - the path of the file, where the serialized data should be
|
||||
* written. If empty, stdout will be used.
|
||||
* @note This is intended primarily for debugging. On some processor, which is
|
||||
* not currently supported or not completely recognized by cpu_identify,
|
||||
* one can still successfully get the raw data and write it to a file.
|
||||
* libcpuid developers can later import this file and debug the detection
|
||||
* code as if running on the actual hardware.
|
||||
* The file is simple text format of "something=value" pairs. Version info
|
||||
* is also written, but the format is not intended to be neither backward-
|
||||
* nor forward compatible.
|
||||
* @returns zero if successful, and some negative number on error.
|
||||
* The error message can be obtained by calling \ref cpuid_error.
|
||||
* @see cpu_error_t
|
||||
*/
|
||||
int msr_serialize_raw_data(struct msr_driver_t* handle, const char* filename);
|
||||
|
||||
/**
|
||||
* @brief Closes an open MSR driver
|
||||
*
|
||||
* This function unloads the MSR driver opened by cpu_msr_driver_open and
|
||||
* frees any resources associated with it.
|
||||
*
|
||||
* @param handle - a handle to the MSR reader driver, as created by
|
||||
* cpu_msr_driver_open
|
||||
*
|
||||
* @returns zero if successful, and some negative number on error.
|
||||
* The error message can be obtained by calling \ref cpuid_error.
|
||||
* @see cpu_error_t
|
||||
*/
|
||||
int cpu_msr_driver_close(struct msr_driver_t* handle);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
#endif
|
||||
@@ -58,48 +58,6 @@ struct internal_id_info_t {
|
||||
int score; // detection (matchtable) score
|
||||
};
|
||||
|
||||
#define LBIT(x) (((long long) 1) << x)
|
||||
|
||||
enum _common_bits_t {
|
||||
_M_ = LBIT( 0 ),
|
||||
MOBILE_ = LBIT( 1 ),
|
||||
_MP_ = LBIT( 2 ),
|
||||
};
|
||||
|
||||
// additional detection bits for Intel CPUs:
|
||||
enum _intel_bits_t {
|
||||
PENTIUM_ = LBIT( 10 ),
|
||||
CELERON_ = LBIT( 11 ),
|
||||
CORE_ = LBIT( 12 ),
|
||||
_I_ = LBIT( 13 ),
|
||||
_3 = LBIT( 14 ),
|
||||
_5 = LBIT( 15 ),
|
||||
_7 = LBIT( 16 ),
|
||||
XEON_ = LBIT( 17 ),
|
||||
ATOM_ = LBIT( 18 ),
|
||||
};
|
||||
typedef enum _intel_bits_t intel_bits_t;
|
||||
|
||||
enum _amd_bits_t {
|
||||
ATHLON_ = LBIT( 10 ),
|
||||
_XP_ = LBIT( 11 ),
|
||||
DURON_ = LBIT( 12 ),
|
||||
SEMPRON_ = LBIT( 13 ),
|
||||
OPTERON_ = LBIT( 14 ),
|
||||
TURION_ = LBIT( 15 ),
|
||||
_LV_ = LBIT( 16 ),
|
||||
_64_ = LBIT( 17 ),
|
||||
_X2 = LBIT( 18 ),
|
||||
_X3 = LBIT( 19 ),
|
||||
_X4 = LBIT( 20 ),
|
||||
_X6 = LBIT( 21 ),
|
||||
_FX = LBIT( 22 ),
|
||||
_APU_ = LBIT( 23 ),
|
||||
};
|
||||
typedef enum _amd_bits_t amd_bits_t;
|
||||
|
||||
|
||||
|
||||
int cpu_ident_internal(struct cpu_raw_data_t* raw, struct cpu_id_t* data,
|
||||
struct internal_id_info_t* internal);
|
||||
|
||||
218
compat/libcpuid/libcpuid_util.c
Normal file
218
compat/libcpuid/libcpuid_util.c
Normal file
@@ -0,0 +1,218 @@
|
||||
/*
|
||||
* Copyright 2008 Veselin Georgiev,
|
||||
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdarg.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include "libcpuid.h"
|
||||
#include "libcpuid_util.h"
|
||||
|
||||
int _current_verboselevel;
|
||||
|
||||
void match_features(const struct feature_map_t* matchtable, int count, uint32_t reg, struct cpu_id_t* data)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < count; i++)
|
||||
if (reg & (1u << matchtable[i].bit))
|
||||
data->flags[matchtable[i].feature] = 1;
|
||||
}
|
||||
|
||||
static void default_warn(const char *msg)
|
||||
{
|
||||
fprintf(stderr, "%s", msg);
|
||||
}
|
||||
|
||||
libcpuid_warn_fn_t _warn_fun = default_warn;
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
# define vsnprintf _vsnprintf
|
||||
#endif
|
||||
void warnf(const char* format, ...)
|
||||
{
|
||||
char buff[1024];
|
||||
va_list va;
|
||||
if (!_warn_fun) return;
|
||||
va_start(va, format);
|
||||
vsnprintf(buff, sizeof(buff), format, va);
|
||||
va_end(va);
|
||||
_warn_fun(buff);
|
||||
}
|
||||
|
||||
void debugf(int verboselevel, const char* format, ...)
|
||||
{
|
||||
char buff[1024];
|
||||
va_list va;
|
||||
if (verboselevel > _current_verboselevel) return;
|
||||
va_start(va, format);
|
||||
vsnprintf(buff, sizeof(buff), format, va);
|
||||
va_end(va);
|
||||
_warn_fun(buff);
|
||||
}
|
||||
|
||||
static int popcount64(uint64_t mask)
|
||||
{
|
||||
int num_set_bits = 0;
|
||||
|
||||
while (mask) {
|
||||
mask &= mask - 1;
|
||||
num_set_bits++;
|
||||
}
|
||||
|
||||
return num_set_bits;
|
||||
}
|
||||
|
||||
static int score(const struct match_entry_t* entry, const struct cpu_id_t* data,
|
||||
int brand_code, uint64_t bits, int model_code)
|
||||
{
|
||||
int res = 0;
|
||||
if (entry->family == data->family ) res += 2;
|
||||
if (entry->model == data->model ) res += 2;
|
||||
if (entry->stepping == data->stepping ) res += 2;
|
||||
if (entry->ext_family == data->ext_family) res += 2;
|
||||
if (entry->ext_model == data->ext_model ) res += 2;
|
||||
if (entry->ncores == data->num_cores ) res += 2;
|
||||
if (entry->l2cache == data->l2_cache ) res += 1;
|
||||
if (entry->l3cache == data->l3_cache ) res += 1;
|
||||
if (entry->brand_code == brand_code ) res += 2;
|
||||
if (entry->model_code == model_code ) res += 2;
|
||||
|
||||
res += popcount64(entry->model_bits & bits) * 2;
|
||||
return res;
|
||||
}
|
||||
|
||||
int match_cpu_codename(const struct match_entry_t* matchtable, int count,
|
||||
struct cpu_id_t* data, int brand_code, uint64_t bits,
|
||||
int model_code)
|
||||
{
|
||||
int bestscore = -1;
|
||||
int bestindex = 0;
|
||||
int i, t;
|
||||
|
||||
debugf(3, "Matching cpu f:%d, m:%d, s:%d, xf:%d, xm:%d, ncore:%d, l2:%d, bcode:%d, bits:%llu, code:%d\n",
|
||||
data->family, data->model, data->stepping, data->ext_family,
|
||||
data->ext_model, data->num_cores, data->l2_cache, brand_code, (unsigned long long) bits, model_code);
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
t = score(&matchtable[i], data, brand_code, bits, model_code);
|
||||
debugf(3, "Entry %d, `%s', score %d\n", i, matchtable[i].name, t);
|
||||
if (t > bestscore) {
|
||||
debugf(2, "Entry `%s' selected - best score so far (%d)\n", matchtable[i].name, t);
|
||||
bestscore = t;
|
||||
bestindex = i;
|
||||
}
|
||||
}
|
||||
strcpy(data->cpu_codename, matchtable[bestindex].name);
|
||||
return bestscore;
|
||||
}
|
||||
|
||||
void generic_get_cpu_list(const struct match_entry_t* matchtable, int count,
|
||||
struct cpu_list_t* list)
|
||||
{
|
||||
int i, j, n, good;
|
||||
n = 0;
|
||||
list->names = (char**) malloc(sizeof(char*) * count);
|
||||
for (i = 0; i < count; i++) {
|
||||
if (strstr(matchtable[i].name, "Unknown")) continue;
|
||||
good = 1;
|
||||
for (j = n - 1; j >= 0; j--)
|
||||
if (!strcmp(list->names[j], matchtable[i].name)) {
|
||||
good = 0;
|
||||
break;
|
||||
}
|
||||
if (!good) continue;
|
||||
#if defined(_MSC_VER)
|
||||
list->names[n++] = _strdup(matchtable[i].name);
|
||||
#else
|
||||
list->names[n++] = strdup(matchtable[i].name);
|
||||
#endif
|
||||
}
|
||||
list->num_entries = n;
|
||||
}
|
||||
|
||||
static int xmatch_entry(char c, const char* p)
|
||||
{
|
||||
int i, j;
|
||||
if (c == 0) return -1;
|
||||
if (c == p[0]) return 1;
|
||||
if (p[0] == '.') return 1;
|
||||
if (p[0] == '#' && isdigit(c)) return 1;
|
||||
if (p[0] == '[') {
|
||||
j = 1;
|
||||
while (p[j] && p[j] != ']') j++;
|
||||
if (!p[j]) return -1;
|
||||
for (i = 1; i < j; i++)
|
||||
if (p[i] == c) return j + 1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int match_pattern(const char* s, const char* p)
|
||||
{
|
||||
int i, j, dj, k, n, m;
|
||||
n = (int) strlen(s);
|
||||
m = (int) strlen(p);
|
||||
for (i = 0; i < n; i++) {
|
||||
if (xmatch_entry(s[i], p) != -1) {
|
||||
j = 0;
|
||||
k = 0;
|
||||
while (j < m && ((dj = xmatch_entry(s[i + k], p + j)) != -1)) {
|
||||
k++;
|
||||
j += dj;
|
||||
}
|
||||
if (j == m) return i + 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct cpu_id_t* get_cached_cpuid(void)
|
||||
{
|
||||
static int initialized = 0;
|
||||
static struct cpu_id_t id;
|
||||
if (initialized) return &id;
|
||||
if (cpu_identify(NULL, &id))
|
||||
memset(&id, 0, sizeof(id));
|
||||
initialized = 1;
|
||||
return &id;
|
||||
}
|
||||
|
||||
int match_all(uint64_t bits, uint64_t mask)
|
||||
{
|
||||
return (bits & mask) == mask;
|
||||
}
|
||||
|
||||
void debug_print_lbits(int debuglevel, uint64_t mask)
|
||||
{
|
||||
int i, first = 0;
|
||||
for (i = 0; i < 64; i++) if (mask & (((uint64_t) 1) << i)) {
|
||||
if (first) first = 0;
|
||||
else debugf(2, " + ");
|
||||
debugf(2, "LBIT(%d)", i);
|
||||
}
|
||||
debugf(2, "\n");
|
||||
}
|
||||
@@ -28,6 +28,8 @@
|
||||
|
||||
#define COUNT_OF(array) (sizeof(array) / sizeof(array[0]))
|
||||
|
||||
#define LBIT(x) (((long long) 1) << x)
|
||||
|
||||
struct feature_map_t {
|
||||
unsigned bit;
|
||||
cpu_feature_t feature;
|
||||
@@ -48,6 +50,20 @@ struct match_entry_t {
|
||||
int match_cpu_codename(const struct match_entry_t* matchtable, int count,
|
||||
struct cpu_id_t* data, int brand_code, uint64_t bits,
|
||||
int model_code);
|
||||
|
||||
void warnf(const char* format, ...)
|
||||
#ifdef __GNUC__
|
||||
__attribute__((format(printf, 1, 2)))
|
||||
#endif
|
||||
;
|
||||
void debugf(int verboselevel, const char* format, ...)
|
||||
#ifdef __GNUC__
|
||||
__attribute__((format(printf, 2, 3)))
|
||||
#endif
|
||||
;
|
||||
void generic_get_cpu_list(const struct match_entry_t* matchtable, int count,
|
||||
struct cpu_list_t* list);
|
||||
|
||||
/*
|
||||
* Seek for a pattern in `haystack'.
|
||||
* Pattern may be an fixed string, or contain the special metacharacters
|
||||
@@ -70,9 +86,15 @@ struct cpu_id_t* get_cached_cpuid(void);
|
||||
/* returns true if all bits of mask are present in `bits'. */
|
||||
int match_all(uint64_t bits, uint64_t mask);
|
||||
|
||||
/* print what bits a mask consists of */
|
||||
void debug_print_lbits(int debuglevel, uint64_t mask);
|
||||
|
||||
/*
|
||||
* Sets the current errno
|
||||
*/
|
||||
int set_error(cpu_error_t err);
|
||||
|
||||
extern libcpuid_warn_fn_t _warn_fun;
|
||||
extern int _current_verboselevel;
|
||||
|
||||
#endif /* __LIBCPUID_UTIL_H__ */
|
||||
549
compat/libcpuid/recog_amd.c
Normal file
549
compat/libcpuid/recog_amd.c
Normal file
@@ -0,0 +1,549 @@
|
||||
/*
|
||||
* Copyright 2008 Veselin Georgiev,
|
||||
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include "libcpuid.h"
|
||||
#include "libcpuid_util.h"
|
||||
#include "libcpuid_internal.h"
|
||||
#include "recog_amd.h"
|
||||
|
||||
const struct amd_code_str { amd_code_t code; char *str; } amd_code_str[] = {
|
||||
#define CODE(x) { x, #x }
|
||||
#define CODE2(x, y) CODE(x)
|
||||
#include "amd_code_t.h"
|
||||
#undef CODE
|
||||
};
|
||||
|
||||
struct amd_code_and_bits_t {
|
||||
int code;
|
||||
uint64_t bits;
|
||||
};
|
||||
|
||||
enum _amd_bits_t {
|
||||
ATHLON_ = LBIT( 0 ),
|
||||
_XP_ = LBIT( 1 ),
|
||||
_M_ = LBIT( 2 ),
|
||||
_MP_ = LBIT( 3 ),
|
||||
MOBILE_ = LBIT( 4 ),
|
||||
DURON_ = LBIT( 5 ),
|
||||
SEMPRON_ = LBIT( 6 ),
|
||||
OPTERON_ = LBIT( 7 ),
|
||||
TURION_ = LBIT( 8 ),
|
||||
_LV_ = LBIT( 9 ),
|
||||
_64_ = LBIT( 10 ),
|
||||
_X2 = LBIT( 11 ),
|
||||
_X3 = LBIT( 12 ),
|
||||
_X4 = LBIT( 13 ),
|
||||
_X6 = LBIT( 14 ),
|
||||
_FX = LBIT( 15 ),
|
||||
};
|
||||
typedef enum _amd_bits_t amd_bits_t;
|
||||
|
||||
enum _amd_model_codes_t {
|
||||
// Only for Ryzen CPUs:
|
||||
_1400,
|
||||
_1500,
|
||||
_1600,
|
||||
};
|
||||
|
||||
|
||||
const struct match_entry_t cpudb_amd[] = {
|
||||
{ -1, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown AMD CPU" },
|
||||
|
||||
/* 486 and the likes */
|
||||
{ 4, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown AMD 486" },
|
||||
{ 4, 3, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "AMD 486DX2" },
|
||||
{ 4, 7, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "AMD 486DX2WB" },
|
||||
{ 4, 8, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "AMD 486DX4" },
|
||||
{ 4, 9, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "AMD 486DX4WB" },
|
||||
|
||||
/* Pentia clones */
|
||||
{ 5, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown AMD 586" },
|
||||
{ 5, 0, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K5" },
|
||||
{ 5, 1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K5" },
|
||||
{ 5, 2, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K5" },
|
||||
{ 5, 3, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K5" },
|
||||
|
||||
/* The K6 */
|
||||
{ 5, 6, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K6" },
|
||||
{ 5, 7, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K6" },
|
||||
|
||||
{ 5, 8, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K6-2" },
|
||||
{ 5, 9, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K6-III" },
|
||||
{ 5, 10, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown K6" },
|
||||
{ 5, 11, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown K6" },
|
||||
{ 5, 12, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown K6" },
|
||||
{ 5, 13, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K6-2+" },
|
||||
|
||||
/* Athlon et al. */
|
||||
{ 6, 1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Athlon (Slot-A)" },
|
||||
{ 6, 2, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Athlon (Slot-A)" },
|
||||
{ 6, 3, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Duron (Spitfire)" },
|
||||
{ 6, 4, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Athlon (ThunderBird)" },
|
||||
|
||||
{ 6, 6, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown Athlon" },
|
||||
{ 6, 6, -1, -1, -1, 1, -1, -1, NC, ATHLON_ , 0, "Athlon (Palomino)" },
|
||||
{ 6, 6, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_MP_ , 0, "Athlon MP (Palomino)" },
|
||||
{ 6, 6, -1, -1, -1, 1, -1, -1, NC, DURON_ , 0, "Duron (Palomino)" },
|
||||
{ 6, 6, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_XP_ , 0, "Athlon XP" },
|
||||
|
||||
{ 6, 7, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown Athlon XP" },
|
||||
{ 6, 7, -1, -1, -1, 1, -1, -1, NC, DURON_ , 0, "Duron (Morgan)" },
|
||||
|
||||
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Athlon XP" },
|
||||
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, ATHLON_ , 0, "Athlon XP (Thoroughbred)" },
|
||||
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_XP_ , 0, "Athlon XP (Thoroughbred)" },
|
||||
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, DURON_ , 0, "Duron (Applebred)" },
|
||||
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, SEMPRON_ , 0, "Sempron (Thoroughbred)" },
|
||||
{ 6, 8, -1, -1, -1, 1, 128, -1, NC, SEMPRON_ , 0, "Sempron (Thoroughbred)" },
|
||||
{ 6, 8, -1, -1, -1, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron (Thoroughbred)" },
|
||||
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_MP_ , 0, "Athlon MP (Thoroughbred)" },
|
||||
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_XP_|_M_ , 0, "Mobile Athlon (T-Bred)" },
|
||||
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_XP_|_M_|_LV_, 0, "Mobile Athlon (T-Bred)" },
|
||||
|
||||
{ 6, 10, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Athlon XP (Barton)" },
|
||||
{ 6, 10, -1, -1, -1, 1, 512, -1, NC, ATHLON_|_XP_ , 0, "Athlon XP (Barton)" },
|
||||
{ 6, 10, -1, -1, -1, 1, 512, -1, NC, SEMPRON_ , 0, "Sempron (Barton)" },
|
||||
{ 6, 10, -1, -1, -1, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron (Thorton)" },
|
||||
{ 6, 10, -1, -1, -1, 1, 256, -1, NC, ATHLON_|_XP_ , 0, "Athlon XP (Thorton)" },
|
||||
{ 6, 10, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_MP_ , 0, "Athlon MP (Barton)" },
|
||||
{ 6, 10, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_XP_|_M_ , 0, "Mobile Athlon (Barton)" },
|
||||
{ 6, 10, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_XP_|_M_|_LV_, 0, "Mobile Athlon (Barton)" },
|
||||
|
||||
/* K8 Architecture */
|
||||
{ 15, -1, -1, 15, -1, 1, -1, -1, NC, 0 , 0, "Unknown K8" },
|
||||
{ 15, -1, -1, 16, -1, 1, -1, -1, NC, 0 , 0, "Unknown K9" },
|
||||
|
||||
{ 15, -1, -1, 15, -1, 1, -1, -1, NC, 0 , 0, "Unknown A64" },
|
||||
{ 15, -1, -1, 15, -1, 1, -1, -1, NC, OPTERON_ , 0, "Opteron" },
|
||||
{ 15, -1, -1, 15, -1, 2, -1, -1, NC, OPTERON_|_X2 , 0, "Opteron (Dual Core)" },
|
||||
{ 15, 3, -1, 15, -1, 1, -1, -1, NC, OPTERON_ , 0, "Opteron" },
|
||||
{ 15, 3, -1, 15, -1, 2, -1, -1, NC, OPTERON_|_X2 , 0, "Opteron (Dual Core)" },
|
||||
{ 15, -1, -1, 15, -1, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (512K)" },
|
||||
{ 15, -1, -1, 15, -1, 1, 1024, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (1024K)" },
|
||||
{ 15, -1, -1, 15, -1, 1, -1, -1, NC, ATHLON_|_FX , 0, "Athlon FX" },
|
||||
{ 15, -1, -1, 15, -1, 1, -1, -1, NC, ATHLON_|_64_|_FX , 0, "Athlon 64 FX" },
|
||||
{ 15, 3, -1, 15, 35, 2, -1, -1, NC, ATHLON_|_64_|_FX , 0, "Athlon 64 FX X2 (Toledo)" },
|
||||
{ 15, -1, -1, 15, -1, 2, 512, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (512K)" },
|
||||
{ 15, -1, -1, 15, -1, 2, 1024, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (1024K)" },
|
||||
{ 15, -1, -1, 15, -1, 1, 512, -1, NC, TURION_|_64_ , 0, "Turion 64 (512K)" },
|
||||
{ 15, -1, -1, 15, -1, 1, 1024, -1, NC, TURION_|_64_ , 0, "Turion 64 (1024K)" },
|
||||
{ 15, -1, -1, 15, -1, 2, 512, -1, NC, TURION_|_X2 , 0, "Turion 64 X2 (512K)" },
|
||||
{ 15, -1, -1, 15, -1, 2, 1024, -1, NC, TURION_|_X2 , 0, "Turion 64 X2 (1024K)" },
|
||||
{ 15, -1, -1, 15, -1, 1, 128, -1, NC, SEMPRON_ , 0, "A64 Sempron (128K)" },
|
||||
{ 15, -1, -1, 15, -1, 1, 256, -1, NC, SEMPRON_ , 0, "A64 Sempron (256K)" },
|
||||
{ 15, -1, -1, 15, -1, 1, 512, -1, NC, SEMPRON_ , 0, "A64 Sempron (512K)" },
|
||||
{ 15, -1, -1, 15, 0x4f, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (Orleans/512K)" },
|
||||
{ 15, -1, -1, 15, 0x5f, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (Orleans/512K)" },
|
||||
{ 15, -1, -1, 15, 0x2f, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (Venice/512K)" },
|
||||
{ 15, -1, -1, 15, 0x2c, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (Venice/512K)" },
|
||||
{ 15, -1, -1, 15, 0x1f, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (Winchester/512K)" },
|
||||
{ 15, -1, -1, 15, 0x0c, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (Newcastle/512K)" },
|
||||
{ 15, -1, -1, 15, 0x27, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (San Diego/512K)" },
|
||||
{ 15, -1, -1, 15, 0x37, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (San Diego/512K)" },
|
||||
{ 15, -1, -1, 15, 0x04, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (ClawHammer/512K)" },
|
||||
|
||||
{ 15, -1, -1, 15, 0x5f, 1, 1024, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (Orleans/1024K)" },
|
||||
{ 15, -1, -1, 15, 0x27, 1, 1024, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (San Diego/1024K)" },
|
||||
{ 15, -1, -1, 15, 0x04, 1, 1024, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (ClawHammer/1024K)" },
|
||||
|
||||
{ 15, -1, -1, 15, 0x4b, 2, 256, -1, NC, SEMPRON_ , 0, "Athlon 64 X2 (Windsor/256K)" },
|
||||
|
||||
{ 15, -1, -1, 15, 0x23, 2, 512, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (Toledo/512K)" },
|
||||
{ 15, -1, -1, 15, 0x4b, 2, 512, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (Windsor/512K)" },
|
||||
{ 15, -1, -1, 15, 0x43, 2, 512, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (Windsor/512K)" },
|
||||
{ 15, -1, -1, 15, 0x6b, 2, 512, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (Brisbane/512K)" },
|
||||
{ 15, -1, -1, 15, 0x2b, 2, 512, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (Manchester/512K)"},
|
||||
|
||||
{ 15, -1, -1, 15, 0x23, 2, 1024, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (Toledo/1024K)" },
|
||||
{ 15, -1, -1, 15, 0x43, 2, 1024, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (Windsor/1024K)" },
|
||||
|
||||
{ 15, -1, -1, 15, 0x08, 1, 128, -1, NC, MOBILE_|SEMPRON_ , 0, "Mobile Sempron 64 (Dublin/128K)"},
|
||||
{ 15, -1, -1, 15, 0x08, 1, 256, -1, NC, MOBILE_|SEMPRON_ , 0, "Mobile Sempron 64 (Dublin/256K)"},
|
||||
{ 15, -1, -1, 15, 0x0c, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron 64 (Paris)" },
|
||||
{ 15, -1, -1, 15, 0x1c, 1, 128, -1, NC, SEMPRON_ , 0, "Sempron 64 (Palermo/128K)" },
|
||||
{ 15, -1, -1, 15, 0x1c, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron 64 (Palermo/256K)" },
|
||||
{ 15, -1, -1, 15, 0x1c, 1, 128, -1, NC, MOBILE_| SEMPRON_ , 0, "Mobile Sempron 64 (Sonora/128K)"},
|
||||
{ 15, -1, -1, 15, 0x1c, 1, 256, -1, NC, MOBILE_| SEMPRON_ , 0, "Mobile Sempron 64 (Sonora/256K)"},
|
||||
{ 15, -1, -1, 15, 0x2c, 1, 128, -1, NC, SEMPRON_ , 0, "Sempron 64 (Palermo/128K)" },
|
||||
{ 15, -1, -1, 15, 0x2c, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron 64 (Palermo/256K)" },
|
||||
{ 15, -1, -1, 15, 0x2c, 1, 128, -1, NC, MOBILE_| SEMPRON_ , 0, "Mobile Sempron 64 (Albany/128K)"},
|
||||
{ 15, -1, -1, 15, 0x2c, 1, 256, -1, NC, MOBILE_| SEMPRON_ , 0, "Mobile Sempron 64 (Albany/256K)"},
|
||||
{ 15, -1, -1, 15, 0x2f, 1, 128, -1, NC, SEMPRON_ , 0, "Sempron 64 (Palermo/128K)" },
|
||||
{ 15, -1, -1, 15, 0x2f, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron 64 (Palermo/256K)" },
|
||||
{ 15, -1, -1, 15, 0x4f, 1, 128, -1, NC, SEMPRON_ , 0, "Sempron 64 (Manila/128K)" },
|
||||
{ 15, -1, -1, 15, 0x4f, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron 64 (Manila/256K)" },
|
||||
{ 15, -1, -1, 15, 0x5f, 1, 128, -1, NC, SEMPRON_ , 0, "Sempron 64 (Manila/128K)" },
|
||||
{ 15, -1, -1, 15, 0x5f, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron 64 (Manila/256K)" },
|
||||
{ 15, -1, -1, 15, 0x6b, 2, 256, -1, NC, SEMPRON_ , 0, "Sempron 64 Dual (Sherman/256K)"},
|
||||
{ 15, -1, -1, 15, 0x6b, 2, 512, -1, NC, SEMPRON_ , 0, "Sempron 64 Dual (Sherman/512K)"},
|
||||
{ 15, -1, -1, 15, 0x7f, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron 64 (Sparta/256K)" },
|
||||
{ 15, -1, -1, 15, 0x7f, 1, 512, -1, NC, SEMPRON_ , 0, "Sempron 64 (Sparta/512K)" },
|
||||
{ 15, -1, -1, 15, 0x4c, 1, 256, -1, NC, MOBILE_| SEMPRON_ , 0, "Mobile Sempron 64 (Keene/256K)"},
|
||||
{ 15, -1, -1, 15, 0x4c, 1, 512, -1, NC, MOBILE_| SEMPRON_ , 0, "Mobile Sempron 64 (Keene/512K)"},
|
||||
{ 15, -1, -1, 15, -1, 2, -1, -1, NC, SEMPRON_ , 0, "Sempron Dual Core" },
|
||||
|
||||
{ 15, -1, -1, 15, 0x24, 1, 512, -1, NC, TURION_|_64_ , 0, "Turion 64 (Lancaster/512K)" },
|
||||
{ 15, -1, -1, 15, 0x24, 1, 1024, -1, NC, TURION_|_64_ , 0, "Turion 64 (Lancaster/1024K)" },
|
||||
{ 15, -1, -1, 15, 0x48, 2, 256, -1, NC, TURION_|_X2 , 0, "Turion X2 (Taylor)" },
|
||||
{ 15, -1, -1, 15, 0x48, 2, 512, -1, NC, TURION_|_X2 , 0, "Turion X2 (Trinidad)" },
|
||||
{ 15, -1, -1, 15, 0x4c, 1, 512, -1, NC, TURION_|_64_ , 0, "Turion 64 (Richmond)" },
|
||||
{ 15, -1, -1, 15, 0x68, 2, 256, -1, NC, TURION_|_X2 , 0, "Turion X2 (Tyler/256K)" },
|
||||
{ 15, -1, -1, 15, 0x68, 2, 512, -1, NC, TURION_|_X2 , 0, "Turion X2 (Tyler/512K)" },
|
||||
{ 15, -1, -1, 17, 3, 2, 512, -1, NC, TURION_|_X2 , 0, "Turion X2 (Griffin/512K)" },
|
||||
{ 15, -1, -1, 17, 3, 2, 1024, -1, NC, TURION_|_X2 , 0, "Turion X2 (Griffin/1024K)" },
|
||||
|
||||
/* K10 Architecture (2007) */
|
||||
{ 15, -1, -1, 16, -1, 1, -1, -1, PHENOM, 0 , 0, "Unknown AMD Phenom" },
|
||||
{ 15, 2, -1, 16, -1, 1, -1, -1, PHENOM, 0 , 0, "Phenom" },
|
||||
{ 15, 2, -1, 16, -1, 3, -1, -1, PHENOM, 0 , 0, "Phenom X3 (Toliman)" },
|
||||
{ 15, 2, -1, 16, -1, 4, -1, -1, PHENOM, 0 , 0, "Phenom X4 (Agena)" },
|
||||
{ 15, 2, -1, 16, -1, 3, 512, -1, PHENOM, 0 , 0, "Phenom X3 (Toliman/256K)" },
|
||||
{ 15, 2, -1, 16, -1, 3, 512, -1, PHENOM, 0 , 0, "Phenom X3 (Toliman/512K)" },
|
||||
{ 15, 2, -1, 16, -1, 4, 128, -1, PHENOM, 0 , 0, "Phenom X4 (Agena/128K)" },
|
||||
{ 15, 2, -1, 16, -1, 4, 256, -1, PHENOM, 0 , 0, "Phenom X4 (Agena/256K)" },
|
||||
{ 15, 2, -1, 16, -1, 4, 512, -1, PHENOM, 0 , 0, "Phenom X4 (Agena/512K)" },
|
||||
{ 15, 2, -1, 16, -1, 2, 512, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon X2 (Kuma)" },
|
||||
/* Phenom II derivates: */
|
||||
{ 15, 4, -1, 16, -1, 4, -1, -1, NC, 0 , 0, "Phenom (Deneb-based)" },
|
||||
{ 15, 4, -1, 16, -1, 1, 1024, -1, NC, SEMPRON_ , 0, "Sempron (Sargas)" },
|
||||
{ 15, 4, -1, 16, -1, 2, 512, -1, PHENOM2, 0 , 0, "Phenom II X2 (Callisto)" },
|
||||
{ 15, 4, -1, 16, -1, 3, 512, -1, PHENOM2, 0 , 0, "Phenom II X3 (Heka)" },
|
||||
{ 15, 4, -1, 16, -1, 4, 512, -1, PHENOM2, 0 , 0, "Phenom II X4" },
|
||||
{ 15, 4, -1, 16, 4, 4, 512, -1, PHENOM2, 0 , 0, "Phenom II X4 (Deneb)" },
|
||||
{ 15, 5, -1, 16, 5, 4, 512, -1, PHENOM2, 0 , 0, "Phenom II X4 (Deneb)" },
|
||||
{ 15, 4, -1, 16, 10, 4, 512, -1, PHENOM2, 0 , 0, "Phenom II X4 (Zosma)" },
|
||||
{ 15, 4, -1, 16, 10, 6, 512, -1, PHENOM2, 0 , 0, "Phenom II X6 (Thuban)" },
|
||||
/* Athlon II derivates: */
|
||||
{ 15, 6, -1, 16, 6, 2, 512, -1, NC, ATHLON_|_X2 , 0, "Athlon II (Champlain)" },
|
||||
{ 15, 6, -1, 16, 6, 2, 512, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon II X2 (Regor)" },
|
||||
{ 15, 6, -1, 16, 6, 2, 1024, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon II X2 (Regor)" },
|
||||
{ 15, 5, -1, 16, 5, 3, 512, -1, NC, ATHLON_|_64_|_X3 , 0, "Athlon II X3 (Rana)" },
|
||||
{ 15, 5, -1, 16, 5, 4, 512, -1, NC, ATHLON_|_64_|_X4 , 0, "Athlon II X4 (Propus)" },
|
||||
/* Llano APUs (2011): */
|
||||
{ 15, 1, -1, 18, 1, 2, -1, -1, FUSION_EA, 0 , 0, "Llano X2" },
|
||||
{ 15, 1, -1, 18, 1, 3, -1, -1, FUSION_EA, 0 , 0, "Llano X3" },
|
||||
{ 15, 1, -1, 18, 1, 4, -1, -1, FUSION_EA, 0 , 0, "Llano X4" },
|
||||
|
||||
/* Family 14h: Bobcat Architecture (2011) */
|
||||
{ 15, 2, -1, 20, -1, 1, -1, -1, FUSION_C, 0 , 0, "Brazos Ontario" },
|
||||
{ 15, 2, -1, 20, -1, 2, -1, -1, FUSION_C, 0 , 0, "Brazos Ontario (Dual-core)" },
|
||||
{ 15, 1, -1, 20, -1, 1, -1, -1, FUSION_E, 0 , 0, "Brazos Zacate" },
|
||||
{ 15, 1, -1, 20, -1, 2, -1, -1, FUSION_E, 0 , 0, "Brazos Zacate (Dual-core)" },
|
||||
{ 15, 2, -1, 20, -1, 2, -1, -1, FUSION_Z, 0 , 0, "Brazos Desna (Dual-core)" },
|
||||
|
||||
/* Family 15h: Bulldozer Architecture (2011) */
|
||||
{ 15, -1, -1, 21, 0, 4, -1, -1, NC, 0 , 0, "Bulldozer X2" },
|
||||
{ 15, -1, -1, 21, 1, 4, -1, -1, NC, 0 , 0, "Bulldozer X2" },
|
||||
{ 15, -1, -1, 21, 1, 6, -1, -1, NC, 0 , 0, "Bulldozer X3" },
|
||||
{ 15, -1, -1, 21, 1, 8, -1, -1, NC, 0 , 0, "Bulldozer X4" },
|
||||
/* 2nd-gen, Piledriver core (2012): */
|
||||
{ 15, -1, -1, 21, 2, 4, -1, -1, NC, 0 , 0, "Vishera X2" },
|
||||
{ 15, -1, -1, 21, 2, 6, -1, -1, NC, 0 , 0, "Vishera X3" },
|
||||
{ 15, -1, -1, 21, 2, 8, -1, -1, NC, 0 , 0, "Vishera X4" },
|
||||
{ 15, 0, -1, 21, 16, 2, -1, -1, FUSION_A, 0 , 0, "Trinity X2" },
|
||||
{ 15, 0, -1, 21, 16, 4, -1, -1, FUSION_A, 0 , 0, "Trinity X4" },
|
||||
{ 15, 3, -1, 21, 19, 2, -1, -1, FUSION_A, 0 , 0, "Richland X2" },
|
||||
{ 15, 3, -1, 21, 19, 4, -1, -1, FUSION_A, 0 , 0, "Richland X4" },
|
||||
/* 3rd-gen, Steamroller core (2014): */
|
||||
{ 15, 0, -1, 21, 48, 2, -1, -1, FUSION_A, 0 , 0, "Kaveri X2" },
|
||||
{ 15, 0, -1, 21, 48, 4, -1, -1, FUSION_A, 0 , 0, "Kaveri X4" },
|
||||
{ 15, 8, -1, 21, 56, 4, -1, -1, FUSION_A, 0 , 0, "Godavari X4" },
|
||||
/* 4th-gen, Excavator core (2015): */
|
||||
{ 15, 1, -1, 21, 96, 2, -1, -1, FUSION_A, 0 , 0, "Carrizo X2" },
|
||||
{ 15, 1, -1, 21, 96, 4, -1, -1, FUSION_A, 0 , 0, "Carrizo X4" },
|
||||
{ 15, 5, -1, 21, 101, 2, -1, -1, FUSION_A, 0 , 0, "Bristol Ridge X2" },
|
||||
{ 15, 5, -1, 21, 101, 4, -1, -1, FUSION_A, 0 , 0, "Bristol Ridge X4" },
|
||||
{ 15, 0, -1, 21, 112, 2, -1, -1, FUSION_A, 0 , 0, "Stoney Ridge X2" },
|
||||
{ 15, 0, -1, 21, 112, 2, -1, -1, FUSION_E, 0 , 0, "Stoney Ridge X2" },
|
||||
|
||||
/* Family 16h: Jaguar Architecture (2013) */
|
||||
{ 15, 0, -1, 22, 0, 2, -1, -1, FUSION_A, 0 , 0, "Kabini X2" },
|
||||
{ 15, 0, -1, 22, 0, 4, -1, -1, FUSION_A, 0 , 0, "Kabini X4" },
|
||||
/* 2nd-gen, Puma core (2013): */
|
||||
{ 15, 0, -1, 22, 48, 2, -1, -1, FUSION_E, 0 , 0, "Mullins X2" },
|
||||
{ 15, 0, -1, 22, 48, 4, -1, -1, FUSION_A, 0 , 0, "Mullins X4" },
|
||||
|
||||
/* Family 17h: Zen Architecture (2017) */
|
||||
{ 15, -1, -1, 23, 1, 8, -1, -1, NC, 0 , 0, "Ryzen 7" },
|
||||
{ 15, -1, -1, 23, 1, 6, -1, -1, NC, 0 , _1600, "Ryzen 5" },
|
||||
{ 15, -1, -1, 23, 1, 4, -1, -1, NC, 0 , _1500, "Ryzen 5" },
|
||||
{ 15, -1, -1, 23, 1, 4, -1, -1, NC, 0 , _1400, "Ryzen 5" },
|
||||
{ 15, -1, -1, 23, 1, 4, -1, -1, NC, 0 , 0, "Ryzen 3" },
|
||||
//{ 15, -1, -1, 23, 1, 4, -1, -1, NC, 0 , 0, "Raven Ridge" }, //TBA
|
||||
|
||||
/* Newer Opterons: */
|
||||
{ 15, 9, -1, 22, 9, 8, -1, -1, NC, OPTERON_ , 0, "Magny-Cours Opteron" },
|
||||
};
|
||||
|
||||
|
||||
static void load_amd_features(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
|
||||
{
|
||||
const struct feature_map_t matchtable_edx81[] = {
|
||||
{ 20, CPU_FEATURE_NX },
|
||||
{ 22, CPU_FEATURE_MMXEXT },
|
||||
{ 25, CPU_FEATURE_FXSR_OPT },
|
||||
{ 30, CPU_FEATURE_3DNOWEXT },
|
||||
{ 31, CPU_FEATURE_3DNOW },
|
||||
};
|
||||
const struct feature_map_t matchtable_ecx81[] = {
|
||||
{ 1, CPU_FEATURE_CMP_LEGACY },
|
||||
{ 2, CPU_FEATURE_SVM },
|
||||
{ 5, CPU_FEATURE_ABM },
|
||||
{ 6, CPU_FEATURE_SSE4A },
|
||||
{ 7, CPU_FEATURE_MISALIGNSSE },
|
||||
{ 8, CPU_FEATURE_3DNOWPREFETCH },
|
||||
{ 9, CPU_FEATURE_OSVW },
|
||||
{ 10, CPU_FEATURE_IBS },
|
||||
{ 11, CPU_FEATURE_XOP },
|
||||
{ 12, CPU_FEATURE_SKINIT },
|
||||
{ 13, CPU_FEATURE_WDT },
|
||||
{ 16, CPU_FEATURE_FMA4 },
|
||||
{ 21, CPU_FEATURE_TBM },
|
||||
};
|
||||
const struct feature_map_t matchtable_edx87[] = {
|
||||
{ 0, CPU_FEATURE_TS },
|
||||
{ 1, CPU_FEATURE_FID },
|
||||
{ 2, CPU_FEATURE_VID },
|
||||
{ 3, CPU_FEATURE_TTP },
|
||||
{ 4, CPU_FEATURE_TM_AMD },
|
||||
{ 5, CPU_FEATURE_STC },
|
||||
{ 6, CPU_FEATURE_100MHZSTEPS },
|
||||
{ 7, CPU_FEATURE_HWPSTATE },
|
||||
/* id 8 is handled in common */
|
||||
{ 9, CPU_FEATURE_CPB },
|
||||
{ 10, CPU_FEATURE_APERFMPERF },
|
||||
{ 11, CPU_FEATURE_PFI },
|
||||
{ 12, CPU_FEATURE_PA },
|
||||
};
|
||||
if (raw->ext_cpuid[0][0] >= 0x80000001) {
|
||||
match_features(matchtable_edx81, COUNT_OF(matchtable_edx81), raw->ext_cpuid[1][3], data);
|
||||
match_features(matchtable_ecx81, COUNT_OF(matchtable_ecx81), raw->ext_cpuid[1][2], data);
|
||||
}
|
||||
if (raw->ext_cpuid[0][0] >= 0x80000007)
|
||||
match_features(matchtable_edx87, COUNT_OF(matchtable_edx87), raw->ext_cpuid[7][3], data);
|
||||
if (raw->ext_cpuid[0][0] >= 0x8000001a) {
|
||||
/* We have the extended info about SSE unit size */
|
||||
data->detection_hints[CPU_HINT_SSE_SIZE_AUTH] = 1;
|
||||
data->sse_size = (raw->ext_cpuid[0x1a][0] & 1) ? 128 : 64;
|
||||
}
|
||||
}
|
||||
|
||||
static void decode_amd_cache_info(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
|
||||
{
|
||||
int l3_result;
|
||||
const int assoc_table[16] = {
|
||||
0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, 255
|
||||
};
|
||||
unsigned n = raw->ext_cpuid[0][0];
|
||||
|
||||
if (n >= 0x80000005) {
|
||||
data->l1_data_cache = (raw->ext_cpuid[5][2] >> 24) & 0xff;
|
||||
data->l1_assoc = (raw->ext_cpuid[5][2] >> 16) & 0xff;
|
||||
data->l1_cacheline = (raw->ext_cpuid[5][2]) & 0xff;
|
||||
data->l1_instruction_cache = (raw->ext_cpuid[5][3] >> 24) & 0xff;
|
||||
}
|
||||
if (n >= 0x80000006) {
|
||||
data->l2_cache = (raw->ext_cpuid[6][2] >> 16) & 0xffff;
|
||||
data->l2_assoc = assoc_table[(raw->ext_cpuid[6][2] >> 12) & 0xf];
|
||||
data->l2_cacheline = (raw->ext_cpuid[6][2]) & 0xff;
|
||||
|
||||
l3_result = (raw->ext_cpuid[6][3] >> 18);
|
||||
if (l3_result > 0) {
|
||||
l3_result = 512 * l3_result; /* AMD spec says it's a range,
|
||||
but we take the lower bound */
|
||||
data->l3_cache = l3_result;
|
||||
data->l3_assoc = assoc_table[(raw->ext_cpuid[6][3] >> 12) & 0xf];
|
||||
data->l3_cacheline = (raw->ext_cpuid[6][3]) & 0xff;
|
||||
} else {
|
||||
data->l3_cache = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void decode_amd_number_of_cores(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
|
||||
{
|
||||
int logical_cpus = -1, num_cores = -1;
|
||||
|
||||
if (raw->basic_cpuid[0][0] >= 1) {
|
||||
logical_cpus = (raw->basic_cpuid[1][1] >> 16) & 0xff;
|
||||
if (raw->ext_cpuid[0][0] >= 8) {
|
||||
num_cores = 1 + (raw->ext_cpuid[8][2] & 0xff);
|
||||
}
|
||||
}
|
||||
if (data->flags[CPU_FEATURE_HT]) {
|
||||
if (num_cores > 1) {
|
||||
if (data->ext_family >= 23)
|
||||
num_cores /= 2; // e.g., Ryzen 7 reports 16 "real" cores, but they are really just 8.
|
||||
data->num_cores = num_cores;
|
||||
data->num_logical_cpus = logical_cpus;
|
||||
} else {
|
||||
data->num_cores = 1;
|
||||
data->num_logical_cpus = (logical_cpus >= 2 ? logical_cpus : 2);
|
||||
}
|
||||
} else {
|
||||
data->num_cores = data->num_logical_cpus = 1;
|
||||
}
|
||||
}
|
||||
|
||||
static int amd_has_turion_modelname(const char *bs)
|
||||
{
|
||||
/* We search for something like TL-60. Ahh, I miss regexes...*/
|
||||
int i, l, k;
|
||||
char code[3] = {0};
|
||||
const char* codes[] = { "ML", "MT", "MK", "TK", "TL", "RM", "ZM", "" };
|
||||
l = (int) strlen(bs);
|
||||
for (i = 3; i < l - 2; i++) {
|
||||
if (bs[i] == '-' &&
|
||||
isupper(bs[i-1]) && isupper(bs[i-2]) && !isupper(bs[i-3]) &&
|
||||
isdigit(bs[i+1]) && isdigit(bs[i+2]) && !isdigit(bs[i+3]))
|
||||
{
|
||||
code[0] = bs[i-2];
|
||||
code[1] = bs[i-1];
|
||||
for (k = 0; codes[k][0]; k++)
|
||||
if (!strcmp(codes[k], code)) return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct amd_code_and_bits_t decode_amd_codename_part1(const char *bs)
|
||||
{
|
||||
amd_code_t code = NC;
|
||||
uint64_t bits = 0;
|
||||
struct amd_code_and_bits_t result;
|
||||
|
||||
if (strstr(bs, "Dual Core") ||
|
||||
strstr(bs, "Dual-Core") ||
|
||||
strstr(bs, " X2 "))
|
||||
bits |= _X2;
|
||||
if (strstr(bs, " X4 ")) bits |= _X4;
|
||||
if (strstr(bs, " X3 ")) bits |= _X3;
|
||||
if (strstr(bs, "Opteron")) bits |= OPTERON_;
|
||||
if (strstr(bs, "Phenom")) {
|
||||
code = (strstr(bs, "II")) ? PHENOM2 : PHENOM;
|
||||
}
|
||||
if (amd_has_turion_modelname(bs)) {
|
||||
bits |= TURION_;
|
||||
}
|
||||
if (strstr(bs, "Athlon(tm)")) bits |= ATHLON_;
|
||||
if (strstr(bs, "Sempron(tm)")) bits |= SEMPRON_;
|
||||
if (strstr(bs, "Duron")) bits |= DURON_;
|
||||
if (strstr(bs, " 64 ")) bits |= _64_;
|
||||
if (strstr(bs, " FX")) bits |= _FX;
|
||||
if (strstr(bs, " MP")) bits |= _MP_;
|
||||
if (strstr(bs, "Athlon(tm) 64") || strstr(bs, "Athlon(tm) II X") || match_pattern(bs, "Athlon(tm) X#")) {
|
||||
bits |= ATHLON_ | _64_;
|
||||
}
|
||||
if (strstr(bs, "Turion")) bits |= TURION_;
|
||||
|
||||
if (strstr(bs, "mobile") || strstr(bs, "Mobile")) {
|
||||
bits |= MOBILE_;
|
||||
}
|
||||
|
||||
if (strstr(bs, "XP")) bits |= _XP_;
|
||||
if (strstr(bs, "XP-M")) bits |= _M_;
|
||||
if (strstr(bs, "(LV)")) bits |= _LV_;
|
||||
|
||||
if (match_pattern(bs, "C-##")) code = FUSION_C;
|
||||
if (match_pattern(bs, "E-###")) code = FUSION_E;
|
||||
if (match_pattern(bs, "Z-##")) code = FUSION_Z;
|
||||
if (match_pattern(bs, "E#-####") || match_pattern(bs, "A#-####")) code = FUSION_EA;
|
||||
|
||||
result.code = code;
|
||||
result.bits = bits;
|
||||
return result;
|
||||
}
|
||||
|
||||
static int decode_amd_ryzen_model_code(const char* bs)
|
||||
{
|
||||
const struct {
|
||||
int model_code;
|
||||
const char* match_str;
|
||||
} patterns[] = {
|
||||
{ _1600, "1600" },
|
||||
{ _1500, "1500" },
|
||||
{ _1400, "1400" },
|
||||
};
|
||||
int i;
|
||||
|
||||
for (i = 0; i < COUNT_OF(patterns); i++)
|
||||
if (strstr(bs, patterns[i].match_str))
|
||||
return patterns[i].model_code;
|
||||
//
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void decode_amd_codename(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal)
|
||||
{
|
||||
struct amd_code_and_bits_t code_and_bits = decode_amd_codename_part1(data->brand_str);
|
||||
int i = 0;
|
||||
char* code_str = NULL;
|
||||
int model_code;
|
||||
|
||||
for (i = 0; i < COUNT_OF(amd_code_str); i++) {
|
||||
if (code_and_bits.code == amd_code_str[i].code) {
|
||||
code_str = amd_code_str[i].str;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (/*code == ATHLON_64_X2*/ match_all(code_and_bits.bits, ATHLON_|_64_|_X2) && data->l2_cache < 512) {
|
||||
code_and_bits.bits &= ~(ATHLON_ | _64_);
|
||||
code_and_bits.bits |= SEMPRON_;
|
||||
}
|
||||
if (code_str)
|
||||
debugf(2, "Detected AMD brand code: %d (%s)\n", code_and_bits.code, code_str);
|
||||
else
|
||||
debugf(2, "Detected AMD brand code: %d\n", code_and_bits.code);
|
||||
|
||||
if (code_and_bits.bits) {
|
||||
debugf(2, "Detected AMD bits: ");
|
||||
debug_print_lbits(2, code_and_bits.bits);
|
||||
}
|
||||
// is it Ryzen? if so, we need to detect discern between the four-core 1400/1500 (Ryzen 5) and the four-core Ryzen 3:
|
||||
model_code = (data->ext_family == 23) ? decode_amd_ryzen_model_code(data->brand_str) : 0;
|
||||
|
||||
internal->code.amd = code_and_bits.code;
|
||||
internal->bits = code_and_bits.bits;
|
||||
internal->score = match_cpu_codename(cpudb_amd, COUNT_OF(cpudb_amd), data, code_and_bits.code,
|
||||
code_and_bits.bits, model_code);
|
||||
}
|
||||
|
||||
int cpuid_identify_amd(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal)
|
||||
{
|
||||
load_amd_features(raw, data);
|
||||
decode_amd_cache_info(raw, data);
|
||||
decode_amd_number_of_cores(raw, data);
|
||||
decode_amd_codename(raw, data, internal);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void cpuid_get_list_amd(struct cpu_list_t* list)
|
||||
{
|
||||
generic_get_cpu_list(cpudb_amd, COUNT_OF(cpudb_amd), list);
|
||||
}
|
||||
@@ -27,5 +27,6 @@
|
||||
#define __RECOG_AMD_H__
|
||||
|
||||
int cpuid_identify_amd(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal);
|
||||
void cpuid_get_list_amd(struct cpu_list_t* list);
|
||||
|
||||
#endif /* __RECOG_AMD_H__ */
|
||||
935
compat/libcpuid/recog_intel.c
Normal file
935
compat/libcpuid/recog_intel.c
Normal file
@@ -0,0 +1,935 @@
|
||||
/*
|
||||
* Copyright 2008 Veselin Georgiev,
|
||||
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include "libcpuid.h"
|
||||
#include "libcpuid_util.h"
|
||||
#include "libcpuid_internal.h"
|
||||
#include "recog_intel.h"
|
||||
|
||||
const struct intel_bcode_str { intel_code_t code; char *str; } intel_bcode_str[] = {
|
||||
#define CODE(x) { x, #x }
|
||||
#define CODE2(x, y) CODE(x)
|
||||
#include "intel_code_t.h"
|
||||
#undef CODE
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
int code;
|
||||
uint64_t bits;
|
||||
} intel_code_and_bits_t;
|
||||
|
||||
enum _intel_model_t {
|
||||
UNKNOWN = -1,
|
||||
_3000 = 100,
|
||||
_3100,
|
||||
_3200,
|
||||
X3200,
|
||||
_3300,
|
||||
X3300,
|
||||
_5100,
|
||||
_5200,
|
||||
_5300,
|
||||
_5400,
|
||||
_2xxx, /* Core i[357] 2xxx */
|
||||
_3xxx, /* Core i[357] 3xxx */
|
||||
};
|
||||
typedef enum _intel_model_t intel_model_t;
|
||||
|
||||
enum _intel_bits_t {
|
||||
PENTIUM_ = LBIT( 0 ),
|
||||
CELERON_ = LBIT( 1 ),
|
||||
MOBILE_ = LBIT( 2 ),
|
||||
CORE_ = LBIT( 3 ),
|
||||
_I_ = LBIT( 4 ),
|
||||
_M_ = LBIT( 5 ),
|
||||
_3 = LBIT( 6 ),
|
||||
_5 = LBIT( 7 ),
|
||||
_7 = LBIT( 8 ),
|
||||
XEON_ = LBIT( 9 ),
|
||||
_MP = LBIT( 10 ),
|
||||
ATOM_ = LBIT( 11 ),
|
||||
|
||||
};
|
||||
typedef enum _intel_bits_t intel_bits_t;
|
||||
|
||||
const struct match_entry_t cpudb_intel[] = {
|
||||
{ -1, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown Intel CPU" },
|
||||
|
||||
/* i486 */
|
||||
{ 4, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown i486" },
|
||||
{ 4, 0, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 DX-25/33" },
|
||||
{ 4, 1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 DX-50" },
|
||||
{ 4, 2, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 SX" },
|
||||
{ 4, 3, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 DX2" },
|
||||
{ 4, 4, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 SL" },
|
||||
{ 4, 5, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 SX2" },
|
||||
{ 4, 7, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 DX2 WriteBack" },
|
||||
{ 4, 8, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 DX4" },
|
||||
{ 4, 9, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 DX4 WriteBack" },
|
||||
|
||||
/* All Pentia:
|
||||
Pentium 1 */
|
||||
{ 5, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown Pentium" },
|
||||
{ 5, 0, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium A-Step" },
|
||||
{ 5, 1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium 1 (0.8u)" },
|
||||
{ 5, 2, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium 1 (0.35u)" },
|
||||
{ 5, 3, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium OverDrive" },
|
||||
{ 5, 4, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium 1 (0.35u)" },
|
||||
{ 5, 7, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium 1 (0.35u)" },
|
||||
{ 5, 8, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium MMX (0.25u)" },
|
||||
|
||||
/* Pentium 2 / 3 / M / Conroe / whatsnext - all P6 based. */
|
||||
{ 6, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown P6" },
|
||||
{ 6, 0, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium Pro" },
|
||||
{ 6, 1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium Pro" },
|
||||
{ 6, 3, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium II (Klamath)" },
|
||||
{ 6, 5, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium II (Deschutes)" },
|
||||
{ 6, 5, -1, -1, -1, 1, -1, -1, NC, MOBILE_|PENTIUM_, 0, "Mobile Pentium II (Tonga)"},
|
||||
{ 6, 6, -1, -1, -1, 1, -1, -1, NC,0 , 0, "Pentium II (Dixon)" },
|
||||
|
||||
{ 6, 3, -1, -1, -1, 1, -1, -1, NC, XEON_ , 0, "P-II Xeon (Klamath)" },
|
||||
{ 6, 5, -1, -1, -1, 1, -1, -1, NC, XEON_ , 0, "P-II Xeon (Drake)" },
|
||||
{ 6, 6, -1, -1, -1, 1, -1, -1, NC, XEON_ , 0, "P-II Xeon (Dixon)" },
|
||||
|
||||
{ 6, 5, -1, -1, -1, 1, -1, -1, NC, CELERON_ , 0, "P-II Celeron (Covington)" },
|
||||
{ 6, 6, -1, -1, -1, 1, -1, -1, NC, CELERON_ , 0, "P-II Celeron (Mendocino)" },
|
||||
|
||||
/* -------------------------------------------------- */
|
||||
|
||||
{ 6, 7, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium III (Katmai)" },
|
||||
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium III (Coppermine)"},
|
||||
{ 6, 10, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium III (Coppermine)"},
|
||||
{ 6, 11, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium III (Tualatin)" },
|
||||
|
||||
{ 6, 7, -1, -1, -1, 1, -1, -1, NC, XEON_ , 0, "P-III Xeon (Tanner)" },
|
||||
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, XEON_ , 0, "P-III Xeon (Cascades)" },
|
||||
{ 6, 10, -1, -1, -1, 1, -1, -1, NC, XEON_ , 0, "P-III Xeon (Cascades)" },
|
||||
{ 6, 11, -1, -1, -1, 1, -1, -1, NC, XEON_ , 0, "P-III Xeon (Tualatin)" },
|
||||
|
||||
{ 6, 7, -1, -1, -1, 1, -1, -1, NC, CELERON_ , 0, "P-III Celeron (Katmai)" },
|
||||
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, CELERON_ , 0, "P-III Celeron (Coppermine)" },
|
||||
{ 6, 10, -1, -1, -1, 1, -1, -1, NC, CELERON_ , 0, "P-III Celeron (Coppermine)" },
|
||||
{ 6, 11, -1, -1, -1, 1, -1, -1, NC, CELERON_ , 0, "P-III Celeron (Tualatin)" },
|
||||
|
||||
/* Netburst based (Pentium 4 and later)
|
||||
classic P4s */
|
||||
{ 15, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown Pentium 4" },
|
||||
{ 15, -1, -1, 15, -1, 1, -1, -1, NC, CELERON_ , 0, "Unknown P-4 Celeron" },
|
||||
{ 15, -1, -1, 15, -1, 1, -1, -1, NC, XEON_ , 0, "Unknown Xeon" },
|
||||
|
||||
{ 15, 0, -1, 15, -1, 1, -1, -1, NC, PENTIUM_ , 0, "Pentium 4 (Willamette)" },
|
||||
{ 15, 1, -1, 15, -1, 1, -1, -1, NC, PENTIUM_ , 0, "Pentium 4 (Willamette)" },
|
||||
{ 15, 2, -1, 15, -1, 1, -1, -1, NC, PENTIUM_ , 0, "Pentium 4 (Northwood)" },
|
||||
{ 15, 3, -1, 15, -1, 1, -1, -1, NC, PENTIUM_ , 0, "Pentium 4 (Prescott)" },
|
||||
{ 15, 4, -1, 15, -1, 1, -1, -1, NC, PENTIUM_ , 0, "Pentium 4 (Prescott)" },
|
||||
{ 15, 6, -1, 15, -1, 1, -1, -1, NC, PENTIUM_ , 0, "Pentium 4 (Cedar Mill)" },
|
||||
{ 15, 0, -1, 15, -1, 1, -1, -1, NC, MOBILE_|PENTIUM_, 0, "Mobile P-4 (Willamette)" },
|
||||
{ 15, 1, -1, 15, -1, 1, -1, -1, NC, MOBILE_|PENTIUM_, 0, "Mobile P-4 (Willamette)" },
|
||||
{ 15, 2, -1, 15, -1, 1, -1, -1, NC, MOBILE_|PENTIUM_, 0, "Mobile P-4 (Northwood)" },
|
||||
{ 15, 3, -1, 15, -1, 1, -1, -1, NC, MOBILE_|PENTIUM_, 0, "Mobile P-4 (Prescott)" },
|
||||
{ 15, 4, -1, 15, -1, 1, -1, -1, NC, MOBILE_|PENTIUM_, 0, "Mobile P-4 (Prescott)" },
|
||||
{ 15, 6, -1, 15, -1, 1, -1, -1, NC, MOBILE_|PENTIUM_, 0, "Mobile P-4 (Cedar Mill)" },
|
||||
|
||||
/* server CPUs */
|
||||
{ 15, 0, -1, 15, -1, 1, -1, -1, NC, XEON_ , 0, "Xeon (Foster)" },
|
||||
{ 15, 1, -1, 15, -1, 1, -1, -1, NC, XEON_ , 0, "Xeon (Foster)" },
|
||||
{ 15, 2, -1, 15, -1, 1, -1, -1, NC, XEON_ , 0, "Xeon (Prestonia)" },
|
||||
{ 15, 2, -1, 15, -1, 1, -1, -1, NC, XEON_|_MP , 0, "Xeon (Gallatin)" },
|
||||
{ 15, 3, -1, 15, -1, 1, -1, -1, NC, XEON_ , 0, "Xeon (Nocona)" },
|
||||
{ 15, 4, -1, 15, -1, 1, -1, -1, NC, XEON_ , 0, "Xeon (Nocona)" },
|
||||
{ 15, 4, -1, 15, -1, 1, -1, -1, IRWIN, XEON_ , 0, "Xeon (Irwindale)" },
|
||||
{ 15, 4, -1, 15, -1, 1, -1, -1, NC, XEON_|_MP , 0, "Xeon (Cranford)" },
|
||||
{ 15, 4, -1, 15, -1, 1, -1, -1, POTOMAC, XEON_ , 0, "Xeon (Potomac)" },
|
||||
{ 15, 6, -1, 15, -1, 1, -1, -1, NC, XEON_ , 0, "Xeon (Dempsey)" },
|
||||
|
||||
/* Pentium Ds */
|
||||
{ 15, 4, 4, 15, -1, 1, -1, -1, NC, 0 , 0, "Pentium D (SmithField)" },
|
||||
{ 15, 4, -1, 15, -1, 1, -1, -1, PENTIUM_D, 0 , 0, "Pentium D (SmithField)" },
|
||||
{ 15, 4, 7, 15, -1, 1, -1, -1, NC, 0 , 0, "Pentium D (SmithField)" },
|
||||
{ 15, 6, -1, 15, -1, 1, -1, -1, PENTIUM_D, 0 , 0, "Pentium D (Presler)" },
|
||||
|
||||
/* Celeron and Celeron Ds */
|
||||
{ 15, 1, -1, 15, -1, 1, -1, -1, NC, CELERON_ , 0, "P-4 Celeron (Willamette)" },
|
||||
{ 15, 2, -1, 15, -1, 1, -1, -1, NC, CELERON_ , 0, "P-4 Celeron (Northwood)" },
|
||||
{ 15, 3, -1, 15, -1, 1, -1, -1, NC, CELERON_ , 0, "P-4 Celeron D (Prescott)" },
|
||||
{ 15, 4, -1, 15, -1, 1, -1, -1, NC, CELERON_ , 0, "P-4 Celeron D (Prescott)" },
|
||||
{ 15, 6, -1, 15, -1, 1, -1, -1, NC, CELERON_ , 0, "P-4 Celeron D (Cedar Mill)" },
|
||||
|
||||
/* -------------------------------------------------- */
|
||||
/* Intel Core microarchitecture - P6-based */
|
||||
|
||||
{ 6, 9, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown Pentium M" },
|
||||
{ 6, 9, -1, -1, -1, 1, -1, -1, PENTIUM_M, 0 , 0, "Unknown Pentium M" },
|
||||
{ 6, 9, -1, -1, -1, 1, -1, -1, NC, PENTIUM_ , 0, "Pentium M (Banias)" },
|
||||
{ 6, 9, -1, -1, -1, 1, -1, -1, PENTIUM_M, 0 , 0, "Pentium M (Banias)" },
|
||||
{ 6, 9, -1, -1, -1, 1, -1, -1, NC, CELERON_ , 0, "Celeron M" },
|
||||
{ 6, 13, -1, -1, -1, 1, -1, -1, NC, PENTIUM_ , 0, "Pentium M (Dothan)" },
|
||||
{ 6, 13, -1, -1, -1, 1, -1, -1, PENTIUM_M, 0 , 0, "Pentium M (Dothan)" },
|
||||
{ 6, 13, -1, -1, -1, 1, -1, -1, NC, CELERON_ , 0, "Celeron M" },
|
||||
|
||||
{ 6, 12, -1, -1, -1, -1, -1, -1, NC, ATOM_ , 0, "Unknown Atom" },
|
||||
{ 6, 12, -1, -1, -1, -1, -1, -1, DIAMONDVILLE,ATOM_, 0, "Atom (Diamondville)" },
|
||||
{ 6, 12, -1, -1, -1, -1, -1, -1, SILVERTHORNE,ATOM_, 0, "Atom (Silverthorne)" },
|
||||
{ 6, 12, -1, -1, -1, -1, -1, -1, CEDARVIEW, ATOM_ , 0, "Atom (Cedarview)" },
|
||||
{ 6, 6, -1, -1, -1, -1, -1, -1, CEDARVIEW, ATOM_ , 0, "Atom (Cedarview)" },
|
||||
{ 6, 12, -1, -1, -1, -1, -1, -1, PINEVIEW, ATOM_ , 0, "Atom (Pineview)" },
|
||||
|
||||
/* -------------------------------------------------- */
|
||||
|
||||
{ 6, 14, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown Yonah" },
|
||||
{ 6, 14, -1, -1, -1, 1, -1, -1, CORE_SOLO, 0 , 0, "Yonah (Core Solo)" },
|
||||
{ 6, 14, -1, -1, -1, 2, -1, -1, CORE_DUO, 0 , 0, "Yonah (Core Duo)" },
|
||||
{ 6, 14, -1, -1, -1, 1, -1, -1, CORE_SOLO, MOBILE_, 0, "Yonah (Core Solo)" },
|
||||
{ 6, 14, -1, -1, -1, 2, -1, -1, CORE_DUO , MOBILE_, 0, "Yonah (Core Duo)" },
|
||||
{ 6, 14, -1, -1, -1, 1, -1, -1, CORE_SOLO, 0 , 0, "Yonah (Core Solo)" },
|
||||
|
||||
{ 6, 15, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown Core 2" },
|
||||
{ 6, 15, -1, -1, -1, 2, 4096, -1, CORE_DUO, 0 , 0, "Conroe (Core 2 Duo)" },
|
||||
{ 6, 15, -1, -1, -1, 2, 1024, -1, CORE_DUO, 0 , 0, "Conroe (Core 2 Duo) 1024K" },
|
||||
{ 6, 15, -1, -1, -1, 2, 512, -1, CORE_DUO, 0 , 0, "Conroe (Core 2 Duo) 512K" },
|
||||
{ 6, 15, -1, -1, -1, 4, -1, -1, QUAD_CORE, 0 , 0, "Kentsfield (Core 2 Quad)" },
|
||||
{ 6, 15, -1, -1, -1, 4, 4096, -1, QUAD_CORE, 0 , 0, "Kentsfield (Core 2 Quad)" },
|
||||
{ 6, 15, -1, -1, -1, 400, -1, -1, MORE_THAN_QUADCORE, 0, 0, "More than quad-core" },
|
||||
{ 6, 15, -1, -1, -1, 2, 2048, -1, CORE_DUO, 0 , 0, "Allendale (Core 2 Duo)" },
|
||||
{ 6, 15, -1, -1, -1, 2, -1, -1, MOBILE_CORE_DUO, 0, 0, "Merom (Core 2 Duo)" },
|
||||
{ 6, 15, -1, -1, -1, 2, 2048, -1, MEROM, 0 , 0, "Merom (Core 2 Duo) 2048K" },
|
||||
{ 6, 15, -1, -1, -1, 2, 4096, -1, MEROM, 0 , 0, "Merom (Core 2 Duo) 4096K" },
|
||||
|
||||
{ 6, 15, -1, -1, 15, 1, -1, -1, NC, CELERON_ , 0, "Conroe-L (Celeron)" },
|
||||
{ 6, 6, -1, -1, 22, 1, -1, -1, NC, CELERON_ , 0, "Conroe-L (Celeron)" },
|
||||
{ 6, 15, -1, -1, 15, 2, -1, -1, NC, CELERON_ , 0, "Conroe-L (Allendale)" },
|
||||
{ 6, 6, -1, -1, 22, 2, -1, -1, NC, CELERON_ , 0, "Conroe-L (Allendale)" },
|
||||
|
||||
|
||||
{ 6, 6, -1, -1, 22, 1, -1, -1, NC, 0 , 0, "Unknown Core ?" },
|
||||
{ 6, 7, -1, -1, 23, 1, -1, -1, NC, 0 , 0, "Unknown Core ?" },
|
||||
{ 6, 6, -1, -1, 22, 400, -1, -1, MORE_THAN_QUADCORE, 0, 0, "More than quad-core" },
|
||||
{ 6, 7, -1, -1, 23, 400, -1, -1, MORE_THAN_QUADCORE, 0, 0, "More than quad-core" },
|
||||
|
||||
{ 6, 7, -1, -1, 23, 1, -1, -1, CORE_SOLO , 0, 0, "Unknown Core 45nm" },
|
||||
{ 6, 7, -1, -1, 23, 1, -1, -1, CORE_DUO , 0, 0, "Unknown Core 45nm" },
|
||||
{ 6, 7, -1, -1, 23, 2, 1024, -1, WOLFDALE , 0, 0, "Celeron Wolfdale 1M" },
|
||||
{ 6, 7, -1, -1, 23, 2, 2048, -1, WOLFDALE , 0, 0, "Wolfdale (Core 2 Duo) 2M" },
|
||||
{ 6, 7, -1, -1, 23, 2, 3072, -1, WOLFDALE , 0, 0, "Wolfdale (Core 2 Duo) 3M" },
|
||||
{ 6, 7, -1, -1, 23, 2, 6144, -1, WOLFDALE , 0, 0, "Wolfdale (Core 2 Duo) 6M" },
|
||||
{ 6, 7, -1, -1, 23, 1, -1, -1, MOBILE_CORE_DUO , 0, 0, "Penryn (Core 2 Duo)" },
|
||||
{ 6, 7, -1, -1, 23, 2, 1024, -1, PENRYN , 0, 0, "Penryn (Core 2 Duo)" },
|
||||
{ 6, 7, -1, -1, 23, 2, 3072, -1, PENRYN , 0, 0, "Penryn (Core 2 Duo) 3M" },
|
||||
{ 6, 7, -1, -1, 23, 2, 6144, -1, PENRYN , 0, 0, "Penryn (Core 2 Duo) 6M" },
|
||||
{ 6, 7, -1, -1, 23, 4, 2048, -1, NC , 0, 0, "Yorkfield (Core 2 Quad) 2M"},
|
||||
{ 6, 7, -1, -1, 23, 4, 3072, -1, NC , 0, 0, "Yorkfield (Core 2 Quad) 3M"},
|
||||
{ 6, 7, -1, -1, 23, 4, 6144, -1, NC , 0, 0, "Yorkfield (Core 2 Quad) 6M"},
|
||||
|
||||
/* Core microarchitecture-based Xeons: */
|
||||
{ 6, 14, -1, -1, 14, 1, -1, -1, NC, XEON_ , 0, "Xeon LV" },
|
||||
{ 6, 15, -1, -1, 15, 2, 4096, -1, NC, XEON_ , _5100, "Xeon (Woodcrest)" },
|
||||
{ 6, 15, -1, -1, 15, 2, 2048, -1, NC, XEON_ , _3000, "Xeon (Conroe/2M)" },
|
||||
{ 6, 15, -1, -1, 15, 2, 4096, -1, NC, XEON_ , _3000, "Xeon (Conroe/4M)" },
|
||||
{ 6, 15, -1, -1, 15, 4, 4096, -1, NC, XEON_ , X3200, "Xeon (Kentsfield)" },
|
||||
{ 6, 15, -1, -1, 15, 4, 4096, -1, NC, XEON_ , _5300, "Xeon (Clovertown)" },
|
||||
{ 6, 7, -1, -1, 23, 2, 6144, -1, NC, XEON_ , _3100, "Xeon (Wolfdale)" },
|
||||
{ 6, 7, -1, -1, 23, 2, 6144, -1, NC, XEON_ , _5200, "Xeon (Wolfdale DP)" },
|
||||
{ 6, 7, -1, -1, 23, 4, 6144, -1, NC, XEON_ , _5400, "Xeon (Harpertown)" },
|
||||
{ 6, 7, -1, -1, 23, 4, 3072, -1, NC, XEON_ , X3300, "Xeon (Yorkfield/3M)" },
|
||||
{ 6, 7, -1, -1, 23, 4, 6144, -1, NC, XEON_ , X3300, "Xeon (Yorkfield/6M)" },
|
||||
|
||||
/* Nehalem CPUs (45nm): */
|
||||
{ 6, 10, -1, -1, 26, 4, -1, -1, GAINESTOWN, XEON_ , 0, "Gainestown (Xeon)" },
|
||||
{ 6, 10, -1, -1, 26, 4, -1, 4096, GAINESTOWN, XEON_ , 0, "Gainestown 4M (Xeon)" },
|
||||
{ 6, 10, -1, -1, 26, 4, -1, 8192, GAINESTOWN, XEON_ , 0, "Gainestown 8M (Xeon)" },
|
||||
{ 6, 10, -1, -1, 26, 4, -1, -1, NC, XEON_|_7 , 0, "Bloomfield (Xeon)" },
|
||||
{ 6, 10, -1, -1, 26, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Bloomfield (Core i7)" },
|
||||
{ 6, 10, -1, -1, 30, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Lynnfield (Core i7)" },
|
||||
{ 6, 5, -1, -1, 37, 4, -1, 8192, NC, CORE_|_I_|_5 , 0, "Lynnfield (Core i5)" },
|
||||
|
||||
/* Westmere CPUs (32nm): */
|
||||
{ 6, 5, -1, -1, 37, 2, -1, -1, NC, 0 , 0, "Unknown Core i3/i5" },
|
||||
{ 6, 12, -1, -1, 44, -1, -1, -1, WESTMERE, XEON_ , 0, "Westmere (Xeon)" },
|
||||
{ 6, 12, -1, -1, 44, -1, -1, 12288, WESTMERE, XEON_ , 0, "Gulftown (Xeon)" },
|
||||
{ 6, 12, -1, -1, 44, 4, -1, 12288, NC, CORE_|_I_|_7 , 0, "Gulftown (Core i7)" },
|
||||
{ 6, 5, -1, -1, 37, 2, -1, 4096, NC, CORE_|_I_|_5 , 0, "Clarkdale (Core i5)" },
|
||||
{ 6, 5, -1, -1, 37, 2, -1, 4096, NC, CORE_|_I_|_3 , 0, "Clarkdale (Core i3)" },
|
||||
{ 6, 5, -1, -1, 37, 2, -1, -1, NC, PENTIUM_ , 0, "Arrandale" },
|
||||
{ 6, 5, -1, -1, 37, 2, -1, 4096, NC, CORE_|_I_|_7 , 0, "Arrandale (Core i7)" },
|
||||
{ 6, 5, -1, -1, 37, 2, -1, 3072, NC, CORE_|_I_|_5 , 0, "Arrandale (Core i5)" },
|
||||
{ 6, 5, -1, -1, 37, 2, -1, 3072, NC, CORE_|_I_|_3 , 0, "Arrandale (Core i3)" },
|
||||
|
||||
/* Sandy Bridge CPUs (32nm): */
|
||||
{ 6, 10, -1, -1, 42, -1, -1, -1, NC, 0 , 0, "Unknown Sandy Bridge" },
|
||||
{ 6, 10, -1, -1, 42, -1, -1, -1, NC, XEON_ , 0, "Sandy Bridge (Xeon)" },
|
||||
{ 6, 10, -1, -1, 42, -1, -1, -1, NC, CORE_|_I_|_7 , 0, "Sandy Bridge (Core i7)" },
|
||||
{ 6, 10, -1, -1, 42, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Sandy Bridge (Core i7)" },
|
||||
{ 6, 10, -1, -1, 42, 4, -1, -1, NC, CORE_|_I_|_5 , 0, "Sandy Bridge (Core i5)" },
|
||||
{ 6, 10, -1, -1, 42, 2, -1, -1, NC, CORE_|_I_|_3 , 0, "Sandy Bridge (Core i3)" },
|
||||
{ 6, 10, -1, -1, 42, 2, -1, -1, NC, PENTIUM_ , 0, "Sandy Bridge (Pentium)" },
|
||||
{ 6, 10, -1, -1, 42, 1, -1, -1, NC, CELERON_ , 0, "Sandy Bridge (Celeron)" },
|
||||
{ 6, 10, -1, -1, 42, 2, -1, -1, NC, CELERON_ , 0, "Sandy Bridge (Celeron)" },
|
||||
{ 6, 13, -1, -1, 45, -1, -1, -1, NC, CORE_|_I_|_3 , 0, "Sandy Bridge-E" },
|
||||
{ 6, 13, -1, -1, 45, -1, -1, -1, NC, XEON_ , 0, "Sandy Bridge-E (Xeon)" },
|
||||
|
||||
/* Ivy Bridge CPUs (22nm): */
|
||||
{ 6, 10, -1, -1, 58, -1, -1, -1, NC, XEON_ , 0, "Ivy Bridge (Xeon)" },
|
||||
{ 6, 10, -1, -1, 58, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Ivy Bridge (Core i7)" },
|
||||
{ 6, 10, -1, -1, 58, 4, -1, -1, NC, CORE_|_I_|_5 , 0, "Ivy Bridge (Core i5)" },
|
||||
{ 6, 10, -1, -1, 58, 2, -1, -1, NC, CORE_|_I_|_3 , 0, "Ivy Bridge (Core i3)" },
|
||||
{ 6, 10, -1, -1, 58, 2, -1, -1, NC, PENTIUM_ , 0, "Ivy Bridge (Pentium)" },
|
||||
{ 6, 10, -1, -1, 58, 1, -1, -1, NC, CELERON_ , 0, "Ivy Bridge (Celeron)" },
|
||||
{ 6, 10, -1, -1, 58, 2, -1, -1, NC, CELERON_ , 0, "Ivy Bridge (Celeron)" },
|
||||
{ 6, 14, -1, -1, 62, -1, -1, -1, NC, 0 , 0, "Ivy Bridge-E" },
|
||||
|
||||
/* Haswell CPUs (22nm): */
|
||||
{ 6, 12, -1, -1, 60, -1, -1, -1, NC, XEON_ , 0, "Haswell (Xeon)" },
|
||||
{ 6, 12, -1, -1, 60, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Haswell (Core i7)" },
|
||||
{ 6, 5, -1, -1, 69, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Haswell (Core i7)" },
|
||||
{ 6, 6, -1, -1, 70, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Haswell (Core i7)" },
|
||||
{ 6, 12, -1, -1, 60, 4, -1, -1, NC, CORE_|_I_|_5 , 0, "Haswell (Core i5)" },
|
||||
{ 6, 5, -1, -1, 69, 4, -1, -1, NC, CORE_|_I_|_5 , 0, "Haswell (Core i5)" },
|
||||
{ 6, 12, -1, -1, 60, 2, -1, -1, NC, CORE_|_I_|_5 , 0, "Haswell (Core i5)" },
|
||||
{ 6, 5, -1, -1, 69, 2, -1, -1, NC, CORE_|_I_|_5 , 0, "Haswell (Core i5)" },
|
||||
{ 6, 12, -1, -1, 60, 2, -1, -1, NC, CORE_|_I_|_3 , 0, "Haswell (Core i3)" },
|
||||
{ 6, 5, -1, -1, 69, 2, -1, -1, NC, CORE_|_I_|_3 , 0, "Haswell (Core i3)" },
|
||||
{ 6, 12, -1, -1, 60, 2, -1, -1, NC, PENTIUM_ , 0, "Haswell (Pentium)" },
|
||||
{ 6, 12, -1, -1, 60, 2, -1, -1, NC, CELERON_ , 0, "Haswell (Celeron)" },
|
||||
{ 6, 12, -1, -1, 60, 1, -1, -1, NC, CELERON_ , 0, "Haswell (Celeron)" },
|
||||
{ 6, 15, -1, -1, 63, -1, -1, -1, NC, 0 , 0, "Haswell-E" },
|
||||
|
||||
/* Broadwell CPUs (14nm): */
|
||||
{ 6, 7, -1, -1, 71, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Broadwell (Core i7)" },
|
||||
{ 6, 7, -1, -1, 71, 4, -1, -1, NC, CORE_|_I_|_5 , 0, "Broadwell (Core i5)" },
|
||||
{ 6, 13, -1, -1, 61, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Broadwell-U (Core i7)" },
|
||||
{ 6, 13, -1, -1, 61, 2, -1, -1, NC, CORE_|_I_|_7 , 0, "Broadwell-U (Core i7)" },
|
||||
{ 6, 13, -1, -1, 61, 2, -1, -1, NC, CORE_|_I_|_5 , 0, "Broadwell-U (Core i5)" },
|
||||
{ 6, 13, -1, -1, 61, 2, -1, -1, NC, CORE_|_I_|_3 , 0, "Broadwell-U (Core i3)" },
|
||||
{ 6, 13, -1, -1, 61, 2, -1, -1, NC, PENTIUM_ , 0, "Broadwell-U (Pentium)" },
|
||||
{ 6, 13, -1, -1, 61, 2, -1, -1, NC, CELERON_ , 0, "Broadwell-U (Celeron)" },
|
||||
{ 6, 13, -1, -1, 61, 2, -1, -1, NA, 0 , 0, "Broadwell-U (Core M)" },
|
||||
{ 6, 15, -1, -1, 79, -1, -1, -1, NC, XEON_ , 0, "Broadwell-E (Xeon)" },
|
||||
{ 6, 15, -1, -1, 79, 2, -1, -1, NC, CORE_|_I_|_3 , 0, "Broadwell-E (Core i3)" },
|
||||
{ 6, 15, -1, -1, 79, 2, -1, -1, NC, CORE_|_I_|_5 , 0, "Broadwell-E (Core i5)" },
|
||||
{ 6, 15, -1, -1, 79, 4, -1, -1, NC, CORE_|_I_|_5 , 0, "Broadwell-E (Core i5)" },
|
||||
{ 6, 15, -1, -1, 79, 2, -1, -1, NC, CORE_|_I_|_7 , 0, "Broadwell-E (Core i7)" },
|
||||
{ 6, 15, -1, -1, 79, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Broadwell-E (Core i7)" },
|
||||
|
||||
/* Skylake CPUs (14nm): */
|
||||
{ 6, 14, -1, -1, 94, -1, -1, -1, NC, XEON_ , 0, "Skylake (Xeon)" },
|
||||
{ 6, 14, -1, -1, 94, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Skylake (Core i7)" },
|
||||
{ 6, 14, -1, -1, 94, 4, -1, -1, NC, CORE_|_I_|_5 , 0, "Skylake (Core i5)" },
|
||||
{ 6, 14, -1, -1, 94, 2, -1, -1, NC, CORE_|_I_|_3 , 0, "Skylake (Core i3)" },
|
||||
{ 6, 14, -1, -1, 94, 2, -1, -1, NC, PENTIUM_ , 0, "Skylake (Pentium)" },
|
||||
{ 6, 14, -1, -1, 78, 2, -1, -1, NC, PENTIUM_ , 0, "Skylake (Pentium)" },
|
||||
{ 6, 14, -1, -1, 94, 2, -1, -1, NC, CELERON_ , 0, "Skylake (Celeron)" },
|
||||
{ 6, 14, -1, -1, 78, 2, -1, -1, NC, CELERON_ , 0, "Skylake (Celeron)" },
|
||||
{ 6, 14, -1, -1, 78, 2, -1, -1, NC, CORE_|_M_|_7 , 0, "Skylake (Core m7)" },
|
||||
{ 6, 14, -1, -1, 78, 2, -1, -1, NC, CORE_|_M_|_5 , 0, "Skylake (Core m5)" },
|
||||
{ 6, 14, -1, -1, 78, 2, -1, -1, NC, CORE_|_M_|_3 , 0, "Skylake (Core m3)" },
|
||||
|
||||
/* Kaby Lake CPUs (14nm): */
|
||||
{ 6, 14, -1, -1, 158, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Kaby Lake (Core i7)" },
|
||||
{ 6, 14, -1, -1, 158, 4, -1, -1, NC, CORE_|_I_|_5 , 0, "Kaby Lake (Core i5)" },
|
||||
{ 6, 14, -1, -1, 158, 2, -1, -1, NC, CORE_|_I_|_3 , 0, "Kaby Lake (Core i3)" },
|
||||
{ 6, 14, -1, -1, 158, 2, -1, -1, NC, PENTIUM_ , 0, "Kaby Lake (Pentium)" },
|
||||
{ 6, 14, -1, -1, 158, 2, -1, -1, NC, CELERON_ , 0, "Kaby Lake (Celeron)" },
|
||||
{ 6, 14, -1, -1, 158, 2, -1, -1, NC, CORE_|_M_|_3 , 0, "Kaby Lake (Core m3)" },
|
||||
|
||||
/* Itaniums */
|
||||
{ 7, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Itanium" },
|
||||
{ 15, -1, -1, 16, -1, 1, -1, -1, NC, 0 , 0, "Itanium 2" },
|
||||
};
|
||||
|
||||
|
||||
static void load_intel_features(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
|
||||
{
|
||||
const struct feature_map_t matchtable_edx1[] = {
|
||||
{ 18, CPU_FEATURE_PN },
|
||||
{ 21, CPU_FEATURE_DTS },
|
||||
{ 22, CPU_FEATURE_ACPI },
|
||||
{ 27, CPU_FEATURE_SS },
|
||||
{ 29, CPU_FEATURE_TM },
|
||||
{ 30, CPU_FEATURE_IA64 },
|
||||
{ 31, CPU_FEATURE_PBE },
|
||||
};
|
||||
const struct feature_map_t matchtable_ecx1[] = {
|
||||
{ 2, CPU_FEATURE_DTS64 },
|
||||
{ 4, CPU_FEATURE_DS_CPL },
|
||||
{ 5, CPU_FEATURE_VMX },
|
||||
{ 6, CPU_FEATURE_SMX },
|
||||
{ 7, CPU_FEATURE_EST },
|
||||
{ 8, CPU_FEATURE_TM2 },
|
||||
{ 10, CPU_FEATURE_CID },
|
||||
{ 14, CPU_FEATURE_XTPR },
|
||||
{ 15, CPU_FEATURE_PDCM },
|
||||
{ 18, CPU_FEATURE_DCA },
|
||||
{ 21, CPU_FEATURE_X2APIC },
|
||||
};
|
||||
const struct feature_map_t matchtable_edx81[] = {
|
||||
{ 20, CPU_FEATURE_XD },
|
||||
};
|
||||
const struct feature_map_t matchtable_ebx7[] = {
|
||||
{ 2, CPU_FEATURE_SGX },
|
||||
{ 4, CPU_FEATURE_HLE },
|
||||
{ 11, CPU_FEATURE_RTM },
|
||||
{ 16, CPU_FEATURE_AVX512F },
|
||||
{ 17, CPU_FEATURE_AVX512DQ },
|
||||
{ 18, CPU_FEATURE_RDSEED },
|
||||
{ 19, CPU_FEATURE_ADX },
|
||||
{ 26, CPU_FEATURE_AVX512PF },
|
||||
{ 27, CPU_FEATURE_AVX512ER },
|
||||
{ 28, CPU_FEATURE_AVX512CD },
|
||||
{ 29, CPU_FEATURE_SHA_NI },
|
||||
{ 30, CPU_FEATURE_AVX512BW },
|
||||
{ 31, CPU_FEATURE_AVX512VL },
|
||||
};
|
||||
if (raw->basic_cpuid[0][0] >= 1) {
|
||||
match_features(matchtable_edx1, COUNT_OF(matchtable_edx1), raw->basic_cpuid[1][3], data);
|
||||
match_features(matchtable_ecx1, COUNT_OF(matchtable_ecx1), raw->basic_cpuid[1][2], data);
|
||||
}
|
||||
if (raw->ext_cpuid[0][0] >= 1) {
|
||||
match_features(matchtable_edx81, COUNT_OF(matchtable_edx81), raw->ext_cpuid[1][3], data);
|
||||
}
|
||||
// detect TSX/AVX512:
|
||||
if (raw->basic_cpuid[0][0] >= 7) {
|
||||
match_features(matchtable_ebx7, COUNT_OF(matchtable_ebx7), raw->basic_cpuid[7][1], data);
|
||||
}
|
||||
}
|
||||
|
||||
enum _cache_type_t {
|
||||
L1I,
|
||||
L1D,
|
||||
L2,
|
||||
L3,
|
||||
L4
|
||||
};
|
||||
typedef enum _cache_type_t cache_type_t;
|
||||
|
||||
static void check_case(uint8_t on, cache_type_t cache, int size, int assoc, int linesize, struct cpu_id_t* data)
|
||||
{
|
||||
if (!on) return;
|
||||
switch (cache) {
|
||||
case L1I:
|
||||
data->l1_instruction_cache = size;
|
||||
break;
|
||||
case L1D:
|
||||
data->l1_data_cache = size;
|
||||
data->l1_assoc = assoc;
|
||||
data->l1_cacheline = linesize;
|
||||
break;
|
||||
case L2:
|
||||
data->l2_cache = size;
|
||||
data->l2_assoc = assoc;
|
||||
data->l2_cacheline = linesize;
|
||||
break;
|
||||
case L3:
|
||||
data->l3_cache = size;
|
||||
data->l3_assoc = assoc;
|
||||
data->l3_cacheline = linesize;
|
||||
break;
|
||||
case L4:
|
||||
data->l4_cache = size;
|
||||
data->l4_assoc = assoc;
|
||||
data->l4_cacheline = linesize;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void decode_intel_oldstyle_cache_info(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
|
||||
{
|
||||
uint8_t f[256] = {0};
|
||||
int reg, off;
|
||||
uint32_t x;
|
||||
for (reg = 0; reg < 4; reg++) {
|
||||
x = raw->basic_cpuid[2][reg];
|
||||
if (x & 0x80000000) continue;
|
||||
for (off = 0; off < 4; off++) {
|
||||
f[x & 0xff] = 1;
|
||||
x >>= 8;
|
||||
}
|
||||
}
|
||||
|
||||
check_case(f[0x06], L1I, 8, 4, 32, data);
|
||||
check_case(f[0x08], L1I, 16, 4, 32, data);
|
||||
check_case(f[0x0A], L1D, 8, 2, 32, data);
|
||||
check_case(f[0x0C], L1D, 16, 4, 32, data);
|
||||
check_case(f[0x22], L3, 512, 4, 64, data);
|
||||
check_case(f[0x23], L3, 1024, 8, 64, data);
|
||||
check_case(f[0x25], L3, 2048, 8, 64, data);
|
||||
check_case(f[0x29], L3, 4096, 8, 64, data);
|
||||
check_case(f[0x2C], L1D, 32, 8, 64, data);
|
||||
check_case(f[0x30], L1I, 32, 8, 64, data);
|
||||
check_case(f[0x39], L2, 128, 4, 64, data);
|
||||
check_case(f[0x3A], L2, 192, 6, 64, data);
|
||||
check_case(f[0x3B], L2, 128, 2, 64, data);
|
||||
check_case(f[0x3C], L2, 256, 4, 64, data);
|
||||
check_case(f[0x3D], L2, 384, 6, 64, data);
|
||||
check_case(f[0x3E], L2, 512, 4, 64, data);
|
||||
check_case(f[0x41], L2, 128, 4, 32, data);
|
||||
check_case(f[0x42], L2, 256, 4, 32, data);
|
||||
check_case(f[0x43], L2, 512, 4, 32, data);
|
||||
check_case(f[0x44], L2, 1024, 4, 32, data);
|
||||
check_case(f[0x45], L2, 2048, 4, 32, data);
|
||||
check_case(f[0x46], L3, 4096, 4, 64, data);
|
||||
check_case(f[0x47], L3, 8192, 8, 64, data);
|
||||
check_case(f[0x4A], L3, 6144, 12, 64, data);
|
||||
check_case(f[0x4B], L3, 8192, 16, 64, data);
|
||||
check_case(f[0x4C], L3, 12288, 12, 64, data);
|
||||
check_case(f[0x4D], L3, 16384, 16, 64, data);
|
||||
check_case(f[0x4E], L2, 6144, 24, 64, data);
|
||||
check_case(f[0x60], L1D, 16, 8, 64, data);
|
||||
check_case(f[0x66], L1D, 8, 4, 64, data);
|
||||
check_case(f[0x67], L1D, 16, 4, 64, data);
|
||||
check_case(f[0x68], L1D, 32, 4, 64, data);
|
||||
/* The following four entries are trace cache. Intel does not
|
||||
* specify a cache-line size, so we use -1 instead
|
||||
*/
|
||||
check_case(f[0x70], L1I, 12, 8, -1, data);
|
||||
check_case(f[0x71], L1I, 16, 8, -1, data);
|
||||
check_case(f[0x72], L1I, 32, 8, -1, data);
|
||||
check_case(f[0x73], L1I, 64, 8, -1, data);
|
||||
|
||||
check_case(f[0x78], L2, 1024, 4, 64, data);
|
||||
check_case(f[0x79], L2, 128, 8, 64, data);
|
||||
check_case(f[0x7A], L2, 256, 8, 64, data);
|
||||
check_case(f[0x7B], L2, 512, 8, 64, data);
|
||||
check_case(f[0x7C], L2, 1024, 8, 64, data);
|
||||
check_case(f[0x7D], L2, 2048, 8, 64, data);
|
||||
check_case(f[0x7F], L2, 512, 2, 64, data);
|
||||
check_case(f[0x82], L2, 256, 8, 32, data);
|
||||
check_case(f[0x83], L2, 512, 8, 32, data);
|
||||
check_case(f[0x84], L2, 1024, 8, 32, data);
|
||||
check_case(f[0x85], L2, 2048, 8, 32, data);
|
||||
check_case(f[0x86], L2, 512, 4, 64, data);
|
||||
check_case(f[0x87], L2, 1024, 8, 64, data);
|
||||
|
||||
if (f[0x49]) {
|
||||
/* This flag is overloaded with two meanings. On Xeon MP
|
||||
* (family 0xf, model 0x6) this means L3 cache. On all other
|
||||
* CPUs (notably Conroe et al), this is L2 cache. In both cases
|
||||
* it means 4MB, 16-way associative, 64-byte line size.
|
||||
*/
|
||||
if (data->family == 0xf && data->model == 0x6) {
|
||||
data->l3_cache = 4096;
|
||||
data->l3_assoc = 16;
|
||||
data->l3_cacheline = 64;
|
||||
} else {
|
||||
data->l2_cache = 4096;
|
||||
data->l2_assoc = 16;
|
||||
data->l2_cacheline = 64;
|
||||
}
|
||||
}
|
||||
if (f[0x40]) {
|
||||
/* Again, a special flag. It means:
|
||||
* 1) If no L2 is specified, then CPU is w/o L2 (0 KB)
|
||||
* 2) If L2 is specified by other flags, then, CPU is w/o L3.
|
||||
*/
|
||||
if (data->l2_cache == -1) {
|
||||
data->l2_cache = 0;
|
||||
} else {
|
||||
data->l3_cache = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void decode_intel_deterministic_cache_info(struct cpu_raw_data_t* raw,
|
||||
struct cpu_id_t* data)
|
||||
{
|
||||
int ecx;
|
||||
int ways, partitions, linesize, sets, size, level, typenumber;
|
||||
cache_type_t type;
|
||||
for (ecx = 0; ecx < MAX_INTELFN4_LEVEL; ecx++) {
|
||||
typenumber = raw->intel_fn4[ecx][0] & 0x1f;
|
||||
if (typenumber == 0) break;
|
||||
level = (raw->intel_fn4[ecx][0] >> 5) & 0x7;
|
||||
if (level == 1 && typenumber == 1)
|
||||
type = L1D;
|
||||
else if (level == 1 && typenumber == 2)
|
||||
type = L1I;
|
||||
else if (level == 2 && typenumber == 3)
|
||||
type = L2;
|
||||
else if (level == 3 && typenumber == 3)
|
||||
type = L3;
|
||||
else if (level == 4 && typenumber == 3)
|
||||
type = L4;
|
||||
else {
|
||||
warnf("deterministic_cache: unknown level/typenumber combo (%d/%d), cannot\n", level, typenumber);
|
||||
warnf("deterministic_cache: recognize cache type\n");
|
||||
continue;
|
||||
}
|
||||
ways = ((raw->intel_fn4[ecx][1] >> 22) & 0x3ff) + 1;
|
||||
partitions = ((raw->intel_fn4[ecx][1] >> 12) & 0x3ff) + 1;
|
||||
linesize = (raw->intel_fn4[ecx][1] & 0xfff) + 1;
|
||||
sets = raw->intel_fn4[ecx][2] + 1;
|
||||
size = ways * partitions * linesize * sets / 1024;
|
||||
check_case(1, type, size, ways, linesize, data);
|
||||
}
|
||||
}
|
||||
|
||||
static int decode_intel_extended_topology(struct cpu_raw_data_t* raw,
|
||||
struct cpu_id_t* data)
|
||||
{
|
||||
int i, level_type, num_smt = -1, num_core = -1;
|
||||
for (i = 0; i < MAX_INTELFN11_LEVEL; i++) {
|
||||
level_type = (raw->intel_fn11[i][2] & 0xff00) >> 8;
|
||||
switch (level_type) {
|
||||
case 0x01:
|
||||
num_smt = raw->intel_fn11[i][1] & 0xffff;
|
||||
break;
|
||||
case 0x02:
|
||||
num_core = raw->intel_fn11[i][1] & 0xffff;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (num_smt == -1 || num_core == -1) return 0;
|
||||
data->num_logical_cpus = num_core;
|
||||
data->num_cores = num_core / num_smt;
|
||||
// make sure num_cores is at least 1. In VMs, the CPUID instruction
|
||||
// is rigged and may give nonsensical results, but we should at least
|
||||
// avoid outputs like data->num_cores == 0.
|
||||
if (data->num_cores <= 0) data->num_cores = 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void decode_intel_number_of_cores(struct cpu_raw_data_t* raw,
|
||||
struct cpu_id_t* data)
|
||||
{
|
||||
int logical_cpus = -1, num_cores = -1;
|
||||
|
||||
if (raw->basic_cpuid[0][0] >= 11) {
|
||||
if (decode_intel_extended_topology(raw, data)) return;
|
||||
}
|
||||
|
||||
if (raw->basic_cpuid[0][0] >= 1) {
|
||||
logical_cpus = (raw->basic_cpuid[1][1] >> 16) & 0xff;
|
||||
if (raw->basic_cpuid[0][0] >= 4) {
|
||||
num_cores = 1 + ((raw->basic_cpuid[4][0] >> 26) & 0x3f);
|
||||
}
|
||||
}
|
||||
if (data->flags[CPU_FEATURE_HT]) {
|
||||
if (num_cores > 1) {
|
||||
data->num_cores = num_cores;
|
||||
data->num_logical_cpus = logical_cpus;
|
||||
} else {
|
||||
data->num_cores = 1;
|
||||
data->num_logical_cpus = (logical_cpus >= 1 ? logical_cpus : 1);
|
||||
if (data->num_logical_cpus == 1)
|
||||
data->flags[CPU_FEATURE_HT] = 0;
|
||||
}
|
||||
} else {
|
||||
data->num_cores = data->num_logical_cpus = 1;
|
||||
}
|
||||
}
|
||||
|
||||
static intel_code_and_bits_t get_brand_code_and_bits(struct cpu_id_t* data)
|
||||
{
|
||||
intel_code_t code = (intel_code_t) NC;
|
||||
intel_code_and_bits_t result;
|
||||
uint64_t bits = 0;
|
||||
int i = 0;
|
||||
const char* bs = data->brand_str;
|
||||
const char* s;
|
||||
const struct { intel_code_t c; const char *search; } matchtable[] = {
|
||||
{ PENTIUM_M, "Pentium(R) M" },
|
||||
{ CORE_SOLO, "Pentium(R) Dual CPU" },
|
||||
{ CORE_SOLO, "Pentium(R) Dual-Core" },
|
||||
{ PENTIUM_D, "Pentium(R) D" },
|
||||
{ CORE_SOLO, "Genuine Intel(R) CPU" },
|
||||
{ CORE_SOLO, "Intel(R) Core(TM)" },
|
||||
{ DIAMONDVILLE, "CPU [N ][23]## " },
|
||||
{ SILVERTHORNE, "CPU Z" },
|
||||
{ PINEVIEW, "CPU [ND][45]## " },
|
||||
{ CEDARVIEW, "CPU [ND]#### " },
|
||||
};
|
||||
|
||||
const struct { uint64_t bit; const char* search; } bit_matchtable[] = {
|
||||
{ XEON_, "Xeon" },
|
||||
{ _MP, " MP" },
|
||||
{ ATOM_, "Atom(TM) CPU" },
|
||||
{ MOBILE_, "Mobile" },
|
||||
{ CELERON_, "Celeron" },
|
||||
{ PENTIUM_, "Pentium" },
|
||||
};
|
||||
|
||||
for (i = 0; i < COUNT_OF(bit_matchtable); i++) {
|
||||
if (match_pattern(bs, bit_matchtable[i].search))
|
||||
bits |= bit_matchtable[i].bit;
|
||||
}
|
||||
|
||||
if ((i = match_pattern(bs, "Core(TM) [im][357]")) != 0) {
|
||||
bits |= CORE_;
|
||||
i--;
|
||||
switch (bs[i + 9]) {
|
||||
case 'i': bits |= _I_; break;
|
||||
case 'm': bits |= _M_; break;
|
||||
}
|
||||
switch (bs[i + 10]) {
|
||||
case '3': bits |= _3; break;
|
||||
case '5': bits |= _5; break;
|
||||
case '7': bits |= _7; break;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < COUNT_OF(matchtable); i++)
|
||||
if (match_pattern(bs, matchtable[i].search)) {
|
||||
code = matchtable[i].c;
|
||||
break;
|
||||
}
|
||||
debugf(2, "intel matchtable result is %d\n", code);
|
||||
if (bits & XEON_) {
|
||||
if (match_pattern(bs, "W35##") || match_pattern(bs, "[ELXW]75##"))
|
||||
bits |= _7;
|
||||
else if (match_pattern(bs, "[ELXW]55##"))
|
||||
code = GAINESTOWN;
|
||||
else if (match_pattern(bs, "[ELXW]56##"))
|
||||
code = WESTMERE;
|
||||
else if (data->l3_cache > 0 && data->family == 16)
|
||||
/* restrict by family, since later Xeons also have L3 ... */
|
||||
code = IRWIN;
|
||||
}
|
||||
if (match_all(bits, XEON_ + _MP) && data->l3_cache > 0)
|
||||
code = POTOMAC;
|
||||
if (code == CORE_SOLO) {
|
||||
s = strstr(bs, "CPU");
|
||||
if (s) {
|
||||
s += 3;
|
||||
while (*s == ' ') s++;
|
||||
if (*s == 'T')
|
||||
bits |= MOBILE_;
|
||||
}
|
||||
}
|
||||
if (code == CORE_SOLO) {
|
||||
switch (data->num_cores) {
|
||||
case 1: break;
|
||||
case 2:
|
||||
{
|
||||
code = CORE_DUO;
|
||||
if (data->num_logical_cpus > 2)
|
||||
code = DUAL_CORE_HT;
|
||||
break;
|
||||
}
|
||||
case 4:
|
||||
{
|
||||
code = QUAD_CORE;
|
||||
if (data->num_logical_cpus > 4)
|
||||
code = QUAD_CORE_HT;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
code = MORE_THAN_QUADCORE; break;
|
||||
}
|
||||
}
|
||||
|
||||
if (code == CORE_DUO && (bits & MOBILE_) && data->model != 14) {
|
||||
if (data->ext_model < 23) {
|
||||
code = MEROM;
|
||||
} else {
|
||||
code = PENRYN;
|
||||
}
|
||||
}
|
||||
if (data->ext_model == 23 &&
|
||||
(code == CORE_DUO || code == PENTIUM_D || (bits & CELERON_))) {
|
||||
code = WOLFDALE;
|
||||
}
|
||||
|
||||
result.code = code;
|
||||
result.bits = bits;
|
||||
return result;
|
||||
}
|
||||
|
||||
static intel_model_t get_model_code(struct cpu_id_t* data)
|
||||
{
|
||||
int i = 0;
|
||||
int l = (int) strlen(data->brand_str);
|
||||
const char *bs = data->brand_str;
|
||||
int mod_flags = 0, model_no = 0, ndigs = 0;
|
||||
/* If the CPU is a Core ix, then just return the model number generation: */
|
||||
if ((i = match_pattern(bs, "Core(TM) i[357]")) != 0) {
|
||||
i += 11;
|
||||
if (i + 4 >= l) return UNKNOWN;
|
||||
if (bs[i] == '2') return _2xxx;
|
||||
if (bs[i] == '3') return _3xxx;
|
||||
return UNKNOWN;
|
||||
}
|
||||
|
||||
/* For Core2-based Xeons: */
|
||||
while (i < l - 3) {
|
||||
if (bs[i] == 'C' && bs[i+1] == 'P' && bs[i+2] == 'U')
|
||||
break;
|
||||
i++;
|
||||
}
|
||||
if (i >= l - 3) return UNKNOWN;
|
||||
i += 3;
|
||||
while (i < l - 4 && bs[i] == ' ') i++;
|
||||
if (i >= l - 4) return UNKNOWN;
|
||||
while (i < l - 4 && !isdigit(bs[i])) {
|
||||
if (bs[i] >= 'A' && bs[i] <= 'Z')
|
||||
mod_flags |= (1 << (bs[i] - 'A'));
|
||||
i++;
|
||||
}
|
||||
if (i >= l - 4) return UNKNOWN;
|
||||
while (isdigit(bs[i])) {
|
||||
ndigs++;
|
||||
model_no = model_no * 10 + (int) (bs[i] - '0');
|
||||
i++;
|
||||
}
|
||||
if (ndigs != 4) return UNKNOWN;
|
||||
#define HAVE(ch, flags) ((flags & (1 << ((int)(ch-'A')))) != 0)
|
||||
switch (model_no / 100) {
|
||||
case 30: return _3000;
|
||||
case 31: return _3100;
|
||||
case 32:
|
||||
{
|
||||
return (HAVE('X', mod_flags)) ? X3200 : _3200;
|
||||
}
|
||||
case 33:
|
||||
{
|
||||
return (HAVE('X', mod_flags)) ? X3300 : _3300;
|
||||
}
|
||||
case 51: return _5100;
|
||||
case 52: return _5200;
|
||||
case 53: return _5300;
|
||||
case 54: return _5400;
|
||||
default:
|
||||
return UNKNOWN;
|
||||
}
|
||||
#undef HAVE
|
||||
}
|
||||
|
||||
static void decode_intel_sgx_features(const struct cpu_raw_data_t* raw, struct cpu_id_t* data)
|
||||
{
|
||||
struct cpu_epc_t epc;
|
||||
int i;
|
||||
|
||||
if (raw->basic_cpuid[0][0] < 0x12) return; // no 12h leaf
|
||||
if (raw->basic_cpuid[0x12][0] == 0) return; // no sub-leafs available, probably it's disabled by BIOS
|
||||
|
||||
// decode sub-leaf 0:
|
||||
if (raw->basic_cpuid[0x12][0] & 1) data->sgx.flags[INTEL_SGX1] = 1;
|
||||
if (raw->basic_cpuid[0x12][0] & 2) data->sgx.flags[INTEL_SGX2] = 1;
|
||||
if (data->sgx.flags[INTEL_SGX1] || data->sgx.flags[INTEL_SGX2])
|
||||
data->sgx.present = 1;
|
||||
data->sgx.misc_select = raw->basic_cpuid[0x12][1];
|
||||
data->sgx.max_enclave_32bit = (raw->basic_cpuid[0x12][3] ) & 0xff;
|
||||
data->sgx.max_enclave_64bit = (raw->basic_cpuid[0x12][3] >> 8) & 0xff;
|
||||
|
||||
// decode sub-leaf 1:
|
||||
data->sgx.secs_attributes = raw->intel_fn12h[1][0] | (((uint64_t) raw->intel_fn12h[1][1]) << 32);
|
||||
data->sgx.secs_xfrm = raw->intel_fn12h[1][2] | (((uint64_t) raw->intel_fn12h[1][3]) << 32);
|
||||
|
||||
// decode higher-order subleafs, whenever present:
|
||||
data->sgx.num_epc_sections = -1;
|
||||
for (i = 0; i < 1000000; i++) {
|
||||
epc = cpuid_get_epc(i, raw);
|
||||
if (epc.length == 0) {
|
||||
debugf(2, "SGX: epc section request for %d returned null, no more EPC sections.\n", i);
|
||||
data->sgx.num_epc_sections = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (data->sgx.num_epc_sections == -1) {
|
||||
debugf(1, "SGX: warning: seems to be infinitude of EPC sections.\n");
|
||||
data->sgx.num_epc_sections = 1000000;
|
||||
}
|
||||
}
|
||||
|
||||
struct cpu_epc_t cpuid_get_epc(int index, const struct cpu_raw_data_t* raw)
|
||||
{
|
||||
uint32_t regs[4];
|
||||
struct cpu_epc_t retval = {0, 0};
|
||||
if (raw && index < MAX_INTELFN12H_LEVEL - 2) {
|
||||
// this was queried already, use the data:
|
||||
memcpy(regs, raw->intel_fn12h[2 + index], sizeof(regs));
|
||||
} else {
|
||||
// query this ourselves:
|
||||
regs[0] = 0x12;
|
||||
regs[2] = 2 + index;
|
||||
regs[1] = regs[3] = 0;
|
||||
cpu_exec_cpuid_ext(regs);
|
||||
}
|
||||
|
||||
// decode values:
|
||||
if ((regs[0] & 0xf) == 0x1) {
|
||||
retval.start_addr |= (regs[0] & 0xfffff000); // bits [12, 32) -> bits [12, 32)
|
||||
retval.start_addr |= ((uint64_t) (regs[1] & 0x000fffff)) << 32; // bits [0, 20) -> bits [32, 52)
|
||||
retval.length |= (regs[2] & 0xfffff000); // bits [12, 32) -> bits [12, 32)
|
||||
retval.length |= ((uint64_t) (regs[3] & 0x000fffff)) << 32; // bits [0, 20) -> bits [32, 52)
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
int cpuid_identify_intel(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal)
|
||||
{
|
||||
intel_code_and_bits_t brand;
|
||||
intel_model_t model_code;
|
||||
int i;
|
||||
char* brand_code_str = NULL;
|
||||
|
||||
load_intel_features(raw, data);
|
||||
if (raw->basic_cpuid[0][0] >= 4) {
|
||||
/* Deterministic way is preferred, being more generic */
|
||||
decode_intel_deterministic_cache_info(raw, data);
|
||||
} else if (raw->basic_cpuid[0][0] >= 2) {
|
||||
decode_intel_oldstyle_cache_info(raw, data);
|
||||
}
|
||||
decode_intel_number_of_cores(raw, data);
|
||||
|
||||
brand = get_brand_code_and_bits(data);
|
||||
model_code = get_model_code(data);
|
||||
for (i = 0; i < COUNT_OF(intel_bcode_str); i++) {
|
||||
if (brand.code == intel_bcode_str[i].code) {
|
||||
brand_code_str = intel_bcode_str[i].str;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (brand_code_str)
|
||||
debugf(2, "Detected Intel brand code: %d (%s)\n", brand.code, brand_code_str);
|
||||
else
|
||||
debugf(2, "Detected Intel brand code: %d\n", brand.code);
|
||||
if (brand.bits) {
|
||||
debugf(2, "Detected Intel bits: ");
|
||||
debug_print_lbits(2, brand.bits);
|
||||
}
|
||||
debugf(2, "Detected Intel model code: %d\n", model_code);
|
||||
|
||||
internal->code.intel = brand.code;
|
||||
internal->bits = brand.bits;
|
||||
|
||||
if (data->flags[CPU_FEATURE_SGX]) {
|
||||
debugf(2, "SGX seems to be present, decoding...\n");
|
||||
// if SGX is indicated by the CPU, verify its presence:
|
||||
decode_intel_sgx_features(raw, data);
|
||||
}
|
||||
|
||||
internal->score = match_cpu_codename(cpudb_intel, COUNT_OF(cpudb_intel), data,
|
||||
brand.code, brand.bits, model_code);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void cpuid_get_list_intel(struct cpu_list_t* list)
|
||||
{
|
||||
generic_get_cpu_list(cpudb_intel, COUNT_OF(cpudb_intel), list);
|
||||
}
|
||||
@@ -27,5 +27,6 @@
|
||||
#define __RECOG_INTEL_H__
|
||||
|
||||
int cpuid_identify_intel(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal);
|
||||
void cpuid_get_list_intel(struct cpu_list_t* list);
|
||||
|
||||
#endif /*__RECOG_INTEL_H__*/
|
||||
392
compat/winansi.c
Normal file
392
compat/winansi.c
Normal file
@@ -0,0 +1,392 @@
|
||||
/**
|
||||
* Old Git implementation of windows terminal colors (2009)
|
||||
* before use of a threaded wrapper.
|
||||
*/
|
||||
|
||||
#undef NOGDI
|
||||
#include <windows.h>
|
||||
#include <wingdi.h>
|
||||
#include <winreg.h>
|
||||
#include <malloc.h>
|
||||
#include <stdio.h>
|
||||
#include <io.h>
|
||||
|
||||
#include "compat/winansi.h"
|
||||
/*
|
||||
* Copyright 2008 Peter Harris <git@peter.is-a-geek.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
Functions to be wrapped:
|
||||
*/
|
||||
#undef printf
|
||||
#undef fprintf
|
||||
#undef fputs
|
||||
#undef vfprintf
|
||||
/* TODO: write */
|
||||
|
||||
/*
|
||||
ANSI codes used by git: m, K
|
||||
|
||||
This file is git-specific. Therefore, this file does not attempt
|
||||
to implement any codes that are not used by git.
|
||||
*/
|
||||
|
||||
static HANDLE console;
|
||||
static WORD plain_attr;
|
||||
static WORD attr;
|
||||
static int negative;
|
||||
|
||||
static void init(void)
|
||||
{
|
||||
CONSOLE_SCREEN_BUFFER_INFO sbi;
|
||||
|
||||
static int initialized = 0;
|
||||
if (initialized)
|
||||
return;
|
||||
|
||||
console = GetStdHandle(STD_OUTPUT_HANDLE);
|
||||
if (console == INVALID_HANDLE_VALUE)
|
||||
console = NULL;
|
||||
|
||||
if (!console)
|
||||
return;
|
||||
|
||||
GetConsoleScreenBufferInfo(console, &sbi);
|
||||
attr = plain_attr = sbi.wAttributes;
|
||||
negative = 0;
|
||||
|
||||
initialized = 1;
|
||||
}
|
||||
|
||||
static int write_console(const char *str, int len)
|
||||
{
|
||||
/* convert utf-8 to utf-16, write directly to console */
|
||||
int wlen = MultiByteToWideChar(CP_UTF8, 0, str, len, NULL, 0);
|
||||
wchar_t *wbuf = (wchar_t *)alloca(wlen * sizeof(wchar_t));
|
||||
MultiByteToWideChar(CP_UTF8, 0, str, len, wbuf, wlen);
|
||||
|
||||
WriteConsoleW(console, wbuf, wlen, NULL, NULL);
|
||||
|
||||
/* return original (utf-8 encoded) length */
|
||||
return len;
|
||||
}
|
||||
|
||||
#define FOREGROUND_ALL (FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE)
|
||||
#define BACKGROUND_ALL (BACKGROUND_RED | BACKGROUND_GREEN | BACKGROUND_BLUE)
|
||||
|
||||
static void set_console_attr(void)
|
||||
{
|
||||
WORD attributes = attr;
|
||||
if (negative) {
|
||||
attributes &= ~FOREGROUND_ALL;
|
||||
attributes &= ~BACKGROUND_ALL;
|
||||
|
||||
/* This could probably use a bitmask
|
||||
instead of a series of ifs */
|
||||
if (attr & FOREGROUND_RED)
|
||||
attributes |= BACKGROUND_RED;
|
||||
if (attr & FOREGROUND_GREEN)
|
||||
attributes |= BACKGROUND_GREEN;
|
||||
if (attr & FOREGROUND_BLUE)
|
||||
attributes |= BACKGROUND_BLUE;
|
||||
|
||||
if (attr & BACKGROUND_RED)
|
||||
attributes |= FOREGROUND_RED;
|
||||
if (attr & BACKGROUND_GREEN)
|
||||
attributes |= FOREGROUND_GREEN;
|
||||
if (attr & BACKGROUND_BLUE)
|
||||
attributes |= FOREGROUND_BLUE;
|
||||
}
|
||||
SetConsoleTextAttribute(console, attributes);
|
||||
}
|
||||
|
||||
static void erase_in_line(void)
|
||||
{
|
||||
CONSOLE_SCREEN_BUFFER_INFO sbi;
|
||||
DWORD dummy; /* Needed for Windows 7 (or Vista) regression */
|
||||
|
||||
if (!console)
|
||||
return;
|
||||
|
||||
GetConsoleScreenBufferInfo(console, &sbi);
|
||||
FillConsoleOutputCharacterA(console, ' ',
|
||||
sbi.dwSize.X - sbi.dwCursorPosition.X, sbi.dwCursorPosition,
|
||||
&dummy);
|
||||
}
|
||||
|
||||
|
||||
static const char *set_attr(const char *str)
|
||||
{
|
||||
const char *func;
|
||||
size_t len = strspn(str, "0123456789;");
|
||||
func = str + len;
|
||||
|
||||
switch (*func) {
|
||||
case 'm':
|
||||
do {
|
||||
long val = strtol(str, (char **)&str, 10);
|
||||
switch (val) {
|
||||
case 0: /* reset */
|
||||
attr = plain_attr;
|
||||
negative = 0;
|
||||
break;
|
||||
case 1: /* bold */
|
||||
attr |= FOREGROUND_INTENSITY;
|
||||
break;
|
||||
case 2: /* faint */
|
||||
case 22: /* normal */
|
||||
attr &= ~FOREGROUND_INTENSITY;
|
||||
break;
|
||||
case 3: /* italic */
|
||||
/* Unsupported */
|
||||
break;
|
||||
case 4: /* underline */
|
||||
case 21: /* double underline */
|
||||
/* Wikipedia says this flag does nothing */
|
||||
/* Furthermore, mingw doesn't define this flag
|
||||
attr |= COMMON_LVB_UNDERSCORE; */
|
||||
break;
|
||||
case 24: /* no underline */
|
||||
/* attr &= ~COMMON_LVB_UNDERSCORE; */
|
||||
break;
|
||||
case 5: /* slow blink */
|
||||
case 6: /* fast blink */
|
||||
/* We don't have blink, but we do have
|
||||
background intensity */
|
||||
attr |= BACKGROUND_INTENSITY;
|
||||
break;
|
||||
case 25: /* no blink */
|
||||
attr &= ~BACKGROUND_INTENSITY;
|
||||
break;
|
||||
case 7: /* negative */
|
||||
negative = 1;
|
||||
break;
|
||||
case 27: /* positive */
|
||||
negative = 0;
|
||||
break;
|
||||
case 8: /* conceal */
|
||||
case 28: /* reveal */
|
||||
/* Unsupported */
|
||||
break;
|
||||
case 30: /* Black */
|
||||
attr &= ~FOREGROUND_ALL;
|
||||
break;
|
||||
case 31: /* Red */
|
||||
attr &= ~FOREGROUND_ALL;
|
||||
attr |= FOREGROUND_RED;
|
||||
break;
|
||||
case 32: /* Green */
|
||||
attr &= ~FOREGROUND_ALL;
|
||||
attr |= FOREGROUND_GREEN;
|
||||
break;
|
||||
case 33: /* Yellow */
|
||||
attr &= ~FOREGROUND_ALL;
|
||||
attr |= FOREGROUND_RED | FOREGROUND_GREEN;
|
||||
break;
|
||||
case 34: /* Blue */
|
||||
attr &= ~FOREGROUND_ALL;
|
||||
attr |= FOREGROUND_BLUE;
|
||||
break;
|
||||
case 35: /* Magenta */
|
||||
attr &= ~FOREGROUND_ALL;
|
||||
attr |= FOREGROUND_RED | FOREGROUND_BLUE;
|
||||
break;
|
||||
case 36: /* Cyan */
|
||||
attr &= ~FOREGROUND_ALL;
|
||||
attr |= FOREGROUND_GREEN | FOREGROUND_BLUE;
|
||||
break;
|
||||
case 37: /* White */
|
||||
attr |= FOREGROUND_RED |
|
||||
FOREGROUND_GREEN |
|
||||
FOREGROUND_BLUE;
|
||||
break;
|
||||
case 38: /* Unknown */
|
||||
break;
|
||||
case 39: /* reset */
|
||||
attr &= ~FOREGROUND_ALL;
|
||||
attr |= (plain_attr & FOREGROUND_ALL);
|
||||
break;
|
||||
case 40: /* Black */
|
||||
attr &= ~BACKGROUND_ALL;
|
||||
break;
|
||||
case 41: /* Red */
|
||||
attr &= ~BACKGROUND_ALL;
|
||||
attr |= BACKGROUND_RED;
|
||||
break;
|
||||
case 42: /* Green */
|
||||
attr &= ~BACKGROUND_ALL;
|
||||
attr |= BACKGROUND_GREEN;
|
||||
break;
|
||||
case 43: /* Yellow */
|
||||
attr &= ~BACKGROUND_ALL;
|
||||
attr |= BACKGROUND_RED | BACKGROUND_GREEN;
|
||||
break;
|
||||
case 44: /* Blue */
|
||||
attr &= ~BACKGROUND_ALL;
|
||||
attr |= BACKGROUND_BLUE;
|
||||
break;
|
||||
case 45: /* Magenta */
|
||||
attr &= ~BACKGROUND_ALL;
|
||||
attr |= BACKGROUND_RED | BACKGROUND_BLUE;
|
||||
break;
|
||||
case 46: /* Cyan */
|
||||
attr &= ~BACKGROUND_ALL;
|
||||
attr |= BACKGROUND_GREEN | BACKGROUND_BLUE;
|
||||
break;
|
||||
case 47: /* White */
|
||||
attr |= BACKGROUND_RED |
|
||||
BACKGROUND_GREEN |
|
||||
BACKGROUND_BLUE;
|
||||
break;
|
||||
case 48: /* Unknown */
|
||||
break;
|
||||
case 49: /* reset */
|
||||
attr &= ~BACKGROUND_ALL;
|
||||
attr |= (plain_attr & BACKGROUND_ALL);
|
||||
break;
|
||||
default:
|
||||
/* Unsupported code */
|
||||
break;
|
||||
}
|
||||
str++;
|
||||
} while (*(str - 1) == ';');
|
||||
|
||||
set_console_attr();
|
||||
break;
|
||||
case 'K':
|
||||
erase_in_line();
|
||||
break;
|
||||
default:
|
||||
/* Unsupported code */
|
||||
break;
|
||||
}
|
||||
|
||||
return func + 1;
|
||||
}
|
||||
|
||||
static int ansi_emulate(const char *str, FILE *stream)
|
||||
{
|
||||
int rv = 0;
|
||||
const char *pos = str;
|
||||
|
||||
fflush(stream);
|
||||
|
||||
while (*pos) {
|
||||
pos = strstr(str, "\033[");
|
||||
if (pos) {
|
||||
int len = (int) (pos - str);
|
||||
|
||||
if (len) {
|
||||
int out_len = write_console(str, len);
|
||||
rv += out_len;
|
||||
if (out_len < len)
|
||||
return rv;
|
||||
}
|
||||
|
||||
str = pos + 2;
|
||||
rv += 2;
|
||||
|
||||
pos = set_attr(str);
|
||||
rv += (int) (pos - str);
|
||||
str = pos;
|
||||
}
|
||||
else {
|
||||
int len = (int) strlen(str);
|
||||
rv += write_console(str, len);
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
int winansi_fputs(const char *str, FILE *stream)
|
||||
{
|
||||
int rv;
|
||||
|
||||
if (!isatty(fileno(stream)))
|
||||
return fputs(str, stream);
|
||||
|
||||
init();
|
||||
|
||||
if (!console)
|
||||
return fputs(str, stream);
|
||||
|
||||
rv = ansi_emulate(str, stream);
|
||||
|
||||
if (rv >= 0)
|
||||
return 0;
|
||||
else
|
||||
return EOF;
|
||||
}
|
||||
|
||||
int winansi_vfprintf(FILE *stream, const char *format, va_list list)
|
||||
{
|
||||
int len, rv;
|
||||
char small_buf[256] = { 0 };
|
||||
char *buf = small_buf;
|
||||
va_list cp;
|
||||
|
||||
if (!isatty(fileno(stream)))
|
||||
goto abort;
|
||||
|
||||
init();
|
||||
|
||||
if (!console)
|
||||
goto abort;
|
||||
|
||||
va_copy(cp, list);
|
||||
len = vsnprintf(small_buf, sizeof(small_buf), format, cp);
|
||||
#ifdef WIN32
|
||||
/* bug on long strings without that */
|
||||
if (len == -1)
|
||||
len = _vscprintf(format, cp);
|
||||
#endif
|
||||
va_end(cp);
|
||||
|
||||
if (len > sizeof(small_buf) - 1) {
|
||||
buf = malloc(len + 1);
|
||||
if (!buf)
|
||||
goto abort;
|
||||
|
||||
len = vsnprintf(buf, len + 1, format, list);
|
||||
#ifdef WIN32
|
||||
if (len == -1)
|
||||
len = _vscprintf(format, list);
|
||||
#endif
|
||||
}
|
||||
|
||||
rv = ansi_emulate(buf, stream);
|
||||
|
||||
if (buf != small_buf)
|
||||
free(buf);
|
||||
return rv;
|
||||
|
||||
abort:
|
||||
rv = vfprintf(stream, format, list);
|
||||
return rv;
|
||||
}
|
||||
|
||||
int winansi_fprintf(FILE *stream, const char *format, ...)
|
||||
{
|
||||
va_list list;
|
||||
int rv;
|
||||
|
||||
va_start(list, format);
|
||||
rv = winansi_vfprintf(stream, format, list);
|
||||
va_end(list);
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
int winansi_printf(const char *format, ...)
|
||||
{
|
||||
va_list list;
|
||||
int rv;
|
||||
|
||||
va_start(list, format);
|
||||
rv = winansi_vfprintf(stdout, format, list);
|
||||
va_end(list);
|
||||
|
||||
return rv;
|
||||
}
|
||||
32
compat/winansi.h
Normal file
32
compat/winansi.h
Normal file
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
* ANSI emulation wrappers
|
||||
*/
|
||||
#ifdef WIN32
|
||||
#include <windows.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#define isatty(fd) _isatty(fd)
|
||||
#define fileno(fd) _fileno(fd)
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
int winansi_fputs(const char *str, FILE *stream);
|
||||
int winansi_printf(const char *format, ...);
|
||||
int winansi_fprintf(FILE *stream, const char *format, ...);
|
||||
int winansi_vfprintf(FILE *stream, const char *format, va_list list);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#undef fputs
|
||||
#undef fprintf
|
||||
#undef vfprintf
|
||||
|
||||
#define fputs winansi_fputs
|
||||
#define printf winansi_printf
|
||||
#define fprintf winansi_fprintf
|
||||
#define vfprintf winansi_vfprintf
|
||||
|
||||
#endif
|
||||
111
cpu.c
Normal file
111
cpu.c
Normal file
@@ -0,0 +1,111 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <cpuid.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
#include <math.h>
|
||||
|
||||
#ifndef BUILD_TEST
|
||||
# include <libcpuid.h>
|
||||
#endif
|
||||
|
||||
#include "cpu.h"
|
||||
#include "options.h"
|
||||
|
||||
|
||||
#ifndef BUILD_TEST
|
||||
void cpu_init_common() {
|
||||
struct cpu_raw_data_t raw = { 0 };
|
||||
struct cpu_id_t data = { 0 };
|
||||
|
||||
cpuid_get_raw_data(&raw);
|
||||
cpu_identify(&raw, &data);
|
||||
|
||||
strncpy(cpu_info.brand, data.brand_str, sizeof(cpu_info.brand) - 1);
|
||||
|
||||
cpu_info.total_logical_cpus = data.total_logical_cpus;
|
||||
cpu_info.sockets = data.total_logical_cpus / data.num_logical_cpus;
|
||||
cpu_info.total_cores = data.num_cores * cpu_info.sockets;
|
||||
cpu_info.l3_cache = data.l3_cache > 0 ? data.l3_cache * cpu_info.sockets : 0;
|
||||
|
||||
// Workaround for AMD CPUs https://github.com/anrieff/libcpuid/issues/97
|
||||
if (data.vendor == VENDOR_AMD && data.l3_cache <= 0 && data.l2_assoc == 16 && data.ext_family >= 21) {
|
||||
cpu_info.l2_cache = data.l2_cache * (cpu_info.total_cores / 2) * cpu_info.sockets;
|
||||
}
|
||||
else {
|
||||
cpu_info.l2_cache = data.l2_cache > 0 ? data.l2_cache * cpu_info.total_cores * cpu_info.sockets : 0;
|
||||
}
|
||||
|
||||
|
||||
# ifdef __x86_64__
|
||||
cpu_info.flags |= CPU_FLAG_X86_64;
|
||||
# endif
|
||||
|
||||
if (data.flags[CPU_FEATURE_AES]) {
|
||||
cpu_info.flags |= CPU_FLAG_AES;
|
||||
}
|
||||
|
||||
if (data.flags[CPU_FEATURE_BMI2]) {
|
||||
cpu_info.flags |= CPU_FLAG_BMI2;
|
||||
}
|
||||
|
||||
# ifndef XMRIG_NO_ASM
|
||||
if (data.vendor == VENDOR_AMD) {
|
||||
cpu_info.assembly = (data.ext_family >= 23) ? ASM_RYZEN : ASM_BULLDOZER;
|
||||
}
|
||||
else if (data.vendor == VENDOR_INTEL) {
|
||||
cpu_info.assembly = ASM_INTEL;
|
||||
}
|
||||
# endif
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
int get_optimal_threads_count(int algo, bool double_hash, int max_cpu_usage) {
|
||||
if (cpu_info.total_logical_cpus == 1) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
int cache = cpu_info.l3_cache ? cpu_info.l3_cache : cpu_info.l2_cache;
|
||||
int count = 0;
|
||||
const int size = (algo ? 1024 : 2048) * (double_hash ? 2 : 1);
|
||||
|
||||
if (cache) {
|
||||
count = cache / size;
|
||||
}
|
||||
else {
|
||||
count = cpu_info.total_logical_cpus / 2;
|
||||
}
|
||||
|
||||
if (count > cpu_info.total_logical_cpus) {
|
||||
count = cpu_info.total_logical_cpus;
|
||||
}
|
||||
|
||||
if (((float) count / cpu_info.total_logical_cpus * 100) > max_cpu_usage) {
|
||||
count = ceil((float) cpu_info.total_logical_cpus * (max_cpu_usage / 100.0));
|
||||
}
|
||||
|
||||
return count < 1 ? 1 : count;
|
||||
}
|
||||
@@ -4,8 +4,9 @@
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -21,41 +22,34 @@
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef __CRYPTONIGHT_H__
|
||||
#define __CRYPTONIGHT_H__
|
||||
#ifndef XMRIG_CPU_H
|
||||
#define XMRIG_CPU_H
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
struct cpu_info {
|
||||
int total_cores;
|
||||
int total_logical_cpus;
|
||||
int flags;
|
||||
int sockets;
|
||||
int l2_cache;
|
||||
int l3_cache;
|
||||
char brand[64];
|
||||
int assembly;
|
||||
};
|
||||
|
||||
extern struct cpu_info cpu_info;
|
||||
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
|
||||
#include "align.h"
|
||||
|
||||
|
||||
#define MEMORY 2097152 /* 2 MiB */
|
||||
#define MEMORY_LITE 1048576 /* 1 MiB */
|
||||
|
||||
|
||||
struct cryptonight_ctx {
|
||||
VAR_ALIGN(16, uint8_t state0[200]);
|
||||
VAR_ALIGN(16, uint8_t state1[200]);
|
||||
VAR_ALIGN(16, uint8_t* memory);
|
||||
enum cpu_flags {
|
||||
CPU_FLAG_X86_64 = 1,
|
||||
CPU_FLAG_AES = 2,
|
||||
CPU_FLAG_BMI2 = 4
|
||||
};
|
||||
|
||||
|
||||
class Job;
|
||||
class JobResult;
|
||||
void cpu_init();
|
||||
int get_optimal_threads_count(int algo, bool double_hash, int max_cpu_usage);
|
||||
int affine_to_cpu_mask(int id, unsigned long mask);
|
||||
|
||||
|
||||
class CryptoNight
|
||||
{
|
||||
public:
|
||||
static bool hash(const Job &job, JobResult &result, cryptonight_ctx *ctx);
|
||||
static bool init(int algo, int variant);
|
||||
static void hash(const uint8_t *input, size_t size, uint8_t *output, cryptonight_ctx *ctx);
|
||||
|
||||
private:
|
||||
static bool selfTest(int algo);
|
||||
};
|
||||
|
||||
#endif /* __CRYPTONIGHT_H__ */
|
||||
#endif /* XMRIG_CPU_H */
|
||||
@@ -21,20 +21,14 @@
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# include <intrin.h>
|
||||
|
||||
# define bit_AES (1 << 25)
|
||||
# define bit_BMI2 (1 << 8)
|
||||
#else
|
||||
# include <cpuid.h>
|
||||
#endif
|
||||
|
||||
#include <cpuid.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
|
||||
#include "Cpu.h"
|
||||
#include "cpu.h"
|
||||
#include "options.h"
|
||||
|
||||
|
||||
#define VENDOR_ID (0)
|
||||
@@ -51,11 +45,6 @@
|
||||
#define EDX_Reg (3)
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
static inline void cpuid(int level, int output[4]) {
|
||||
__cpuid(output, level);
|
||||
}
|
||||
#else
|
||||
static inline void cpuid(int level, int output[4]) {
|
||||
int a, b, c, d;
|
||||
__cpuid_count(level, 0, a, b, c, d);
|
||||
@@ -65,11 +54,10 @@ static inline void cpuid(int level, int output[4]) {
|
||||
output[2] = c;
|
||||
output[3] = d;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
static inline void cpu_brand_string(char* s) {
|
||||
int cpu_info[4] = { 0 };
|
||||
static void cpu_brand_string(char* s) {
|
||||
int32_t cpu_info[4] = { 0 };
|
||||
cpuid(VENDOR_ID, cpu_info);
|
||||
|
||||
if (cpu_info[EAX_Reg] >= 4) {
|
||||
@@ -82,52 +70,60 @@ static inline void cpu_brand_string(char* s) {
|
||||
}
|
||||
|
||||
|
||||
static inline bool has_aes_ni()
|
||||
static bool has_aes_ni()
|
||||
{
|
||||
int cpu_info[4] = { 0 };
|
||||
int32_t cpu_info[4] = { 0 };
|
||||
cpuid(PROCESSOR_INFO, cpu_info);
|
||||
|
||||
return cpu_info[ECX_Reg] & bit_AES;
|
||||
}
|
||||
|
||||
|
||||
static inline bool has_bmi2() {
|
||||
int cpu_info[4] = { 0 };
|
||||
static bool has_bmi2() {
|
||||
int32_t cpu_info[4] = { 0 };
|
||||
cpuid(EXTENDED_FEATURES, cpu_info);
|
||||
|
||||
return cpu_info[EBX_Reg] & bit_BMI2;
|
||||
}
|
||||
|
||||
|
||||
char Cpu::m_brand[64] = { 0 };
|
||||
int Cpu::m_flags = 0;
|
||||
int Cpu::m_l2_cache = 0;
|
||||
int Cpu::m_l3_cache = 0;
|
||||
int Cpu::m_sockets = 1;
|
||||
int Cpu::m_totalCores = 0;
|
||||
int Cpu::m_totalThreads = 0;
|
||||
void cpu_init_common() {
|
||||
cpu_info.sockets = 1;
|
||||
cpu_brand_string(cpu_info.brand);
|
||||
|
||||
|
||||
int Cpu::optimalThreadsCount(int algo, bool doubleHash, int maxCpuUsage)
|
||||
{
|
||||
int count = m_totalThreads / 2;
|
||||
return count < 1 ? 1 : count;
|
||||
}
|
||||
|
||||
|
||||
void Cpu::initCommon()
|
||||
{
|
||||
cpu_brand_string(m_brand);
|
||||
|
||||
# if defined(__x86_64__) || defined(_M_AMD64)
|
||||
m_flags |= X86_64;
|
||||
# ifdef __x86_64__
|
||||
cpu_info.flags |= CPU_FLAG_X86_64;
|
||||
# endif
|
||||
|
||||
if (has_aes_ni()) {
|
||||
m_flags |= AES;
|
||||
cpu_info.flags |= CPU_FLAG_AES;
|
||||
|
||||
# ifndef XMRIG_NO_ASM
|
||||
char vendor[13] = { 0 };
|
||||
int32_t data[4] = { 0 };
|
||||
|
||||
cpuid(0, data);
|
||||
|
||||
memcpy(vendor + 0, &data[1], 4);
|
||||
memcpy(vendor + 4, &data[3], 4);
|
||||
memcpy(vendor + 8, &data[2], 4);
|
||||
|
||||
if (memcmp(vendor, "GenuineIntel", 12) == 0) {
|
||||
cpu_info.assembly = ASM_INTEL;
|
||||
}
|
||||
else if (memcmp(vendor, "AuthenticAMD", 12) == 0) {
|
||||
cpu_info.assembly = ASM_RYZEN;
|
||||
}
|
||||
# endif
|
||||
}
|
||||
|
||||
if (has_bmi2()) {
|
||||
m_flags |= BMI2;
|
||||
cpu_info.flags |= CPU_FLAG_BMI2;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int get_optimal_threads_count(int algo, bool double_hash, int max_cpu_usage) {
|
||||
int count = cpu_info.total_logical_cpus / 2;
|
||||
return count < 1 ? 1 : count;
|
||||
}
|
||||
146
crypto/CryptonightR_gen.c
Normal file
146
crypto/CryptonightR_gen.c
Normal file
@@ -0,0 +1,146 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "algo/cryptonight/cryptonight_monero.h"
|
||||
#include "crypto/asm/CryptonightR_template.h"
|
||||
#include "persistent_memory.h"
|
||||
|
||||
|
||||
static inline void add_code(uint8_t **p, void (*p1)(), void (*p2)())
|
||||
{
|
||||
const ptrdiff_t size = (const uint8_t*)(p2) - (const uint8_t*)(p1);
|
||||
if (size > 0) {
|
||||
memcpy(*p, (const void *) p1, size);
|
||||
*p += size;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static inline void add_random_math(uint8_t **p, const struct V4_Instruction* code, int code_size, const void_func* instructions, const void_func* instructions_mov, bool is_64_bit, enum Assembly ASM)
|
||||
{
|
||||
uint32_t prev_rot_src = (uint32_t)(-1);
|
||||
|
||||
for (int i = 0;; ++i) {
|
||||
const struct V4_Instruction inst = code[i];
|
||||
if (inst.opcode == RET) {
|
||||
break;
|
||||
}
|
||||
|
||||
uint8_t opcode = (inst.opcode == MUL) ? inst.opcode : (inst.opcode + 2);
|
||||
uint8_t dst_index = inst.dst_index;
|
||||
uint8_t src_index = inst.src_index;
|
||||
|
||||
const uint32_t a = inst.dst_index;
|
||||
const uint32_t b = inst.src_index;
|
||||
const uint8_t c = opcode | (dst_index << V4_OPCODE_BITS) | (((src_index == 8) ? dst_index : src_index) << (V4_OPCODE_BITS + V4_DST_INDEX_BITS));
|
||||
|
||||
switch (inst.opcode) {
|
||||
case ROR:
|
||||
case ROL:
|
||||
if (b != prev_rot_src) {
|
||||
prev_rot_src = b;
|
||||
add_code(p, instructions_mov[c], instructions_mov[c + 1]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (a == prev_rot_src) {
|
||||
prev_rot_src = (uint32_t)(-1);
|
||||
}
|
||||
|
||||
void_func begin = instructions[c];
|
||||
|
||||
if ((ASM = ASM_BULLDOZER) && (inst.opcode == MUL) && !is_64_bit) {
|
||||
// AMD Bulldozer has latency 4 for 32-bit IMUL and 6 for 64-bit IMUL
|
||||
// Always use 32-bit IMUL for AMD Bulldozer in 32-bit mode - skip prefix 0x48 and change 0x49 to 0x41
|
||||
uint8_t* prefix = (uint8_t*) begin;
|
||||
|
||||
if (*prefix == 0x49) {
|
||||
**p = 0x41;
|
||||
*p += 1;
|
||||
}
|
||||
|
||||
begin = (void_func)(prefix + 1);
|
||||
}
|
||||
|
||||
add_code(p, begin, instructions[c + 1]);
|
||||
|
||||
if (inst.opcode == ADD) {
|
||||
*(uint32_t*)(*p - sizeof(uint32_t) - (is_64_bit ? 3 : 0)) = inst.C;
|
||||
if (is_64_bit) {
|
||||
prev_rot_src = (uint32_t)(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void v4_compile_code(const struct V4_Instruction* code, int code_size, void* machine_code, enum Assembly ASM)
|
||||
{
|
||||
uint8_t* p0 = machine_code;
|
||||
uint8_t* p = p0;
|
||||
|
||||
add_code(&p, CryptonightR_template_part1, CryptonightR_template_part2);
|
||||
add_random_math(&p, code, code_size, instructions, instructions_mov, false, ASM);
|
||||
add_code(&p, CryptonightR_template_part2, CryptonightR_template_part3);
|
||||
*(int*)(p - 4) = (int)((((const uint8_t*)CryptonightR_template_mainloop) - ((const uint8_t*)CryptonightR_template_part1)) - (p - p0));
|
||||
add_code(&p, CryptonightR_template_part3, CryptonightR_template_end);
|
||||
|
||||
flush_instruction_cache(machine_code, p - p0);
|
||||
}
|
||||
|
||||
|
||||
void v4_compile_code_double(const struct V4_Instruction* code, int code_size, void* machine_code, enum Assembly ASM)
|
||||
{
|
||||
uint8_t* p0 = (uint8_t*) machine_code;
|
||||
uint8_t* p = p0;
|
||||
|
||||
add_code(&p, CryptonightR_template_double_part1, CryptonightR_template_double_part2);
|
||||
add_random_math(&p, code, code_size, instructions, instructions_mov, false, ASM);
|
||||
add_code(&p, CryptonightR_template_double_part2, CryptonightR_template_double_part3);
|
||||
add_random_math(&p, code, code_size, instructions, instructions_mov, false, ASM);
|
||||
add_code(&p, CryptonightR_template_double_part3, CryptonightR_template_double_part4);
|
||||
*(int*)(p - 4) = (int)((((const uint8_t*)CryptonightR_template_double_mainloop) - ((const uint8_t*)CryptonightR_template_double_part1)) - (p - p0));
|
||||
add_code(&p, CryptonightR_template_double_part4, CryptonightR_template_double_end);
|
||||
|
||||
flush_instruction_cache(machine_code, p - p0);
|
||||
}
|
||||
|
||||
|
||||
void v4_soft_aes_compile_code(const struct V4_Instruction* code, int code_size, void* machine_code, enum Assembly ASM)
|
||||
{
|
||||
uint8_t* p0 = machine_code;
|
||||
uint8_t* p = p0;
|
||||
|
||||
add_code(&p, CryptonightR_soft_aes_template_part1, CryptonightR_soft_aes_template_part2);
|
||||
add_random_math(&p, code, code_size, instructions, instructions_mov, false, ASM);
|
||||
add_code(&p, CryptonightR_soft_aes_template_part2, CryptonightR_soft_aes_template_part3);
|
||||
*(int*)(p - 4) = (int)((((const uint8_t*)CryptonightR_soft_aes_template_mainloop) - ((const uint8_t*)CryptonightR_soft_aes_template_part1)) - (p - p0));
|
||||
add_code(&p, CryptonightR_soft_aes_template_part3, CryptonightR_soft_aes_template_end);
|
||||
|
||||
flush_instruction_cache(machine_code, p - p0);
|
||||
}
|
||||
279
crypto/asm/CryptonightR_soft_aes_template.inc
Normal file
279
crypto/asm/CryptonightR_soft_aes_template.inc
Normal file
@@ -0,0 +1,279 @@
|
||||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part1)
|
||||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_mainloop)
|
||||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part2)
|
||||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part3)
|
||||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_end)
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightR_soft_aes_template_part1):
|
||||
mov QWORD PTR [rsp+8], rcx
|
||||
push rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 232
|
||||
|
||||
mov eax, [rcx+96]
|
||||
mov ebx, [rcx+100]
|
||||
mov esi, [rcx+104]
|
||||
mov edx, [rcx+108]
|
||||
mov [rsp+144], eax
|
||||
mov [rsp+148], ebx
|
||||
mov [rsp+152], esi
|
||||
mov [rsp+156], edx
|
||||
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
mov r10, rcx
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
xor r8, QWORD PTR [rcx]
|
||||
mov r9, QWORD PTR [rcx+40]
|
||||
xor r9, QWORD PTR [rcx+8]
|
||||
movq xmm4, rax
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
mov r11, QWORD PTR [rcx+224]
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r10+72]
|
||||
mov rax, QWORD PTR [r10+80]
|
||||
movq xmm0, rdx
|
||||
xor rax, QWORD PTR [r10+64]
|
||||
|
||||
movaps XMMWORD PTR [rsp+16], xmm6
|
||||
movaps XMMWORD PTR [rsp+32], xmm7
|
||||
movaps XMMWORD PTR [rsp+48], xmm8
|
||||
movaps XMMWORD PTR [rsp+64], xmm9
|
||||
movaps XMMWORD PTR [rsp+80], xmm10
|
||||
movaps XMMWORD PTR [rsp+96], xmm11
|
||||
movaps XMMWORD PTR [rsp+112], xmm12
|
||||
movaps XMMWORD PTR [rsp+128], xmm13
|
||||
|
||||
movq xmm5, rax
|
||||
|
||||
mov rax, r8
|
||||
punpcklqdq xmm4, xmm0
|
||||
and eax, 2097136
|
||||
movq xmm10, QWORD PTR [r10+96]
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [r10+104]
|
||||
xorps xmm9, xmm9
|
||||
mov QWORD PTR [rsp+328], rax
|
||||
movq xmm12, r11
|
||||
mov QWORD PTR [rsp+320], r9
|
||||
punpcklqdq xmm5, xmm0
|
||||
movq xmm13, rcx
|
||||
mov r12d, 524288
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightR_soft_aes_template_mainloop):
|
||||
movd xmm11, r12d
|
||||
mov r12, QWORD PTR [r10+272]
|
||||
lea r13, QWORD PTR [rax+r11]
|
||||
mov esi, DWORD PTR [r13]
|
||||
movq xmm0, r9
|
||||
mov r10d, DWORD PTR [r13+4]
|
||||
movq xmm7, r8
|
||||
mov ebp, DWORD PTR [r13+12]
|
||||
mov r14d, DWORD PTR [r13+8]
|
||||
mov rdx, QWORD PTR [rsp+328]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
punpcklqdq xmm7, xmm0
|
||||
mov r15d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
mov edi, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov ebx, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
xor r15d, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov eax, r14d
|
||||
shr eax, 8
|
||||
xor edi, DWORD PTR [r12+rcx*4+1024]
|
||||
add eax, 256
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
xor ebx, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
xor r9d, DWORD PTR [r12+rcx*4+1024]
|
||||
add r12, 2048
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
add r10d, 256
|
||||
mov r11d, DWORD PTR [r12+rax*4]
|
||||
xor r11d, DWORD PTR [r12+rcx*4]
|
||||
xor r11d, r9d
|
||||
movzx ecx, sil
|
||||
mov r10d, DWORD PTR [r12+r10*4]
|
||||
shr esi, 8
|
||||
add esi, 256
|
||||
xor r10d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
xor r10d, ebx
|
||||
shr ebp, 8
|
||||
movd xmm1, r11d
|
||||
add ebp, 256
|
||||
movq r11, xmm12
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
xor r9d, DWORD PTR [r12+rsi*4]
|
||||
mov eax, DWORD PTR [r12+rbp*4]
|
||||
xor r9d, edi
|
||||
movzx ecx, r14b
|
||||
movd xmm0, r10d
|
||||
movd xmm2, r9d
|
||||
xor eax, DWORD PTR [r12+rcx*4]
|
||||
mov rcx, rdx
|
||||
xor eax, r15d
|
||||
punpckldq xmm2, xmm1
|
||||
xor rcx, 16
|
||||
movd xmm6, eax
|
||||
mov rax, rdx
|
||||
punpckldq xmm6, xmm0
|
||||
xor rax, 32
|
||||
punpckldq xmm6, xmm2
|
||||
xor rdx, 48
|
||||
movdqu xmm2, XMMWORD PTR [rcx+r11]
|
||||
pxor xmm6, xmm2
|
||||
pxor xmm6, xmm7
|
||||
paddq xmm2, xmm4
|
||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||
movdqu xmm0, XMMWORD PTR [rdx+r11]
|
||||
pxor xmm6, xmm1
|
||||
pxor xmm6, xmm0
|
||||
paddq xmm0, xmm5
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm0
|
||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||
movq rcx, xmm13
|
||||
paddq xmm1, xmm7
|
||||
movdqu XMMWORD PTR [rdx+r11], xmm1
|
||||
movq rdi, xmm6
|
||||
mov r10, rdi
|
||||
and r10d, 2097136
|
||||
movdqa xmm0, xmm6
|
||||
pxor xmm0, xmm4
|
||||
movdqu XMMWORD PTR [r13], xmm0
|
||||
|
||||
mov ebx, [rsp+144]
|
||||
mov ebp, [rsp+152]
|
||||
add ebx, [rsp+148]
|
||||
add ebp, [rsp+156]
|
||||
shl rbp, 32
|
||||
or rbx, rbp
|
||||
|
||||
xor rbx, QWORD PTR [r10+r11]
|
||||
lea r14, QWORD PTR [r10+r11]
|
||||
mov rbp, QWORD PTR [r14+8]
|
||||
|
||||
mov [rsp+160], rbx
|
||||
mov [rsp+168], rdi
|
||||
mov [rsp+176], rbp
|
||||
mov [rsp+184], r10
|
||||
mov r10, rsp
|
||||
|
||||
mov ebx, [rsp+144]
|
||||
mov esi, [rsp+148]
|
||||
mov edi, [rsp+152]
|
||||
mov ebp, [rsp+156]
|
||||
|
||||
movd esp, xmm7
|
||||
movaps xmm0, xmm7
|
||||
psrldq xmm0, 8
|
||||
movd r15d, xmm0
|
||||
movd eax, xmm4
|
||||
movd edx, xmm5
|
||||
movaps xmm0, xmm5
|
||||
psrldq xmm0, 8
|
||||
movd r9d, xmm0
|
||||
|
||||
FN_PREFIX(CryptonightR_soft_aes_template_part2):
|
||||
mov rsp, r10
|
||||
mov [rsp+144], ebx
|
||||
mov [rsp+148], esi
|
||||
mov [rsp+152], edi
|
||||
mov [rsp+156], ebp
|
||||
|
||||
mov edi, edi
|
||||
shl rbp, 32
|
||||
or rbp, rdi
|
||||
xor r8, rbp
|
||||
|
||||
mov ebx, ebx
|
||||
shl rsi, 32
|
||||
or rsi, rbx
|
||||
xor QWORD PTR [rsp+320], rsi
|
||||
|
||||
mov rbx, [rsp+160]
|
||||
mov rdi, [rsp+168]
|
||||
mov rbp, [rsp+176]
|
||||
mov r10, [rsp+184]
|
||||
|
||||
mov r9, r10
|
||||
xor r9, 16
|
||||
mov rcx, r10
|
||||
xor rcx, 32
|
||||
xor r10, 48
|
||||
mov rax, rbx
|
||||
mul rdi
|
||||
movdqu xmm2, XMMWORD PTR [r9+r11]
|
||||
movdqu xmm1, XMMWORD PTR [rcx+r11]
|
||||
pxor xmm6, xmm2
|
||||
pxor xmm6, xmm1
|
||||
paddq xmm1, xmm7
|
||||
add r8, rdx
|
||||
movdqu xmm0, XMMWORD PTR [r10+r11]
|
||||
pxor xmm6, xmm0
|
||||
paddq xmm0, xmm5
|
||||
paddq xmm2, xmm4
|
||||
movdqu XMMWORD PTR [r9+r11], xmm0
|
||||
movdqa xmm5, xmm4
|
||||
mov r9, QWORD PTR [rsp+320]
|
||||
movdqa xmm4, xmm6
|
||||
add r9, rax
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm2
|
||||
movdqu XMMWORD PTR [r10+r11], xmm1
|
||||
mov r10, QWORD PTR [rsp+304]
|
||||
movd r12d, xmm11
|
||||
mov QWORD PTR [r14], r8
|
||||
xor r8, rbx
|
||||
mov rax, r8
|
||||
mov QWORD PTR [r14+8], r9
|
||||
and eax, 2097136
|
||||
xor r9, rbp
|
||||
mov QWORD PTR [rsp+320], r9
|
||||
mov QWORD PTR [rsp+328], rax
|
||||
sub r12d, 1
|
||||
jne FN_PREFIX(CryptonightR_soft_aes_template_mainloop)
|
||||
|
||||
FN_PREFIX(CryptonightR_soft_aes_template_part3):
|
||||
movaps xmm6, XMMWORD PTR [rsp+16]
|
||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||
movaps xmm8, XMMWORD PTR [rsp+48]
|
||||
movaps xmm9, XMMWORD PTR [rsp+64]
|
||||
movaps xmm10, XMMWORD PTR [rsp+80]
|
||||
movaps xmm11, XMMWORD PTR [rsp+96]
|
||||
movaps xmm12, XMMWORD PTR [rsp+112]
|
||||
movaps xmm13, XMMWORD PTR [rsp+128]
|
||||
|
||||
add rsp, 232
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
pop rbx
|
||||
ret
|
||||
FN_PREFIX(CryptonightR_soft_aes_template_end):
|
||||
1593
crypto/asm/CryptonightR_template.S
Normal file
1593
crypto/asm/CryptonightR_template.S
Normal file
File diff suppressed because it is too large
Load Diff
1060
crypto/asm/CryptonightR_template.h
Normal file
1060
crypto/asm/CryptonightR_template.h
Normal file
File diff suppressed because it is too large
Load Diff
531
crypto/asm/CryptonightR_template.inc
Normal file
531
crypto/asm/CryptonightR_template.inc
Normal file
@@ -0,0 +1,531 @@
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_part1)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_mainloop)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_part2)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_part3)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_end)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_double_part1)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_double_mainloop)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_double_part2)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_double_part3)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_double_part4)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_double_end)
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightR_template_part1):
|
||||
mov QWORD PTR [rsp+16], rbx
|
||||
mov QWORD PTR [rsp+24], rbp
|
||||
mov QWORD PTR [rsp+32], rsi
|
||||
push r10
|
||||
push r11
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
push rdi
|
||||
sub rsp, 64
|
||||
mov r12, rcx
|
||||
mov r8, QWORD PTR [r12+32]
|
||||
mov rdx, r12
|
||||
xor r8, QWORD PTR [r12]
|
||||
mov r15, QWORD PTR [r12+40]
|
||||
mov r9, r8
|
||||
xor r15, QWORD PTR [r12+8]
|
||||
mov r11, QWORD PTR [r12+224]
|
||||
mov r12, QWORD PTR [r12+56]
|
||||
xor r12, QWORD PTR [rdx+24]
|
||||
mov rax, QWORD PTR [rdx+48]
|
||||
xor rax, QWORD PTR [rdx+16]
|
||||
movaps XMMWORD PTR [rsp+48], xmm6
|
||||
movq xmm0, r12
|
||||
movaps XMMWORD PTR [rsp+32], xmm7
|
||||
movaps XMMWORD PTR [rsp+16], xmm8
|
||||
movaps XMMWORD PTR [rsp], xmm9
|
||||
mov r12, QWORD PTR [rdx+88]
|
||||
xor r12, QWORD PTR [rdx+72]
|
||||
movq xmm6, rax
|
||||
mov rax, QWORD PTR [rdx+80]
|
||||
xor rax, QWORD PTR [rdx+64]
|
||||
punpcklqdq xmm6, xmm0
|
||||
and r9d, 2097136
|
||||
movq xmm0, r12
|
||||
movq xmm7, rax
|
||||
punpcklqdq xmm7, xmm0
|
||||
mov r10d, r9d
|
||||
movq xmm9, rsp
|
||||
mov rsp, r8
|
||||
mov r8d, 524288
|
||||
|
||||
mov ebx, [rdx+96]
|
||||
mov esi, [rdx+100]
|
||||
mov edi, [rdx+104]
|
||||
mov ebp, [rdx+108]
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightR_template_mainloop):
|
||||
movdqa xmm5, XMMWORD PTR [r9+r11]
|
||||
movq xmm0, r15
|
||||
movq xmm4, rsp
|
||||
punpcklqdq xmm4, xmm0
|
||||
lea rdx, QWORD PTR [r9+r11]
|
||||
|
||||
aesenc xmm5, xmm4
|
||||
|
||||
mov r13d, r9d
|
||||
mov eax, r9d
|
||||
xor r9d, 48
|
||||
xor r13d, 16
|
||||
xor eax, 32
|
||||
movdqu xmm0, XMMWORD PTR [r9+r11]
|
||||
movaps xmm3, xmm0
|
||||
movdqu xmm2, XMMWORD PTR [r13+r11]
|
||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||
pxor xmm0, xmm2
|
||||
pxor xmm5, xmm1
|
||||
pxor xmm5, xmm0
|
||||
|
||||
movq r12, xmm5
|
||||
movd r10d, xmm5
|
||||
and r10d, 2097136
|
||||
|
||||
paddq xmm3, xmm7
|
||||
paddq xmm2, xmm6
|
||||
paddq xmm1, xmm4
|
||||
movdqu XMMWORD PTR [r13+r11], xmm3
|
||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||
movdqu XMMWORD PTR [r9+r11], xmm1
|
||||
|
||||
movdqa xmm0, xmm5
|
||||
pxor xmm0, xmm6
|
||||
movdqu XMMWORD PTR [rdx], xmm0
|
||||
|
||||
lea r13d, [ebx+esi]
|
||||
lea edx, [edi+ebp]
|
||||
shl rdx, 32
|
||||
or r13, rdx
|
||||
|
||||
movd eax, xmm6
|
||||
movd edx, xmm7
|
||||
pextrd r9d, xmm7, 2
|
||||
|
||||
xor r13, QWORD PTR [r10+r11]
|
||||
mov r14, QWORD PTR [r10+r11+8]
|
||||
|
||||
FN_PREFIX(CryptonightR_template_part2):
|
||||
lea rcx, [r10+r11]
|
||||
|
||||
mov eax, edi
|
||||
mov edx, ebp
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor rsp, rax
|
||||
|
||||
mov eax, ebx
|
||||
mov edx, esi
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor r15, rax
|
||||
|
||||
mov rax, r13
|
||||
mul r12
|
||||
add r15, rax
|
||||
add rsp, rdx
|
||||
|
||||
mov r9d, r10d
|
||||
mov r12d, r10d
|
||||
xor r9d, 16
|
||||
xor r12d, 32
|
||||
xor r10d, 48
|
||||
movdqa xmm1, XMMWORD PTR [r12+r11]
|
||||
movaps xmm3, xmm1
|
||||
movdqa xmm2, XMMWORD PTR [r9+r11]
|
||||
movdqa xmm0, XMMWORD PTR [r10+r11]
|
||||
pxor xmm1, xmm2
|
||||
pxor xmm5, xmm0
|
||||
pxor xmm5, xmm1
|
||||
paddq xmm3, xmm4
|
||||
paddq xmm2, xmm6
|
||||
paddq xmm0, xmm7
|
||||
movdqu XMMWORD PTR [r9+r11], xmm0
|
||||
movdqu XMMWORD PTR [r12+r11], xmm2
|
||||
movdqu XMMWORD PTR [r10+r11], xmm3
|
||||
|
||||
movdqa xmm7, xmm6
|
||||
mov QWORD PTR [rcx], rsp
|
||||
xor rsp, r13
|
||||
mov r9d, esp
|
||||
mov QWORD PTR [rcx+8], r15
|
||||
and r9d, 2097136
|
||||
xor r15, r14
|
||||
movdqa xmm6, xmm5
|
||||
dec r8d
|
||||
jnz FN_PREFIX(CryptonightR_template_mainloop)
|
||||
|
||||
FN_PREFIX(CryptonightR_template_part3):
|
||||
movq rsp, xmm9
|
||||
|
||||
mov rbx, QWORD PTR [rsp+136]
|
||||
mov rbp, QWORD PTR [rsp+144]
|
||||
mov rsi, QWORD PTR [rsp+152]
|
||||
movaps xmm6, XMMWORD PTR [rsp+48]
|
||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||
movaps xmm8, XMMWORD PTR [rsp+16]
|
||||
movaps xmm9, XMMWORD PTR [rsp]
|
||||
add rsp, 64
|
||||
pop rdi
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop r11
|
||||
pop r10
|
||||
ret 0
|
||||
FN_PREFIX(CryptonightR_template_end):
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightR_template_double_part1):
|
||||
mov QWORD PTR [rsp+24], rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 320
|
||||
mov r14, QWORD PTR [rcx+32]
|
||||
mov r8, rcx
|
||||
xor r14, QWORD PTR [rcx]
|
||||
mov r12, QWORD PTR [rcx+40]
|
||||
mov ebx, r14d
|
||||
mov rsi, QWORD PTR [rcx+224]
|
||||
and ebx, 2097136
|
||||
xor r12, QWORD PTR [rcx+8]
|
||||
mov rcx, QWORD PTR [rcx+56]
|
||||
xor rcx, QWORD PTR [r8+24]
|
||||
mov rax, QWORD PTR [r8+48]
|
||||
xor rax, QWORD PTR [r8+16]
|
||||
mov r15, QWORD PTR [rdx+32]
|
||||
xor r15, QWORD PTR [rdx]
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [r8+88]
|
||||
xor rcx, QWORD PTR [r8+72]
|
||||
mov r13, QWORD PTR [rdx+40]
|
||||
mov rdi, QWORD PTR [rdx+224]
|
||||
xor r13, QWORD PTR [rdx+8]
|
||||
movaps XMMWORD PTR [rsp+160], xmm6
|
||||
movaps XMMWORD PTR [rsp+176], xmm7
|
||||
movaps XMMWORD PTR [rsp+192], xmm8
|
||||
movaps XMMWORD PTR [rsp+208], xmm9
|
||||
movaps XMMWORD PTR [rsp+224], xmm10
|
||||
movaps XMMWORD PTR [rsp+240], xmm11
|
||||
movaps XMMWORD PTR [rsp+256], xmm12
|
||||
movaps XMMWORD PTR [rsp+272], xmm13
|
||||
movaps XMMWORD PTR [rsp+288], xmm14
|
||||
movaps XMMWORD PTR [rsp+304], xmm15
|
||||
movq xmm7, rax
|
||||
mov rax, QWORD PTR [r8+80]
|
||||
xor rax, QWORD PTR [r8+64]
|
||||
|
||||
movaps xmm1, XMMWORD PTR [rdx+96]
|
||||
movaps xmm2, XMMWORD PTR [r8+96]
|
||||
movaps XMMWORD PTR [rsp], xmm1
|
||||
movaps XMMWORD PTR [rsp+16], xmm2
|
||||
|
||||
mov r8d, r15d
|
||||
punpcklqdq xmm7, xmm0
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [rdx+56]
|
||||
xor rcx, QWORD PTR [rdx+24]
|
||||
movq xmm9, rax
|
||||
mov QWORD PTR [rsp+128], rsi
|
||||
mov rax, QWORD PTR [rdx+48]
|
||||
xor rax, QWORD PTR [rdx+16]
|
||||
punpcklqdq xmm9, xmm0
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [rdx+88]
|
||||
xor rcx, QWORD PTR [rdx+72]
|
||||
movq xmm8, rax
|
||||
mov QWORD PTR [rsp+136], rdi
|
||||
mov rax, QWORD PTR [rdx+80]
|
||||
xor rax, QWORD PTR [rdx+64]
|
||||
punpcklqdq xmm8, xmm0
|
||||
and r8d, 2097136
|
||||
movq xmm0, rcx
|
||||
mov r11d, 524288
|
||||
movq xmm10, rax
|
||||
punpcklqdq xmm10, xmm0
|
||||
|
||||
movq xmm14, QWORD PTR [rsp+128]
|
||||
movq xmm15, QWORD PTR [rsp+136]
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightR_template_double_mainloop):
|
||||
movdqu xmm6, XMMWORD PTR [rbx+rsi]
|
||||
movq xmm0, r12
|
||||
mov ecx, ebx
|
||||
movq xmm3, r14
|
||||
punpcklqdq xmm3, xmm0
|
||||
xor ebx, 16
|
||||
aesenc xmm6, xmm3
|
||||
movq xmm4, r15
|
||||
movdqu xmm0, XMMWORD PTR [rbx+rsi]
|
||||
pxor xmm6, xmm0
|
||||
xor ebx, 48
|
||||
paddq xmm0, xmm7
|
||||
movdqu xmm1, XMMWORD PTR [rbx+rsi]
|
||||
pxor xmm6, xmm1
|
||||
movdqu XMMWORD PTR [rbx+rsi], xmm0
|
||||
paddq xmm1, xmm3
|
||||
xor ebx, 16
|
||||
mov eax, ebx
|
||||
xor rax, 32
|
||||
movdqu xmm0, XMMWORD PTR [rbx+rsi]
|
||||
pxor xmm6, xmm0
|
||||
movq rdx, xmm6
|
||||
movdqu XMMWORD PTR [rbx+rsi], xmm1
|
||||
paddq xmm0, xmm9
|
||||
movdqu XMMWORD PTR [rax+rsi], xmm0
|
||||
movdqa xmm0, xmm6
|
||||
pxor xmm0, xmm7
|
||||
movdqu XMMWORD PTR [rcx+rsi], xmm0
|
||||
mov esi, edx
|
||||
movdqu xmm5, XMMWORD PTR [r8+rdi]
|
||||
and esi, 2097136
|
||||
mov ecx, r8d
|
||||
movq xmm0, r13
|
||||
punpcklqdq xmm4, xmm0
|
||||
xor r8d, 16
|
||||
aesenc xmm5, xmm4
|
||||
movdqu xmm0, XMMWORD PTR [r8+rdi]
|
||||
pxor xmm5, xmm0
|
||||
xor r8d, 48
|
||||
paddq xmm0, xmm8
|
||||
movdqu xmm1, XMMWORD PTR [r8+rdi]
|
||||
pxor xmm5, xmm1
|
||||
movdqu XMMWORD PTR [r8+rdi], xmm0
|
||||
paddq xmm1, xmm4
|
||||
xor r8d, 16
|
||||
mov eax, r8d
|
||||
xor rax, 32
|
||||
movdqu xmm0, XMMWORD PTR [r8+rdi]
|
||||
pxor xmm5, xmm0
|
||||
movdqu XMMWORD PTR [r8+rdi], xmm1
|
||||
paddq xmm0, xmm10
|
||||
movdqu XMMWORD PTR [rax+rdi], xmm0
|
||||
movdqa xmm0, xmm5
|
||||
pxor xmm0, xmm8
|
||||
movdqu XMMWORD PTR [rcx+rdi], xmm0
|
||||
movq rdi, xmm5
|
||||
movq rcx, xmm14
|
||||
mov ebp, edi
|
||||
mov r8, QWORD PTR [rcx+rsi]
|
||||
mov r10, QWORD PTR [rcx+rsi+8]
|
||||
lea r9, QWORD PTR [rcx+rsi]
|
||||
xor esi, 16
|
||||
|
||||
movq xmm0, rsp
|
||||
movq xmm1, rsi
|
||||
movq xmm2, rdi
|
||||
movq xmm11, rbp
|
||||
movq xmm12, r15
|
||||
movq xmm13, rdx
|
||||
mov [rsp+104], rcx
|
||||
mov [rsp+112], r9
|
||||
|
||||
mov ebx, DWORD PTR [rsp+16]
|
||||
mov esi, DWORD PTR [rsp+20]
|
||||
mov edi, DWORD PTR [rsp+24]
|
||||
mov ebp, DWORD PTR [rsp+28]
|
||||
|
||||
lea eax, [ebx+esi]
|
||||
lea edx, [edi+ebp]
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor r8, rax
|
||||
|
||||
movd esp, xmm3
|
||||
pextrd r15d, xmm3, 2
|
||||
movd eax, xmm7
|
||||
movd edx, xmm9
|
||||
pextrd r9d, xmm9, 2
|
||||
|
||||
FN_PREFIX(CryptonightR_template_double_part2):
|
||||
|
||||
mov eax, edi
|
||||
mov edx, ebp
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor r14, rax
|
||||
|
||||
mov eax, ebx
|
||||
mov edx, esi
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor r12, rax
|
||||
|
||||
movq rsp, xmm0
|
||||
mov DWORD PTR [rsp+16], ebx
|
||||
mov DWORD PTR [rsp+20], esi
|
||||
mov DWORD PTR [rsp+24], edi
|
||||
mov DWORD PTR [rsp+28], ebp
|
||||
|
||||
movq rsi, xmm1
|
||||
movq rdi, xmm2
|
||||
movq rbp, xmm11
|
||||
movq r15, xmm12
|
||||
movq rdx, xmm13
|
||||
mov rcx, [rsp+104]
|
||||
mov r9, [rsp+112]
|
||||
|
||||
mov rbx, r8
|
||||
mov rax, r8
|
||||
mul rdx
|
||||
and ebp, 2097136
|
||||
mov r8, rax
|
||||
movdqu xmm1, XMMWORD PTR [rcx+rsi]
|
||||
pxor xmm6, xmm1
|
||||
xor esi, 48
|
||||
paddq xmm1, xmm7
|
||||
movdqu xmm2, XMMWORD PTR [rsi+rcx]
|
||||
pxor xmm6, xmm2
|
||||
paddq xmm2, xmm3
|
||||
movdqu XMMWORD PTR [rsi+rcx], xmm1
|
||||
xor esi, 16
|
||||
mov eax, esi
|
||||
mov rsi, rcx
|
||||
movdqu xmm0, XMMWORD PTR [rax+rcx]
|
||||
pxor xmm6, xmm0
|
||||
movdqu XMMWORD PTR [rax+rcx], xmm2
|
||||
paddq xmm0, xmm9
|
||||
add r12, r8
|
||||
xor rax, 32
|
||||
add r14, rdx
|
||||
movdqa xmm9, xmm7
|
||||
movdqa xmm7, xmm6
|
||||
movdqu XMMWORD PTR [rax+rcx], xmm0
|
||||
mov QWORD PTR [r9+8], r12
|
||||
xor r12, r10
|
||||
mov QWORD PTR [r9], r14
|
||||
movq rcx, xmm15
|
||||
xor r14, rbx
|
||||
mov r10d, ebp
|
||||
mov ebx, r14d
|
||||
xor ebp, 16
|
||||
and ebx, 2097136
|
||||
mov r8, QWORD PTR [r10+rcx]
|
||||
mov r9, QWORD PTR [r10+rcx+8]
|
||||
|
||||
movq xmm0, rsp
|
||||
movq xmm1, rbx
|
||||
movq xmm2, rsi
|
||||
movq xmm11, rdi
|
||||
movq xmm12, rbp
|
||||
movq xmm13, r15
|
||||
mov [rsp+104], rcx
|
||||
mov [rsp+112], r9
|
||||
|
||||
mov ebx, DWORD PTR [rsp]
|
||||
mov esi, DWORD PTR [rsp+4]
|
||||
mov edi, DWORD PTR [rsp+8]
|
||||
mov ebp, DWORD PTR [rsp+12]
|
||||
|
||||
lea eax, [ebx+esi]
|
||||
lea edx, [edi+ebp]
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
|
||||
xor r8, rax
|
||||
movq xmm3, r8
|
||||
|
||||
movd esp, xmm4
|
||||
pextrd r15d, xmm4, 2
|
||||
movd eax, xmm8
|
||||
movd edx, xmm10
|
||||
pextrd r9d, xmm10, 2
|
||||
|
||||
FN_PREFIX(CryptonightR_template_double_part3):
|
||||
|
||||
movq r15, xmm13
|
||||
|
||||
mov eax, edi
|
||||
mov edx, ebp
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor r15, rax
|
||||
|
||||
mov eax, ebx
|
||||
mov edx, esi
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor r13, rax
|
||||
|
||||
movq rsp, xmm0
|
||||
mov DWORD PTR [rsp], ebx
|
||||
mov DWORD PTR [rsp+4], esi
|
||||
mov DWORD PTR [rsp+8], edi
|
||||
mov DWORD PTR [rsp+12], ebp
|
||||
|
||||
movq rbx, xmm1
|
||||
movq rsi, xmm2
|
||||
movq rdi, xmm11
|
||||
movq rbp, xmm12
|
||||
mov rcx, [rsp+104]
|
||||
mov r9, [rsp+112]
|
||||
|
||||
mov rax, r8
|
||||
mul rdi
|
||||
mov rdi, rcx
|
||||
mov r8, rax
|
||||
movdqu xmm1, XMMWORD PTR [rbp+rcx]
|
||||
pxor xmm5, xmm1
|
||||
xor ebp, 48
|
||||
paddq xmm1, xmm8
|
||||
add r13, r8
|
||||
movdqu xmm2, XMMWORD PTR [rbp+rcx]
|
||||
pxor xmm5, xmm2
|
||||
add r15, rdx
|
||||
movdqu XMMWORD PTR [rbp+rcx], xmm1
|
||||
paddq xmm2, xmm4
|
||||
xor ebp, 16
|
||||
mov eax, ebp
|
||||
xor rax, 32
|
||||
movdqu xmm0, XMMWORD PTR [rbp+rcx]
|
||||
pxor xmm5, xmm0
|
||||
movdqu XMMWORD PTR [rbp+rcx], xmm2
|
||||
paddq xmm0, xmm10
|
||||
movdqu XMMWORD PTR [rax+rcx], xmm0
|
||||
movq rax, xmm3
|
||||
movdqa xmm10, xmm8
|
||||
mov QWORD PTR [r10+rcx], r15
|
||||
movdqa xmm8, xmm5
|
||||
xor r15, rax
|
||||
mov QWORD PTR [r10+rcx+8], r13
|
||||
mov r8d, r15d
|
||||
xor r13, r9
|
||||
and r8d, 2097136
|
||||
dec r11d
|
||||
jnz FN_PREFIX(CryptonightR_template_double_mainloop)
|
||||
|
||||
FN_PREFIX(CryptonightR_template_double_part4):
|
||||
|
||||
mov rbx, QWORD PTR [rsp+400]
|
||||
movaps xmm6, XMMWORD PTR [rsp+160]
|
||||
movaps xmm7, XMMWORD PTR [rsp+176]
|
||||
movaps xmm8, XMMWORD PTR [rsp+192]
|
||||
movaps xmm9, XMMWORD PTR [rsp+208]
|
||||
movaps xmm10, XMMWORD PTR [rsp+224]
|
||||
movaps xmm11, XMMWORD PTR [rsp+240]
|
||||
movaps xmm12, XMMWORD PTR [rsp+256]
|
||||
movaps xmm13, XMMWORD PTR [rsp+272]
|
||||
movaps xmm14, XMMWORD PTR [rsp+288]
|
||||
movaps xmm15, XMMWORD PTR [rsp+304]
|
||||
add rsp, 320
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
ret 0
|
||||
FN_PREFIX(CryptonightR_template_double_end):
|
||||
410
crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc
Normal file
410
crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc
Normal file
@@ -0,0 +1,410 @@
|
||||
mov rax, rsp
|
||||
push rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 184
|
||||
|
||||
stmxcsr DWORD PTR [rsp+272]
|
||||
mov DWORD PTR [rsp+276], 24448
|
||||
ldmxcsr DWORD PTR [rsp+276]
|
||||
|
||||
mov r13, QWORD PTR [rcx+224]
|
||||
mov r9, rdx
|
||||
mov r10, QWORD PTR [rcx+32]
|
||||
mov r8, rcx
|
||||
xor r10, QWORD PTR [rcx]
|
||||
mov r14d, 524288
|
||||
mov r11, QWORD PTR [rcx+40]
|
||||
xor r11, QWORD PTR [rcx+8]
|
||||
mov rsi, QWORD PTR [rdx+224]
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
mov rdi, QWORD PTR [r9+32]
|
||||
xor rdi, QWORD PTR [r9]
|
||||
mov rbp, QWORD PTR [r9+40]
|
||||
xor rbp, QWORD PTR [r9+8]
|
||||
movq xmm0, rdx
|
||||
movaps XMMWORD PTR [rax-88], xmm6
|
||||
movaps XMMWORD PTR [rax-104], xmm7
|
||||
movaps XMMWORD PTR [rax-120], xmm8
|
||||
movaps XMMWORD PTR [rsp+112], xmm9
|
||||
movaps XMMWORD PTR [rsp+96], xmm10
|
||||
movaps XMMWORD PTR [rsp+80], xmm11
|
||||
movaps XMMWORD PTR [rsp+64], xmm12
|
||||
movaps XMMWORD PTR [rsp+48], xmm13
|
||||
movaps XMMWORD PTR [rsp+32], xmm14
|
||||
movaps XMMWORD PTR [rsp+16], xmm15
|
||||
mov rdx, r10
|
||||
movq xmm4, QWORD PTR [r8+96]
|
||||
and edx, 2097136
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
xorps xmm13, xmm13
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r8+72]
|
||||
movq xmm5, QWORD PTR [r8+104]
|
||||
movq xmm7, rax
|
||||
|
||||
mov eax, 1
|
||||
shl rax, 52
|
||||
movq xmm14, rax
|
||||
punpcklqdq xmm14, xmm14
|
||||
|
||||
mov eax, 1023
|
||||
shl rax, 52
|
||||
movq xmm12, rax
|
||||
punpcklqdq xmm12, xmm12
|
||||
|
||||
mov rax, QWORD PTR [r8+80]
|
||||
xor rax, QWORD PTR [r8+64]
|
||||
punpcklqdq xmm7, xmm0
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [r9+56]
|
||||
xor rcx, QWORD PTR [r9+24]
|
||||
movq xmm3, rax
|
||||
mov rax, QWORD PTR [r9+48]
|
||||
xor rax, QWORD PTR [r9+16]
|
||||
punpcklqdq xmm3, xmm0
|
||||
movq xmm0, rcx
|
||||
mov QWORD PTR [rsp], r13
|
||||
mov rcx, QWORD PTR [r9+88]
|
||||
xor rcx, QWORD PTR [r9+72]
|
||||
movq xmm6, rax
|
||||
mov rax, QWORD PTR [r9+80]
|
||||
xor rax, QWORD PTR [r9+64]
|
||||
punpcklqdq xmm6, xmm0
|
||||
movq xmm0, rcx
|
||||
mov QWORD PTR [rsp+256], r10
|
||||
mov rcx, rdi
|
||||
mov QWORD PTR [rsp+264], r11
|
||||
movq xmm8, rax
|
||||
and ecx, 2097136
|
||||
punpcklqdq xmm8, xmm0
|
||||
movq xmm0, QWORD PTR [r9+96]
|
||||
punpcklqdq xmm4, xmm0
|
||||
movq xmm0, QWORD PTR [r9+104]
|
||||
lea r8, QWORD PTR [rcx+rsi]
|
||||
movdqu xmm11, XMMWORD PTR [r8]
|
||||
punpcklqdq xmm5, xmm0
|
||||
lea r9, QWORD PTR [rdx+r13]
|
||||
movdqu xmm15, XMMWORD PTR [r9]
|
||||
|
||||
ALIGN(64)
|
||||
main_loop_double_sandybridge:
|
||||
movdqu xmm9, xmm15
|
||||
mov eax, edx
|
||||
mov ebx, edx
|
||||
xor eax, 16
|
||||
xor ebx, 32
|
||||
xor edx, 48
|
||||
|
||||
movq xmm0, r11
|
||||
movq xmm2, r10
|
||||
punpcklqdq xmm2, xmm0
|
||||
aesenc xmm9, xmm2
|
||||
|
||||
movdqu xmm0, XMMWORD PTR [rax+r13]
|
||||
movdqu xmm1, XMMWORD PTR [rbx+r13]
|
||||
paddq xmm0, xmm7
|
||||
paddq xmm1, xmm2
|
||||
movdqu XMMWORD PTR [rbx+r13], xmm0
|
||||
movdqu xmm0, XMMWORD PTR [rdx+r13]
|
||||
movdqu XMMWORD PTR [rdx+r13], xmm1
|
||||
paddq xmm0, xmm3
|
||||
movdqu XMMWORD PTR [rax+r13], xmm0
|
||||
|
||||
movq r11, xmm9
|
||||
mov edx, r11d
|
||||
and edx, 2097136
|
||||
movdqa xmm0, xmm9
|
||||
pxor xmm0, xmm7
|
||||
movdqu XMMWORD PTR [r9], xmm0
|
||||
|
||||
lea rbx, QWORD PTR [rdx+r13]
|
||||
mov r10, QWORD PTR [rdx+r13]
|
||||
|
||||
movdqu xmm10, xmm11
|
||||
movq xmm0, rbp
|
||||
movq xmm11, rdi
|
||||
punpcklqdq xmm11, xmm0
|
||||
aesenc xmm10, xmm11
|
||||
|
||||
mov eax, ecx
|
||||
mov r12d, ecx
|
||||
xor eax, 16
|
||||
xor r12d, 32
|
||||
xor ecx, 48
|
||||
|
||||
movdqu xmm0, XMMWORD PTR [rax+rsi]
|
||||
paddq xmm0, xmm6
|
||||
movdqu xmm1, XMMWORD PTR [r12+rsi]
|
||||
movdqu XMMWORD PTR [r12+rsi], xmm0
|
||||
paddq xmm1, xmm11
|
||||
movdqu xmm0, XMMWORD PTR [rcx+rsi]
|
||||
movdqu XMMWORD PTR [rcx+rsi], xmm1
|
||||
paddq xmm0, xmm8
|
||||
movdqu XMMWORD PTR [rax+rsi], xmm0
|
||||
|
||||
movq rcx, xmm10
|
||||
and ecx, 2097136
|
||||
|
||||
movdqa xmm0, xmm10
|
||||
pxor xmm0, xmm6
|
||||
movdqu XMMWORD PTR [r8], xmm0
|
||||
mov r12, QWORD PTR [rcx+rsi]
|
||||
|
||||
mov r9, QWORD PTR [rbx+8]
|
||||
|
||||
xor edx, 16
|
||||
mov r8d, edx
|
||||
mov r15d, edx
|
||||
|
||||
movq rdx, xmm5
|
||||
shl rdx, 32
|
||||
movq rax, xmm4
|
||||
xor rdx, rax
|
||||
xor r10, rdx
|
||||
mov rax, r10
|
||||
mul r11
|
||||
mov r11d, r8d
|
||||
xor r11d, 48
|
||||
movq xmm0, rdx
|
||||
xor rdx, [r11+r13]
|
||||
movq xmm1, rax
|
||||
xor rax, [r11+r13+8]
|
||||
punpcklqdq xmm0, xmm1
|
||||
|
||||
pxor xmm0, XMMWORD PTR [r8+r13]
|
||||
xor r8d, 32
|
||||
movdqu xmm1, XMMWORD PTR [r11+r13]
|
||||
paddq xmm0, xmm7
|
||||
paddq xmm1, xmm2
|
||||
movdqu XMMWORD PTR [r11+r13], xmm0
|
||||
movdqu xmm0, XMMWORD PTR [r8+r13]
|
||||
movdqu XMMWORD PTR [r8+r13], xmm1
|
||||
paddq xmm0, xmm3
|
||||
movdqu XMMWORD PTR [r15+r13], xmm0
|
||||
|
||||
mov r11, QWORD PTR [rsp+256]
|
||||
add r11, rdx
|
||||
mov rdx, QWORD PTR [rsp+264]
|
||||
add rdx, rax
|
||||
mov QWORD PTR [rbx], r11
|
||||
xor r11, r10
|
||||
mov QWORD PTR [rbx+8], rdx
|
||||
xor rdx, r9
|
||||
mov QWORD PTR [rsp+256], r11
|
||||
and r11d, 2097136
|
||||
mov QWORD PTR [rsp+264], rdx
|
||||
mov QWORD PTR [rsp+8], r11
|
||||
lea r15, QWORD PTR [r11+r13]
|
||||
movdqu xmm15, XMMWORD PTR [r11+r13]
|
||||
lea r13, QWORD PTR [rsi+rcx]
|
||||
movdqa xmm0, xmm5
|
||||
psrldq xmm0, 8
|
||||
movaps xmm2, xmm13
|
||||
movq r10, xmm0
|
||||
psllq xmm5, 1
|
||||
shl r10, 32
|
||||
movdqa xmm0, xmm9
|
||||
psrldq xmm0, 8
|
||||
movdqa xmm1, xmm10
|
||||
movq r11, xmm0
|
||||
psrldq xmm1, 8
|
||||
movq r8, xmm1
|
||||
psrldq xmm4, 8
|
||||
movaps xmm0, xmm13
|
||||
movq rax, xmm4
|
||||
xor r10, rax
|
||||
movaps xmm1, xmm13
|
||||
xor r10, r12
|
||||
lea rax, QWORD PTR [r11+1]
|
||||
shr rax, 1
|
||||
movdqa xmm3, xmm9
|
||||
punpcklqdq xmm3, xmm10
|
||||
paddq xmm5, xmm3
|
||||
movq rdx, xmm5
|
||||
psrldq xmm5, 8
|
||||
cvtsi2sd xmm2, rax
|
||||
or edx, -2147483647
|
||||
lea rax, QWORD PTR [r8+1]
|
||||
shr rax, 1
|
||||
movq r9, xmm5
|
||||
cvtsi2sd xmm0, rax
|
||||
or r9d, -2147483647
|
||||
cvtsi2sd xmm1, rdx
|
||||
unpcklpd xmm2, xmm0
|
||||
movaps xmm0, xmm13
|
||||
cvtsi2sd xmm0, r9
|
||||
unpcklpd xmm1, xmm0
|
||||
divpd xmm2, xmm1
|
||||
paddq xmm2, xmm14
|
||||
cvttsd2si rax, xmm2
|
||||
psrldq xmm2, 8
|
||||
mov rbx, rax
|
||||
imul rax, rdx
|
||||
sub r11, rax
|
||||
js div_fix_1_sandybridge
|
||||
div_fix_1_ret_sandybridge:
|
||||
|
||||
cvttsd2si rdx, xmm2
|
||||
mov rax, rdx
|
||||
imul rax, r9
|
||||
movd xmm2, r11d
|
||||
movd xmm4, ebx
|
||||
sub r8, rax
|
||||
js div_fix_2_sandybridge
|
||||
div_fix_2_ret_sandybridge:
|
||||
|
||||
movd xmm1, r8d
|
||||
movd xmm0, edx
|
||||
punpckldq xmm2, xmm1
|
||||
punpckldq xmm4, xmm0
|
||||
punpckldq xmm4, xmm2
|
||||
paddq xmm3, xmm4
|
||||
movdqa xmm0, xmm3
|
||||
psrlq xmm0, 12
|
||||
paddq xmm0, xmm12
|
||||
sqrtpd xmm1, xmm0
|
||||
movq r9, xmm1
|
||||
movdqa xmm5, xmm1
|
||||
psrlq xmm5, 19
|
||||
test r9, 524287
|
||||
je sqrt_fix_1_sandybridge
|
||||
sqrt_fix_1_ret_sandybridge:
|
||||
|
||||
movq r9, xmm10
|
||||
psrldq xmm1, 8
|
||||
movq r8, xmm1
|
||||
test r8, 524287
|
||||
je sqrt_fix_2_sandybridge
|
||||
sqrt_fix_2_ret_sandybridge:
|
||||
|
||||
mov r12d, ecx
|
||||
mov r8d, ecx
|
||||
xor r12d, 16
|
||||
xor r8d, 32
|
||||
xor ecx, 48
|
||||
mov rax, r10
|
||||
mul r9
|
||||
movq xmm0, rax
|
||||
movq xmm3, rdx
|
||||
punpcklqdq xmm3, xmm0
|
||||
|
||||
movdqu xmm0, XMMWORD PTR [r12+rsi]
|
||||
pxor xmm0, xmm3
|
||||
movdqu xmm1, XMMWORD PTR [r8+rsi]
|
||||
xor rdx, [r8+rsi]
|
||||
xor rax, [r8+rsi+8]
|
||||
movdqu xmm3, XMMWORD PTR [rcx+rsi]
|
||||
paddq xmm0, xmm6
|
||||
paddq xmm1, xmm11
|
||||
paddq xmm3, xmm8
|
||||
movdqu XMMWORD PTR [r8+rsi], xmm0
|
||||
movdqu XMMWORD PTR [rcx+rsi], xmm1
|
||||
movdqu XMMWORD PTR [r12+rsi], xmm3
|
||||
|
||||
add rdi, rdx
|
||||
mov QWORD PTR [r13], rdi
|
||||
xor rdi, r10
|
||||
mov ecx, edi
|
||||
and ecx, 2097136
|
||||
lea r8, QWORD PTR [rcx+rsi]
|
||||
|
||||
mov rdx, QWORD PTR [r13+8]
|
||||
add rbp, rax
|
||||
mov QWORD PTR [r13+8], rbp
|
||||
movdqu xmm11, XMMWORD PTR [rcx+rsi]
|
||||
xor rbp, rdx
|
||||
mov r13, QWORD PTR [rsp]
|
||||
movdqa xmm3, xmm7
|
||||
mov rdx, QWORD PTR [rsp+8]
|
||||
movdqa xmm8, xmm6
|
||||
mov r10, QWORD PTR [rsp+256]
|
||||
movdqa xmm7, xmm9
|
||||
mov r11, QWORD PTR [rsp+264]
|
||||
movdqa xmm6, xmm10
|
||||
mov r9, r15
|
||||
dec r14d
|
||||
jne main_loop_double_sandybridge
|
||||
|
||||
ldmxcsr DWORD PTR [rsp+272]
|
||||
movaps xmm13, XMMWORD PTR [rsp+48]
|
||||
lea r11, QWORD PTR [rsp+184]
|
||||
movaps xmm6, XMMWORD PTR [r11-24]
|
||||
movaps xmm7, XMMWORD PTR [r11-40]
|
||||
movaps xmm8, XMMWORD PTR [r11-56]
|
||||
movaps xmm9, XMMWORD PTR [r11-72]
|
||||
movaps xmm10, XMMWORD PTR [r11-88]
|
||||
movaps xmm11, XMMWORD PTR [r11-104]
|
||||
movaps xmm12, XMMWORD PTR [r11-120]
|
||||
movaps xmm14, XMMWORD PTR [rsp+32]
|
||||
movaps xmm15, XMMWORD PTR [rsp+16]
|
||||
mov rsp, r11
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
pop rbx
|
||||
jmp cnv2_double_mainloop_asm_sandybridge_endp
|
||||
|
||||
div_fix_1_sandybridge:
|
||||
dec rbx
|
||||
add r11, rdx
|
||||
jmp div_fix_1_ret_sandybridge
|
||||
|
||||
div_fix_2_sandybridge:
|
||||
dec rdx
|
||||
add r8, r9
|
||||
jmp div_fix_2_ret_sandybridge
|
||||
|
||||
sqrt_fix_1_sandybridge:
|
||||
movq r8, xmm3
|
||||
movdqa xmm0, xmm5
|
||||
psrldq xmm0, 8
|
||||
dec r9
|
||||
mov r11d, -1022
|
||||
shl r11, 32
|
||||
mov rax, r9
|
||||
shr r9, 19
|
||||
shr rax, 20
|
||||
mov rdx, r9
|
||||
sub rdx, rax
|
||||
lea rdx, [rdx+r11+1]
|
||||
add rax, r11
|
||||
imul rdx, rax
|
||||
sub rdx, r8
|
||||
adc r9, 0
|
||||
movq xmm5, r9
|
||||
punpcklqdq xmm5, xmm0
|
||||
jmp sqrt_fix_1_ret_sandybridge
|
||||
|
||||
sqrt_fix_2_sandybridge:
|
||||
psrldq xmm3, 8
|
||||
movq r11, xmm3
|
||||
dec r8
|
||||
mov ebx, -1022
|
||||
shl rbx, 32
|
||||
mov rax, r8
|
||||
shr r8, 19
|
||||
shr rax, 20
|
||||
mov rdx, r8
|
||||
sub rdx, rax
|
||||
lea rdx, [rdx+rbx+1]
|
||||
add rax, rbx
|
||||
imul rdx, rax
|
||||
sub rdx, r11
|
||||
adc r8, 0
|
||||
movq xmm0, r8
|
||||
punpcklqdq xmm5, xmm0
|
||||
jmp sqrt_fix_2_ret_sandybridge
|
||||
|
||||
cnv2_double_mainloop_asm_sandybridge_endp:
|
||||
180
crypto/asm/cn2/cnv2_main_loop_bulldozer.inc
Normal file
180
crypto/asm/cn2/cnv2_main_loop_bulldozer.inc
Normal file
@@ -0,0 +1,180 @@
|
||||
mov QWORD PTR [rsp+16], rbx
|
||||
mov QWORD PTR [rsp+24], rbp
|
||||
mov QWORD PTR [rsp+32], rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 64
|
||||
|
||||
stmxcsr DWORD PTR [rsp]
|
||||
mov DWORD PTR [rsp+4], 24448
|
||||
ldmxcsr DWORD PTR [rsp+4]
|
||||
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
mov r9, rcx
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov ebp, 524288
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
xor r8, QWORD PTR [rcx]
|
||||
mov r11, QWORD PTR [rcx+40]
|
||||
mov r10, r8
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
movq xmm3, rax
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
xor r11, QWORD PTR [rcx+8]
|
||||
mov rbx, QWORD PTR [rcx+224]
|
||||
mov rax, QWORD PTR [r9+80]
|
||||
xor rax, QWORD PTR [r9+64]
|
||||
movq xmm0, rdx
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r9+72]
|
||||
mov rdi, QWORD PTR [r9+104]
|
||||
and r10d, 2097136
|
||||
movaps XMMWORD PTR [rsp+48], xmm6
|
||||
movq xmm4, rax
|
||||
movaps XMMWORD PTR [rsp+32], xmm7
|
||||
movaps XMMWORD PTR [rsp+16], xmm8
|
||||
xorps xmm8, xmm8
|
||||
mov ax, 1023
|
||||
shl rax, 52
|
||||
movq xmm7, rax
|
||||
mov r15, QWORD PTR [r9+96]
|
||||
punpcklqdq xmm3, xmm0
|
||||
movq xmm0, rcx
|
||||
punpcklqdq xmm4, xmm0
|
||||
|
||||
ALIGN(64)
|
||||
cnv2_main_loop_bulldozer:
|
||||
movdqa xmm5, XMMWORD PTR [r10+rbx]
|
||||
movq xmm6, r8
|
||||
pinsrq xmm6, r11, 1
|
||||
lea rdx, QWORD PTR [r10+rbx]
|
||||
lea r9, QWORD PTR [rdi+rdi]
|
||||
shl rdi, 32
|
||||
|
||||
mov ecx, r10d
|
||||
mov eax, r10d
|
||||
xor ecx, 16
|
||||
xor eax, 32
|
||||
xor r10d, 48
|
||||
aesenc xmm5, xmm6
|
||||
movdqa xmm2, XMMWORD PTR [rcx+rbx]
|
||||
movdqa xmm1, XMMWORD PTR [rax+rbx]
|
||||
movdqa xmm0, XMMWORD PTR [r10+rbx]
|
||||
paddq xmm2, xmm3
|
||||
paddq xmm1, xmm6
|
||||
paddq xmm0, xmm4
|
||||
movdqa XMMWORD PTR [rcx+rbx], xmm0
|
||||
movdqa XMMWORD PTR [rax+rbx], xmm2
|
||||
movdqa XMMWORD PTR [r10+rbx], xmm1
|
||||
|
||||
movaps xmm1, xmm8
|
||||
mov rsi, r15
|
||||
xor rsi, rdi
|
||||
|
||||
mov edi, 1023
|
||||
shl rdi, 52
|
||||
|
||||
movq r14, xmm5
|
||||
pextrq rax, xmm5, 1
|
||||
|
||||
movdqa xmm0, xmm5
|
||||
pxor xmm0, xmm3
|
||||
mov r10, r14
|
||||
and r10d, 2097136
|
||||
movdqa XMMWORD PTR [rdx], xmm0
|
||||
xor rsi, QWORD PTR [r10+rbx]
|
||||
lea r12, QWORD PTR [r10+rbx]
|
||||
mov r13, QWORD PTR [r10+rbx+8]
|
||||
|
||||
add r9d, r14d
|
||||
or r9d, -2147483647
|
||||
xor edx, edx
|
||||
div r9
|
||||
mov eax, eax
|
||||
shl rdx, 32
|
||||
lea r15, [rax+rdx]
|
||||
lea rax, [r14+r15]
|
||||
shr rax, 12
|
||||
add rax, rdi
|
||||
movq xmm0, rax
|
||||
sqrtsd xmm1, xmm0
|
||||
movq rdi, xmm1
|
||||
test rdi, 524287
|
||||
je sqrt_fixup_bulldozer
|
||||
shr rdi, 19
|
||||
|
||||
sqrt_fixup_bulldozer_ret:
|
||||
mov rax, rsi
|
||||
mul r14
|
||||
movq xmm1, rax
|
||||
movq xmm0, rdx
|
||||
punpcklqdq xmm0, xmm1
|
||||
|
||||
mov r9d, r10d
|
||||
mov ecx, r10d
|
||||
xor r9d, 16
|
||||
xor ecx, 32
|
||||
xor r10d, 48
|
||||
movdqa xmm1, XMMWORD PTR [rcx+rbx]
|
||||
xor rdx, [rcx+rbx]
|
||||
xor rax, [rcx+rbx+8]
|
||||
movdqa xmm2, XMMWORD PTR [r9+rbx]
|
||||
pxor xmm2, xmm0
|
||||
paddq xmm4, XMMWORD PTR [r10+rbx]
|
||||
paddq xmm2, xmm3
|
||||
paddq xmm1, xmm6
|
||||
movdqa XMMWORD PTR [r9+rbx], xmm4
|
||||
movdqa XMMWORD PTR [rcx+rbx], xmm2
|
||||
movdqa XMMWORD PTR [r10+rbx], xmm1
|
||||
|
||||
movdqa xmm4, xmm3
|
||||
add r8, rdx
|
||||
add r11, rax
|
||||
mov QWORD PTR [r12], r8
|
||||
xor r8, rsi
|
||||
mov QWORD PTR [r12+8], r11
|
||||
mov r10, r8
|
||||
xor r11, r13
|
||||
and r10d, 2097136
|
||||
movdqa xmm3, xmm5
|
||||
dec ebp
|
||||
jne cnv2_main_loop_bulldozer
|
||||
|
||||
ldmxcsr DWORD PTR [rsp]
|
||||
movaps xmm6, XMMWORD PTR [rsp+48]
|
||||
lea r11, QWORD PTR [rsp+64]
|
||||
mov rbx, QWORD PTR [r11+56]
|
||||
mov rbp, QWORD PTR [r11+64]
|
||||
mov rsi, QWORD PTR [r11+72]
|
||||
movaps xmm8, XMMWORD PTR [r11-48]
|
||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||
mov rsp, r11
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
jmp cnv2_main_loop_bulldozer_endp
|
||||
|
||||
sqrt_fixup_bulldozer:
|
||||
movq r9, xmm5
|
||||
add r9, r15
|
||||
dec rdi
|
||||
mov edx, -1022
|
||||
shl rdx, 32
|
||||
mov rax, rdi
|
||||
shr rdi, 19
|
||||
shr rax, 20
|
||||
mov rcx, rdi
|
||||
sub rcx, rax
|
||||
lea rcx, [rcx+rdx+1]
|
||||
add rax, rdx
|
||||
imul rcx, rax
|
||||
sub rcx, r9
|
||||
adc rdi, 0
|
||||
jmp sqrt_fixup_bulldozer_ret
|
||||
|
||||
cnv2_main_loop_bulldozer_endp:
|
||||
186
crypto/asm/cn2/cnv2_main_loop_ivybridge.inc
Normal file
186
crypto/asm/cn2/cnv2_main_loop_ivybridge.inc
Normal file
@@ -0,0 +1,186 @@
|
||||
mov QWORD PTR [rsp+24], rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 80
|
||||
|
||||
stmxcsr DWORD PTR [rsp]
|
||||
mov DWORD PTR [rsp+4], 24448
|
||||
ldmxcsr DWORD PTR [rsp+4]
|
||||
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
mov r9, rcx
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov esi, 524288
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
mov r13d, -2147483647
|
||||
xor r8, QWORD PTR [rcx]
|
||||
mov r11, QWORD PTR [rcx+40]
|
||||
mov r10, r8
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
movq xmm4, rax
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
xor r11, QWORD PTR [rcx+8]
|
||||
mov rbx, QWORD PTR [rcx+224]
|
||||
mov rax, QWORD PTR [r9+80]
|
||||
xor rax, QWORD PTR [r9+64]
|
||||
movq xmm0, rdx
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r9+72]
|
||||
movq xmm3, QWORD PTR [r9+104]
|
||||
movaps XMMWORD PTR [rsp+64], xmm6
|
||||
movaps XMMWORD PTR [rsp+48], xmm7
|
||||
movaps XMMWORD PTR [rsp+32], xmm8
|
||||
and r10d, 2097136
|
||||
movq xmm5, rax
|
||||
|
||||
xor eax, eax
|
||||
mov QWORD PTR [rsp+16], rax
|
||||
|
||||
mov ax, 1023
|
||||
shl rax, 52
|
||||
movq xmm8, rax
|
||||
mov r15, QWORD PTR [r9+96]
|
||||
punpcklqdq xmm4, xmm0
|
||||
movq xmm0, rcx
|
||||
punpcklqdq xmm5, xmm0
|
||||
movdqu xmm6, XMMWORD PTR [r10+rbx]
|
||||
|
||||
ALIGN(64)
|
||||
main_loop_ivybridge:
|
||||
lea rdx, QWORD PTR [r10+rbx]
|
||||
mov ecx, r10d
|
||||
mov eax, r10d
|
||||
mov rdi, r15
|
||||
xor ecx, 16
|
||||
xor eax, 32
|
||||
xor r10d, 48
|
||||
movq xmm0, r11
|
||||
movq xmm7, r8
|
||||
punpcklqdq xmm7, xmm0
|
||||
aesenc xmm6, xmm7
|
||||
movq rbp, xmm6
|
||||
mov r9, rbp
|
||||
and r9d, 2097136
|
||||
movdqu xmm2, XMMWORD PTR [rcx+rbx]
|
||||
movdqu xmm1, XMMWORD PTR [rax+rbx]
|
||||
movdqu xmm0, XMMWORD PTR [r10+rbx]
|
||||
paddq xmm1, xmm7
|
||||
paddq xmm0, xmm5
|
||||
paddq xmm2, xmm4
|
||||
movdqu XMMWORD PTR [rcx+rbx], xmm0
|
||||
movdqu XMMWORD PTR [rax+rbx], xmm2
|
||||
movdqu XMMWORD PTR [r10+rbx], xmm1
|
||||
mov r10, r9
|
||||
xor r10d, 32
|
||||
movq rcx, xmm3
|
||||
mov rax, rcx
|
||||
shl rax, 32
|
||||
xor rdi, rax
|
||||
movdqa xmm0, xmm6
|
||||
pxor xmm0, xmm4
|
||||
movdqu XMMWORD PTR [rdx], xmm0
|
||||
xor rdi, QWORD PTR [r9+rbx]
|
||||
lea r14, QWORD PTR [r9+rbx]
|
||||
mov r12, QWORD PTR [r14+8]
|
||||
xor edx, edx
|
||||
lea r9d, DWORD PTR [ecx+ecx]
|
||||
add r9d, ebp
|
||||
movdqa xmm0, xmm6
|
||||
psrldq xmm0, 8
|
||||
or r9d, r13d
|
||||
movq rax, xmm0
|
||||
div r9
|
||||
xorps xmm3, xmm3
|
||||
mov eax, eax
|
||||
shl rdx, 32
|
||||
add rdx, rax
|
||||
lea r9, QWORD PTR [rdx+rbp]
|
||||
mov r15, rdx
|
||||
mov rax, r9
|
||||
shr rax, 12
|
||||
movq xmm0, rax
|
||||
paddq xmm0, xmm8
|
||||
sqrtsd xmm3, xmm0
|
||||
psubq xmm3, XMMWORD PTR [rsp+16]
|
||||
movq rdx, xmm3
|
||||
test edx, 524287
|
||||
je sqrt_fixup_ivybridge
|
||||
psrlq xmm3, 19
|
||||
sqrt_fixup_ivybridge_ret:
|
||||
|
||||
mov ecx, r10d
|
||||
mov rax, rdi
|
||||
mul rbp
|
||||
movq xmm2, rdx
|
||||
xor rdx, [rcx+rbx]
|
||||
add r8, rdx
|
||||
mov QWORD PTR [r14], r8
|
||||
xor r8, rdi
|
||||
mov edi, r8d
|
||||
and edi, 2097136
|
||||
movq xmm0, rax
|
||||
xor rax, [rcx+rbx+8]
|
||||
add r11, rax
|
||||
mov QWORD PTR [r14+8], r11
|
||||
punpcklqdq xmm2, xmm0
|
||||
|
||||
mov r9d, r10d
|
||||
xor r9d, 48
|
||||
xor r10d, 16
|
||||
pxor xmm2, XMMWORD PTR [r9+rbx]
|
||||
movdqu xmm0, XMMWORD PTR [r10+rbx]
|
||||
paddq xmm0, xmm5
|
||||
movdqu xmm1, XMMWORD PTR [rcx+rbx]
|
||||
paddq xmm2, xmm4
|
||||
paddq xmm1, xmm7
|
||||
movdqa xmm5, xmm4
|
||||
movdqu XMMWORD PTR [r9+rbx], xmm0
|
||||
movdqa xmm4, xmm6
|
||||
movdqu XMMWORD PTR [rcx+rbx], xmm2
|
||||
movdqu XMMWORD PTR [r10+rbx], xmm1
|
||||
movdqu xmm6, [rdi+rbx]
|
||||
mov r10d, edi
|
||||
xor r11, r12
|
||||
dec rsi
|
||||
jne main_loop_ivybridge
|
||||
|
||||
ldmxcsr DWORD PTR [rsp]
|
||||
mov rbx, QWORD PTR [rsp+160]
|
||||
movaps xmm6, XMMWORD PTR [rsp+64]
|
||||
movaps xmm7, XMMWORD PTR [rsp+48]
|
||||
movaps xmm8, XMMWORD PTR [rsp+32]
|
||||
add rsp, 80
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
jmp cnv2_main_loop_ivybridge_endp
|
||||
|
||||
sqrt_fixup_ivybridge:
|
||||
dec rdx
|
||||
mov r13d, -1022
|
||||
shl r13, 32
|
||||
mov rax, rdx
|
||||
shr rdx, 19
|
||||
shr rax, 20
|
||||
mov rcx, rdx
|
||||
sub rcx, rax
|
||||
add rax, r13
|
||||
not r13
|
||||
sub rcx, r13
|
||||
mov r13d, -2147483647
|
||||
imul rcx, rax
|
||||
sub rcx, r9
|
||||
adc rdx, 0
|
||||
movq xmm3, rdx
|
||||
jmp sqrt_fixup_ivybridge_ret
|
||||
|
||||
cnv2_main_loop_ivybridge_endp:
|
||||
179
crypto/asm/cn2/cnv2_main_loop_ryzen.inc
Normal file
179
crypto/asm/cn2/cnv2_main_loop_ryzen.inc
Normal file
@@ -0,0 +1,179 @@
|
||||
mov QWORD PTR [rsp+16], rbx
|
||||
mov QWORD PTR [rsp+24], rbp
|
||||
mov QWORD PTR [rsp+32], rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 64
|
||||
|
||||
stmxcsr DWORD PTR [rsp]
|
||||
mov DWORD PTR [rsp+4], 24448
|
||||
ldmxcsr DWORD PTR [rsp+4]
|
||||
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
mov r9, rcx
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov ebp, 524288
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
xor r8, QWORD PTR [rcx]
|
||||
mov r11, QWORD PTR [rcx+40]
|
||||
mov r10, r8
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
movq xmm3, rax
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
xor r11, QWORD PTR [rcx+8]
|
||||
mov rbx, QWORD PTR [rcx+224]
|
||||
mov rax, QWORD PTR [r9+80]
|
||||
xor rax, QWORD PTR [r9+64]
|
||||
movq xmm0, rdx
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r9+72]
|
||||
mov rdi, QWORD PTR [r9+104]
|
||||
and r10d, 2097136
|
||||
movaps XMMWORD PTR [rsp+48], xmm6
|
||||
movq xmm4, rax
|
||||
movaps XMMWORD PTR [rsp+32], xmm7
|
||||
movaps XMMWORD PTR [rsp+16], xmm8
|
||||
xorps xmm8, xmm8
|
||||
mov ax, 1023
|
||||
shl rax, 52
|
||||
movq xmm7, rax
|
||||
mov r15, QWORD PTR [r9+96]
|
||||
punpcklqdq xmm3, xmm0
|
||||
movq xmm0, rcx
|
||||
punpcklqdq xmm4, xmm0
|
||||
|
||||
ALIGN(64)
|
||||
main_loop_ryzen:
|
||||
movdqa xmm5, XMMWORD PTR [r10+rbx]
|
||||
movq xmm0, r11
|
||||
movq xmm6, r8
|
||||
punpcklqdq xmm6, xmm0
|
||||
lea rdx, QWORD PTR [r10+rbx]
|
||||
lea r9, QWORD PTR [rdi+rdi]
|
||||
shl rdi, 32
|
||||
|
||||
mov ecx, r10d
|
||||
mov eax, r10d
|
||||
xor ecx, 16
|
||||
xor eax, 32
|
||||
xor r10d, 48
|
||||
aesenc xmm5, xmm6
|
||||
movdqa xmm2, XMMWORD PTR [rcx+rbx]
|
||||
movdqa xmm1, XMMWORD PTR [rax+rbx]
|
||||
movdqa xmm0, XMMWORD PTR [r10+rbx]
|
||||
paddq xmm2, xmm3
|
||||
paddq xmm1, xmm6
|
||||
paddq xmm0, xmm4
|
||||
movdqa XMMWORD PTR [rcx+rbx], xmm0
|
||||
movdqa XMMWORD PTR [rax+rbx], xmm2
|
||||
movdqa XMMWORD PTR [r10+rbx], xmm1
|
||||
|
||||
movaps xmm1, xmm8
|
||||
mov rsi, r15
|
||||
xor rsi, rdi
|
||||
movq r14, xmm5
|
||||
movdqa xmm0, xmm5
|
||||
pxor xmm0, xmm3
|
||||
mov r10, r14
|
||||
and r10d, 2097136
|
||||
movdqa XMMWORD PTR [rdx], xmm0
|
||||
xor rsi, QWORD PTR [r10+rbx]
|
||||
lea r12, QWORD PTR [r10+rbx]
|
||||
mov r13, QWORD PTR [r10+rbx+8]
|
||||
|
||||
add r9d, r14d
|
||||
or r9d, -2147483647
|
||||
xor edx, edx
|
||||
movdqa xmm0, xmm5
|
||||
psrldq xmm0, 8
|
||||
movq rax, xmm0
|
||||
|
||||
div r9
|
||||
movq xmm0, rax
|
||||
movq xmm1, rdx
|
||||
punpckldq xmm0, xmm1
|
||||
movq r15, xmm0
|
||||
paddq xmm0, xmm5
|
||||
movdqa xmm2, xmm0
|
||||
psrlq xmm0, 12
|
||||
paddq xmm0, xmm7
|
||||
sqrtsd xmm1, xmm0
|
||||
movq rdi, xmm1
|
||||
test rdi, 524287
|
||||
je sqrt_fixup_ryzen
|
||||
shr rdi, 19
|
||||
|
||||
sqrt_fixup_ryzen_ret:
|
||||
mov rax, rsi
|
||||
mul r14
|
||||
movq xmm1, rax
|
||||
movq xmm0, rdx
|
||||
punpcklqdq xmm0, xmm1
|
||||
|
||||
mov r9d, r10d
|
||||
mov ecx, r10d
|
||||
xor r9d, 16
|
||||
xor ecx, 32
|
||||
xor r10d, 48
|
||||
movdqa xmm1, XMMWORD PTR [rcx+rbx]
|
||||
xor rdx, [rcx+rbx]
|
||||
xor rax, [rcx+rbx+8]
|
||||
movdqa xmm2, XMMWORD PTR [r9+rbx]
|
||||
pxor xmm2, xmm0
|
||||
paddq xmm4, XMMWORD PTR [r10+rbx]
|
||||
paddq xmm2, xmm3
|
||||
paddq xmm1, xmm6
|
||||
movdqa XMMWORD PTR [r9+rbx], xmm4
|
||||
movdqa XMMWORD PTR [rcx+rbx], xmm2
|
||||
movdqa XMMWORD PTR [r10+rbx], xmm1
|
||||
|
||||
movdqa xmm4, xmm3
|
||||
add r8, rdx
|
||||
add r11, rax
|
||||
mov QWORD PTR [r12], r8
|
||||
xor r8, rsi
|
||||
mov QWORD PTR [r12+8], r11
|
||||
mov r10, r8
|
||||
xor r11, r13
|
||||
and r10d, 2097136
|
||||
movdqa xmm3, xmm5
|
||||
dec ebp
|
||||
jne main_loop_ryzen
|
||||
|
||||
ldmxcsr DWORD PTR [rsp]
|
||||
movaps xmm6, XMMWORD PTR [rsp+48]
|
||||
lea r11, QWORD PTR [rsp+64]
|
||||
mov rbx, QWORD PTR [r11+56]
|
||||
mov rbp, QWORD PTR [r11+64]
|
||||
mov rsi, QWORD PTR [r11+72]
|
||||
movaps xmm8, XMMWORD PTR [r11-48]
|
||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||
mov rsp, r11
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
jmp cnv2_main_loop_ryzen_endp
|
||||
|
||||
sqrt_fixup_ryzen:
|
||||
movq r9, xmm2
|
||||
dec rdi
|
||||
mov edx, -1022
|
||||
shl rdx, 32
|
||||
mov rax, rdi
|
||||
shr rdi, 19
|
||||
shr rax, 20
|
||||
mov rcx, rdi
|
||||
sub rcx, rax
|
||||
lea rcx, [rcx+rdx+1]
|
||||
add rax, rdx
|
||||
imul rcx, rax
|
||||
sub rcx, r9
|
||||
adc rdi, 0
|
||||
jmp sqrt_fixup_ryzen_ret
|
||||
|
||||
cnv2_main_loop_ryzen_endp:
|
||||
54
crypto/asm/cn_main_loop.S
Normal file
54
crypto/asm/cn_main_loop.S
Normal file
@@ -0,0 +1,54 @@
|
||||
#ifdef __APPLE__
|
||||
# define ALIGN(x) .align 6
|
||||
#else
|
||||
# define ALIGN(x) .align 64
|
||||
#endif
|
||||
.intel_syntax noprefix
|
||||
#ifdef __APPLE__
|
||||
# define FN_PREFIX(fn) _ ## fn
|
||||
.text
|
||||
#else
|
||||
# define FN_PREFIX(fn) fn
|
||||
.section .text
|
||||
#endif
|
||||
.global FN_PREFIX(cnv2_mainloop_ivybridge_asm)
|
||||
.global FN_PREFIX(cnv2_mainloop_ryzen_asm)
|
||||
.global FN_PREFIX(cnv2_mainloop_bulldozer_asm)
|
||||
.global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm)
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(cnv2_mainloop_ivybridge_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
#include "cn2/cnv2_main_loop_ivybridge.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(cnv2_mainloop_ryzen_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
#include "cn2/cnv2_main_loop_ryzen.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(cnv2_mainloop_bulldozer_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
#include "cn2/cnv2_main_loop_bulldozer.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(cnv2_double_mainloop_sandybridge_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
mov rdx, rsi
|
||||
#include "cn2/cnv2_double_main_loop_sandybridge.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
31
crypto/asm/win64/cn_main_loop.S
Normal file
31
crypto/asm/win64/cn_main_loop.S
Normal file
@@ -0,0 +1,31 @@
|
||||
#define ALIGN(x) .align 64
|
||||
.intel_syntax noprefix
|
||||
.section .text
|
||||
.global cnv2_mainloop_ivybridge_asm
|
||||
.global cnv2_mainloop_ryzen_asm
|
||||
.global cnv2_mainloop_bulldozer_asm
|
||||
.global cnv2_double_mainloop_sandybridge_asm
|
||||
|
||||
ALIGN(64)
|
||||
cnv2_mainloop_ivybridge_asm:
|
||||
#include "../cn2/cnv2_main_loop_ivybridge.inc"
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
ALIGN(64)
|
||||
cnv2_mainloop_ryzen_asm:
|
||||
#include "../cn2/cnv2_main_loop_ryzen.inc"
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
ALIGN(64)
|
||||
cnv2_mainloop_bulldozer_asm:
|
||||
#include "../cn2/cnv2_main_loop_bulldozer.inc"
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
ALIGN(64)
|
||||
cnv2_double_mainloop_sandybridge_asm:
|
||||
#include "../cn2/cnv2_double_main_loop_sandybridge.inc"
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
@@ -148,7 +148,7 @@ void blake256_update(state *S, const uint8_t *data, uint64_t datalen) {
|
||||
|
||||
if (datalen > 0) {
|
||||
memcpy((void *) (S->buf + left), (void *) data, datalen >> 3);
|
||||
S->buflen = (left << 3) + (int) datalen;
|
||||
S->buflen = (left << 3) + datalen;
|
||||
} else {
|
||||
S->buflen = 0;
|
||||
}
|
||||
123
crypto/c_keccak.c
Normal file
123
crypto/c_keccak.c
Normal file
@@ -0,0 +1,123 @@
|
||||
// keccak.c
|
||||
// 19-Nov-11 Markku-Juhani O. Saarinen <mjos@iki.fi>
|
||||
// A baseline Keccak (3rd round) implementation.
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#define HASH_DATA_AREA 136
|
||||
#define KECCAK_ROUNDS 24
|
||||
|
||||
#ifndef ROTL64
|
||||
#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
|
||||
#endif
|
||||
|
||||
const uint64_t keccakf_rndc[24] =
|
||||
{
|
||||
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
|
||||
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
|
||||
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
|
||||
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
|
||||
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
|
||||
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
|
||||
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
|
||||
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
|
||||
};
|
||||
|
||||
const int keccakf_rotc[24] =
|
||||
{
|
||||
1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
|
||||
27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44
|
||||
};
|
||||
|
||||
const int keccakf_piln[24] =
|
||||
{
|
||||
10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
|
||||
15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1
|
||||
};
|
||||
|
||||
// update the state with given number of rounds
|
||||
|
||||
void keccakf(uint64_t st[25], int rounds)
|
||||
{
|
||||
int i, j, round;
|
||||
uint64_t t, bc[5];
|
||||
|
||||
for (round = 0; round < rounds; ++round) {
|
||||
|
||||
// Theta
|
||||
bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20];
|
||||
bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21];
|
||||
bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22];
|
||||
bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23];
|
||||
bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24];
|
||||
|
||||
for (i = 0; i < 5; ++i) {
|
||||
t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1);
|
||||
st[i ] ^= t;
|
||||
st[i + 5] ^= t;
|
||||
st[i + 10] ^= t;
|
||||
st[i + 15] ^= t;
|
||||
st[i + 20] ^= t;
|
||||
}
|
||||
|
||||
// Rho Pi
|
||||
t = st[1];
|
||||
for (i = 0; i < 24; ++i) {
|
||||
bc[0] = st[keccakf_piln[i]];
|
||||
st[keccakf_piln[i]] = ROTL64(t, keccakf_rotc[i]);
|
||||
t = bc[0];
|
||||
}
|
||||
|
||||
// Chi
|
||||
for (j = 0; j < 25; j += 5) {
|
||||
bc[0] = st[j ];
|
||||
bc[1] = st[j + 1];
|
||||
bc[2] = st[j + 2];
|
||||
bc[3] = st[j + 3];
|
||||
bc[4] = st[j + 4];
|
||||
st[j ] ^= (~bc[1]) & bc[2];
|
||||
st[j + 1] ^= (~bc[2]) & bc[3];
|
||||
st[j + 2] ^= (~bc[3]) & bc[4];
|
||||
st[j + 3] ^= (~bc[4]) & bc[0];
|
||||
st[j + 4] ^= (~bc[0]) & bc[1];
|
||||
}
|
||||
|
||||
// Iota
|
||||
st[0] ^= keccakf_rndc[round];
|
||||
}
|
||||
}
|
||||
|
||||
// compute a keccak hash (md) of given byte length from "in"
|
||||
typedef uint64_t state_t[25];
|
||||
|
||||
void keccak(const uint8_t *in, int inlen, uint8_t *md, int mdlen)
|
||||
{
|
||||
state_t st;
|
||||
uint8_t temp[144];
|
||||
int i, rsiz, rsizw;
|
||||
|
||||
rsiz = sizeof(state_t) == mdlen ? HASH_DATA_AREA : 200 - 2 * mdlen;
|
||||
rsizw = rsiz / 8;
|
||||
|
||||
memset(st, 0, sizeof(st));
|
||||
|
||||
for ( ; inlen >= rsiz; inlen -= rsiz, in += rsiz) {
|
||||
for (i = 0; i < rsizw; i++)
|
||||
st[i] ^= ((uint64_t *) in)[i];
|
||||
keccakf(st, KECCAK_ROUNDS);
|
||||
}
|
||||
|
||||
// last block and padding
|
||||
memcpy(temp, in, inlen);
|
||||
temp[inlen++] = 1;
|
||||
memset(temp + inlen, 0, rsiz - inlen);
|
||||
temp[rsiz - 1] |= 0x80;
|
||||
|
||||
for (i = 0; i < rsizw; i++)
|
||||
st[i] ^= ((uint64_t *) temp)[i];
|
||||
|
||||
keccakf(st, KECCAK_ROUNDS);
|
||||
|
||||
memcpy(md, st, mdlen);
|
||||
}
|
||||
2036
crypto/c_skein.c
Normal file
2036
crypto/c_skein.c
Normal file
File diff suppressed because it is too large
Load Diff
@@ -44,6 +44,4 @@ typedef u08b_t SkeinBitSequence; /* bit stream type */
|
||||
SkeinHashReturn skein_hash(int hashbitlen, const SkeinBitSequence *data,
|
||||
SkeinDataLength databitlen, SkeinBitSequence *hashval);
|
||||
|
||||
void xmr_skein(const SkeinBitSequence *data, SkeinBitSequence *hashval);
|
||||
|
||||
#endif /* ifndef _SKEIN_H_ */
|
||||
24
crypto/hash.c
Normal file
24
crypto/hash.c
Normal file
@@ -0,0 +1,24 @@
|
||||
// Copyright (c) 2012-2013 The Cryptonote developers
|
||||
// Distributed under the MIT/X11 software license, see the accompanying
|
||||
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "hash-ops.h"
|
||||
#include "c_keccak.h"
|
||||
|
||||
void hash_permutation(union hash_state *state) {
|
||||
keccakf((uint64_t*)state, 24);
|
||||
}
|
||||
|
||||
void hash_process(union hash_state *state, const uint8_t *buf, size_t count) {
|
||||
keccak1600(buf, count, (uint8_t*)state);
|
||||
}
|
||||
|
||||
void cn_fast_hash(const void *data, size_t length, char *hash) {
|
||||
union hash_state state;
|
||||
hash_process(&state, data, length);
|
||||
memcpy(hash, &state, HASH_SIZE);
|
||||
}
|
||||
131
crypto/soft_aes.h
Normal file
131
crypto/soft_aes.h
Normal file
@@ -0,0 +1,131 @@
|
||||
/*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* Additional permission under GNU GPL version 3 section 7
|
||||
*
|
||||
* If you modify this Program, or any covered work, by linking or combining
|
||||
* it with OpenSSL (or a modified version of that library), containing parts
|
||||
* covered by the terms of OpenSSL License and SSLeay License, the licensors
|
||||
* of this Program grant you additional permission to convey the resulting work.
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* Parts of this file are originally copyright (c) 2014-2017, The Monero Project
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
|
||||
#if defined(XMRIG_ARM)
|
||||
# include "crypto/SSE2NEON.h"
|
||||
#elif defined(__GNUC__)
|
||||
# include <x86intrin.h>
|
||||
#else
|
||||
# include <intrin.h>
|
||||
#endif
|
||||
|
||||
#include <inttypes.h>
|
||||
|
||||
|
||||
#define saes_data(w) {\
|
||||
w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\
|
||||
w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\
|
||||
w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\
|
||||
w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\
|
||||
w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\
|
||||
w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\
|
||||
w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\
|
||||
w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\
|
||||
w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\
|
||||
w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\
|
||||
w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\
|
||||
w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\
|
||||
w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\
|
||||
w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\
|
||||
w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\
|
||||
w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\
|
||||
w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\
|
||||
w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\
|
||||
w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\
|
||||
w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\
|
||||
w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\
|
||||
w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\
|
||||
w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\
|
||||
w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\
|
||||
w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\
|
||||
w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\
|
||||
w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\
|
||||
w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\
|
||||
w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\
|
||||
w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\
|
||||
w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\
|
||||
w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) }
|
||||
|
||||
#define SAES_WPOLY 0x011b
|
||||
|
||||
#define saes_b2w(b0, b1, b2, b3) (((uint32_t)(b3) << 24) | \
|
||||
((uint32_t)(b2) << 16) | ((uint32_t)(b1) << 8) | (b0))
|
||||
|
||||
#define saes_f2(x) ((x<<1) ^ (((x>>7) & 1) * SAES_WPOLY))
|
||||
#define saes_f3(x) (saes_f2(x) ^ x)
|
||||
#define saes_h0(x) (x)
|
||||
|
||||
#define saes_u0(p) saes_b2w(saes_f2(p), p, p, saes_f3(p))
|
||||
#define saes_u1(p) saes_b2w(saes_f3(p), saes_f2(p), p, p)
|
||||
#define saes_u2(p) saes_b2w( p, saes_f3(p), saes_f2(p), p)
|
||||
#define saes_u3(p) saes_b2w( p, p, saes_f3(p), saes_f2(p))
|
||||
|
||||
__attribute__((aligned(16))) const static uint32_t saes_table[4][256] = { saes_data(saes_u0), saes_data(saes_u1), saes_data(saes_u2), saes_data(saes_u3) };
|
||||
__attribute__((aligned(16))) const static uint8_t saes_sbox[256] = saes_data(saes_h0);
|
||||
|
||||
|
||||
static inline __m128i soft_aesenc(__m128i in, __m128i key)
|
||||
{
|
||||
uint32_t x0, x1, x2, x3;
|
||||
x0 = _mm_cvtsi128_si32(in);
|
||||
x1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0x55));
|
||||
x2 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xAA));
|
||||
x3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xFF));
|
||||
|
||||
__m128i out = _mm_set_epi32(
|
||||
(saes_table[0][x3 & 0xff] ^ saes_table[1][(x0 >> 8) & 0xff] ^ saes_table[2][(x1 >> 16) & 0xff] ^ saes_table[3][x2 >> 24]),
|
||||
(saes_table[0][x2 & 0xff] ^ saes_table[1][(x3 >> 8) & 0xff] ^ saes_table[2][(x0 >> 16) & 0xff] ^ saes_table[3][x1 >> 24]),
|
||||
(saes_table[0][x1 & 0xff] ^ saes_table[1][(x2 >> 8) & 0xff] ^ saes_table[2][(x3 >> 16) & 0xff] ^ saes_table[3][x0 >> 24]),
|
||||
(saes_table[0][x0 & 0xff] ^ saes_table[1][(x1 >> 8) & 0xff] ^ saes_table[2][(x2 >> 16) & 0xff] ^ saes_table[3][x3 >> 24]));
|
||||
|
||||
return _mm_xor_si128(out, key);
|
||||
}
|
||||
|
||||
static inline uint32_t sub_word(uint32_t key)
|
||||
{
|
||||
return (saes_sbox[key >> 24 ] << 24) |
|
||||
(saes_sbox[(key >> 16) & 0xff] << 16 ) |
|
||||
(saes_sbox[(key >> 8) & 0xff] << 8 ) |
|
||||
saes_sbox[key & 0xff];
|
||||
}
|
||||
|
||||
#if defined(__clang__) || defined(XMRIG_ARM)
|
||||
static inline uint32_t _rotr(uint32_t value, uint32_t amount)
|
||||
{
|
||||
return (value >> amount) | (value << ((32 - amount) & 31));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
static inline __m128i soft_aeskeygenassist(__m128i key, uint8_t rcon)
|
||||
{
|
||||
const uint32_t X1 = sub_word(_mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0x55)));
|
||||
const uint32_t X3 = sub_word(_mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0xFF)));
|
||||
return _mm_set_epi32(_rotr(X3, 8) ^ rcon, X3, _rotr(X1, 8) ^ rcon, X1);
|
||||
}
|
||||
@@ -21,11 +21,9 @@
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "App.h"
|
||||
#ifndef __DONATE_H__
|
||||
#define __DONATE_H__
|
||||
|
||||
#define DONATE_LEVEL 0
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
App app(argc, argv);
|
||||
|
||||
return app.exec();
|
||||
}
|
||||
#endif /* __DONATE_H__ */
|
||||
274
elist.h
Normal file
274
elist.h
Normal file
@@ -0,0 +1,274 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_LIST_H
|
||||
#define _LINUX_LIST_H
|
||||
|
||||
/*
|
||||
* Simple doubly linked list implementation.
|
||||
*
|
||||
* Some of the internal functions ("__xxx") are useful when
|
||||
* manipulating whole lists rather than single entries, as
|
||||
* sometimes we already know the next/prev entries and we can
|
||||
* generate better code by using them directly rather than
|
||||
* using the generic single-entry routines.
|
||||
*/
|
||||
|
||||
struct list_head {
|
||||
struct list_head *next, *prev;
|
||||
};
|
||||
|
||||
#define LIST_HEAD_INIT(name) { &(name), &(name) }
|
||||
|
||||
#define LIST_HEAD(name) \
|
||||
struct list_head name = LIST_HEAD_INIT(name)
|
||||
|
||||
#define INIT_LIST_HEAD(ptr) do { \
|
||||
(ptr)->next = (ptr); (ptr)->prev = (ptr); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Insert a new entry between two known consecutive entries.
|
||||
*
|
||||
* This is only for internal list manipulation where we know
|
||||
* the prev/next entries already!
|
||||
*/
|
||||
static inline void __list_add(struct list_head *new,
|
||||
struct list_head *prev,
|
||||
struct list_head *next)
|
||||
{
|
||||
next->prev = new;
|
||||
new->next = next;
|
||||
new->prev = prev;
|
||||
prev->next = new;
|
||||
}
|
||||
|
||||
/**
|
||||
* list_add - add a new entry
|
||||
* @new: new entry to be added
|
||||
* @head: list head to add it after
|
||||
*
|
||||
* Insert a new entry after the specified head.
|
||||
* This is good for implementing stacks.
|
||||
*/
|
||||
static inline void list_add(struct list_head *new, struct list_head *head)
|
||||
{
|
||||
__list_add(new, head, head->next);
|
||||
}
|
||||
|
||||
/**
|
||||
* list_add_tail - add a new entry
|
||||
* @new: new entry to be added
|
||||
* @head: list head to add it before
|
||||
*
|
||||
* Insert a new entry before the specified head.
|
||||
* This is useful for implementing queues.
|
||||
*/
|
||||
static inline void list_add_tail(struct list_head *new, struct list_head *head)
|
||||
{
|
||||
__list_add(new, head->prev, head);
|
||||
}
|
||||
|
||||
/*
|
||||
* Delete a list entry by making the prev/next entries
|
||||
* point to each other.
|
||||
*
|
||||
* This is only for internal list manipulation where we know
|
||||
* the prev/next entries already!
|
||||
*/
|
||||
static inline void __list_del(struct list_head *prev, struct list_head *next)
|
||||
{
|
||||
next->prev = prev;
|
||||
prev->next = next;
|
||||
}
|
||||
|
||||
/**
|
||||
* list_del - deletes entry from list.
|
||||
* @entry: the element to delete from the list.
|
||||
* Note: list_empty on entry does not return true after this, the entry is in an undefined state.
|
||||
*/
|
||||
static inline void list_del(struct list_head *entry)
|
||||
{
|
||||
__list_del(entry->prev, entry->next);
|
||||
entry->next = (void *) 0;
|
||||
entry->prev = (void *) 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* list_del_init - deletes entry from list and reinitialize it.
|
||||
* @entry: the element to delete from the list.
|
||||
*/
|
||||
static inline void list_del_init(struct list_head *entry)
|
||||
{
|
||||
__list_del(entry->prev, entry->next);
|
||||
INIT_LIST_HEAD(entry);
|
||||
}
|
||||
|
||||
/**
|
||||
* list_move - delete from one list and add as another's head
|
||||
* @list: the entry to move
|
||||
* @head: the head that will precede our entry
|
||||
*/
|
||||
static inline void list_move(struct list_head *list, struct list_head *head)
|
||||
{
|
||||
__list_del(list->prev, list->next);
|
||||
list_add(list, head);
|
||||
}
|
||||
|
||||
/**
|
||||
* list_move_tail - delete from one list and add as another's tail
|
||||
* @list: the entry to move
|
||||
* @head: the head that will follow our entry
|
||||
*/
|
||||
static inline void list_move_tail(struct list_head *list,
|
||||
struct list_head *head)
|
||||
{
|
||||
__list_del(list->prev, list->next);
|
||||
list_add_tail(list, head);
|
||||
}
|
||||
|
||||
/**
|
||||
* list_empty - tests whether a list is empty
|
||||
* @head: the list to test.
|
||||
*/
|
||||
static inline int list_empty(struct list_head *head)
|
||||
{
|
||||
return head->next == head;
|
||||
}
|
||||
|
||||
static inline void __list_splice(struct list_head *list,
|
||||
struct list_head *head)
|
||||
{
|
||||
struct list_head *first = list->next;
|
||||
struct list_head *last = list->prev;
|
||||
struct list_head *at = head->next;
|
||||
|
||||
first->prev = head;
|
||||
head->next = first;
|
||||
|
||||
last->next = at;
|
||||
at->prev = last;
|
||||
}
|
||||
|
||||
/**
|
||||
* list_splice - join two lists
|
||||
* @list: the new list to add.
|
||||
* @head: the place to add it in the first list.
|
||||
*/
|
||||
static inline void list_splice(struct list_head *list, struct list_head *head)
|
||||
{
|
||||
if (!list_empty(list))
|
||||
__list_splice(list, head);
|
||||
}
|
||||
|
||||
/**
|
||||
* list_splice_init - join two lists and reinitialise the emptied list.
|
||||
* @list: the new list to add.
|
||||
* @head: the place to add it in the first list.
|
||||
*
|
||||
* The list at @list is reinitialised
|
||||
*/
|
||||
static inline void list_splice_init(struct list_head *list,
|
||||
struct list_head *head)
|
||||
{
|
||||
if (!list_empty(list)) {
|
||||
__list_splice(list, head);
|
||||
INIT_LIST_HEAD(list);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* list_entry - get the struct for this entry
|
||||
* @ptr: the &struct list_head pointer.
|
||||
* @type: the type of the struct this is embedded in.
|
||||
* @member: the name of the list_struct within the struct.
|
||||
*/
|
||||
#define list_entry(ptr, type, member) \
|
||||
((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
|
||||
|
||||
/**
|
||||
* list_for_each - iterate over a list
|
||||
* @pos: the &struct list_head to use as a loop counter.
|
||||
* @head: the head for your list.
|
||||
*/
|
||||
#define list_for_each(pos, head) \
|
||||
for (pos = (head)->next; pos != (head); \
|
||||
pos = pos->next)
|
||||
/**
|
||||
* list_for_each_prev - iterate over a list backwards
|
||||
* @pos: the &struct list_head to use as a loop counter.
|
||||
* @head: the head for your list.
|
||||
*/
|
||||
#define list_for_each_prev(pos, head) \
|
||||
for (pos = (head)->prev; pos != (head); \
|
||||
pos = pos->prev)
|
||||
|
||||
/**
|
||||
* list_for_each_safe - iterate over a list safe against removal of list entry
|
||||
* @pos: the &struct list_head to use as a loop counter.
|
||||
* @n: another &struct list_head to use as temporary storage
|
||||
* @head: the head for your list.
|
||||
*/
|
||||
#define list_for_each_safe(pos, n, head) \
|
||||
for (pos = (head)->next, n = pos->next; pos != (head); \
|
||||
pos = n, n = pos->next)
|
||||
|
||||
/**
|
||||
* list_for_each_entry - iterate over list of given type
|
||||
* @pos: the type * to use as a loop counter.
|
||||
* @head: the head for your list.
|
||||
* @member: the name of the list_struct within the struct.
|
||||
*/
|
||||
#define list_for_each_entry(pos, head, member) \
|
||||
for (pos = list_entry((head)->next, typeof(*pos), member); \
|
||||
&pos->member != (head); \
|
||||
pos = list_entry(pos->member.next, typeof(*pos), member))
|
||||
|
||||
/**
|
||||
* list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
|
||||
* @pos: the type * to use as a loop counter.
|
||||
* @n: another type * to use as temporary storage
|
||||
* @head: the head for your list.
|
||||
* @member: the name of the list_struct within the struct.
|
||||
*/
|
||||
#define list_for_each_entry_safe(pos, n, head, member) \
|
||||
for (pos = list_entry((head)->next, typeof(*pos), member), \
|
||||
n = list_entry(pos->member.next, typeof(*pos), member); \
|
||||
&pos->member != (head); \
|
||||
pos = n, n = list_entry(n->member.next, typeof(*n), member))
|
||||
|
||||
/**
|
||||
* list_for_each_entry_continue - iterate over list of given type
|
||||
* continuing after existing point
|
||||
* @pos: the type * to use as a loop counter.
|
||||
* @head: the head for your list.
|
||||
* @member: the name of the list_struct within the struct.
|
||||
*/
|
||||
#define list_for_each_entry_continue(pos, head, member) \
|
||||
for (pos = list_entry(pos->member.next, typeof(*pos), member), \
|
||||
prefetch(pos->member.next); \
|
||||
&pos->member != (head); \
|
||||
pos = list_entry(pos->member.next, typeof(*pos), member), \
|
||||
prefetch(pos->member.next))
|
||||
|
||||
#endif
|
||||
@@ -21,25 +21,27 @@
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include <pthread.h>
|
||||
#include <sched.h>
|
||||
#include <unistd.h>
|
||||
#include <sched.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include "cpu.h"
|
||||
|
||||
|
||||
#include "Cpu.h"
|
||||
struct cpu_info cpu_info = { 0 };
|
||||
void cpu_init_common();
|
||||
|
||||
|
||||
void Cpu::init()
|
||||
{
|
||||
void cpu_init() {
|
||||
# ifdef XMRIG_NO_LIBCPUID
|
||||
m_totalThreads = sysconf(_SC_NPROCESSORS_CONF);
|
||||
cpu_info.total_logical_cpus = sysconf(_SC_NPROCESSORS_CONF);
|
||||
# endif
|
||||
|
||||
initCommon();
|
||||
|
||||
cpu_init_common();
|
||||
}
|
||||
|
||||
|
||||
void Cpu::setAffinity(int id, uint64_t mask)
|
||||
int affine_to_cpu_mask(int id, unsigned long mask)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user